aboutsummaryrefslogtreecommitdiffstats
path: root/examples/l4fwd
diff options
context:
space:
mode:
authorKonstantin Ananyev <konstantin.ananyev@intel.com>2017-02-21 18:12:20 +0000
committerKonstantin Ananyev <konstantin.ananyev@intel.com>2017-02-24 16:37:08 +0000
commitaa97dd1ce910b839fed46ad55d1e70e403f5a930 (patch)
treef6f0fd494eaf499859bff9f20f5ddfac9ab99233 /examples/l4fwd
parentf5f10013ffef8e4ac1071087b8492fe6380d98fe (diff)
Introduce first version of TCP code.
Supported functionality: - open/close - listen/accept/connect - send/recv In order to achieve that libtle_udp library was reworked into libtle_l4p library that supports both TCP and UDP protocols. New libtle_timer library was introduced (thanks to Cisco guys and Dave Barach <dbarach@cisco.com> for sharing their timer code with us). Sample application was also reworked significantly to support both TCP and UDP traffic handling. New UT were introduced. Change-Id: I806b05011f521e89b58db403cfdd484a37beb775 Signed-off-by: Mohammad Abdul Awal <mohammad.abdul.awal@intel.com> Signed-off-by: Karol Latecki <karolx.latecki@intel.com> Signed-off-by: Daniel Mrzyglod <danielx.t.mrzyglod@intel.com> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Diffstat (limited to 'examples/l4fwd')
-rw-r--r--examples/l4fwd/Makefile47
-rw-r--r--examples/l4fwd/README346
-rw-r--r--examples/l4fwd/be.cfg5
-rw-r--r--examples/l4fwd/common.h662
-rw-r--r--examples/l4fwd/dpdk_legacy.h160
-rw-r--r--examples/l4fwd/dpdk_version.h33
-rw-r--r--examples/l4fwd/fe.cfg24
-rw-r--r--examples/l4fwd/fwdtbl.h117
-rwxr-xr-xexamples/l4fwd/gen_fe_cfg.py200
-rw-r--r--examples/l4fwd/lcore.h370
-rw-r--r--examples/l4fwd/main.c313
-rw-r--r--examples/l4fwd/netbe.h330
-rw-r--r--examples/l4fwd/parse.c839
-rw-r--r--examples/l4fwd/parse.h69
-rw-r--r--examples/l4fwd/pkt.c872
-rw-r--r--examples/l4fwd/pkt_dpdk_legacy.h197
-rw-r--r--examples/l4fwd/port.h453
-rw-r--r--examples/l4fwd/tcp.h701
-rw-r--r--examples/l4fwd/udp.h588
19 files changed, 6326 insertions, 0 deletions
diff --git a/examples/l4fwd/Makefile b/examples/l4fwd/Makefile
new file mode 100644
index 0000000..f18b622
--- /dev/null
+++ b/examples/l4fwd/Makefile
@@ -0,0 +1,47 @@
+# Copyright (c) 2016 Intel Corporation.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+ifeq ($(RTE_TARGET),)
+$(error "Please define RTE_TARGET environment variable")
+endif
+
+ifeq ($(TLDK_ROOT),)
+$(error "Please define TLDK_ROOT environment variable")
+endif
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = l4fwd
+
+# all source are stored in SRCS-y
+SRCS-y += parse.c
+SRCS-y += pkt.c
+SRCS-y += main.c
+
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -I$(RTE_OUTPUT)/include
+
+LDLIBS += -L$(RTE_OUTPUT)/lib
+LDLIBS += -ltle_l4p
+LDLIBS += -ltle_timer
+
+EXTRA_CFLAGS += -O3
+CFLAGS_parse.o += -D_GNU_SOURCE
+CFLAGS_main.o += -D_GNU_SOURCE
+
+include $(TLDK_ROOT)/mk/tle.app.mk
diff --git a/examples/l4fwd/README b/examples/l4fwd/README
new file mode 100644
index 0000000..658fe3a
--- /dev/null
+++ b/examples/l4fwd/README
@@ -0,0 +1,346 @@
+1. INTRODUCTION
+
+ l4fwd is a sample application to demonstrate and test TLDK TCP/UDP
+ functionalities. Depending on configuration it can do simple send, recv or
+ both over opened TCP/UDP streams. Also it implements ability to do TCP/UDP
+ packet forwarding between different streams, so it is possible to use the
+ l4fwd application as a TCP/UDP proxy.
+
+ The l4fwd application logically is divided into two parts, Back End (BE) and
+ Front End (FE).
+
+1.1 Back End (BE)
+
+ BE is responsible for:
+ - RX over DPDK ports and feed them into TCP/UDP TLDK context(s)
+ (via tle_*_rx_bulk).
+
+ - retrieve packets ready to be send out from TCP/UDP TLDK context(s) and TX
+ them over destined DPDK port.
+
+ - Multiple RX/TX queues per port are supported by RSS. Right now the number
+ of TX is same as the number of RX queue.
+
+- Each BE lcore can serve multiple DPDK ports, TLDK TCP/UDP contexts.
+
+ BE configuration record format:
+
+ port=<uint>,addr=<ipv4/ipv6>,masklen=<uint>,mac=<ether><mtu>
+
+ port - DPDK port id to be used to send packets to the destination.
+ It is an mandatory option.
+ addr - destination network address. It is an mandatory option.
+ masklen - destination network prefix length. It is an mandatory option.
+ mac - destination Ethernet address. It is an mandatory option.
+ mtu - MTU to be used on that port (= application data size + L2/L3/L4
+ headers sizes, default=1514). It is an optional option.
+
+ Below are some example of BE entries
+
+ port=0,masklen=16,addr=192.168.0.0,mac=01:de:ad:be:ef:01
+ port=0,addr=2001:4860:b002::,masklen=64,mac=01:de:ad:be:ef:01
+
+ These examples are also available in be.cfg file.
+
+1.2 Front End (FE)
+
+ FE is responsible for:
+ - to open configured TCP/UDP streams and perform send/recv over them.
+ These streams can belong to different TCP/UDP contexts.
+
+ Each lcore can act as BE and/or FE.
+
+ In UDP mode the application can reassemble input fragmented IP packets, and
+ fragment outgoing IP packets (if destination MTU is less then packet size).
+
+ FE configuration record format:
+
+ lcore=<uint>,op=<"rx|tx|echo|fwd">,\
+ laddr=<ip>,lport=<uint16>,raddr=<ip>,rport=<uint16>,\
+ [txlen=<uint>,fwladdr=<ip>,fwlport=<uint16>,fwraddr=<ip>,fwrport=<uint16>,\
+ belcore=<uint>]
+
+ lcore - EAL lcore to manage that stream(s) in the FE. It is an mandatory
+ option.
+ belcore - EAL lcore to manage that stream(s) in the BE. It is an optional
+ option. lcore and belcore can specify the same cpu core.
+ op - operation to perform on that stream:
+ "rx" - do receive only on that stream.
+ "tx" - do send only on that stream.
+ "echo" - mimic recvfrom(..., &addr);sendto(..., &addr);
+ on that stream.
+ "fwd" - forward packets between streams.
+ It is an mandatory option.
+ laddr - local address for the stream to open. It is an mandatory option.
+ lport - local port for the stream to open. It is an mandatory option.
+ raddr - remote address for the stream to open. It is an mandatory option.
+ rport - remote port for the stream to open. It is an mandatory option.
+ txlen - data length sending in each packet (mandatory for "tx" mode only).
+ fwladdr - local address for the forwarding stream(s) to open
+ (mandatory for "fwd" mode only).
+ fwlport - local port for the forwarding stream(s) to open
+ (mandatory for "fwd" mode only).
+ fwraddr - remote address for the forwarding stream(s) to open
+ (mandatory for "fwd" mode only).
+ fwrport - remote port for the forwarding stream(s) to open
+ (mandatory for "fwd" mode only).
+
+ Below are some example of FE entries
+
+ lcore=3,op=echo,laddr=192.168.1.233,lport=0x8000,raddr=0.0.0.0,rport=0
+
+ lcore=3,op=tx,laddr=192.168.1.233,lport=0x8001,raddr=192.168.1.56,\
+ rport=0x200,txlen=72
+
+ lcore=3,op=rx,laddr=::,lport=0x200,raddr=::,rport=0,txlen=72
+
+ lcore=3,op=fwd,laddr=0.0.0.0,lport=11211,raddr=0.0.0.0,rport=0,\
+ fwladdr=::,fwlport=0,fwraddr=2001:4860:b002::56,fwrport=11211
+
+ These examples are also available in fe.cfg file with some more explanation.
+
+1.3 Configuration files format
+
+ - each record on a separate line.
+ - lines started with '#' are treated as comments.
+ - empty lines (containing whitespace chars only) are ignored.
+ - kvargs style format for each record.
+ - each FE record correspond to at least one stream to be opened
+ (could be multiple streams in case of op="fwd").
+ - each BE record define a ipv4/ipv6 destination.
+
+2. REQUIREMENTS
+
+ DPDK libraries (16.11 or higher)
+ TLDK libraries (1.0)
+ Back-End (BE) configuration file
+ Front-End(FE) configuration file
+
+3. USAGE
+
+ l4fwd <DPDK EAL parameters> -- \
+ -P | --promisc /* promiscuous mode enabled. */ \
+ -R | --rbufs <num> /* max recv buffers per stream. */ \
+ -S | --sbufs <num> /* max send buffers per stream. */ \
+ -s | --streams <num> /* streams to open per context. */ \
+ -b | --becfg <filename> /* backend configuration file. */ \
+ -f | --fecfg <filename> /* frontend configuration file. */ \
+ -U | --udp /* run the app to handle UDP streams only. */ \
+ -T | --tcp /* run the app to handle TCP streams only. */ \
+ -L | --listen /* open TCP streams in server mode (listen). */ \
+ -a | --enable-arp /* enable arp responses (request not supported) */ \
+ -v | --verbose /* different level of verbose mode */ \
+ <port0_params> <port1_params> ... <portN_params>
+
+ Note that: options -U and -T cannot be used together.
+ Option -L can be used only with option -T.
+
+ portX_params: port=<uint>,lcore=<uint>[-<uint>],[lcore=<uint>[-<uint>],]\
+ [rx_offload=<uint>,tx_offload=<uint>,mtu=<uint>,ipv4=<ipv4>,ipv6=<ipv6>]
+
+ portX_params are used to configure the particular DPDK device
+ (rte_ethdev port), and specify BE lcore that will handle RX/TX from/to the
+ device and manage BE part of corresponding TCP/UDP context.
+ Multiple BE lcore can be specified.
+
+ port - DPDK port id (RSS are supported when multiple lcores are
+ specified for a port). It is an mandatory option.
+ lcore - EAL lcore id to handle IO over that port (rx_burst/tx_burst).
+ several ports can be managed by the same lcore, and same port
+ can be managed by more than one lcore.
+ It is an mandatory option. At least one lcore option has to be
+ specified. lcore range can be specified in one lcore option.
+ e.g. lcore=2-3,lcore=6 will enable lcores 2, 3, and 6 to
+ handle BE.
+ rx_offload - RX HW offload capabilities to enable/use on this port.
+ (bitmask of DEV_RX_OFFLOAD_* values). It is an optional option.
+ tx_offload - TX HW offload capabilities to enable/use on this port.
+ (bitmask of DEV_TX_OFFLOAD_* values).
+ mtu - MTU to be used on that port (= application data size + L2/L3/L4
+ headers sizes, default=1514).
+ ipv4 - ipv4 address to assign to that port.
+ ipv6 - ipv6 address to assign to that port.
+
+ At least one of ipv4/ipv6 values have to be specified for each port.
+
+3.1 RSS
+
+ If multiple lcore is specified per DPDK port, the following RSS hash will
+ be enabled on that port:
+ ETH_RSS_UDP, or ETH_RSS_TCP
+
+ The RSS queue qid will handle the stream according to the TCP/UDP source
+ ports of the stream. The qid can be calculated as below
+
+ qid = (src_port % power_of_2(n)) % n
+
+ where n is number of lcore used to mane the DPDK port.
+
+4. EXAMPLES
+
+4.1 Sample testbed
+
++----------------------------+ +-------------------------------+
+| TLDK Box | | Linux Box |
+| | | |
+| port 0 +----------------+ port 0 |
+| 192.168.1.1 | | 192.168.1.2 |
+| 2001:4860:b002::1 | | 2001:4860:b002::2 |
+| AA:BB:CC:DD:EE:F1 | | AA:BB:CC:DD:EE:F2 |
++----------------------------+ +-------------------------------+
+
+4.2 UDP, "rx" mode, IPv4-only, Single core
+
+ This example shows receiving data from a IPv4 stream. The TLDK UDP server
+ runs on single core where both BE and FE run on cpu core 3.
+
+ be.cfg file contains:
+
+ port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2
+
+ fe.cfg file contains (udp server listening to port 6000):
+
+ lcore=3,op=rx,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0
+
+ run the l4fwd application as below (DPDK port 0 (pci 01:00.0)):
+
+ l4fwd --lcores='3' -w 01:00.0 -- \
+ --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \
+ --becfg be.cfg -U port=0,lcore=3,ipv4=192.168.1.1
+
+ This will create TLDK UDP context on lcore=3 (BE lcore) to manage
+ DPDK port 0. The port 0 will have IPv4 address 192.168.1.1.
+ All the streams will be in server mode and also managed by lcore 3.
+
+4.3 UDP, "echo" mode, IPv6-only, Multicore
+
+ This example shows receiving data from a IPv6 stream and sending the data
+ back through the same IPv6 stream. The TLDK UDP server runs on multicore
+ where BE runs on cpu core 2 and FE runs on cpu core 3.
+
+ be.cfg file contains:
+
+ port=0,masklen=64,addr=2001:4860:b002::,mac=AA:BB:CC:DD:EE:F2
+
+ fe.cfg file contains (udp server listening to port 6000):
+
+ lcore=3,op=rx,laddr=2001:4860:b002::1,lport=6000,raddr=::,rport=0
+
+ run the l4fwd application as below (DPDK port 0 (pci 01:00.0)):
+
+ l4fwd --lcores='2,3' -w 01:00.0 -- \
+ --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \
+ --becfg be.cfg -U port=0,lcore=2,ipv6=2001:4860:b002::1
+
+ This will create TLDK UDP context on lcore=2 (BE lcore) to manage
+ DPDK port 0. The port 0 will have IPv4 address 2001:4860:b002::1.
+ All the streams will be in server mode and managed by lcore 3 (FE lcore).
+ In this case, the UDP server will send the incoming data back to the sender.
+
+4.4 TCP, "echo" mode, IPv4-only, Multicore, RX-Offload
+
+ This example shows receiving data from a IPv4 stream and sending the data
+ back through the same IPv4 stream. The TLDK TCP server runs on multicore
+ where BE runs on cpu core 2 and FE runs on cpu core 3. The BE also uses
+ receive offload features of the NIC.
+
+ be.cfg file contains:
+
+ port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2
+
+ fe.cfg file contains (tcp server listening to port 6000):
+
+ lcore=3,op=echo,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0
+
+ run the l4fwd application as below (DPDK port 0 (pci 01:00.0)):
+
+ l4fwd --lcores='2,3' -w 01:00.0 -- \
+ --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \
+ --becfg be.cfg -T -L port=0,lcore=2,rx_offload=0xf,tx_offload=0,\
+ ipv4=192.168.1.1
+
+ This will create TLDK TCP context on lcore=2 (BE lcore) to manage
+ DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. The following
+ DPDK RX HW offloads will be enabled on that port.
+ DEV_RX_OFFLOAD_VLAN_STRIP,
+ DEV_RX_OFFLOAD_IPV4_CKSUM,
+ DEV_RX_OFFLOAD_UDP_CKSUM,
+ DEV_RX_OFFLOAD_TCP_CKSUM
+ No HW TX offloads will be enabled.
+ All the streams will be in server mode and managed by lcore 3 (FE core).
+ In this case, the TCP server will send the incoming data back to the sender.
+
+4.5 TCP, "fwd" (proxy) mode, IPv4-to-IPv6, Multi-core, RX-Offload
+
+ This example shows receiving data from a IPv4 stream and forwarding the
+ data to a IPv6 stream. The TLDK TCP server runs on multicore
+ where BE runs on cpu core 2 and FE runs on cpu core 3. The BE also uses
+ receive offload features of the NIC.
+
+ be.cfg file contains:
+
+ port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2
+
+ fe.cfg file contains (tcp server listening to port 6000):
+
+ lcore=3,op=fwd,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0,\
+ rladdr=::,lport=0,raddr=2001:4860:b002::2,rport=7000
+
+ run the l4fwd application as below (DPDK port 0 (pci 01:00.0)):
+
+ l4fwd --lcores='2,3' -w 01:00.0 -- \
+ --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \
+ --becfg be.cfg -T -L port=0,lcore=2,rx_offload=0xf,tx_offload=0,\
+ ipv4=192.168.1.1,ipv6=2001:4860:b002::1
+
+ This will create TLDK TCP context on lcore=2 (BE lcore) to manage
+ DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. The following
+ DPDK RX HW offloads will be enabled on that port.
+ DEV_RX_OFFLOAD_VLAN_STRIP,
+ DEV_RX_OFFLOAD_IPV4_CKSUM,
+ DEV_RX_OFFLOAD_UDP_CKSUM,
+ DEV_RX_OFFLOAD_TCP_CKSUM
+ No HW TX offloads will be enabled.
+ All the streams will be in server mode and managed by lcore 3 (FE core).
+ In this case, the IPv4 TCP server will forward the incoming data to the IPv6
+ TCP server 2001:4860:b002::2 listening to port 7000.
+
+4.6 TCP, "echo" mode, RSS, IPv4-only, Multicore, RX-Offload
+
+ This example shows receiving data from a IPv4 stream and sending the data
+ back through the same IPv4 stream. The TLDK TCP server runs on multicore
+ where BE runs on cpu cores 1-2 and FE runs on cpu core 3. As BE runs on
+ multicore, Receive Side Scaling (RSS) feature will be automatically enabled.
+ The BE also uses receive offload features of the NIC.
+
+ be.cfg file contains:
+
+ port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2
+
+ fe.cfg file contains (tcp server listening to port 6000):
+
+ lcore=3,op=echo,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0
+ lcore=3,op=echo,laddr=192.168.1.1,lport=6001,raddr=0.0.0.0,rport=0
+
+ run the l4fwd application as below (DPDK port 0 (pci 01:00.0)):
+
+ l4fwd --lcores='1,2,3' -w 01:00.0 -- \
+ --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \
+ --becfg be.cfg -T -L port=0,lcore="1-2",rx_offload=0xf,tx_offload=0,\
+ ipv4=192.168.1.1
+
+ This will create TLDK TCP context on lcore=1-2 (BE lcore) to manage
+ DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. The following
+ DPDK RX HW offloads will be enabled on that port.
+ DEV_RX_OFFLOAD_VLAN_STRIP,
+ DEV_RX_OFFLOAD_IPV4_CKSUM,
+ DEV_RX_OFFLOAD_UDP_CKSUM,
+ DEV_RX_OFFLOAD_TCP_CKSUM
+ No HW TX offloads will be enabled.
+ All the streams will be in server mode and managed by lcore 3 (FE core).
+ In this case, the TCP server will send the incoming data back to the sender.
+
+ As RSS is enabled, all the packets with destination port 6000 and 6001 will
+ be managed by HW queue 0 and queue 1 respectively. Please note that RSS
+ is not supported on the interface when both IPv4 and IPv6 are enabled.
+ Only one of IPv4 or IPv6 has to be enabled in the port.
diff --git a/examples/l4fwd/be.cfg b/examples/l4fwd/be.cfg
new file mode 100644
index 0000000..8e6d983
--- /dev/null
+++ b/examples/l4fwd/be.cfg
@@ -0,0 +1,5 @@
+#
+# l4fwd BE config file example
+#
+port=0,masklen=16,addr=192.168.0.0,mac=01:de:ad:be:ef:01
+port=0,addr=2001:4860:b002::,masklen=64,mac=01:de:ad:be:ef:01
diff --git a/examples/l4fwd/common.h b/examples/l4fwd/common.h
new file mode 100644
index 0000000..ff8ee7a
--- /dev/null
+++ b/examples/l4fwd/common.h
@@ -0,0 +1,662 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_H_
+#define COMMON_H_
+
+#include <rte_arp.h>
+
+static void
+sig_handle(int signum)
+{
+ RTE_LOG(ERR, USER1, "%s(%d)\n", __func__, signum);
+ force_quit = 1;
+}
+
+static void
+netfe_stream_dump(const struct netfe_stream *fes, struct sockaddr_storage *la,
+ struct sockaddr_storage *ra)
+{
+ struct sockaddr_in *l4, *r4;
+ struct sockaddr_in6 *l6, *r6;
+ uint16_t lport, rport;
+ char laddr[INET6_ADDRSTRLEN];
+ char raddr[INET6_ADDRSTRLEN];
+
+ if (la->ss_family == AF_INET) {
+
+ l4 = (struct sockaddr_in *)la;
+ r4 = (struct sockaddr_in *)ra;
+
+ lport = l4->sin_port;
+ rport = r4->sin_port;
+
+ } else if (la->ss_family == AF_INET6) {
+
+ l6 = (struct sockaddr_in6 *)la;
+ r6 = (struct sockaddr_in6 *)ra;
+
+ lport = l6->sin6_port;
+ rport = r6->sin6_port;
+
+ } else {
+ RTE_LOG(ERR, USER1, "stream@%p - unknown family=%hu\n",
+ fes->s, la->ss_family);
+ return;
+ }
+
+ format_addr(la, laddr, sizeof(laddr));
+ format_addr(ra, raddr, sizeof(raddr));
+
+ RTE_LOG(INFO, USER1, "stream@%p={s=%p,"
+ "family=%hu,proto=%s,laddr=%s,lport=%hu,raddr=%s,rport=%hu;"
+ "stats={"
+ "rxp=%" PRIu64 ",rxb=%" PRIu64
+ ",txp=%" PRIu64 ",txb=%" PRIu64
+ ",drops=%" PRIu64 ","
+ "rxev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "],"
+ "txev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "]"
+ "};};\n",
+ fes, fes->s, la->ss_family, proto_name[fes->proto],
+ laddr, ntohs(lport), raddr, ntohs(rport),
+ fes->stat.rxp, fes->stat.rxb,
+ fes->stat.txp, fes->stat.txb,
+ fes->stat.drops,
+ fes->stat.rxev[TLE_SEV_IDLE],
+ fes->stat.rxev[TLE_SEV_DOWN],
+ fes->stat.rxev[TLE_SEV_UP],
+ fes->stat.txev[TLE_SEV_IDLE],
+ fes->stat.txev[TLE_SEV_DOWN],
+ fes->stat.txev[TLE_SEV_UP]);
+}
+
+static inline uint32_t
+netfe_get_streams(struct netfe_stream_list *list, struct netfe_stream *rs[],
+ uint32_t num)
+{
+ struct netfe_stream *s;
+ uint32_t i, n;
+
+ n = RTE_MIN(list->num, num);
+ for (i = 0, s = LIST_FIRST(&list->head);
+ i != n;
+ i++, s = LIST_NEXT(s, link)) {
+ rs[i] = s;
+ }
+
+ if (s == NULL)
+ /* we retrieved all free entries */
+ LIST_INIT(&list->head);
+ else
+ LIST_FIRST(&list->head) = s;
+
+ list->num -= n;
+
+ return n;
+}
+
+static inline struct netfe_stream *
+netfe_get_stream(struct netfe_stream_list *list)
+{
+ struct netfe_stream *s;
+
+ s = NULL;
+ if (list->num == 0)
+ return s;
+
+ netfe_get_streams(list, &s, 1);
+
+ return s;
+}
+
+static inline void
+netfe_put_streams(struct netfe_lcore *fe, struct netfe_stream_list *list,
+ struct netfe_stream *fs[], uint32_t num)
+{
+ uint32_t i, n;
+
+ n = RTE_MIN(fe->snum - list->num, num);
+ if (n != num)
+ RTE_LOG(ERR, USER1, "%s: list overflow by %u\n", __func__,
+ num - n);
+
+ for (i = 0; i != n; i++)
+ LIST_INSERT_HEAD(&list->head, fs[i], link);
+ list->num += n;
+}
+
+static inline void
+netfe_put_stream(struct netfe_lcore *fe, struct netfe_stream_list *list,
+ struct netfe_stream *s)
+{
+ if (list->num == fe->snum) {
+ RTE_LOG(ERR, USER1, "%s: list is full\n", __func__);
+ return;
+ }
+
+ netfe_put_streams(fe, list, &s, 1);
+}
+
+static inline void
+netfe_rem_stream(struct netfe_stream_list *list, struct netfe_stream *s)
+{
+ LIST_REMOVE(s, link);
+ list->num--;
+}
+
+static void
+netfe_stream_close(struct netfe_lcore *fe, struct netfe_stream *fes)
+{
+ tle_stream_close(fes->s);
+ tle_event_free(fes->txev);
+ tle_event_free(fes->rxev);
+ tle_event_free(fes->erev);
+ memset(fes, 0, sizeof(*fes));
+ netfe_put_stream(fe, &fe->free, fes);
+}
+
+/*
+ * Helper functions, verify the queue for corresponding UDP port.
+ */
+static uint8_t
+verify_queue_for_port(const struct netbe_dev *prtq, const uint16_t lport)
+{
+ uint32_t align_nb_q, qid;
+
+ align_nb_q = rte_align32pow2(prtq->port.nb_lcore);
+ qid = (lport % align_nb_q) % prtq->port.nb_lcore;
+ if (prtq->rxqid == qid)
+ return 1;
+
+ return 0;
+}
+
+static inline size_t
+pkt_buf_empty(struct pkt_buf *pb)
+{
+ uint32_t i;
+ size_t x;
+
+ x = 0;
+ for (i = 0; i != pb->num; i++) {
+ x += pb->pkt[i]->pkt_len;
+ NETFE_PKT_DUMP(pb->pkt[i]);
+ rte_pktmbuf_free(pb->pkt[i]);
+ }
+
+ pb->num = 0;
+ return x;
+}
+
+static inline void
+pkt_buf_fill(uint32_t lcore, struct pkt_buf *pb, uint32_t dlen)
+{
+ uint32_t i;
+ int32_t sid;
+
+ sid = rte_lcore_to_socket_id(lcore) + 1;
+
+ for (i = pb->num; i != RTE_DIM(pb->pkt); i++) {
+ pb->pkt[i] = rte_pktmbuf_alloc(mpool[sid]);
+ if (pb->pkt[i] == NULL)
+ break;
+ rte_pktmbuf_append(pb->pkt[i], dlen);
+ }
+
+ pb->num = i;
+}
+
+static int
+netbe_lcore_setup(struct netbe_lcore *lc)
+{
+ uint32_t i;
+ int32_t rc;
+
+ RTE_LOG(NOTICE, USER1, "%s:(lcore=%u, proto=%s, ctx=%p) start\n",
+ __func__, lc->id, proto_name[lc->proto], lc->ctx);
+
+ /*
+ * ???????
+ * wait for FE lcores to start, so BE dont' drop any packets
+ * because corresponding streams not opened yet by FE.
+ * useful when used with pcap PMDS.
+ * think better way, or should this timeout be a cmdlien parameter.
+ * ???????
+ */
+ rte_delay_ms(10);
+
+ rc = 0;
+ for (i = 0; i != lc->prtq_num && rc == 0; i++) {
+ RTE_LOG(NOTICE, USER1,
+ "%s:%u(port=%u, q=%u, proto=%s, dev=%p)\n",
+ __func__, i, lc->prtq[i].port.id, lc->prtq[i].rxqid,
+ proto_name[lc->proto], lc->prtq[i].dev);
+
+ rc = setup_rx_cb(&lc->prtq[i].port, lc, lc->prtq[i].rxqid,
+ becfg.arp);
+ if (rc < 0)
+ return rc;
+ }
+
+ if (rc == 0)
+ RTE_PER_LCORE(_be) = lc;
+ return rc;
+}
+
+static void
+netbe_lcore_clear(void)
+{
+ uint32_t i, j;
+ struct netbe_lcore *lc;
+
+ lc = RTE_PER_LCORE(_be);
+ if (lc == NULL)
+ return;
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u, proto=%s, ctx: %p) finish\n",
+ __func__, lc->id, proto_name[lc->proto], lc->ctx);
+ for (i = 0; i != lc->prtq_num; i++) {
+ RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, q=%u, lcore=%u, dev=%p) "
+ "rx_stats={"
+ "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "}, "
+ "tx_stats={"
+ "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "};\n",
+ __func__, i, lc->prtq[i].port.id, lc->prtq[i].rxqid,
+ lc->id,
+ lc->prtq[i].dev,
+ lc->prtq[i].rx_stat.in,
+ lc->prtq[i].rx_stat.up,
+ lc->prtq[i].rx_stat.drop,
+ lc->prtq[i].tx_stat.down,
+ lc->prtq[i].tx_stat.out,
+ lc->prtq[i].tx_stat.drop);
+ }
+
+ RTE_LOG(NOTICE, USER1, "tcp_stat={\n");
+ for (i = 0; i != RTE_DIM(lc->tcp_stat.flags); i++) {
+ if (lc->tcp_stat.flags[i] != 0)
+ RTE_LOG(NOTICE, USER1, "[flag=%#x]==%" PRIu64 ";\n",
+ i, lc->tcp_stat.flags[i]);
+ }
+ RTE_LOG(NOTICE, USER1, "};\n");
+
+ for (i = 0; i != lc->prtq_num; i++)
+ for (j = 0; j != lc->prtq[i].tx_buf.num; j++)
+ rte_pktmbuf_free(lc->prtq[i].tx_buf.pkt[j]);
+
+ RTE_PER_LCORE(_be) = NULL;
+}
+
+static int
+netbe_add_ipv4_route(struct netbe_lcore *lc, const struct netbe_dest *dst,
+ uint8_t idx)
+{
+ int32_t rc;
+ uint32_t addr, depth;
+ char str[INET_ADDRSTRLEN];
+
+ depth = dst->prfx;
+ addr = rte_be_to_cpu_32(dst->ipv4.s_addr);
+
+ inet_ntop(AF_INET, &dst->ipv4, str, sizeof(str));
+ rc = rte_lpm_add(lc->lpm4, addr, depth, idx);
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p,"
+ "ipv4=%s/%u,mtu=%u,"
+ "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) "
+ "returns %d;\n",
+ __func__, lc->id, dst->port, lc->dst4[idx].dev,
+ str, depth, lc->dst4[idx].mtu,
+ dst->mac.addr_bytes[0], dst->mac.addr_bytes[1],
+ dst->mac.addr_bytes[2], dst->mac.addr_bytes[3],
+ dst->mac.addr_bytes[4], dst->mac.addr_bytes[5],
+ rc);
+ return rc;
+}
+
+static int
+netbe_add_ipv6_route(struct netbe_lcore *lc, const struct netbe_dest *dst,
+ uint8_t idx)
+{
+ int32_t rc;
+ uint32_t depth;
+ char str[INET6_ADDRSTRLEN];
+
+ depth = dst->prfx;
+
+ rc = rte_lpm6_add(lc->lpm6, (uint8_t *)(uintptr_t)dst->ipv6.s6_addr,
+ depth, idx);
+
+ inet_ntop(AF_INET6, &dst->ipv6, str, sizeof(str));
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p,"
+ "ipv6=%s/%u,mtu=%u,"
+ "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) "
+ "returns %d;\n",
+ __func__, lc->id, dst->port, lc->dst6[idx].dev,
+ str, depth, lc->dst4[idx].mtu,
+ dst->mac.addr_bytes[0], dst->mac.addr_bytes[1],
+ dst->mac.addr_bytes[2], dst->mac.addr_bytes[3],
+ dst->mac.addr_bytes[4], dst->mac.addr_bytes[5],
+ rc);
+ return rc;
+}
+
+static void
+fill_dst(struct tle_dest *dst, struct netbe_dev *bed,
+ const struct netbe_dest *bdp, uint16_t l3_type, int32_t sid,
+ uint8_t proto_id)
+{
+ struct ether_hdr *eth;
+ struct ipv4_hdr *ip4h;
+ struct ipv6_hdr *ip6h;
+
+ dst->dev = bed->dev;
+ dst->head_mp = frag_mpool[sid + 1];
+ dst->mtu = RTE_MIN(bdp->mtu, bed->port.mtu);
+ dst->l2_len = sizeof(*eth);
+
+ eth = (struct ether_hdr *)dst->hdr;
+
+ ether_addr_copy(&bed->port.mac, &eth->s_addr);
+ ether_addr_copy(&bdp->mac, &eth->d_addr);
+ eth->ether_type = rte_cpu_to_be_16(l3_type);
+
+ if (l3_type == ETHER_TYPE_IPv4) {
+ dst->l3_len = sizeof(*ip4h);
+ ip4h = (struct ipv4_hdr *)(eth + 1);
+ ip4h->version_ihl = 4 << 4 |
+ sizeof(*ip4h) / IPV4_IHL_MULTIPLIER;
+ ip4h->time_to_live = 64;
+ ip4h->next_proto_id = proto_id;
+ } else if (l3_type == ETHER_TYPE_IPv6) {
+ dst->l3_len = sizeof(*ip6h);
+ ip6h = (struct ipv6_hdr *)(eth + 1);
+ ip6h->vtc_flow = 6 << 4;
+ ip6h->proto = proto_id;
+ ip6h->hop_limits = 64;
+ }
+}
+
+static int
+netbe_add_dest(struct netbe_lcore *lc, uint32_t dev_idx, uint16_t family,
+ const struct netbe_dest *dst, uint32_t dnum)
+{
+ int32_t rc, sid;
+ uint8_t proto;
+ uint16_t l3_type;
+ uint32_t i, n, m;
+ struct tle_dest *dp;
+
+ if (family == AF_INET) {
+ n = lc->dst4_num;
+ dp = lc->dst4 + n;
+ m = RTE_DIM(lc->dst4);
+ l3_type = ETHER_TYPE_IPv4;
+ } else {
+ n = lc->dst6_num;
+ dp = lc->dst6 + n;
+ m = RTE_DIM(lc->dst6);
+ l3_type = ETHER_TYPE_IPv6;
+ }
+
+ if (n + dnum >= m) {
+ RTE_LOG(ERR, USER1, "%s(lcore=%u, family=%hu, dnum=%u) exceeds "
+ "maximum allowed number of destinations(%u);\n",
+ __func__, lc->id, family, dnum, m);
+ return -ENOSPC;
+ }
+
+ sid = rte_lcore_to_socket_id(lc->id);
+ proto = (becfg.proto == TLE_PROTO_UDP) ? IPPROTO_UDP : IPPROTO_TCP;
+ rc = 0;
+
+ for (i = 0; i != dnum && rc == 0; i++) {
+ fill_dst(dp + i, lc->prtq + dev_idx, dst + i, l3_type, sid,
+ proto);
+ if (family == AF_INET)
+ rc = netbe_add_ipv4_route(lc, dst + i, n + i);
+ else
+ rc = netbe_add_ipv6_route(lc, dst + i, n + i);
+ }
+
+ if (family == AF_INET)
+ lc->dst4_num = n + i;
+ else
+ lc->dst6_num = n + i;
+
+ return rc;
+}
+
+static inline void
+fill_arp_reply(struct netbe_dev *dev, struct rte_mbuf *m)
+{
+ struct ether_hdr *eth;
+ struct arp_hdr *ahdr;
+ struct arp_ipv4 *adata;
+ uint32_t tip;
+
+ /* set up the ethernet data */
+ eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ eth->d_addr = eth->s_addr;
+ eth->s_addr = dev->port.mac;
+
+ /* set up the arp data */
+ ahdr = rte_pktmbuf_mtod_offset(m, struct arp_hdr *, m->l2_len);
+ adata = &ahdr->arp_data;
+
+ ahdr->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY);
+
+ tip = adata->arp_tip;
+ adata->arp_tip = adata->arp_sip;
+ adata->arp_sip = tip;
+
+ adata->arp_tha = adata->arp_sha;
+ adata->arp_sha = dev->port.mac;
+}
+
+/* this is a semi ARP response implementation of RFC 826
+ * in RFC, it algo is as below
+ *
+ * ?Do I have the hardware type in ar$hrd?
+ * Yes: (almost definitely)
+ * [optionally check the hardware length ar$hln]
+ * ?Do I speak the protocol in ar$pro?
+ * Yes:
+ * [optionally check the protocol length ar$pln]
+ * Merge_flag := false
+ * If the pair <protocol type, sender protocol address> is
+ * already in my translation table, update the sender
+ * hardware address field of the entry with the new
+ * information in the packet and set Merge_flag to true.
+ * ?Am I the target protocol address?
+ * Yes:
+ * If Merge_flag is false, add the triplet <protocol type,
+ * sender protocol address, sender hardware address> to
+ * the translation table.
+ * ?Is the opcode ares_op$REQUEST? (NOW look at the opcode!!)
+ * Yes:
+ * Swap hardware and protocol fields, putting the local
+ * hardware and protocol addresses in the sender fields.
+ * Set the ar$op field to ares_op$REPLY
+ * Send the packet to the (new) target hardware address on
+ * the same hardware on which the request was received.
+ *
+ * So, in our implementation we skip updating the local cache,
+ * we assume that local cache is ok, so we just reply the packet.
+ */
+
+static inline void
+send_arp_reply(struct netbe_dev *dev, struct pkt_buf *pb)
+{
+ uint32_t i, n, num;
+ struct rte_mbuf **m;
+
+ m = pb->pkt;
+ num = pb->num;
+ for (i = 0; i != num; i++) {
+ fill_arp_reply(dev, m[i]);
+ NETBE_PKT_DUMP(m[i]);
+ }
+
+ n = rte_eth_tx_burst(dev->port.id, dev->txqid, m, num);
+ NETBE_TRACE("%s: sent n=%u arp replies\n", __func__, n);
+
+ /* free mbufs with unsent arp response */
+ for (i = n; i != num; i++)
+ rte_pktmbuf_free(m[i]);
+
+ pb->num = 0;
+}
+
+static inline void
+netbe_rx(struct netbe_lcore *lc, uint32_t pidx)
+{
+ uint32_t j, k, n;
+ struct rte_mbuf *pkt[MAX_PKT_BURST];
+ struct rte_mbuf *rp[MAX_PKT_BURST];
+ int32_t rc[MAX_PKT_BURST];
+ struct pkt_buf *abuf;
+
+ n = rte_eth_rx_burst(lc->prtq[pidx].port.id,
+ lc->prtq[pidx].rxqid, pkt, RTE_DIM(pkt));
+
+ if (n != 0) {
+ lc->prtq[pidx].rx_stat.in += n;
+ NETBE_TRACE("%s(%u): rte_eth_rx_burst(%u, %u) returns %u\n",
+ __func__, lc->id, lc->prtq[pidx].port.id,
+ lc->prtq[pidx].rxqid, n);
+
+ k = tle_rx_bulk(lc->prtq[pidx].dev, pkt, rp, rc, n);
+
+ lc->prtq[pidx].rx_stat.up += k;
+ lc->prtq[pidx].rx_stat.drop += n - k;
+ NETBE_TRACE("%s(%u): tle_%s_rx_bulk(%p, %u) returns %u\n",
+ __func__, lc->id, proto_name[lc->proto],
+ lc->prtq[pidx].dev, n, k);
+
+ for (j = 0; j != n - k; j++) {
+ NETBE_TRACE("%s:%d(port=%u) rp[%u]={%p, %d};\n",
+ __func__, __LINE__, lc->prtq[pidx].port.id,
+ j, rp[j], rc[j]);
+ rte_pktmbuf_free(rp[j]);
+ }
+ }
+
+ /* respond to incoming arp requests */
+ abuf = &lc->prtq[pidx].arp_buf;
+ if (abuf->num == 0)
+ return;
+
+ send_arp_reply(&lc->prtq[pidx], abuf);
+}
+
+static inline void
+netbe_tx(struct netbe_lcore *lc, uint32_t pidx)
+{
+ uint32_t j, k, n;
+ struct rte_mbuf **mb;
+
+ n = lc->prtq[pidx].tx_buf.num;
+ k = RTE_DIM(lc->prtq[pidx].tx_buf.pkt) - n;
+ mb = lc->prtq[pidx].tx_buf.pkt;
+
+ if (k >= RTE_DIM(lc->prtq[pidx].tx_buf.pkt) / 2) {
+ j = tle_tx_bulk(lc->prtq[pidx].dev, mb + n, k);
+ n += j;
+ lc->prtq[pidx].tx_stat.down += j;
+ }
+
+ if (n == 0)
+ return;
+
+ NETBE_TRACE("%s(%u): tle_%s_tx_bulk(%p) returns %u,\n"
+ "total pkts to send: %u\n",
+ __func__, lc->id, proto_name[lc->proto],
+ lc->prtq[pidx].dev, j, n);
+
+ for (j = 0; j != n; j++)
+ NETBE_PKT_DUMP(mb[j]);
+
+ k = rte_eth_tx_burst(lc->prtq[pidx].port.id,
+ lc->prtq[pidx].txqid, mb, n);
+
+ lc->prtq[pidx].tx_stat.out += k;
+ lc->prtq[pidx].tx_stat.drop += n - k;
+ NETBE_TRACE("%s(%u): rte_eth_tx_burst(%u, %u, %u) returns %u\n",
+ __func__, lc->id, lc->prtq[pidx].port.id, lc->prtq[pidx].txqid,
+ n, k);
+
+ lc->prtq[pidx].tx_buf.num = n - k;
+ if (k != 0)
+ for (j = k; j != n; j++)
+ mb[j - k] = mb[j];
+}
+
+static inline void
+netbe_lcore(void)
+{
+ uint32_t i;
+ struct netbe_lcore *lc;
+
+ lc = RTE_PER_LCORE(_be);
+ if (lc == NULL)
+ return;
+
+ for (i = 0; i != lc->prtq_num; i++) {
+ netbe_rx(lc, i);
+ netbe_tx(lc, i);
+ }
+}
+
+static inline void
+netfe_rx_process(__rte_unused uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t k, n;
+
+ n = fes->pbuf.num;
+ k = RTE_DIM(fes->pbuf.pkt) - n;
+
+ /* packet buffer is full, can't receive any new packets. */
+ if (k == 0) {
+ tle_event_idle(fes->rxev);
+ fes->stat.rxev[TLE_SEV_IDLE]++;
+ return;
+ }
+
+ n = tle_stream_recv(fes->s, fes->pbuf.pkt + n, k);
+ if (n == 0)
+ return;
+
+ NETFE_TRACE("%s(%u): tle_%s_stream_recv(%p, %u) returns %u\n",
+ __func__, lcore, proto_name[fes->proto], fes->s, k, n);
+
+ fes->pbuf.num += n;
+ fes->stat.rxp += n;
+
+ /* free all received mbufs. */
+ if (fes->op == RXONLY)
+ fes->stat.rxb += pkt_buf_empty(&fes->pbuf);
+ /* mark stream as writable */
+ else if (k == RTE_DIM(fes->pbuf.pkt)) {
+ if (fes->op == RXTX) {
+ tle_event_active(fes->txev, TLE_SEV_UP);
+ fes->stat.txev[TLE_SEV_UP]++;
+ } else if (fes->op == FWD) {
+ tle_event_raise(fes->txev);
+ fes->stat.txev[TLE_SEV_UP]++;
+ }
+ }
+}
+
+#endif /* COMMON_H_ */
diff --git a/examples/l4fwd/dpdk_legacy.h b/examples/l4fwd/dpdk_legacy.h
new file mode 100644
index 0000000..84fab17
--- /dev/null
+++ b/examples/l4fwd/dpdk_legacy.h
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MAIN_DPDK_LEGACY_H_
+#define MAIN_DPDK_LEGACY_H_
+
+#include "dpdk_version.h"
+
+/*
+ * UDP IPv4 destination lookup callback.
+ */
+static int
+lpm4_dst_lookup(void *data, const struct in_addr *addr,
+ struct tle_dest *res)
+{
+ int32_t rc;
+#ifdef DPDK_VERSION_GE_1604
+ uint32_t idx;
+#else
+ uint8_t idx;
+#endif
+ struct netbe_lcore *lc;
+ struct tle_dest *dst;
+
+ lc = data;
+
+ rc = rte_lpm_lookup(lc->lpm4, rte_be_to_cpu_32(addr->s_addr), &idx);
+ if (rc == 0) {
+ dst = &lc->dst4[idx];
+ rte_memcpy(res, dst, dst->l2_len + dst->l3_len +
+ offsetof(struct tle_dest, hdr));
+ }
+ return rc;
+}
+
+static int
+lcore_lpm_init(struct netbe_lcore *lc)
+{
+ int32_t sid;
+ char str[RTE_LPM_NAMESIZE];
+#ifdef DPDK_VERSION_GE_1604
+ const struct rte_lpm_config lpm4_cfg = {
+ .max_rules = MAX_RULES,
+ .number_tbl8s = MAX_TBL8,
+ };
+#endif
+ const struct rte_lpm6_config lpm6_cfg = {
+ .max_rules = MAX_RULES,
+ .number_tbl8s = MAX_TBL8,
+ };
+
+ sid = rte_lcore_to_socket_id(lc->id);
+
+ snprintf(str, sizeof(str), "LPM4%u\n", lc->id);
+#ifdef DPDK_VERSION_GE_1604
+ lc->lpm4 = rte_lpm_create(str, sid, &lpm4_cfg);
+#else
+ lc->lpm4 = rte_lpm_create(str, sid, MAX_RULES, 0);
+#endif
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u): lpm4=%p;\n",
+ __func__, lc->id, lc->lpm4);
+ if (lc->lpm4 == NULL)
+ return -ENOMEM;
+
+ snprintf(str, sizeof(str), "LPM6%u\n", lc->id);
+ lc->lpm6 = rte_lpm6_create(str, sid, &lpm6_cfg);
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u): lpm6=%p;\n",
+ __func__, lc->id, lc->lpm6);
+ if (lc->lpm6 == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/*
+ * Helper functions, finds BE by given local and remote addresses.
+ */
+static int
+netbe_find4(const struct in_addr *laddr, const uint16_t lport,
+ const struct in_addr *raddr, const uint32_t belc)
+{
+ uint32_t i, j;
+#ifdef DPDK_VERSION_GE_1604
+ uint32_t idx;
+#else
+ uint8_t idx;
+#endif
+ struct netbe_lcore *bc;
+
+ /* we have exactly one BE, use it for all traffic */
+ if (becfg.cpu_num == 1)
+ return 0;
+
+ /* search by provided be_lcore */
+ if (belc != LCORE_ID_ANY) {
+ for (i = 0; i != becfg.cpu_num; i++) {
+ bc = becfg.cpu + i;
+ if (belc == bc->id)
+ return i;
+ }
+ RTE_LOG(NOTICE, USER1, "%s: no stream with be_lcore=%u\n",
+ __func__, belc);
+ return -ENOENT;
+ }
+
+ /* search by local address */
+ if (laddr->s_addr != INADDR_ANY) {
+ for (i = 0; i != becfg.cpu_num; i++) {
+ bc = becfg.cpu + i;
+ /* search by queue for the local port */
+ for (j = 0; j != bc->prtq_num; j++) {
+ if (laddr->s_addr == bc->prtq[j].port.ipv4) {
+
+ if (lport == 0)
+ return i;
+
+ if (verify_queue_for_port(bc->prtq + j,
+ lport) != 0)
+ return i;
+ }
+ }
+ }
+ }
+
+ /* search by remote address */
+ if (raddr->s_addr != INADDR_ANY) {
+ for (i = 0; i != becfg.cpu_num; i++) {
+ bc = becfg.cpu + i;
+ if (rte_lpm_lookup(bc->lpm4,
+ rte_be_to_cpu_32(raddr->s_addr),
+ &idx) == 0) {
+
+ if (lport == 0)
+ return i;
+
+ /* search by queue for the local port */
+ for (j = 0; j != bc->prtq_num; j++)
+ if (verify_queue_for_port(bc->prtq + j,
+ lport) != 0)
+ return i;
+ }
+ }
+ }
+
+ return -ENOENT;
+}
+
+#endif /* MAIN_DPDK_LEGACY_H_ */
diff --git a/examples/l4fwd/dpdk_version.h b/examples/l4fwd/dpdk_version.h
new file mode 100644
index 0000000..43235c8
--- /dev/null
+++ b/examples/l4fwd/dpdk_version.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DPDK_VERSION_H_
+#define DPDK_VERSION_H_
+
+#include <rte_version.h>
+
+#ifdef RTE_VER_MAJOR
+#if RTE_VER_MAJOR >= 16 && RTE_VER_MINOR >= 4
+#define DPDK_VERSION_GE_1604
+#endif
+#elif defined(RTE_VER_YEAR)
+#if RTE_VERSION_NUM(16, 4, 0, 0) <= RTE_VERSION
+#define DPDK_VERSION_GE_1604
+#endif
+#else
+#error "RTE_VER_MAJOR and RTE_VER_YEAR are undefined!"
+#endif
+
+#endif /* DPDK_VERSION_H_ */
diff --git a/examples/l4fwd/fe.cfg b/examples/l4fwd/fe.cfg
new file mode 100644
index 0000000..2706323
--- /dev/null
+++ b/examples/l4fwd/fe.cfg
@@ -0,0 +1,24 @@
+#
+# udpfwd FE config file example
+#
+
+# open IPv4 stream with local_addr=192.168.1.233:32768,
+# and remote_addr as wildcard (any remote addressi/port allowed).
+# use it echo mode - for any received packet - send it back to the source
+lcore=3,op=echo,laddr=192.168.1.233,lport=0x8000,raddr=0.0.0.0,rport=0
+
+# open IPv4 stream with specified local/remote address/port and
+# do send only over that stream.
+lcore=3,op=tx,laddr=192.168.1.233,lport=0x8001,raddr=192.168.1.56,rport=0x200,txlen=72
+
+# open IPv6 stream with specified local port (512) probably over multiple
+# eth ports, and do recv only over that stream.
+lcore=3,op=rx,laddr=::,lport=0x200,raddr=::,rport=0,txlen=72
+
+# fwd mode example.
+# open IPv4 stream on local port 11211 (memcached) over all possible ports.
+# for each new flow, sort of tunnel will be created, i.e:
+# new stream will be opend to communcate with forwarding remote address,
+# so all packets with <laddr=A:11211,raddr=X:N> will be forwarded to
+# <laddr=[B]:M,raddr=[2001:4860:b002::56]:11211> and visa-versa.
+lcore=3,op=fwd,laddr=0.0.0.0,lport=11211,raddr=0.0.0.0,rport=0,fwladdr=::,fwlport=0,fwraddr=2001:4860:b002::56,fwrport=11211
diff --git a/examples/l4fwd/fwdtbl.h b/examples/l4fwd/fwdtbl.h
new file mode 100644
index 0000000..1c4265e
--- /dev/null
+++ b/examples/l4fwd/fwdtbl.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __FWDTBL_H__
+#define __FWDTBL_H__
+
+struct fwd4_key {
+ uint32_t port;
+ struct in_addr addr;
+} __attribute__((__packed__));
+
+struct fwd6_key {
+ uint32_t port;
+ struct in6_addr addr;
+} __attribute__((__packed__));
+
+union fwd_key {
+ struct fwd4_key k4;
+ struct fwd6_key k6;
+};
+
+static struct rte_hash *
+fwd_tbl_key_prep(const struct netfe_lcore *fe, uint16_t family,
+ const struct sockaddr *sa, union fwd_key *key)
+{
+ struct rte_hash *h;
+ const struct sockaddr_in *sin4;
+ const struct sockaddr_in6 *sin6;
+
+ if (family == AF_INET) {
+ h = fe->fw4h;
+ sin4 = (const struct sockaddr_in *)sa;
+ key->k4.port = sin4->sin_port;
+ key->k4.addr = sin4->sin_addr;
+ } else {
+ h = fe->fw6h;
+ sin6 = (const struct sockaddr_in6 *)sa;
+ key->k6.port = sin6->sin6_port;
+ key->k6.addr = sin6->sin6_addr;
+ }
+
+ return h;
+}
+
+static int
+fwd_tbl_add(struct netfe_lcore *fe, uint16_t family, const struct sockaddr *sa,
+ struct netfe_stream *data)
+{
+ int32_t rc;
+ struct rte_hash *h;
+ union fwd_key key;
+
+ h = fwd_tbl_key_prep(fe, family, sa, &key);
+ rc = rte_hash_add_key_data(h, &key, data);
+ return rc;
+}
+
+static struct netfe_stream *
+fwd_tbl_lkp(struct netfe_lcore *fe, uint16_t family, const struct sockaddr *sa)
+{
+ int rc;
+ void *d;
+ struct rte_hash *h;
+ union fwd_key key;
+
+ h = fwd_tbl_key_prep(fe, family, sa, &key);
+ rc = rte_hash_lookup_data(h, &key, &d);
+ if (rc < 0)
+ d = NULL;
+ return d;
+}
+
+static int
+fwd_tbl_init(struct netfe_lcore *fe, uint16_t family, uint32_t lcore)
+{
+ int32_t rc;
+ struct rte_hash **h;
+ struct rte_hash_parameters hprm;
+ char buf[RTE_HASH_NAMESIZE];
+
+ if (family == AF_INET) {
+ snprintf(buf, sizeof(buf), "fwd4tbl@%u", lcore);
+ h = &fe->fw4h;
+ hprm.key_len = sizeof(struct fwd4_key);
+ } else {
+ snprintf(buf, sizeof(buf), "fwd6tbl@%u", lcore);
+ h = &fe->fw6h;
+ hprm.key_len = sizeof(struct fwd6_key);
+ }
+
+ hprm.name = buf;
+ hprm.entries = RTE_MAX(2 * fe->snum, 0x10U);
+ hprm.socket_id = rte_lcore_to_socket_id(lcore);
+ hprm.hash_func = NULL;
+ hprm.hash_func_init_val = 0;
+
+ *h = rte_hash_create(&hprm);
+ if (*h == NULL)
+ rc = (rte_errno != 0) ? -rte_errno : -ENOMEM;
+ else
+ rc = 0;
+ return rc;
+}
+
+#endif /* __FWDTBL_H__ */
diff --git a/examples/l4fwd/gen_fe_cfg.py b/examples/l4fwd/gen_fe_cfg.py
new file mode 100755
index 0000000..67a8d5c
--- /dev/null
+++ b/examples/l4fwd/gen_fe_cfg.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2016 Intel Corporation.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+# generate FE config script
+#
+
+import sys, optparse, math
+
+def print_usage ():
+ print "Usage: " + sys.argv[0] + " [-f fe_lcore_list] [-b be_lcore_list]"
+ print " [-p start_port] [-n number_of_streams]"
+ print " [-m mode] [-q local_address] [-r remote_address]"
+ print " [-s fwd_local_address] [-t fwd_remote_address]"
+ print " [-l txlen] [-4/-6] [-H]"
+ print
+ print "Options"
+ print " -f, --fe_lcore_list: list of lcores used for FE. Multiple " \
+ "lcores are comma-separated, within double quote"
+ print " -b, --be_lcore_list: list of lcores used for BE. Multiple " \
+ "lcores are comma-separated, within double quote"
+ print " -p, --start_port: starting TCP/UDP port number"
+ print " -n, --number_of_streams: number of streams to be generated"
+ print " -m, --mode: mode of the application. [echo, rx, tx, fwd]"
+ print " -q, --local_address: local address of the stream"
+ print " -r, --remote_address: remote address of the stream"
+ print " -s, --fwd_local_address: forwarding local address of the stream"
+ print " -t, --fwd_remote_address: forwarding remote address of the " \
+ "stream"
+ print " -l, --txlen: transmission length for rx/tx mode"
+ print " -H: if port number to printed in hex format"
+ print " -4/-6: if default ip addresses to be printed in ipv4/ipv6 format"
+
+def align_power_of_2(x):
+ return 2**(x-1).bit_length()
+
+def print_stream(mode, la, ra, fwd_la, fwd_ra, lcore, belcore, lport,
+ fwrport, txlen):
+ if support_hex != 0:
+ lport_str = str(format(lport, '#x'))
+ fwrport_str = str(format(fwrport, '#x'))
+ else:
+ lport_str = str(lport)
+ fwrport_str = str(fwrport)
+
+ stream = "lcore=" + str(lcore) + ",belcore=" + str(belcore)
+ stream += ",op=" + mode
+ stream += ",laddr=" + la + ",lport=" + lport_str
+ stream += ",raddr=" + ra + ",rport=0"
+
+ if mode == 'fwd':
+ stream += ",fwladdr=" + fwd_la + ",fwlport=0,fwraddr=" + fwd_ra
+ stream += ",fwrport=" + fwrport_str
+
+ if mode == 'rx' or mode == 'tx':
+ stream += ",txlen=" + str(txlen)
+
+ print stream
+
+parser = optparse.OptionParser()
+parser.add_option("-b", "--be_lcore_list", dest = "be_lcore_list",
+ help = "BE lcore lists.")
+parser.add_option("-f", "--fe_lcore_list", dest = "fe_lcore_list",
+ help = "FE lcore lists.")
+parser.add_option("-p", "--start_port", dest = "start_port",
+ help = "start port.")
+parser.add_option("-n", "--number_of_streams", dest = "number_of_streams",
+ help = "number of streams.")
+parser.add_option("-m", "--mode", dest = "mode",
+ help = "mode (op, rx, tx, fwd).")
+parser.add_option("-q", "--local_address", dest = "local_address",
+ help = "local_address.")
+parser.add_option("-r", "--remote_address", dest = "remote_address",
+ help = "remote_address.")
+parser.add_option("-s", "--fwd_local_address", dest = "fwd_local_address",
+ help = "fwd_local_address.")
+parser.add_option("-t", "--fwd_remote_address", dest = "fwd_remote_address",
+ help = "fwd_remote_address.")
+parser.add_option("-l", "--txlen", dest = "txlen", help = "txlen.")
+parser.add_option("-4", action = "store_false", dest = "ipv6",
+ help = "IPv4/IPv6")
+parser.add_option("-6", action = "store_true", dest = "ipv6",
+ help = "IPv4/IPv6")
+parser.add_option("-H", action = "store_true", dest = "support_hex",
+ help = "print ports in hexa format")
+
+(options, args) = parser.parse_args()
+
+if len(sys.argv) == 1:
+ print
+ print_usage()
+ print
+ sys.exit()
+
+supported_modes = ['echo', 'rx', 'tx', 'fwd']
+support_hex = 0
+txlen = 72
+
+if options.ipv6 == True:
+ la = '::'
+ ra = '::'
+ fwd_la = '::'
+ fwd_ra = '::'
+else:
+ la = '0.0.0.0'
+ ra = '0.0.0.0'
+ fwd_la = '0.0.0.0'
+ fwd_ra = '0.0.0.0'
+
+if options.support_hex == True:
+ support_hex = 1
+
+if options.txlen != None:
+ txlen = options.txlen
+
+if options.fe_lcore_list != None:
+ felcore_list = list(map(int, options.fe_lcore_list.split(",")))
+ felcore_list.sort()
+else:
+ felcore_list = []
+
+if options.be_lcore_list != None:
+ belcore_list = list(map(int, options.be_lcore_list.split(",")))
+ belcore_list.sort()
+else:
+ print "default BE lcore list = [ 1 ]"
+ belcore_list = [1]
+
+if options.mode != None:
+ mode = options.mode
+ if mode not in supported_modes:
+ print "Supported modes: " + str(supported_modes)
+ print "Provided mode \"" + mode + "\" is not supported. Terminating..."
+ sys.exit()
+else:
+ print "default mode = echo"
+ mode = "echo"
+
+if options.start_port != None:
+ start_port = int(options.start_port)
+else:
+ print "default start_port = 32768"
+ start_port = 32768
+
+if options.number_of_streams != None:
+ number_of_streams = int(options.number_of_streams)
+else:
+ print "default number_of_streams = 1"
+ number_of_streams = 1
+
+fwd_start_port = 53248
+
+if options.local_address != None:
+ la = options.local_address
+
+if options.remote_address != None:
+ ra = options.remote_address
+
+if options.fwd_local_address != None:
+ fwd_la = options.fwd_local_address
+
+if options.fwd_remote_address != None:
+ fwd_ra = options.fwd_remote_address
+
+belcore_count = len(belcore_list)
+align_belcore_count = align_power_of_2(belcore_count)
+nb_bits = int(math.log(align_belcore_count, 2))
+
+felcore_count = len(felcore_list)
+if felcore_count != 0:
+ if number_of_streams % felcore_count == 0:
+ nb_stream_per_flc = number_of_streams / felcore_count
+ else:
+ nb_stream_per_flc = (number_of_streams / felcore_count) + 1
+
+for i in range(start_port, start_port + number_of_streams):
+ k = i - start_port
+ align_belcore_count = align_power_of_2(belcore_count)
+ blc_indx = (i % align_belcore_count) % belcore_count
+ belcore = belcore_list[blc_indx]
+ if felcore_count != 0:
+ flc_indx = k / nb_stream_per_flc
+ felcore = felcore_list[flc_indx]
+ else:
+ felcore = belcore
+
+ print_stream(mode, la, ra, fwd_la, fwd_ra, felcore, belcore, i,
+ fwd_start_port + k, txlen)
diff --git a/examples/l4fwd/lcore.h b/examples/l4fwd/lcore.h
new file mode 100644
index 0000000..d88e434
--- /dev/null
+++ b/examples/l4fwd/lcore.h
@@ -0,0 +1,370 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LCORE_H_
+#define LCORE_H_
+
+#include "dpdk_legacy.h"
+
+/*
+ * IPv6 destination lookup callback.
+ */
+static int
+lpm6_dst_lookup(void *data, const struct in6_addr *addr,
+ struct tle_dest *res)
+{
+ int32_t rc;
+ uint8_t idx;
+ struct netbe_lcore *lc;
+ struct tle_dest *dst;
+ uintptr_t p;
+
+ lc = data;
+ p = (uintptr_t)addr->s6_addr;
+
+ rc = rte_lpm6_lookup(lc->lpm6, (uint8_t *)p, &idx);
+ if (rc == 0) {
+ dst = &lc->dst6[idx];
+ rte_memcpy(res, dst, dst->l2_len + dst->l3_len +
+ offsetof(struct tle_dest, hdr));
+ }
+ return rc;
+}
+
+static int
+create_context(struct netbe_lcore *lc, const struct tle_ctx_param *ctx_prm)
+{
+ uint32_t rc = 0, sid;
+ uint64_t frag_cycles;
+ struct tle_ctx_param cprm;
+
+ if (lc->ctx == NULL) {
+ sid = rte_lcore_to_socket_id(lc->id);
+
+ rc = lcore_lpm_init(lc);
+ if (rc != 0)
+ return rc;
+
+ cprm = *ctx_prm;
+ cprm.socket_id = sid;
+ cprm.proto = lc->proto;
+ cprm.lookup4 = lpm4_dst_lookup;
+ cprm.lookup4_data = lc;
+ cprm.lookup6 = lpm6_dst_lookup;
+ cprm.lookup6_data = lc;
+
+ frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) /
+ MS_PER_S * FRAG_TTL;
+
+ lc->ftbl = rte_ip_frag_table_create(cprm.max_streams,
+ FRAG_TBL_BUCKET_ENTRIES, cprm.max_streams,
+ frag_cycles, sid);
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u): frag_tbl=%p;\n",
+ __func__, lc->id, lc->ftbl);
+
+ lc->ctx = tle_ctx_create(&cprm);
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u): proto=%s, ctx=%p;\n",
+ __func__, lc->id, proto_name[lc->proto], lc->ctx);
+
+ if (lc->ctx == NULL || lc->ftbl == NULL)
+ rc = ENOMEM;
+ }
+
+ return rc;
+}
+
+/*
+ * BE lcore setup routine.
+ */
+static int
+lcore_init(struct netbe_lcore *lc, const struct tle_ctx_param *ctx_prm,
+ const uint32_t prtqid, const uint16_t *bl_ports, uint32_t nb_bl_ports)
+{
+ int32_t rc = 0;
+ struct tle_dev_param dprm;
+
+ rc = create_context(lc, ctx_prm);
+
+ if (rc == 0 && lc->ctx != NULL) {
+ memset(&dprm, 0, sizeof(dprm));
+ dprm.rx_offload = lc->prtq[prtqid].port.rx_offload;
+ dprm.tx_offload = lc->prtq[prtqid].port.tx_offload;
+ dprm.local_addr4.s_addr = lc->prtq[prtqid].port.ipv4;
+ memcpy(&dprm.local_addr6, &lc->prtq[prtqid].port.ipv6,
+ sizeof(lc->prtq[prtqid].port.ipv6));
+ dprm.bl4.nb_port = nb_bl_ports;
+ dprm.bl4.port = bl_ports;
+ dprm.bl6.nb_port = nb_bl_ports;
+ dprm.bl6.port = bl_ports;
+
+ lc->prtq[prtqid].dev = tle_add_dev(lc->ctx, &dprm);
+
+ RTE_LOG(NOTICE, USER1,
+ "%s(lcore=%u, port=%u, qid=%u), dev: %p\n",
+ __func__, lc->id, lc->prtq[prtqid].port.id,
+ lc->prtq[prtqid].rxqid, lc->prtq[prtqid].dev);
+
+ if (lc->prtq[prtqid].dev == NULL)
+ rc = -rte_errno;
+
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(lcore=%u) failed with error code: %d\n",
+ __func__, lc->id, rc);
+ tle_ctx_destroy(lc->ctx);
+ rte_ip_frag_table_destroy(lc->ftbl);
+ rte_lpm_free(lc->lpm4);
+ rte_lpm6_free(lc->lpm6);
+ rte_free(lc->prtq[prtqid].port.lcore_id);
+ lc->prtq[prtqid].port.nb_lcore = 0;
+ rte_free(lc->prtq);
+ lc->prtq_num = 0;
+ return rc;
+ }
+ }
+
+ return rc;
+}
+
+static uint16_t
+create_blocklist(const struct netbe_port *beprt, uint16_t *bl_ports,
+ uint32_t q)
+{
+ uint32_t i, j, qid, align_nb_q;
+
+ align_nb_q = rte_align32pow2(beprt->nb_lcore);
+ for (i = 0, j = 0; i < (UINT16_MAX + 1); i++) {
+ qid = (i % align_nb_q) % beprt->nb_lcore;
+ if (qid != q)
+ bl_ports[j++] = i;
+ }
+
+ return j;
+}
+
+static int
+netbe_lcore_init(struct netbe_cfg *cfg, const struct tle_ctx_param *ctx_prm)
+{
+ int32_t rc;
+ uint32_t i, j, nb_bl_ports = 0, sz;
+ struct netbe_lcore *lc;
+ static uint16_t *bl_ports;
+
+ /* Create the context and attached queue for each lcore. */
+ rc = 0;
+ sz = sizeof(uint16_t) * UINT16_MAX;
+ bl_ports = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE);
+ for (i = 0; i < cfg->cpu_num; i++) {
+ lc = &cfg->cpu[i];
+ for (j = 0; j < lc->prtq_num; j++) {
+ memset((uint8_t *)bl_ports, 0, sz);
+ /* create list of blocked ports based on q */
+ nb_bl_ports = create_blocklist(&lc->prtq[j].port,
+ bl_ports, lc->prtq[j].rxqid);
+ RTE_LOG(NOTICE, USER1,
+ "lc=%u, q=%u, nb_bl_ports=%u\n",
+ lc->id, lc->prtq[j].rxqid, nb_bl_ports);
+
+ rc = lcore_init(lc, ctx_prm, j, bl_ports, nb_bl_ports);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: failed with error code: %d\n",
+ __func__, rc);
+ rte_free(bl_ports);
+ return rc;
+ }
+ }
+ }
+ rte_free(bl_ports);
+
+ return 0;
+}
+
+static int
+netfe_lcore_cmp(const void *s1, const void *s2)
+{
+ const struct netfe_stream_prm *p1, *p2;
+
+ p1 = s1;
+ p2 = s2;
+ return p1->lcore - p2->lcore;
+}
+
+static int
+netbe_find6(const struct in6_addr *laddr, uint16_t lport,
+ const struct in6_addr *raddr, uint32_t belc)
+{
+ uint32_t i, j;
+ uint8_t idx;
+ struct netbe_lcore *bc;
+
+ /* we have exactly one BE, use it for all traffic */
+ if (becfg.cpu_num == 1)
+ return 0;
+
+ /* search by provided be_lcore */
+ if (belc != LCORE_ID_ANY) {
+ for (i = 0; i != becfg.cpu_num; i++) {
+ bc = becfg.cpu + i;
+ if (belc == bc->id)
+ return i;
+ }
+ RTE_LOG(NOTICE, USER1, "%s: no stream with belcore=%u\n",
+ __func__, belc);
+ return -ENOENT;
+ }
+
+ /* search by local address */
+ if (memcmp(laddr, &in6addr_any, sizeof(*laddr)) != 0) {
+ for (i = 0; i != becfg.cpu_num; i++) {
+ bc = becfg.cpu + i;
+ /* search by queue for the local port */
+ for (j = 0; j != bc->prtq_num; j++) {
+ if (memcmp(laddr, &bc->prtq[j].port.ipv6,
+ sizeof(*laddr)) == 0) {
+
+ if (lport == 0)
+ return i;
+
+ if (verify_queue_for_port(bc->prtq + j,
+ lport) != 0)
+ return i;
+ }
+ }
+ }
+ }
+
+ /* search by remote address */
+ if (memcmp(raddr, &in6addr_any, sizeof(*raddr)) == 0) {
+ for (i = 0; i != becfg.cpu_num; i++) {
+ bc = becfg.cpu + i;
+ if (rte_lpm6_lookup(bc->lpm6,
+ (uint8_t *)(uintptr_t)raddr->s6_addr,
+ &idx) == 0) {
+
+ if (lport == 0)
+ return i;
+
+ /* search by queue for the local port */
+ for (j = 0; j != bc->prtq_num; j++)
+ if (verify_queue_for_port(bc->prtq + j,
+ lport) != 0)
+ return i;
+ }
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int
+netbe_find(const struct sockaddr_storage *la,
+ const struct sockaddr_storage *ra,
+ uint32_t belc)
+{
+ const struct sockaddr_in *l4, *r4;
+ const struct sockaddr_in6 *l6, *r6;
+
+ if (la->ss_family == AF_INET) {
+ l4 = (const struct sockaddr_in *)la;
+ r4 = (const struct sockaddr_in *)ra;
+ return netbe_find4(&l4->sin_addr, ntohs(l4->sin_port),
+ &r4->sin_addr, belc);
+ } else if (la->ss_family == AF_INET6) {
+ l6 = (const struct sockaddr_in6 *)la;
+ r6 = (const struct sockaddr_in6 *)ra;
+ return netbe_find6(&l6->sin6_addr, ntohs(l6->sin6_port),
+ &r6->sin6_addr, belc);
+ }
+ return -EINVAL;
+}
+
+static int
+netfe_sprm_flll_be(struct netfe_sprm *sp, uint32_t line, uint32_t belc)
+{
+ int32_t bidx;
+
+ bidx = netbe_find(&sp->local_addr, &sp->remote_addr, belc);
+
+ if (bidx < 0) {
+ RTE_LOG(ERR, USER1, "%s(line=%u): no BE for that stream\n",
+ __func__, line);
+ return -EINVAL;
+ }
+ sp->bidx = bidx;
+ return 0;
+}
+
+/* start front-end processing. */
+static int
+netfe_lcore_fill(struct lcore_prm prm[RTE_MAX_LCORE],
+ struct netfe_lcore_prm *lprm)
+{
+ uint32_t belc;
+ uint32_t i, j, lc, ln;
+ struct netfe_stream_prm *s;
+
+ /* determine on what BE each stream should be open. */
+ for (i = 0; i != lprm->nb_streams; i++) {
+ s = lprm->stream + i;
+ ln = s->line;
+ belc = s->belcore;
+ if (netfe_sprm_flll_be(&s->sprm, ln, belc) != 0 ||
+ (s->op == FWD &&
+ netfe_sprm_flll_be(&s->fprm, ln, belc) != 0))
+ return -EINVAL;
+ }
+
+ /* group all fe parameters by lcore. */
+
+ qsort(lprm->stream, lprm->nb_streams, sizeof(lprm->stream[0]),
+ netfe_lcore_cmp);
+
+ for (i = 0; i != lprm->nb_streams; i = j) {
+
+ lc = lprm->stream[i].lcore;
+ ln = lprm->stream[i].line;
+
+ if (rte_lcore_is_enabled(lc) == 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(line=%u): lcore %u is not enabled\n",
+ __func__, ln, lc);
+ return -EINVAL;
+ }
+
+ if (rte_get_master_lcore() != lc &&
+ rte_eal_get_lcore_state(lc) == RUNNING) {
+ RTE_LOG(ERR, USER1,
+ "%s(line=%u): lcore %u already in use\n",
+ __func__, ln, lc);
+ return -EINVAL;
+ }
+
+ for (j = i + 1; j != lprm->nb_streams &&
+ lc == lprm->stream[j].lcore;
+ j++)
+ ;
+
+ prm[lc].fe.max_streams = lprm->max_streams;
+ prm[lc].fe.nb_streams = j - i;
+ prm[lc].fe.stream = lprm->stream + i;
+ }
+
+ return 0;
+}
+
+#endif /* LCORE_H_ */
diff --git a/examples/l4fwd/main.c b/examples/l4fwd/main.c
new file mode 100644
index 0000000..37bd03e
--- /dev/null
+++ b/examples/l4fwd/main.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <time.h>
+
+#include "netbe.h"
+#include "parse.h"
+
+#define MAX_RULES 0x100
+#define MAX_TBL8 0x800
+
+#define RX_RING_SIZE 0x400
+#define TX_RING_SIZE 0x800
+
+#define MPOOL_CACHE_SIZE 0x100
+#define MPOOL_NB_BUF 0x20000
+
+#define FRAG_MBUF_BUF_SIZE (RTE_PKTMBUF_HEADROOM + TLE_DST_MAX_HDR)
+#define FRAG_TTL MS_PER_S
+#define FRAG_TBL_BUCKET_ENTRIES 16
+
+#define FIRST_PORT 0x8000
+
+#define RX_CSUM_OFFLOAD (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM)
+#define TX_CSUM_OFFLOAD (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)
+
+RTE_DEFINE_PER_LCORE(struct netbe_lcore *, _be);
+RTE_DEFINE_PER_LCORE(struct netfe_lcore *, _fe);
+
+#include "fwdtbl.h"
+
+/**
+ * Location to be modified to create the IPv4 hash key which helps
+ * to distribute packets based on the destination TCP/UDP port.
+ */
+#define RSS_HASH_KEY_DEST_PORT_LOC_IPV4 15
+
+/**
+ * Location to be modified to create the IPv6 hash key which helps
+ * to distribute packets based on the destination TCP/UDP port.
+ */
+#define RSS_HASH_KEY_DEST_PORT_LOC_IPV6 39
+
+/**
+ * Size of the rte_eth_rss_reta_entry64 array to update through
+ * rte_eth_dev_rss_reta_update.
+ */
+#define RSS_RETA_CONF_ARRAY_SIZE (ETH_RSS_RETA_SIZE_512/RTE_RETA_GROUP_SIZE)
+
+static volatile int force_quit;
+
+static struct netbe_cfg becfg;
+static struct rte_mempool *mpool[RTE_MAX_NUMA_NODES + 1];
+static struct rte_mempool *frag_mpool[RTE_MAX_NUMA_NODES + 1];
+static char proto_name[3][10] = {"udp", "tcp", ""};
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = {
+ .max_rx_pkt_len = ETHER_MAX_VLAN_FRAME_LEN,
+ .hw_vlan_strip = 1,
+ .jumbo_frame = 1,
+ },
+};
+
+/* function pointers */
+static TLE_RX_BULK_FUNCTYPE tle_rx_bulk;
+static TLE_TX_BULK_FUNCTYPE tle_tx_bulk;
+static TLE_STREAM_RECV_FUNCTYPE tle_stream_recv;
+static TLE_STREAM_CLOSE_FUNCTYPE tle_stream_close;
+
+static LCORE_MAIN_FUNCTYPE lcore_main;
+
+#include "common.h"
+#include "parse.h"
+#include "lcore.h"
+#include "port.h"
+#include "tcp.h"
+#include "udp.h"
+
+int verbose = VERBOSE_NONE;
+
+static void
+netbe_lcore_fini(struct netbe_cfg *cfg)
+{
+ uint32_t i;
+
+ for (i = 0; i != cfg->cpu_num; i++) {
+ tle_ctx_destroy(cfg->cpu[i].ctx);
+ rte_ip_frag_table_destroy(cfg->cpu[i].ftbl);
+ rte_lpm_free(cfg->cpu[i].lpm4);
+ rte_lpm6_free(cfg->cpu[i].lpm6);
+
+ rte_free(cfg->cpu[i].prtq);
+ cfg->cpu[i].prtq_num = 0;
+ }
+
+ rte_free(cfg->cpu);
+ cfg->cpu_num = 0;
+ for (i = 0; i != cfg->prt_num; i++) {
+ rte_free(cfg->prt[i].lcore_id);
+ cfg->prt[i].nb_lcore = 0;
+ }
+ rte_free(cfg->prt);
+ cfg->prt_num = 0;
+}
+
+static int
+netbe_dest_init(const char *fname, struct netbe_cfg *cfg)
+{
+ int32_t rc;
+ uint32_t f, i, p;
+ uint32_t k, l, cnt;
+ struct netbe_lcore *lc;
+ struct netbe_dest_prm prm;
+
+ rc = netbe_parse_dest(fname, &prm);
+ if (rc != 0)
+ return rc;
+
+ rc = 0;
+ for (i = 0; i != prm.nb_dest; i++) {
+
+ p = prm.dest[i].port;
+ f = prm.dest[i].family;
+
+ cnt = 0;
+ for (k = 0; k != cfg->cpu_num; k++) {
+ lc = cfg->cpu + k;
+ for (l = 0; l != lc->prtq_num; l++)
+ if (lc->prtq[l].port.id == p) {
+ rc = netbe_add_dest(lc, l, f,
+ prm.dest + i, 1);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(lc=%u, family=%u) "
+ "could not add "
+ "destinations(%u)\n",
+ __func__, lc->id, f, i);
+ return -ENOSPC;
+ }
+ cnt++;
+ }
+ }
+
+ if (cnt == 0) {
+ RTE_LOG(ERR, USER1, "%s(%s) error at line %u: "
+ "port %u not managed by any lcore;\n",
+ __func__, fname, prm.dest[i].line, p);
+ break;
+ }
+ }
+
+ free(prm.dest);
+ return rc;
+}
+
+static void
+func_ptrs_init(uint32_t proto) {
+ if (proto == TLE_PROTO_TCP) {
+ tle_rx_bulk = tle_tcp_rx_bulk;
+ tle_tx_bulk = tle_tcp_tx_bulk;
+ tle_stream_recv = tle_tcp_stream_recv;
+ tle_stream_close = tle_tcp_stream_close;
+
+ lcore_main = lcore_main_tcp;
+
+ } else {
+ tle_rx_bulk = tle_udp_rx_bulk;
+ tle_tx_bulk = tle_udp_tx_bulk;
+ tle_stream_recv = tle_udp_stream_recv;
+ tle_stream_close = tle_udp_stream_close;
+
+ lcore_main = lcore_main_udp;
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ int32_t rc;
+ uint32_t i;
+ struct tle_ctx_param ctx_prm;
+ struct netfe_lcore_prm feprm;
+ struct rte_eth_stats stats;
+ char fecfg_fname[PATH_MAX + 1];
+ char becfg_fname[PATH_MAX + 1];
+ struct lcore_prm prm[RTE_MAX_LCORE];
+ struct rte_eth_dev_info dev_info;
+
+ fecfg_fname[0] = 0;
+ becfg_fname[0] = 0;
+ memset(prm, 0, sizeof(prm));
+
+ rc = rte_eal_init(argc, argv);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE,
+ "%s: rte_eal_init failed with error code: %d\n",
+ __func__, rc);
+
+ memset(&ctx_prm, 0, sizeof(ctx_prm));
+
+ signal(SIGINT, sig_handle);
+
+ argc -= rc;
+ argv += rc;
+
+ rc = parse_app_options(argc, argv, &becfg, &ctx_prm,
+ fecfg_fname, becfg_fname);
+ if (rc != 0)
+ rte_exit(EXIT_FAILURE,
+ "%s: parse_app_options failed with error code: %d\n",
+ __func__, rc);
+
+ /* init all the function pointer */
+ func_ptrs_init(becfg.proto);
+
+ rc = netbe_port_init(&becfg);
+ if (rc != 0)
+ rte_exit(EXIT_FAILURE,
+ "%s: netbe_port_init failed with error code: %d\n",
+ __func__, rc);
+
+ rc = netbe_lcore_init(&becfg, &ctx_prm);
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+
+ rc = netbe_dest_init(becfg_fname, &becfg);
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+
+ for (i = 0; i != becfg.prt_num && rc == 0; i++) {
+ RTE_LOG(NOTICE, USER1, "%s: starting port %u\n",
+ __func__, becfg.prt[i].id);
+ rc = rte_eth_dev_start(becfg.prt[i].id);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: rte_eth_dev_start(%u) returned "
+ "error code: %d\n",
+ __func__, becfg.prt[i].id, rc);
+ sig_handle(SIGQUIT);
+ }
+ rte_eth_dev_info_get(becfg.prt[i].id, &dev_info);
+ rc = update_rss_reta(&becfg.prt[i], &dev_info);
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+ }
+
+ feprm.max_streams = ctx_prm.max_streams * becfg.cpu_num;
+
+ rc = (rc != 0) ? rc : netfe_parse_cfg(fecfg_fname, &feprm);
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+
+ for (i = 0; rc == 0 && i != becfg.cpu_num; i++)
+ prm[becfg.cpu[i].id].be.lc = becfg.cpu + i;
+
+ rc = (rc != 0) ? rc : netfe_lcore_fill(prm, &feprm);
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+
+ /* launch all slave lcores. */
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0)
+ rte_eal_remote_launch(lcore_main, prm + i, i);
+ }
+
+ /* launch master lcore. */
+ i = rte_get_master_lcore();
+ if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0)
+ lcore_main(prm + i);
+
+ rte_eal_mp_wait_lcore();
+
+ for (i = 0; i != becfg.prt_num; i++) {
+ RTE_LOG(NOTICE, USER1, "%s: stoping port %u\n",
+ __func__, becfg.prt[i].id);
+ rte_eth_stats_get(becfg.prt[i].id, &stats);
+ RTE_LOG(NOTICE, USER1, "port %u stats={\n"
+ "ipackets=%" PRIu64 ";"
+ "ibytes=%" PRIu64 ";"
+ "ierrors=%" PRIu64 ";"
+ "imissed=%" PRIu64 ";\n"
+ "opackets=%" PRIu64 ";"
+ "obytes=%" PRIu64 ";"
+ "oerrors=%" PRIu64 ";\n"
+ "}\n",
+ becfg.prt[i].id,
+ stats.ipackets,
+ stats.ibytes,
+ stats.ierrors,
+ stats.imissed,
+ stats.opackets,
+ stats.obytes,
+ stats.oerrors);
+ rte_eth_dev_stop(becfg.prt[i].id);
+ }
+
+ netbe_lcore_fini(&becfg);
+
+ return 0;
+}
diff --git a/examples/l4fwd/netbe.h b/examples/l4fwd/netbe.h
new file mode 100644
index 0000000..6d25603
--- /dev/null
+++ b/examples/l4fwd/netbe.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NETBE_H__
+#define __NETBE_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <inttypes.h>
+#include <getopt.h>
+#include <arpa/inet.h>
+#include <assert.h>
+#include <signal.h>
+
+#include <rte_config.h>
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_ethdev.h>
+#include <rte_kvargs.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_lpm.h>
+#include <rte_lpm6.h>
+#include <rte_hash.h>
+#include <rte_ip.h>
+#include <rte_ip_frag.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <tle_tcp.h>
+#include <tle_udp.h>
+#include <tle_event.h>
+
+#define MAX_PKT_BURST 0x20
+
+/* Used to allocate the memory for hash key. */
+#define RSS_HASH_KEY_LENGTH 64
+
+/*
+ * global variables
+ */
+
+enum {
+ VERBOSE_NONE = 0,
+ VERBOSE_NUM = 9
+};
+
+extern int verbose;
+
+/*
+ * BE related structures.
+ */
+
+struct netbe_port {
+ uint32_t id;
+ uint32_t nb_lcore;
+ uint32_t *lcore_id;
+ uint32_t mtu;
+ uint32_t rx_offload;
+ uint32_t tx_offload;
+ uint32_t ipv4;
+ struct in6_addr ipv6;
+ struct ether_addr mac;
+ uint32_t hash_key_size;
+ uint8_t hash_key[RSS_HASH_KEY_LENGTH];
+};
+
+struct netbe_dest {
+ uint32_t line;
+ uint32_t port;
+ uint32_t mtu;
+ uint32_t prfx;
+ uint16_t family;
+ union {
+ struct in_addr ipv4;
+ struct in6_addr ipv6;
+ };
+ struct ether_addr mac;
+};
+
+struct netbe_dest_prm {
+ uint32_t nb_dest;
+ struct netbe_dest *dest;
+};
+
+struct pkt_buf {
+ uint32_t num;
+ struct rte_mbuf *pkt[2 * MAX_PKT_BURST];
+};
+
+struct netbe_dev {
+ uint16_t rxqid;
+ uint16_t txqid;
+ struct netbe_port port;
+ struct tle_dev *dev;
+ struct {
+ uint64_t in;
+ uint64_t up;
+ uint64_t drop;
+ } rx_stat;
+ struct {
+ uint64_t down;
+ uint64_t out;
+ uint64_t drop;
+ } tx_stat;
+ struct pkt_buf tx_buf;
+ struct pkt_buf arp_buf;
+};
+
+/* 8 bit LPM user data. */
+#define LCORE_MAX_DST (UINT8_MAX + 1)
+
+struct netbe_lcore {
+ uint32_t id;
+ uint32_t proto; /**< L4 proto to handle. */
+ struct rte_lpm *lpm4;
+ struct rte_lpm6 *lpm6;
+ struct rte_ip_frag_tbl *ftbl;
+ struct tle_ctx *ctx;
+ uint32_t prtq_num;
+ uint32_t dst4_num;
+ uint32_t dst6_num;
+ struct netbe_dev *prtq;
+ struct tle_dest dst4[LCORE_MAX_DST];
+ struct tle_dest dst6[LCORE_MAX_DST];
+ struct rte_ip_frag_death_row death_row;
+ struct {
+ uint64_t flags[UINT8_MAX + 1];
+ } tcp_stat;
+};
+
+struct netbe_cfg {
+ uint32_t promisc;
+ uint32_t proto;
+ uint32_t server;
+ uint32_t arp;
+ uint32_t prt_num;
+ uint32_t cpu_num;
+ struct netbe_port *prt;
+ struct netbe_lcore *cpu;
+};
+
+/*
+ * FE related structures.
+ */
+
+enum {
+ RXONLY,
+ TXONLY,
+ RXTX,
+ FWD,
+};
+
+struct netfe_sprm {
+ uint32_t bidx; /* BE index to use. */
+ struct sockaddr_storage local_addr; /**< stream local address. */
+ struct sockaddr_storage remote_addr; /**< stream remote address. */
+};
+
+struct netfe_stream_prm {
+ uint32_t lcore;
+ uint32_t belcore;
+ uint16_t line;
+ uint16_t op;
+ uint16_t txlen; /* valid/used only for TXONLY op. */
+ struct netfe_sprm sprm;
+ struct netfe_sprm fprm; /* valid/used only for FWD op. */
+};
+
+struct netfe_lcore_prm {
+ uint32_t max_streams;
+ uint32_t nb_streams;
+ struct netfe_stream_prm *stream;
+};
+
+struct netfe_stream {
+ struct tle_stream *s;
+ struct tle_event *erev;
+ struct tle_event *rxev;
+ struct tle_event *txev;
+ uint16_t op;
+ uint16_t proto;
+ uint16_t family;
+ uint16_t txlen;
+ struct {
+ uint64_t rxp;
+ uint64_t rxb;
+ uint64_t txp;
+ uint64_t txb;
+ uint64_t fwp;
+ uint64_t drops;
+ uint64_t rxev[TLE_SEV_NUM];
+ uint64_t txev[TLE_SEV_NUM];
+ uint64_t erev[TLE_SEV_NUM];
+ } stat;
+ struct pkt_buf pbuf;
+ struct sockaddr_storage laddr;
+ struct sockaddr_storage raddr;
+ struct netfe_sprm fwdprm;
+ struct netfe_stream *fwds;
+ LIST_ENTRY(netfe_stream) link;
+};
+
+struct netfe_stream_list {
+ uint32_t num;
+ LIST_HEAD(, netfe_stream) head;
+};
+
+struct netfe_lcore {
+ uint32_t snum; /* max number of streams */
+ struct tle_evq *syneq;
+ struct tle_evq *ereq;
+ struct tle_evq *rxeq;
+ struct tle_evq *txeq;
+ struct rte_hash *fw4h;
+ struct rte_hash *fw6h;
+ struct {
+ uint64_t acc;
+ uint64_t rej;
+ uint64_t ter;
+ } tcp_stat;
+ struct netfe_stream_list free;
+ struct netfe_stream_list use;
+};
+
+struct lcore_prm {
+ struct {
+ struct netbe_lcore *lc;
+ } be;
+ struct netfe_lcore_prm fe;
+};
+
+/*
+ * debug/trace macros.
+ */
+
+#define DUMMY_MACRO do {} while (0)
+
+#ifdef NETFE_DEBUG
+#define NETFE_TRACE(fmt, arg...) printf(fmt, ##arg)
+#define NETFE_PKT_DUMP(p) rte_pktmbuf_dump(stdout, (p), 64)
+#else
+#define NETFE_TRACE(fmt, arg...) DUMMY_MACRO
+#define NETFE_PKT_DUMP(p) DUMMY_MACRO
+#endif
+
+#ifdef NETBE_DEBUG
+#define NETBE_TRACE(fmt, arg...) printf(fmt, ##arg)
+#define NETBE_PKT_DUMP(p) rte_pktmbuf_dump(stdout, (p), 64)
+#else
+#define NETBE_TRACE(fmt, arg...) DUMMY_MACRO
+#define NETBE_PKT_DUMP(p) DUMMY_MACRO
+#endif
+
+#define FUNC_STAT(v, c) do { \
+ static uint64_t nb_call, nb_data; \
+ nb_call++; \
+ nb_data += (v); \
+ if ((nb_call & ((c) - 1)) == 0) { \
+ printf("%s#%d@%u: nb_call=%lu, avg(" #v ")=%#Lf\n", \
+ __func__, __LINE__, rte_lcore_id(), nb_call, \
+ (long double)nb_data / nb_call); \
+ nb_call = 0; \
+ nb_data = 0; \
+ } \
+} while (0)
+
+#define FUNC_TM_STAT(v, c) do { \
+ static uint64_t nb_call, nb_data; \
+ static uint64_t cts, pts, sts; \
+ cts = rte_rdtsc(); \
+ if (pts != 0) \
+ sts += cts - pts; \
+ pts = cts; \
+ nb_call++; \
+ nb_data += (v); \
+ if ((nb_call & ((c) - 1)) == 0) { \
+ printf("%s#%d@%u: nb_call=%lu, " \
+ "avg(" #v ")=%#Lf, " \
+ "avg(cycles)=%#Lf, " \
+ "avg(cycles/" #v ")=%#Lf\n", \
+ __func__, __LINE__, rte_lcore_id(), nb_call, \
+ (long double)nb_data / nb_call, \
+ (long double)sts / nb_call, \
+ (long double)sts / nb_data); \
+ nb_call = 0; \
+ nb_data = 0; \
+ sts = 0; \
+ } \
+} while (0)
+
+int setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc,
+ uint16_t qid, uint32_t arp);
+
+/*
+ * application function pointers
+ */
+
+typedef int (*LCORE_MAIN_FUNCTYPE)(void *arg);
+
+/*
+ * tle_l4p lib function pointers
+ */
+
+typedef uint16_t (*TLE_RX_BULK_FUNCTYPE)
+ (struct tle_dev *dev, struct rte_mbuf *pkt[],
+ struct rte_mbuf *rp[], int32_t rc[], uint16_t num);
+
+typedef uint16_t (*TLE_TX_BULK_FUNCTYPE)
+ (struct tle_dev *dev, struct rte_mbuf *pkt[], uint16_t num);
+
+typedef uint16_t (*TLE_STREAM_RECV_FUNCTYPE)
+ (struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num);
+
+typedef int (*TLE_STREAM_CLOSE_FUNCTYPE)(struct tle_stream *s);
+
+#endif /* __NETBE_H__ */
diff --git a/examples/l4fwd/parse.c b/examples/l4fwd/parse.c
new file mode 100644
index 0000000..6593221
--- /dev/null
+++ b/examples/l4fwd/parse.c
@@ -0,0 +1,839 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "netbe.h"
+#include "parse.h"
+
+#define DEF_LINE_NUM 0x400
+
+static const struct {
+ const char *name;
+ uint16_t op;
+} name2feop[] = {
+ { .name = "rx", .op = RXONLY,},
+ { .name = "tx", .op = TXONLY,},
+ { .name = "echo", .op = RXTX,},
+ { .name = "fwd", .op = FWD,},
+};
+
+#define OPT_SHORT_ARP 'a'
+#define OPT_LONG_ARP "enable-arp"
+
+#define OPT_SHORT_SBULK 'B'
+#define OPT_LONG_SBULK "sburst"
+
+#define OPT_SHORT_PROMISC 'P'
+#define OPT_LONG_PROMISC "promisc"
+
+#define OPT_SHORT_RBUFS 'R'
+#define OPT_LONG_RBUFS "rbufs"
+
+#define OPT_SHORT_SBUFS 'S'
+#define OPT_LONG_SBUFS "sbufs"
+
+#define OPT_SHORT_BECFG 'b'
+#define OPT_LONG_BECFG "becfg"
+
+#define OPT_SHORT_FECFG 'f'
+#define OPT_LONG_FECFG "fecfg"
+
+#define OPT_SHORT_STREAMS 's'
+#define OPT_LONG_STREAMS "streams"
+
+#define OPT_SHORT_UDP 'U'
+#define OPT_LONG_UDP "udp"
+
+#define OPT_SHORT_TCP 'T'
+#define OPT_LONG_TCP "tcp"
+
+#define OPT_SHORT_LISTEN 'L'
+#define OPT_LONG_LISTEN "listen"
+
+#define OPT_SHORT_VERBOSE 'v'
+#define OPT_LONG_VERBOSE "verbose"
+
+static const struct option long_opt[] = {
+ {OPT_LONG_ARP, 1, 0, OPT_SHORT_ARP},
+ {OPT_LONG_SBULK, 1, 0, OPT_SHORT_SBULK},
+ {OPT_LONG_PROMISC, 0, 0, OPT_SHORT_PROMISC},
+ {OPT_LONG_RBUFS, 1, 0, OPT_SHORT_RBUFS},
+ {OPT_LONG_SBUFS, 1, 0, OPT_SHORT_SBUFS},
+ {OPT_LONG_BECFG, 1, 0, OPT_SHORT_BECFG},
+ {OPT_LONG_FECFG, 1, 0, OPT_SHORT_FECFG},
+ {OPT_LONG_STREAMS, 1, 0, OPT_SHORT_STREAMS},
+ {OPT_LONG_UDP, 0, 0, OPT_SHORT_UDP},
+ {OPT_LONG_TCP, 0, 0, OPT_SHORT_TCP},
+ {OPT_LONG_LISTEN, 0, 0, OPT_SHORT_LISTEN},
+ {OPT_LONG_VERBOSE, 1, 0, OPT_SHORT_VERBOSE},
+ {NULL, 0, 0, 0}
+};
+
+static int
+parse_uint_val(__rte_unused const char *key, const char *val, void *prm)
+{
+ union parse_val *rv;
+ unsigned long v;
+ char *end;
+
+ rv = prm;
+ errno = 0;
+ v = strtoul(val, &end, 0);
+ if (errno != 0 || end[0] != 0 || v > UINT32_MAX)
+ return -EINVAL;
+
+ rv->u64 = v;
+ return 0;
+}
+
+static int
+parse_ipv4_val(__rte_unused const char *key, const char *val, void *prm)
+{
+ union parse_val *rv;
+
+ rv = prm;
+ if (inet_pton(AF_INET, val, &rv->in.addr4) != 1)
+ return -EINVAL;
+ rv->in.family = AF_INET;
+ return 0;
+}
+
+static int
+parse_ipv6_val(__rte_unused const char *key, const char *val, void *prm)
+{
+ union parse_val *rv;
+
+ rv = prm;
+ if (inet_pton(AF_INET6, val, &rv->in.addr6) != 1)
+ return -EINVAL;
+ rv->in.family = AF_INET6;
+ return 0;
+}
+
+static int
+parse_ip_val(__rte_unused const char *key, const char *val, void *prm)
+{
+ if (parse_ipv6_val(key, val, prm) != 0 &&
+ parse_ipv4_val(key, val, prm) != 0)
+ return -EINVAL;
+ return 0;
+}
+
+#define PARSE_UINT8x16(s, v, l) \
+do { \
+ char *end; \
+ unsigned long t; \
+ errno = 0; \
+ t = strtoul((s), &end, 16); \
+ if (errno != 0 || end[0] != (l) || t > UINT8_MAX) \
+ return -EINVAL; \
+ (s) = end + 1; \
+ (v) = t; \
+} while (0)
+
+static int
+parse_mac_val(__rte_unused const char *key, const char *val, void *prm)
+{
+ union parse_val *rv;
+ const char *s;
+
+ rv = prm;
+ s = val;
+
+ PARSE_UINT8x16(s, rv->mac.addr_bytes[0], ':');
+ PARSE_UINT8x16(s, rv->mac.addr_bytes[1], ':');
+ PARSE_UINT8x16(s, rv->mac.addr_bytes[2], ':');
+ PARSE_UINT8x16(s, rv->mac.addr_bytes[3], ':');
+ PARSE_UINT8x16(s, rv->mac.addr_bytes[4], ':');
+ PARSE_UINT8x16(s, rv->mac.addr_bytes[5], 0);
+ return 0;
+}
+
+static int
+parse_feop_val(__rte_unused const char *key, const char *val, void *prm)
+{
+ uint32_t i;
+ union parse_val *rv;
+
+ rv = prm;
+ for (i = 0; i != RTE_DIM(name2feop); i++) {
+ if (strcmp(val, name2feop[i].name) == 0) {
+ rv->u64 = name2feop[i].op;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int
+parse_lcore_list_val(__rte_unused const char *key, const char *val, void *prm)
+{
+ union parse_val *rv;
+ unsigned long a, b;
+ uint32_t i;
+ char *end;
+
+ rv = prm;
+
+ errno = 0;
+ a = strtoul(val, &end, 0);
+ if (errno != 0 || (end[0] != 0 && end[0] != '-') || a > UINT32_MAX)
+ return -EINVAL;
+
+ if (end[0] == '-') {
+ val = end + 1;
+ errno = 0;
+ b = strtoul(val, &end, 0);
+ if (errno != 0 || end[0] != 0 || b > UINT32_MAX)
+ return -EINVAL;
+ } else
+ b = a;
+
+ if (a <= b) {
+ for (i = a; i <= b; i++)
+ CPU_SET(i, &rv->cpuset);
+ } else {
+ RTE_LOG(ERR, USER1,
+ "%s: lcores not in ascending order\n", __func__);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+parse_kvargs(const char *arg, const char *keys_man[], uint32_t nb_man,
+ const char *keys_opt[], uint32_t nb_opt,
+ const arg_handler_t hndl[], union parse_val val[])
+{
+ uint32_t j, k;
+ struct rte_kvargs *kvl;
+
+ kvl = rte_kvargs_parse(arg, NULL);
+ if (kvl == NULL) {
+ RTE_LOG(ERR, USER1,
+ "%s: invalid parameter: %s\n",
+ __func__, arg);
+ return -EINVAL;
+ }
+
+ for (j = 0; j != nb_man; j++) {
+ if (rte_kvargs_count(kvl, keys_man[j]) == 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: %s missing mandatory key: %s\n",
+ __func__, arg, keys_man[j]);
+ rte_kvargs_free(kvl);
+ return -EINVAL;
+ }
+ }
+
+ for (j = 0; j != nb_man; j++) {
+ if (rte_kvargs_process(kvl, keys_man[j], hndl[j],
+ val + j) != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: %s invalid value for man key: %s\n",
+ __func__, arg, keys_man[j]);
+ rte_kvargs_free(kvl);
+ return -EINVAL;
+ }
+ }
+
+ for (j = 0; j != nb_opt; j++) {
+ k = j + nb_man;
+ if (rte_kvargs_process(kvl, keys_opt[j], hndl[k],
+ val + k) != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: %s invalid value for opt key: %s\n",
+ __func__, arg, keys_opt[j]);
+ rte_kvargs_free(kvl);
+ return -EINVAL;
+ }
+ }
+
+ rte_kvargs_free(kvl);
+ return 0;
+}
+
+int
+parse_netbe_arg(struct netbe_port *prt, const char *arg, rte_cpuset_t *pcpu)
+{
+ int32_t rc;
+ uint32_t i, j, nc;
+
+ static const char *keys_man[] = {
+ "port",
+ "lcore",
+ };
+
+ static const char *keys_opt[] = {
+ "mtu",
+ "rx_offload",
+ "tx_offload",
+ "ipv4",
+ "ipv6",
+ };
+
+ static const arg_handler_t hndl[] = {
+ parse_uint_val,
+ parse_lcore_list_val,
+ parse_uint_val,
+ parse_uint_val,
+ parse_uint_val,
+ parse_ipv4_val,
+ parse_ipv6_val,
+ };
+
+ union parse_val val[RTE_DIM(hndl)];
+
+ memset(val, 0, sizeof(val));
+ val[2].u64 = ETHER_MAX_VLAN_FRAME_LEN - ETHER_CRC_LEN;
+
+ rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man),
+ keys_opt, RTE_DIM(keys_opt), hndl, val);
+ if (rc != 0)
+ return rc;
+
+ prt->id = val[0].u64;
+
+ for (i = 0, nc = 0; i < RTE_MAX_LCORE; i++)
+ nc += CPU_ISSET(i, &val[1].cpuset);
+ prt->lcore_id = rte_zmalloc(NULL, nc * sizeof(prt->lcore_id[0]),
+ RTE_CACHE_LINE_SIZE);
+ prt->nb_lcore = nc;
+
+ for (i = 0, j = 0; i < RTE_MAX_LCORE; i++)
+ if (CPU_ISSET(i, &val[1].cpuset))
+ prt->lcore_id[j++] = i;
+ CPU_OR(pcpu, pcpu, &val[1].cpuset);
+
+ prt->mtu = val[2].u64;
+ prt->rx_offload = val[3].u64;
+ prt->tx_offload = val[4].u64;
+ prt->ipv4 = val[5].in.addr4.s_addr;
+ prt->ipv6 = val[6].in.addr6;
+
+ return 0;
+}
+
+static int
+check_netbe_dest(const struct netbe_dest *dst)
+{
+ if (dst->port >= RTE_MAX_ETHPORTS) {
+ RTE_LOG(ERR, USER1, "%s(line=%u) invalid port=%u",
+ __func__, dst->line, dst->port);
+ return -EINVAL;
+ } else if ((dst->family == AF_INET &&
+ dst->prfx > sizeof(struct in_addr) * CHAR_BIT) ||
+ (dst->family == AF_INET6 &&
+ dst->prfx > sizeof(struct in6_addr) * CHAR_BIT)) {
+ RTE_LOG(ERR, USER1, "%s(line=%u) invalid masklen=%u",
+ __func__, dst->line, dst->prfx);
+ return -EINVAL;
+ } else if (dst->mtu > ETHER_MAX_JUMBO_FRAME_LEN - ETHER_CRC_LEN) {
+ RTE_LOG(ERR, USER1, "%s(line=%u) invalid mtu=%u",
+ __func__, dst->line, dst->mtu);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int
+parse_netbe_dest(struct netbe_dest *dst, const char *arg)
+{
+ int32_t rc;
+
+ static const char *keys_man[] = {
+ "port",
+ "addr",
+ "masklen",
+ "mac",
+ };
+
+ static const char *keys_opt[] = {
+ "mtu",
+ };
+
+ static const arg_handler_t hndl[] = {
+ parse_uint_val,
+ parse_ip_val,
+ parse_uint_val,
+ parse_mac_val,
+ parse_uint_val,
+ };
+
+ union parse_val val[RTE_DIM(hndl)];
+
+ /* set default values. */
+ memset(val, 0, sizeof(val));
+ val[4].u64 = ETHER_MAX_JUMBO_FRAME_LEN - ETHER_CRC_LEN;
+
+ rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man),
+ keys_opt, RTE_DIM(keys_opt), hndl, val);
+ if (rc != 0)
+ return rc;
+
+ dst->port = val[0].u64;
+ dst->family = val[1].in.family;
+ if (val[1].in.family == AF_INET)
+ dst->ipv4 = val[1].in.addr4;
+ else
+ dst->ipv6 = val[1].in.addr6;
+ dst->prfx = val[2].u64;
+ memcpy(&dst->mac, &val[3].mac, sizeof(dst->mac));
+ dst->mtu = val[4].u64;
+
+ return 0;
+}
+
+int
+netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm)
+{
+ uint32_t i, ln, n, num;
+ int32_t rc;
+ size_t sz;
+ char *s;
+ FILE *f;
+ struct netbe_dest *dp;
+ char line[LINE_MAX];
+
+ f = fopen(fname, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, USER1, "%s failed to open file \"%s\"\n",
+ __func__, fname);
+ return -EINVAL;
+ }
+
+ n = 0;
+ num = 0;
+ dp = NULL;
+ rc = 0;
+ for (ln = 0; fgets(line, sizeof(line), f) != NULL; ln++) {
+
+ /* skip spaces at the start. */
+ for (s = line; isspace(s[0]); s++)
+ ;
+
+ /* skip comment line. */
+ if (s[0] == '#' || s[0] == 0)
+ continue;
+
+ /* skip spaces at the end. */
+ for (i = strlen(s); i-- != 0 && isspace(s[i]); s[i] = 0)
+ ;
+
+ if (n == num) {
+ num += DEF_LINE_NUM;
+ sz = sizeof(dp[0]) * num;
+ dp = realloc(dp, sizeof(dp[0]) * num);
+ if (dp == NULL) {
+ RTE_LOG(ERR, USER1,
+ "%s(%s) allocation of %zu bytes "
+ "failed\n",
+ __func__, fname, sz);
+ rc = -ENOMEM;
+ break;
+ }
+ memset(&dp[n], 0, sizeof(dp[0]) * (num - n));
+ }
+
+ dp[n].line = ln + 1;
+ rc = parse_netbe_dest(dp + n, s);
+ rc = (rc != 0) ? rc : check_netbe_dest(dp + n);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1, "%s(%s) failed to parse line %u\n",
+ __func__, fname, dp[n].line);
+ break;
+ }
+ n++;
+ }
+
+ fclose(f);
+
+ if (rc != 0) {
+ free(dp);
+ dp = NULL;
+ n = 0;
+ }
+
+ prm->dest = dp;
+ prm->nb_dest = n;
+ return rc;
+}
+
+static void
+pv2saddr(struct sockaddr_storage *ss, const union parse_val *pva,
+ const union parse_val *pvp)
+{
+ ss->ss_family = pva->in.family;
+ if (pva->in.family == AF_INET) {
+ struct sockaddr_in *si = (struct sockaddr_in *)ss;
+ si->sin_addr = pva->in.addr4;
+ si->sin_port = rte_cpu_to_be_16((uint16_t)pvp->u64);
+ } else {
+ struct sockaddr_in6 *si = (struct sockaddr_in6 *)ss;
+ si->sin6_addr = pva->in.addr6;
+ si->sin6_port = rte_cpu_to_be_16((uint16_t)pvp->u64);
+ }
+}
+
+static int
+parse_netfe_arg(struct netfe_stream_prm *sp, const char *arg)
+{
+ int32_t rc;
+
+ static const char *keys_man[] = {
+ "lcore",
+ "op",
+ "laddr",
+ "lport",
+ "raddr",
+ "rport",
+ };
+
+ static const char *keys_opt[] = {
+ "txlen",
+ "fwladdr",
+ "fwlport",
+ "fwraddr",
+ "fwrport",
+ "belcore",
+ };
+
+ static const arg_handler_t hndl[] = {
+ parse_uint_val,
+ parse_feop_val,
+ parse_ip_val,
+ parse_uint_val,
+ parse_ip_val,
+ parse_uint_val,
+ parse_uint_val,
+ parse_ip_val,
+ parse_uint_val,
+ parse_ip_val,
+ parse_uint_val,
+ parse_uint_val,
+ };
+
+ union parse_val val[RTE_DIM(hndl)];
+
+ memset(val, 0, sizeof(val));
+ val[11].u64 = LCORE_ID_ANY;
+ rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man),
+ keys_opt, RTE_DIM(keys_opt), hndl, val);
+ if (rc != 0)
+ return rc;
+ sp->lcore = val[0].u64;
+ sp->op = val[1].u64;
+ pv2saddr(&sp->sprm.local_addr, val + 2, val + 3);
+ pv2saddr(&sp->sprm.remote_addr, val + 4, val + 5);
+ sp->txlen = val[6].u64;
+ pv2saddr(&sp->fprm.local_addr, val + 7, val + 8);
+ pv2saddr(&sp->fprm.remote_addr, val + 9, val + 10);
+ sp->belcore = val[11].u64;
+
+ return 0;
+}
+
+static const char *
+format_feop(uint16_t op)
+{
+ uint32_t i;
+
+ for (i = 0; i != RTE_DIM(name2feop); i++) {
+ if (name2feop[i].op == op)
+ return name2feop[i].name;
+ }
+
+ return NULL;
+}
+
+static int
+is_addr_wc(const struct sockaddr_storage *sp)
+{
+ const struct sockaddr_in *i4;
+ const struct sockaddr_in6 *i6;
+
+ if (sp->ss_family == AF_INET) {
+ i4 = (const struct sockaddr_in *)sp;
+ return (i4->sin_addr.s_addr == INADDR_ANY);
+ } else if (sp->ss_family == AF_INET6) {
+ i6 = (const struct sockaddr_in6 *)sp;
+ return (memcmp(&i6->sin6_addr, &in6addr_any,
+ sizeof(i6->sin6_addr)) == 0);
+ }
+ return 0;
+}
+
+static int
+check_netfe_arg(const struct netfe_stream_prm *sp)
+{
+ char buf[INET6_ADDRSTRLEN];
+
+ if (sp->sprm.local_addr.ss_family !=
+ sp->sprm.remote_addr.ss_family) {
+ RTE_LOG(ERR, USER1, "invalid arg at line %u: "
+ "laddr and raddr for different protocols\n",
+ sp->line);
+ return -EINVAL;
+ }
+
+ if (sp->op == TXONLY) {
+ if (sp->txlen > RTE_MBUF_DEFAULT_DATAROOM || sp->txlen == 0) {
+ RTE_LOG(ERR, USER1, "invalid arg at line %u: txlen=%u "
+ "exceeds allowed values: (0, %u]\n",
+ sp->line, sp->txlen, RTE_MBUF_DEFAULT_DATAROOM);
+ return -EINVAL;
+ } else if (is_addr_wc(&sp->sprm.remote_addr)) {
+ RTE_LOG(ERR, USER1, "invalid arg at line %u: "
+ "raddr=%s are not allowed for op=%s;\n",
+ sp->line,
+ format_addr(&sp->sprm.remote_addr,
+ buf, sizeof(buf)),
+ format_feop(sp->op));
+ return -EINVAL;
+ }
+ } else if (sp->op == FWD) {
+ if (sp->fprm.local_addr.ss_family !=
+ sp->fprm.remote_addr.ss_family) {
+ RTE_LOG(ERR, USER1, "invalid arg at line %u: "
+ "fwladdr and fwraddr for different protocols\n",
+ sp->line);
+ return -EINVAL;
+ } else if (is_addr_wc(&sp->fprm.remote_addr)) {
+ RTE_LOG(ERR, USER1, "invalid arg at line %u: "
+ "fwaddr=%s are not allowed for op=%s;\n",
+ sp->line,
+ format_addr(&sp->fprm.remote_addr,
+ buf, sizeof(buf)),
+ format_feop(sp->op));
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+int
+netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp)
+{
+ uint32_t i, ln, n, num;
+ int32_t rc;
+ size_t sz;
+ char *s;
+ FILE *f;
+ struct netfe_stream_prm *sp;
+ char line[LINE_MAX];
+
+ f = fopen(fname, "r");
+ if (f == NULL) {
+ RTE_LOG(ERR, USER1, "%s failed to open file \"%s\"\n",
+ __func__, fname);
+ return -EINVAL;
+ }
+
+ n = 0;
+ num = 0;
+ sp = NULL;
+ rc = 0;
+ for (ln = 0; fgets(line, sizeof(line), f) != NULL; ln++) {
+
+ /* skip spaces at the start. */
+ for (s = line; isspace(s[0]); s++)
+ ;
+
+ /* skip comment line. */
+ if (s[0] == '#' || s[0] == 0)
+ continue;
+
+ /* skip spaces at the end. */
+ for (i = strlen(s); i-- != 0 && isspace(s[i]); s[i] = 0)
+ ;
+
+ if (n == lp->max_streams) {
+ RTE_LOG(ERR, USER1,
+ "%s(%s) number of entries exceed max streams "
+ "value: %u\n",
+ __func__, fname, n);
+ rc = -EINVAL;
+ break;
+ }
+
+ if (n == num) {
+ num += DEF_LINE_NUM;
+ sz = sizeof(sp[0]) * num;
+ sp = realloc(sp, sizeof(sp[0]) * num);
+ if (sp == NULL) {
+ RTE_LOG(ERR, USER1,
+ "%s(%s) allocation of %zu bytes "
+ "failed\n",
+ __func__, fname, sz);
+ rc = -ENOMEM;
+ break;
+ }
+ memset(&sp[n], 0, sizeof(sp[0]) * (num - n));
+ }
+
+ sp[n].line = ln + 1;
+ rc = parse_netfe_arg(sp + n, s);
+ rc = (rc != 0) ? rc : check_netfe_arg(sp + n);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1, "%s(%s) failed to parse line %u\n",
+ __func__, fname, sp[n].line);
+ break;
+ }
+ n++;
+ }
+
+ fclose(f);
+
+ if (rc != 0) {
+ free(sp);
+ sp = NULL;
+ n = 0;
+ }
+
+ lp->stream = sp;
+ lp->nb_streams = n;
+ return rc;
+}
+
+int
+parse_app_options(int argc, char **argv, struct netbe_cfg *cfg,
+ struct tle_ctx_param *ctx_prm,
+ char *fecfg_fname, char *becfg_fname)
+{
+ int32_t opt, opt_idx, rc;
+ uint64_t v;
+ uint32_t i, j, n, nc;
+ rte_cpuset_t cpuset;
+ uint32_t udp = 0, tcp = 0, listen = 0;
+
+ optind = 0;
+ optarg = NULL;
+ while ((opt = getopt_long(argc, argv, "aB:LPR:S:TUb:f:s:v:", long_opt,
+ &opt_idx)) != EOF) {
+ if (opt == OPT_SHORT_ARP) {
+ cfg->arp = 1;
+ } else if (opt == OPT_SHORT_SBULK) {
+ rc = parse_uint_val(NULL, optarg, &v);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
+ "for option: \'%c\'\n",
+ __func__, optarg, opt);
+ ctx_prm->send_bulk_size = v;
+ } else if (opt == OPT_SHORT_PROMISC) {
+ cfg->promisc = 1;
+ } else if (opt == OPT_SHORT_RBUFS) {
+ rc = parse_uint_val(NULL, optarg, &v);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
+ "for option: \'%c\'\n",
+ __func__, optarg, opt);
+ ctx_prm->max_stream_rbufs = v;
+ } else if (opt == OPT_SHORT_SBUFS) {
+ rc = parse_uint_val(NULL, optarg, &v);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
+ "for option: \'%c\'\n",
+ __func__, optarg, opt);
+ ctx_prm->max_stream_sbufs = v;
+ } else if (opt == OPT_SHORT_STREAMS) {
+ rc = parse_uint_val(NULL, optarg, &v);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
+ "for option: \'%c\'\n",
+ __func__, optarg, opt);
+ ctx_prm->max_streams = v;
+ } else if (opt == OPT_SHORT_VERBOSE) {
+ rc = parse_uint_val(NULL, optarg, &v);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
+ "for option: \'%c\'\n",
+ __func__, optarg, opt);
+ verbose = (v > VERBOSE_NUM) ? VERBOSE_NUM : v;
+ } else if (opt == OPT_SHORT_BECFG) {
+ snprintf(becfg_fname, PATH_MAX, "%s",
+ optarg);
+ } else if (opt == OPT_SHORT_FECFG) {
+ snprintf(fecfg_fname, PATH_MAX, "%s",
+ optarg);
+ } else if (opt == OPT_SHORT_UDP) {
+ udp = 1;
+ cfg->proto = TLE_PROTO_UDP;
+ } else if (opt == OPT_SHORT_TCP) {
+ tcp = 1;
+ cfg->proto = TLE_PROTO_TCP;
+ } else if (opt == OPT_SHORT_LISTEN) {
+ listen = 1;
+ cfg->server = 1;
+ } else {
+ rte_exit(EXIT_FAILURE,
+ "%s: unknown option: \'%c\'\n",
+ __func__, opt);
+ }
+ }
+
+ if (!udp && !tcp)
+ rte_exit(EXIT_FAILURE, "%s: either UDP or TCP option has to be "
+ "provided\n", __func__);
+
+ if (udp && tcp)
+ rte_exit(EXIT_FAILURE, "%s: both UDP and TCP options are not "
+ "allowed\n", __func__);
+
+ if (udp && listen)
+ rte_exit(EXIT_FAILURE,
+ "%s: listen mode cannot be opened with UDP\n",
+ __func__);
+
+ if (udp && cfg->arp)
+ rte_exit(EXIT_FAILURE,
+ "%s: arp cannot be enabled with UDP\n",
+ __func__);
+
+ /* parse port params */
+ argc -= optind;
+ argv += optind;
+
+ /* allocate memory for number of ports defined */
+ n = (uint32_t)argc;
+ cfg->prt = rte_zmalloc(NULL, sizeof(struct netbe_port) * n,
+ RTE_CACHE_LINE_SIZE);
+ cfg->prt_num = n;
+
+ rc = 0;
+ for (i = 0; i != n; i++) {
+ rc = parse_netbe_arg(cfg->prt + i, argv[i], &cpuset);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: processing of \"%s\" failed with error "
+ "code: %d\n", __func__, argv[i], rc);
+ for (j = 0; j != i; j++)
+ rte_free(cfg->prt[j].lcore_id);
+ rte_free(cfg->prt);
+ return rc;
+ }
+ }
+
+ /* count the number of CPU defined in ports */
+ for (i = 0, nc = 0; i < RTE_MAX_LCORE; i++)
+ nc += CPU_ISSET(i, &cpuset);
+
+ /* allocate memory for number of CPU defined */
+ cfg->cpu = rte_zmalloc(NULL, sizeof(struct netbe_lcore) * nc,
+ RTE_CACHE_LINE_SIZE);
+
+ return 0;
+}
diff --git a/examples/l4fwd/parse.h b/examples/l4fwd/parse.h
new file mode 100644
index 0000000..4303623
--- /dev/null
+++ b/examples/l4fwd/parse.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __PARSE_H__
+#define __PARSE_H__
+
+#include <sched.h>
+
+#define PARSE_LIST_DELIM "-"
+
+union parse_val {
+ uint64_t u64;
+ struct {
+ uint16_t family;
+ union {
+ struct in_addr addr4;
+ struct in6_addr addr6;
+ };
+ } in;
+ struct ether_addr mac;
+ rte_cpuset_t cpuset;
+};
+
+static const char *
+format_addr(const struct sockaddr_storage *sp, char buf[], size_t len)
+{
+ const struct sockaddr_in *i4;
+ const struct sockaddr_in6 *i6;
+ const void *addr;
+
+ if (sp->ss_family == AF_INET) {
+ i4 = (const struct sockaddr_in *)sp;
+ addr = &i4->sin_addr;
+ } else if (sp->ss_family == AF_INET6) {
+ i6 = (const struct sockaddr_in6 *)sp;
+ addr = &i6->sin6_addr;
+ } else
+ return NULL;
+
+
+ return inet_ntop(sp->ss_family, addr, buf, len);
+}
+
+int parse_netbe_arg(struct netbe_port *prt, const char *arg,
+ rte_cpuset_t *pcpu);
+
+int netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm);
+
+int netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp);
+
+int
+parse_app_options(int argc, char **argv, struct netbe_cfg *cfg,
+ struct tle_ctx_param *ctx_prm,
+ char *fecfg_fname, char *becfg_fname);
+
+#endif /* __PARSE_H__ */
+
diff --git a/examples/l4fwd/pkt.c b/examples/l4fwd/pkt.c
new file mode 100644
index 0000000..660e618
--- /dev/null
+++ b/examples/l4fwd/pkt.c
@@ -0,0 +1,872 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <netinet/ip6.h>
+#include <rte_arp.h>
+
+#include "netbe.h"
+
+static inline uint64_t
+_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
+ uint64_t ol3, uint64_t ol2)
+{
+ return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49;
+}
+
+static inline void
+fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4)
+{
+ m->tx_offload = _mbuf_tx_offload(l2, l3, l4, 0, 0, 0);
+}
+
+static inline int
+is_ipv4_frag(const struct ipv4_hdr *iph)
+{
+ const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG);
+
+ return ((mask & iph->fragment_offset) != 0);
+}
+
+static inline uint32_t
+get_tcp_header_size(struct rte_mbuf *m, uint32_t l2_len, uint32_t l3_len)
+{
+ const struct tcp_hdr *tcp;
+
+ tcp = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len);
+ return (tcp->data_off >> 4) * 4;
+}
+
+static inline void
+adjust_ipv4_pktlen(struct rte_mbuf *m, uint32_t l2_len)
+{
+ uint32_t plen, trim;
+ const struct ipv4_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2_len);
+ plen = rte_be_to_cpu_16(iph->total_length) + l2_len;
+ if (plen < m->pkt_len) {
+ trim = m->pkt_len - plen;
+ rte_pktmbuf_trim(m, trim);
+ }
+}
+
+static inline void
+adjust_ipv6_pktlen(struct rte_mbuf *m, uint32_t l2_len)
+{
+ uint32_t plen, trim;
+ const struct ipv6_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, l2_len);
+ plen = rte_be_to_cpu_16(iph->payload_len) + sizeof(*iph) + l2_len;
+ if (plen < m->pkt_len) {
+ trim = m->pkt_len - plen;
+ rte_pktmbuf_trim(m, trim);
+ }
+}
+
+static inline void
+tcp_stat_update(struct netbe_lcore *lc, const struct rte_mbuf *m,
+ uint32_t l2_len, uint32_t l3_len)
+{
+ const struct tcp_hdr *th;
+
+ th = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len);
+ lc->tcp_stat.flags[th->tcp_flags]++;
+}
+
+static inline uint32_t
+get_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, uint32_t frag)
+{
+ const struct ipv4_hdr *iph;
+ int32_t dlen, len;
+
+ dlen = rte_pktmbuf_data_len(m);
+ dlen -= l2;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2);
+ len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER;
+
+ if (frag != 0 && is_ipv4_frag(iph)) {
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_FRAG;
+ }
+
+ if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto))
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+
+ return len;
+}
+
+static inline void
+fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto,
+ uint32_t frag, uint32_t l4_len)
+{
+ uint32_t len;
+
+ len = get_ipv4_hdr_len(m, l2, proto, frag);
+ fill_pkt_hdr_len(m, l2, len, l4_len);
+ adjust_ipv4_pktlen(m, l2);
+}
+
+static inline int
+ipv6x_hdr(uint32_t proto)
+{
+ return (proto == IPPROTO_HOPOPTS ||
+ proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_FRAGMENT ||
+ proto == IPPROTO_AH ||
+ proto == IPPROTO_NONE ||
+ proto == IPPROTO_DSTOPTS);
+}
+
+static inline uint32_t
+get_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto,
+ uint32_t fproto)
+{
+ const struct ip6_ext *ipx;
+ int32_t dlen, len, ofs;
+
+ len = sizeof(struct ipv6_hdr);
+
+ dlen = rte_pktmbuf_data_len(m);
+ dlen -= l2;
+
+ ofs = l2 + len;
+ ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs);
+
+ while (ofs > 0 && len < dlen) {
+
+ switch (nproto) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ ofs = (ipx->ip6e_len + 1) << 3;
+ break;
+ case IPPROTO_AH:
+ ofs = (ipx->ip6e_len + 2) << 2;
+ break;
+ case IPPROTO_FRAGMENT:
+ /*
+ * tso_segsz is not used by RX, so use it as temporary
+ * buffer to store the fragment offset.
+ */
+ m->tso_segsz = ofs;
+ ofs = sizeof(struct ip6_frag);
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_FRAG;
+ break;
+ default:
+ ofs = 0;
+ }
+
+ if (ofs > 0) {
+ nproto = ipx->ip6e_nxt;
+ len += ofs;
+ ipx += ofs / sizeof(*ipx);
+ }
+ }
+
+ /* unrecognized or invalid packet. */
+ if ((ofs == 0 && nproto != fproto) || len > dlen)
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+
+ return len;
+}
+
+static inline uint32_t
+get_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto)
+{
+ const struct ipv6_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ if (iph->proto == fproto)
+ return sizeof(struct ipv6_hdr);
+ else if (ipv6x_hdr(iph->proto) != 0)
+ return get_ipv6x_hdr_len(m, l2, iph->proto, fproto);
+
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return 0;
+}
+
+static inline void
+fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto,
+ uint32_t l4_len)
+{
+ uint32_t len;
+
+ len = get_ipv6_hdr_len(m, l2, fproto);
+ fill_pkt_hdr_len(m, l2, len, l4_len);
+ adjust_ipv6_pktlen(m, l2);
+}
+
+static inline struct rte_mbuf *
+handle_arp(struct rte_mbuf *m, struct netbe_lcore *lc, uint8_t port,
+ uint32_t l2len)
+{
+ const struct arp_hdr *ahdr;
+ struct pkt_buf *abuf;
+
+ ahdr = rte_pktmbuf_mtod_offset(m, const struct arp_hdr *, l2len);
+
+ if (ahdr->arp_hrd != rte_be_to_cpu_16(ARP_HRD_ETHER) ||
+ ahdr->arp_pro != rte_be_to_cpu_16(ETHER_TYPE_IPv4) ||
+ ahdr->arp_op != rte_be_to_cpu_16(ARP_OP_REQUEST)) {
+
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return m;
+ }
+
+ m->l2_len = l2len;
+ abuf = &lc->prtq[port].arp_buf;
+ if (abuf->num >= RTE_DIM(abuf->pkt))
+ return m;
+
+ abuf->pkt[abuf->num++] = m;
+
+ return NULL;
+}
+
+static inline struct rte_mbuf *
+fill_eth_tcp_arp_hdr_len(struct rte_mbuf *m, struct netbe_lcore *lc,
+ uint8_t port)
+{
+ uint32_t dlen, l2_len, l3_len, l4_len;
+ uint16_t etp;
+ const struct ether_hdr *eth;
+
+ dlen = rte_pktmbuf_data_len(m);
+
+ /* check that first segment is at least 54B long. */
+ if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
+ sizeof(struct tcp_hdr)) {
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return m;
+ }
+
+ l2_len = sizeof(*eth);
+
+ eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
+ etp = eth->ether_type;
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
+ l2_len += sizeof(struct vlan_hdr);
+
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_ARP))
+ return handle_arp(m, lc, port, l2_len);
+
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
+ m->packet_type = RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1);
+ l4_len = get_tcp_header_size(m, l2_len, l3_len);
+ fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(m, l2_len);
+ } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
+ dlen >= l2_len + sizeof(struct ipv6_hdr) +
+ sizeof(struct tcp_hdr)) {
+ m->packet_type = RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(m, l2_len, l3_len);
+ fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(m, l2_len);
+ } else
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+
+ return m;
+}
+
+static inline void
+fill_eth_tcp_hdr_len(struct rte_mbuf *m)
+{
+ uint32_t dlen, l2_len, l3_len, l4_len;
+ uint16_t etp;
+ const struct ether_hdr *eth;
+
+ dlen = rte_pktmbuf_data_len(m);
+
+ /* check that first segment is at least 54B long. */
+ if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
+ sizeof(struct tcp_hdr)) {
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return;
+ }
+
+ l2_len = sizeof(*eth);
+
+ eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
+ etp = eth->ether_type;
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
+ l2_len += sizeof(struct vlan_hdr);
+
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
+ m->packet_type = RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1);
+ l4_len = get_tcp_header_size(m, l2_len, l3_len);
+ fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(m, l2_len);
+ } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
+ dlen >= l2_len + sizeof(struct ipv6_hdr) +
+ sizeof(struct tcp_hdr)) {
+ m->packet_type = RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(m, l2_len, l3_len);
+ fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(m, l2_len);
+ } else
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+}
+
+static inline void
+fill_eth_udp_hdr_len(struct rte_mbuf *m)
+{
+ uint32_t dlen, l2_len;
+ uint16_t etp;
+ const struct ether_hdr *eth;
+
+ dlen = rte_pktmbuf_data_len(m);
+
+ /* check that first segment is at least 42B long. */
+ if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
+ sizeof(struct udp_hdr)) {
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return;
+ }
+
+ l2_len = sizeof(*eth);
+
+ eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
+ etp = eth->ether_type;
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
+ l2_len += sizeof(struct vlan_hdr);
+
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
+ m->packet_type = RTE_PTYPE_L4_UDP |
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ fill_ipv4_hdr_len(m, l2_len, IPPROTO_UDP, 1,
+ sizeof(struct udp_hdr));
+ } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
+ dlen >= l2_len + sizeof(struct ipv6_hdr) +
+ sizeof(struct udp_hdr)) {
+ m->packet_type = RTE_PTYPE_L4_UDP |
+ RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ fill_ipv6_hdr_len(m, l2_len, IPPROTO_UDP,
+ sizeof(struct udp_hdr));
+ } else
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+}
+
+static inline uint16_t
+ipv4x_cksum(const void *iph, size_t len)
+{
+ uint16_t cksum;
+
+ cksum = rte_raw_cksum(iph, len);
+ return (cksum == 0xffff) ? cksum : ~cksum;
+}
+
+static inline void
+fix_reassembled(struct rte_mbuf *m, int32_t hwcsum, uint32_t proto)
+{
+ struct ipv4_hdr *iph;
+
+ /* update packet type. */
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+
+ if (proto == IPPROTO_TCP)
+ m->packet_type |= RTE_PTYPE_L4_TCP;
+ else
+ m->packet_type |= RTE_PTYPE_L4_UDP;
+
+ /* fix reassemble setting TX flags. */
+ m->ol_flags &= ~PKT_TX_IP_CKSUM;
+
+ /* fix l3_len after reassemble. */
+ if (RTE_ETH_IS_IPV6_HDR(m->packet_type))
+ m->l3_len = m->l3_len - sizeof(struct ipv6_extension_fragment);
+
+ /* recalculate ipv4 cksum after reassemble. */
+ else if (hwcsum == 0 && RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
+ iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
+ iph->hdr_checksum = ipv4x_cksum(iph, m->l3_len);
+ }
+}
+
+static struct rte_mbuf *
+reassemble(struct rte_mbuf *m, struct netbe_lcore *lc, uint64_t tms,
+ uint8_t port, uint32_t proto)
+{
+ uint32_t l3cs;
+ struct rte_ip_frag_tbl *tbl;
+ struct rte_ip_frag_death_row *dr;
+
+ tbl = lc->ftbl;
+ dr = &lc->death_row;
+ l3cs = lc->prtq[port].port.rx_offload & DEV_RX_OFFLOAD_IPV4_CKSUM;
+
+ if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
+
+ struct ipv4_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
+
+ /* process this fragment. */
+ m = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, iph);
+
+ } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
+
+ struct ipv6_hdr *iph;
+ struct ipv6_extension_fragment *fhdr;
+
+ iph = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len);
+
+ /*
+ * we store fragment header offset in tso_segsz before
+ * temporary, just to avoid another scan of ipv6 header.
+ */
+ fhdr = rte_pktmbuf_mtod_offset(m,
+ struct ipv6_extension_fragment *, m->tso_segsz);
+ m->tso_segsz = 0;
+
+ /* process this fragment. */
+ m = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, iph, fhdr);
+
+ } else {
+ rte_pktmbuf_free(m);
+ m = NULL;
+ }
+
+ /* got reassembled packet. */
+ if (m != NULL)
+ fix_reassembled(m, l3cs, proto);
+
+ return m;
+}
+
+/* exclude NULLs from the final list of packets. */
+static inline uint32_t
+compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero)
+{
+ uint32_t i, j, k, l;
+
+ for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) {
+
+ /* found a hole. */
+ if (pkt[j] == NULL) {
+
+ /* find how big is it. */
+ for (i = j; i-- != 0 && pkt[i] == NULL; )
+ ;
+ /* fill the hole. */
+ for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++)
+ pkt[l] = pkt[k];
+
+ nb_pkt -= j - i;
+ nb_zero -= j - i;
+ j = i + 1;
+ }
+ }
+
+ return nb_pkt;
+}
+
+/*
+ * if it is a fragment, try to reassemble it,
+ * if by some reason it can't be done, then
+ * set pkt[] entry to NULL.
+ */
+#define DO_REASSEMBLE(proto) \
+do { \
+ if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == \
+ RTE_PTYPE_L4_FRAG) { \
+ cts = (cts == 0) ? rte_rdtsc() : cts; \
+ pkt[j] = reassemble(pkt[j], lc, cts, port, (proto)); \
+ x += (pkt[j] == NULL); \
+ } \
+} while (0)
+
+/*
+ * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k)
+ */
+static uint16_t
+type0_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, tp;
+ struct netbe_lcore *lc;
+ uint32_t l4_len, l3_len, l2_len;
+ const struct ether_hdr *eth;
+
+ lc = user_param;
+ l2_len = sizeof(*eth);
+
+ RTE_SET_USED(lc);
+
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ /* non fragmented tcp packets. */
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4 |
+ RTE_PTYPE_L2_ETHER):
+ l4_len = get_tcp_header_size(pkt[j], l2_len,
+ sizeof(struct ipv4_hdr));
+ fill_pkt_hdr_len(pkt[j], l2_len,
+ sizeof(struct ipv4_hdr), l4_len);
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6 |
+ RTE_PTYPE_L2_ETHER):
+ l4_len = get_tcp_header_size(pkt[j], l2_len,
+ sizeof(struct ipv6_hdr));
+ fill_pkt_hdr_len(pkt[j], l2_len,
+ sizeof(struct ipv6_hdr), l4_len);
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
+ IPPROTO_TCP, 0);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+ }
+
+ return nb_pkts;
+}
+
+/*
+ * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k)
+ */
+static uint16_t
+type0_udp_rx_callback(uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, tp, x;
+ uint64_t cts;
+ struct netbe_lcore *lc;
+ uint32_t l2_len;
+ const struct ether_hdr *eth;
+
+ lc = user_param;
+ cts = 0;
+ l2_len = sizeof(*eth);
+
+ x = 0;
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ /* non fragmented udp packets. */
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4 |
+ RTE_PTYPE_L2_ETHER):
+ fill_pkt_hdr_len(pkt[j], l2_len,
+ sizeof(struct ipv4_hdr),
+ sizeof(struct udp_hdr));
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6 |
+ RTE_PTYPE_L2_ETHER):
+ fill_pkt_hdr_len(pkt[j], l2_len,
+ sizeof(struct ipv6_hdr),
+ sizeof(struct udp_hdr));
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv4_hdr_len(pkt[j], l2_len,
+ UINT32_MAX, 0, sizeof(struct udp_hdr));
+ break;
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv6_hdr_len(pkt[j], l2_len,
+ IPPROTO_UDP, sizeof(struct udp_hdr));
+ break;
+ /* possibly fragmented udp packets. */
+ case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER):
+ case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER):
+ fill_ipv4_hdr_len(pkt[j], l2_len,
+ IPPROTO_UDP, 1, sizeof(struct udp_hdr));
+ break;
+ case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER):
+ case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER):
+ fill_ipv6_hdr_len(pkt[j], l2_len,
+ IPPROTO_UDP, sizeof(struct udp_hdr));
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+
+ DO_REASSEMBLE(IPPROTO_UDP);
+ }
+
+ /* reassemble was invoked, cleanup its death-row. */
+ if (cts != 0)
+ rte_ip_frag_free_death_row(&lc->death_row, 0);
+
+ if (x == 0)
+ return nb_pkts;
+
+ NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
+ "%u non-reassembled fragments;\n",
+ __func__, port, queue, nb_pkts, x);
+
+ return compress_pkt_list(pkt, nb_pkts, x);
+}
+
+/*
+ * HW can recognize L2/L3/L4 and fragments (i40e).
+ */
+static uint16_t
+type1_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, tp;
+ struct netbe_lcore *lc;
+ uint32_t l4_len, l3_len, l2_len;
+ const struct ether_hdr *eth;
+
+ lc = user_param;
+ l2_len = sizeof(*eth);
+
+ RTE_SET_USED(lc);
+
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
+ IPPROTO_TCP, 0);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ tcp_stat_update(lc, pkt[j], l2_len, l3_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ tcp_stat_update(lc, pkt[j], l2_len, l3_len);
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+
+ }
+
+ return nb_pkts;
+}
+
+/*
+ * HW can recognize L2/L3/L4 and fragments (i40e).
+ */
+static uint16_t
+type1_udp_rx_callback(uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, tp, x;
+ uint64_t cts;
+ struct netbe_lcore *lc;
+ uint32_t l2_len;
+ const struct ether_hdr *eth;
+
+ lc = user_param;
+ cts = 0;
+ l2_len = sizeof(*eth);
+
+ x = 0;
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv4_hdr_len(pkt[j], l2_len,
+ UINT32_MAX, 0, sizeof(struct udp_hdr));
+ break;
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv6_hdr_len(pkt[j], l2_len,
+ IPPROTO_UDP, sizeof(struct udp_hdr));
+ break;
+ case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv4_hdr_len(pkt[j], l2_len,
+ IPPROTO_UDP, 0, sizeof(struct udp_hdr));
+ break;
+ case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv6_hdr_len(pkt[j], l2_len,
+ IPPROTO_UDP, sizeof(struct udp_hdr));
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+
+ DO_REASSEMBLE(IPPROTO_UDP);
+ }
+
+ /* reassemble was invoked, cleanup its death-row. */
+ if (cts != 0)
+ rte_ip_frag_free_death_row(&lc->death_row, 0);
+
+ if (x == 0)
+ return nb_pkts;
+
+ NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
+ "%u non-reassembled fragments;\n",
+ __func__, port, queue, nb_pkts, x);
+
+ return compress_pkt_list(pkt, nb_pkts, x);
+}
+
+/*
+ * generic, assumes HW doesn't recognize any packet type.
+ */
+static uint16_t
+typen_tcp_arp_rx_callback(uint8_t port, uint16_t queue, struct rte_mbuf *pkt[],
+ uint16_t nb_pkts, uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, x;
+ struct netbe_lcore *lc;
+
+ lc = user_param;
+
+ RTE_SET_USED(queue);
+ RTE_SET_USED(max_pkts);
+
+ x = 0;
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+ pkt[j] = fill_eth_tcp_arp_hdr_len(pkt[j], lc, port);
+ x += (pkt[j] == NULL);
+ }
+
+ if (x == 0)
+ return nb_pkts;
+
+ return compress_pkt_list(pkt, nb_pkts, x);
+}
+
+static uint16_t
+typen_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j;
+ struct netbe_lcore *lc;
+
+ lc = user_param;
+
+ RTE_SET_USED(lc);
+
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+ fill_eth_tcp_hdr_len(pkt[j]);
+ }
+
+ return nb_pkts;
+}
+
+static uint16_t
+typen_udp_rx_callback(uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, x;
+ uint64_t cts;
+ struct netbe_lcore *lc;
+
+ lc = user_param;
+ cts = 0;
+
+ x = 0;
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+ fill_eth_udp_hdr_len(pkt[j]);
+
+ DO_REASSEMBLE(IPPROTO_UDP);
+ }
+
+ /* reassemble was invoked, cleanup its death-row. */
+ if (cts != 0)
+ rte_ip_frag_free_death_row(&lc->death_row, 0);
+
+ if (x == 0)
+ return nb_pkts;
+
+ NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
+ "%u non-reassembled fragments;\n",
+ __func__, port, queue, nb_pkts, x);
+
+ return compress_pkt_list(pkt, nb_pkts, x);
+}
+
+#include "pkt_dpdk_legacy.h"
diff --git a/examples/l4fwd/pkt_dpdk_legacy.h b/examples/l4fwd/pkt_dpdk_legacy.h
new file mode 100644
index 0000000..d840978
--- /dev/null
+++ b/examples/l4fwd/pkt_dpdk_legacy.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PKT_DPDK_LEGACY_H_
+#define PKT_DPDK_LEGACY_H_
+
+#include "dpdk_version.h"
+
+struct ptype2cb {
+ uint32_t mask;
+ const char *name;
+ rte_rx_callback_fn fn;
+};
+
+enum {
+ ETHER_PTYPE = 0x1,
+ IPV4_PTYPE = 0x2,
+ IPV4_EXT_PTYPE = 0x4,
+ IPV6_PTYPE = 0x8,
+ IPV6_EXT_PTYPE = 0x10,
+ TCP_PTYPE = 0x20,
+ UDP_PTYPE = 0x40,
+};
+
+#ifdef DPDK_VERSION_GE_1604
+
+static uint32_t
+get_ptypes(const struct netbe_port *uprt)
+{
+ uint32_t smask;
+ int32_t i, rc;
+ const uint32_t pmask = RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK |
+ RTE_PTYPE_L4_MASK;
+
+ smask = 0;
+ rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, NULL, 0);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u) failed to get supported ptypes;\n",
+ __func__, uprt->id);
+ return smask;
+ }
+
+ uint32_t ptype[rc];
+ rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, ptype, rc);
+
+ for (i = 0; i != rc; i++) {
+ switch (ptype[i]) {
+ case RTE_PTYPE_L2_ETHER:
+ smask |= ETHER_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV4:
+ case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN:
+ smask |= IPV4_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV4_EXT:
+ smask |= IPV4_EXT_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV6:
+ case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN:
+ smask |= IPV6_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV6_EXT:
+ smask |= IPV6_EXT_PTYPE;
+ break;
+ case RTE_PTYPE_L4_TCP:
+ smask |= TCP_PTYPE;
+ break;
+ case RTE_PTYPE_L4_UDP:
+ smask |= UDP_PTYPE;
+ break;
+ }
+ }
+
+ return smask;
+}
+
+#else
+
+static uint32_t
+get_ptypes(__rte_unused const struct netbe_port *uprt)
+{
+ return 0;
+}
+
+#endif /* DPDK_VERSION_GE_1604 */
+
+int
+setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc,
+ uint16_t qid, uint32_t arp)
+{
+ int32_t rc;
+ uint32_t i, n, smask;
+ void *cb;
+ const struct ptype2cb *ptype2cb;
+
+ static const struct ptype2cb tcp_ptype2cb[] = {
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE |
+ IPV6_PTYPE | IPV6_EXT_PTYPE | TCP_PTYPE,
+ .name = "HW l2/l3x/l4-tcp ptype",
+ .fn = type0_tcp_rx_callback,
+ },
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE |
+ TCP_PTYPE,
+ .name = "HW l2/l3/l4-tcp ptype",
+ .fn = type1_tcp_rx_callback,
+ },
+ {
+ .mask = 0,
+ .name = "tcp no HW ptype",
+ .fn = typen_tcp_rx_callback,
+ },
+ };
+
+ static const struct ptype2cb tcp_arp_ptype2cb[] = {
+ {
+ .mask = 0,
+ .name = "tcp with arp no HW ptype",
+ .fn = typen_tcp_arp_rx_callback,
+ },
+ };
+
+ static const struct ptype2cb udp_ptype2cb[] = {
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE |
+ IPV6_PTYPE | IPV6_EXT_PTYPE | UDP_PTYPE,
+ .name = "HW l2/l3x/l4-udp ptype",
+ .fn = type0_udp_rx_callback,
+ },
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE |
+ UDP_PTYPE,
+ .name = "HW l2/l3/l4-udp ptype",
+ .fn = type1_udp_rx_callback,
+ },
+ {
+ .mask = 0,
+ .name = "udp no HW ptype",
+ .fn = typen_udp_rx_callback,
+ },
+ };
+
+ smask = get_ptypes(uprt);
+
+ if (lc->proto == TLE_PROTO_TCP) {
+ if (arp != 0) {
+ ptype2cb = tcp_arp_ptype2cb;
+ n = RTE_DIM(tcp_arp_ptype2cb);
+ } else {
+ ptype2cb = tcp_ptype2cb;
+ n = RTE_DIM(tcp_ptype2cb);
+ }
+ } else if (lc->proto == TLE_PROTO_UDP) {
+ ptype2cb = udp_ptype2cb;
+ n = RTE_DIM(udp_ptype2cb);
+ } else {
+ RTE_LOG(ERR, USER1,
+ "%s(lc=%u) unsupported proto: %u\n",
+ __func__, lc->id, lc->proto);
+ return -EINVAL;
+ }
+
+ for (i = 0; i != n; i++) {
+ if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) {
+ cb = rte_eth_add_rx_callback(uprt->id, qid,
+ ptype2cb[i].fn, lc);
+ rc = -rte_errno;
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u), setup RX callback \"%s\" "
+ "returns %p;\n",
+ __func__, uprt->id, ptype2cb[i].name, cb);
+ return ((cb == NULL) ? rc : 0);
+ }
+ }
+
+ /* no proper callback found. */
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u) failed to find an appropriate callback;\n",
+ __func__, uprt->id);
+ return -ENOENT;
+}
+
+#endif /* PKT_DPDK_LEGACY_H_ */
diff --git a/examples/l4fwd/port.h b/examples/l4fwd/port.h
new file mode 100644
index 0000000..bc13dca
--- /dev/null
+++ b/examples/l4fwd/port.h
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PORT_H_
+#define PORT_H_
+
+static void
+prepare_hash_key(struct netbe_port *uprt, uint8_t key_size, uint16_t family)
+{
+ uint32_t align_nb_q;
+
+ align_nb_q = rte_align32pow2(uprt->nb_lcore);
+ memset(uprt->hash_key, 0, RSS_HASH_KEY_LENGTH);
+ uprt->hash_key_size = key_size;
+ if (family == AF_INET)
+ uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV4] = align_nb_q;
+ else
+ uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV6] = align_nb_q;
+}
+
+static int
+update_rss_conf(struct netbe_port *uprt,
+ const struct rte_eth_dev_info *dev_info,
+ struct rte_eth_conf *port_conf, uint32_t proto)
+{
+ uint8_t hash_key_size;
+
+ if (uprt->nb_lcore > 1) {
+ if (dev_info->hash_key_size > 0)
+ hash_key_size = dev_info->hash_key_size;
+ else {
+ RTE_LOG(ERR, USER1,
+ "%s: dev_info did not provide a valid hash "
+ "key size\n", __func__);
+ return -EINVAL;
+ }
+
+ if (uprt->ipv4 != INADDR_ANY &&
+ memcmp(&uprt->ipv6, &in6addr_any,
+ sizeof(uprt->ipv6)) != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: RSS for both IPv4 and IPv6 not "
+ "supported!\n", __func__);
+ return -EINVAL;
+ } else if (uprt->ipv4 != INADDR_ANY) {
+ prepare_hash_key(uprt, hash_key_size, AF_INET);
+ } else if (memcmp(&uprt->ipv6, &in6addr_any, sizeof(uprt->ipv6))
+ != 0) {
+ prepare_hash_key(uprt, hash_key_size, AF_INET6);
+ } else {
+ RTE_LOG(ERR, USER1,
+ "%s: No IPv4 or IPv6 address is found!\n",
+ __func__);
+ return -EINVAL;
+ }
+ port_conf->rxmode.mq_mode = ETH_MQ_RX_RSS;
+ if (proto == TLE_PROTO_TCP)
+ port_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_TCP;
+ else
+ port_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_UDP;
+ port_conf->rx_adv_conf.rss_conf.rss_key_len = hash_key_size;
+ port_conf->rx_adv_conf.rss_conf.rss_key = uprt->hash_key;
+ }
+
+ return 0;
+}
+
+static uint32_t
+qidx_from_hash_index(uint32_t hash, uint32_t align_nb_q)
+{
+ uint32_t i, nb_bit, q;
+
+ nb_bit = (sizeof(uint32_t) * CHAR_BIT) - __builtin_clz(align_nb_q - 1);
+ q = (hash & 1);
+ for (i = 1; i < nb_bit; i++) {
+ hash >>= 1;
+ q <<= 1;
+ q |= (hash & 1);
+ }
+
+ return q;
+}
+
+static int
+update_rss_reta(struct netbe_port *uprt,
+ const struct rte_eth_dev_info *dev_info)
+{
+ struct rte_eth_rss_reta_entry64 reta_conf[RSS_RETA_CONF_ARRAY_SIZE];
+ int32_t i, rc, align_nb_q;
+ int32_t q_index, idx, shift;
+
+ if (uprt->nb_lcore > 1) {
+ if (dev_info->reta_size == 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: Redirection table size 0 is invalid for "
+ "RSS\n", __func__);
+ return -EINVAL;
+ }
+ RTE_LOG(NOTICE, USER1,
+ "%s: The reta size of port %d is %u\n",
+ __func__, uprt->id, dev_info->reta_size);
+
+ if (dev_info->reta_size > ETH_RSS_RETA_SIZE_512) {
+ RTE_LOG(ERR, USER1,
+ "%s: More than %u entries of Reta not supported\n",
+ __func__, ETH_RSS_RETA_SIZE_512);
+ return -EINVAL;
+ }
+
+ memset(reta_conf, 0, sizeof(reta_conf));
+ align_nb_q = rte_align32pow2(uprt->nb_lcore);
+ for (i = 0; i < align_nb_q; i++) {
+ q_index = qidx_from_hash_index(i, align_nb_q) %
+ uprt->nb_lcore;
+
+ idx = i / RTE_RETA_GROUP_SIZE;
+ shift = i % RTE_RETA_GROUP_SIZE;
+ reta_conf[idx].mask |= (1ULL << shift);
+ reta_conf[idx].reta[shift] = q_index;
+ RTE_LOG(NOTICE, USER1,
+ "%s: port=%u RSS reta conf: hash=%u, q=%u\n",
+ __func__, uprt->id, i, q_index);
+ }
+
+ rc = rte_eth_dev_rss_reta_update(uprt->id,
+ reta_conf, dev_info->reta_size);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: Bad redirection table parameter, "
+ "rc = %d\n", __func__, rc);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Initilise DPDK port.
+ * In current version, multi-queue per port is used.
+ */
+static int
+port_init(struct netbe_port *uprt, uint32_t proto)
+{
+ int32_t rc;
+ struct rte_eth_conf port_conf;
+ struct rte_eth_dev_info dev_info;
+
+ rte_eth_dev_info_get(uprt->id, &dev_info);
+ if ((dev_info.rx_offload_capa & uprt->rx_offload) != uprt->rx_offload) {
+ RTE_LOG(ERR, USER1,
+ "port#%u supported/requested RX offloads don't match, "
+ "supported: %#x, requested: %#x;\n",
+ uprt->id, dev_info.rx_offload_capa, uprt->rx_offload);
+ return -EINVAL;
+ }
+ if ((dev_info.tx_offload_capa & uprt->tx_offload) != uprt->tx_offload) {
+ RTE_LOG(ERR, USER1,
+ "port#%u supported/requested TX offloads don't match, "
+ "supported: %#x, requested: %#x;\n",
+ uprt->id, dev_info.tx_offload_capa, uprt->tx_offload);
+ return -EINVAL;
+ }
+
+ port_conf = port_conf_default;
+ if ((uprt->rx_offload & RX_CSUM_OFFLOAD) != 0) {
+ RTE_LOG(ERR, USER1, "%s(%u): enabling RX csum offload;\n",
+ __func__, uprt->id);
+ port_conf.rxmode.hw_ip_checksum = 1;
+ }
+ port_conf.rxmode.max_rx_pkt_len = uprt->mtu + ETHER_CRC_LEN;
+
+ rc = update_rss_conf(uprt, &dev_info, &port_conf, proto);
+ if (rc != 0)
+ return rc;
+
+ rc = rte_eth_dev_configure(uprt->id, uprt->nb_lcore, uprt->nb_lcore,
+ &port_conf);
+ RTE_LOG(NOTICE, USER1,
+ "%s: rte_eth_dev_configure(prt_id=%u, nb_rxq=%u, nb_txq=%u) "
+ "returns %d;\n", __func__, uprt->id, uprt->nb_lcore,
+ uprt->nb_lcore, rc);
+ if (rc != 0)
+ return rc;
+
+ return 0;
+}
+
+static int
+queue_init(struct netbe_port *uprt, struct rte_mempool *mp)
+{
+ int32_t socket, rc;
+ uint16_t q;
+ struct rte_eth_dev_info dev_info;
+
+ rte_eth_dev_info_get(uprt->id, &dev_info);
+
+ socket = rte_eth_dev_socket_id(uprt->id);
+
+ dev_info.default_rxconf.rx_drop_en = 1;
+
+ dev_info.default_txconf.tx_free_thresh = TX_RING_SIZE / 2;
+ if (uprt->tx_offload != 0) {
+ RTE_LOG(ERR, USER1, "%s(%u): enabling full featured TX;\n",
+ __func__, uprt->id);
+ dev_info.default_txconf.txq_flags = 0;
+ }
+
+ for (q = 0; q < uprt->nb_lcore; q++) {
+ rc = rte_eth_rx_queue_setup(uprt->id, q, RX_RING_SIZE,
+ socket, &dev_info.default_rxconf, mp);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: rx queue=%u setup failed with error "
+ "code: %d\n", __func__, q, rc);
+ return rc;
+ }
+ }
+
+ for (q = 0; q < uprt->nb_lcore; q++) {
+ rc = rte_eth_tx_queue_setup(uprt->id, q, TX_RING_SIZE,
+ socket, &dev_info.default_txconf);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: tx queue=%u setup failed with error "
+ "code: %d\n", __func__, q, rc);
+ return rc;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Check that lcore is enabled, not master, and not in use already.
+ */
+static int
+check_lcore(uint32_t lc)
+{
+ if (rte_lcore_is_enabled(lc) == 0) {
+ RTE_LOG(ERR, USER1, "lcore %u is not enabled\n", lc);
+ return -EINVAL;
+ }
+ if (rte_eal_get_lcore_state(lc) == RUNNING) {
+ RTE_LOG(ERR, USER1, "lcore %u already running %p\n",
+ lc, lcore_config[lc].f);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void
+log_netbe_prt(const struct netbe_port *uprt)
+{
+ uint32_t i;
+ char corelist[2 * RTE_MAX_LCORE + 1];
+ char hashkey[2 * RSS_HASH_KEY_LENGTH];
+
+ memset(corelist, 0, sizeof(corelist));
+ memset(hashkey, 0, sizeof(hashkey));
+ for (i = 0; i < uprt->nb_lcore; i++)
+ if (i < uprt->nb_lcore - 1)
+ sprintf(corelist + (2 * i), "%u,", uprt->lcore_id[i]);
+ else
+ sprintf(corelist + (2 * i), "%u", uprt->lcore_id[i]);
+
+ for (i = 0; i < uprt->hash_key_size; i++)
+ sprintf(hashkey + (2 * i), "%02x", uprt->hash_key[i]);
+
+ RTE_LOG(NOTICE, USER1,
+ "uprt %p = <id = %u, lcore = <%s>, mtu = %u, "
+ "rx_offload = %u, tx_offload = %u,\n"
+ "ipv4 = %#x, "
+ "ipv6 = %04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx, "
+ "mac = %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx>;\n"
+ "hashkey = %s;\n",
+ uprt, uprt->id, corelist,
+ uprt->mtu, uprt->rx_offload, uprt->tx_offload,
+ uprt->ipv4,
+ uprt->ipv6.s6_addr16[0], uprt->ipv6.s6_addr16[1],
+ uprt->ipv6.s6_addr16[2], uprt->ipv6.s6_addr16[3],
+ uprt->ipv6.s6_addr16[4], uprt->ipv6.s6_addr16[5],
+ uprt->ipv6.s6_addr16[6], uprt->ipv6.s6_addr16[7],
+ uprt->mac.addr_bytes[0], uprt->mac.addr_bytes[1],
+ uprt->mac.addr_bytes[2], uprt->mac.addr_bytes[3],
+ uprt->mac.addr_bytes[4], uprt->mac.addr_bytes[5],
+ hashkey);
+}
+
+static void
+log_netbe_cfg(const struct netbe_cfg *ucfg)
+{
+ uint32_t i;
+
+ RTE_LOG(NOTICE, USER1,
+ "ucfg @ %p, prt_num = %u\n", ucfg, ucfg->prt_num);
+
+ for (i = 0; i != ucfg->prt_num; i++)
+ log_netbe_prt(ucfg->prt + i);
+}
+
+static int
+pool_init(uint32_t sid)
+{
+ int32_t rc;
+ struct rte_mempool *mp;
+ char name[RTE_MEMPOOL_NAMESIZE];
+
+ snprintf(name, sizeof(name), "MP%u", sid);
+ mp = rte_pktmbuf_pool_create(name, MPOOL_NB_BUF, MPOOL_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, sid - 1);
+ if (mp == NULL) {
+ rc = -rte_errno;
+ RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n",
+ __func__, sid - 1, rc);
+ return rc;
+ }
+
+ mpool[sid] = mp;
+ return 0;
+}
+
+static int
+frag_pool_init(uint32_t sid)
+{
+ int32_t rc;
+ struct rte_mempool *frag_mp;
+ char frag_name[RTE_MEMPOOL_NAMESIZE];
+
+ snprintf(frag_name, sizeof(frag_name), "frag_MP%u", sid);
+ frag_mp = rte_pktmbuf_pool_create(frag_name, MPOOL_NB_BUF,
+ MPOOL_CACHE_SIZE, 0, FRAG_MBUF_BUF_SIZE, sid - 1);
+ if (frag_mp == NULL) {
+ rc = -rte_errno;
+ RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n",
+ __func__, sid - 1, rc);
+ return rc;
+ }
+
+ frag_mpool[sid] = frag_mp;
+ return 0;
+}
+
+static struct netbe_lcore *
+find_initilized_lcore(struct netbe_cfg *cfg, uint32_t lc_num)
+{
+ uint32_t i;
+
+ for (i = 0; i < cfg->cpu_num; i++)
+ if (cfg->cpu[i].id == lc_num)
+ return &cfg->cpu[i];
+
+ return NULL;
+}
+
+/*
+ * Setup all enabled ports.
+ */
+static int
+netbe_port_init(struct netbe_cfg *cfg)
+{
+ int32_t rc;
+ uint32_t i, sid, j;
+ struct netbe_port *prt;
+ struct netbe_lcore *lc;
+
+ for (i = 0; i != cfg->prt_num; i++) {
+ prt = cfg->prt + i;
+ rc = port_init(prt, cfg->proto);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: port=%u init failed with error code: %d\n",
+ __func__, prt->id, rc);
+ return rc;
+ }
+ rte_eth_macaddr_get(prt->id, &prt->mac);
+ if (cfg->promisc)
+ rte_eth_promiscuous_enable(prt->id);
+
+ for (j = 0; j < prt->nb_lcore; j++) {
+ rc = check_lcore(prt->lcore_id[j]);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: processing failed with err: %d\n",
+ __func__, rc);
+ return rc;
+ }
+
+ sid = rte_lcore_to_socket_id(prt->lcore_id[j]) + 1;
+ assert(sid < RTE_DIM(mpool));
+
+ if (mpool[sid] == NULL) {
+ rc = pool_init(sid);
+ if (rc != 0)
+ return rc;
+ }
+
+ if (frag_mpool[sid] == NULL) {
+ rc = frag_pool_init(sid);
+ if (rc != 0)
+ return rc;
+ }
+
+ rc = queue_init(prt, mpool[sid]);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: lcore=%u queue init failed with "
+ "err: %d\n",
+ __func__, prt->lcore_id[j], rc);
+ return rc;
+ }
+
+ /* calculate number of queues and assign queue id
+ * per lcore. */
+ lc = find_initilized_lcore(cfg, prt->lcore_id[j]);
+ if (lc == NULL) {
+ lc = &cfg->cpu[cfg->cpu_num];
+ lc->id = prt->lcore_id[j];
+ lc->proto = becfg.proto;
+ cfg->cpu_num++;
+ }
+
+ lc->prtq = rte_realloc(lc->prtq, sizeof(*(lc->prtq)) *
+ (lc->prtq_num + 1), RTE_CACHE_LINE_SIZE);
+ if (lc->prtq == NULL) {
+ RTE_LOG(ERR, USER1,
+ "%s: failed to reallocate memory\n",
+ __func__);
+ return -ENOMEM;
+ }
+ lc->prtq[lc->prtq_num].rxqid = j;
+ lc->prtq[lc->prtq_num].txqid = j;
+ lc->prtq[lc->prtq_num].port = *prt;
+ lc->prtq_num++;
+ }
+ }
+ log_netbe_cfg(cfg);
+
+ return 0;
+}
+
+#endif /* PORT_H_ */
diff --git a/examples/l4fwd/tcp.h b/examples/l4fwd/tcp.h
new file mode 100644
index 0000000..031ad8d
--- /dev/null
+++ b/examples/l4fwd/tcp.h
@@ -0,0 +1,701 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TCP_H_
+#define TCP_H_
+
+#define TCP_MAX_PROCESS 0x20
+
+static inline void
+netfe_stream_term_tcp(struct netfe_lcore *fe, struct netfe_stream *fes)
+{
+ fes->s = NULL;
+ fes->fwds = NULL;
+ memset(&fes->stat, 0, sizeof(fes->stat));
+ netfe_put_stream(fe, &fe->free, fes);
+}
+
+static inline void
+netfe_stream_close_tcp(struct netfe_lcore *fe, struct netfe_stream *fes)
+{
+ tle_tcp_stream_close(fes->s);
+ netfe_stream_term_tcp(fe, fes);
+}
+
+/*
+ * helper function: opens IPv4 and IPv6 streams for selected port.
+ */
+static struct netfe_stream *
+netfe_stream_open_tcp(struct netfe_lcore *fe, struct netfe_sprm *sprm,
+ uint32_t lcore, uint16_t op, uint32_t bidx, uint8_t server_mode)
+{
+ int32_t rc;
+ struct netfe_stream *fes;
+ struct sockaddr_in *l4;
+ struct sockaddr_in6 *l6;
+ uint16_t errport;
+ struct tle_tcp_stream_param tprm;
+
+ fes = netfe_get_stream(&fe->free);
+ if (fes == NULL) {
+ rte_errno = ENOBUFS;
+ return NULL;
+ }
+
+ if (server_mode != 0) {
+ tle_event_free(fes->rxev);
+ fes->rxev = tle_event_alloc(fe->syneq, fes);
+ }
+
+ if (fes->rxev == NULL) {
+ netfe_stream_close_tcp(fe, fes);
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ /* activate rx, tx and err events for the stream */
+ if (op == TXONLY || op == FWD) {
+ tle_event_active(fes->txev, TLE_SEV_DOWN);
+ fes->stat.txev[TLE_SEV_DOWN]++;
+ }
+
+ if (op != TXONLY || server_mode != 0) {
+ tle_event_active(fes->rxev, TLE_SEV_DOWN);
+ fes->stat.rxev[TLE_SEV_DOWN]++;
+ }
+ tle_event_active(fes->erev, TLE_SEV_DOWN);
+ fes->stat.erev[TLE_SEV_DOWN]++;
+
+ memset(&tprm, 0, sizeof(tprm));
+ tprm.addr.local = sprm->local_addr;
+ tprm.addr.remote = sprm->remote_addr;
+ tprm.cfg.err_ev = fes->erev;
+ tprm.cfg.recv_ev = fes->rxev;
+ if (op != FWD)
+ tprm.cfg.send_ev = fes->txev;
+
+ fes->s = tle_tcp_stream_open(becfg.cpu[bidx].ctx, &tprm);
+
+ if (fes->s == NULL) {
+ rc = rte_errno;
+ netfe_stream_close_tcp(fe, fes);
+ rte_errno = rc;
+
+ if (sprm->local_addr.ss_family == AF_INET) {
+ l4 = (struct sockaddr_in *) &sprm->local_addr;
+ errport = ntohs(l4->sin_port);
+ } else {
+ l6 = (struct sockaddr_in6 *) &sprm->local_addr;
+ errport = ntohs(l6->sin6_port);
+ }
+
+ RTE_LOG(ERR, USER1, "stream open failed for port %u with error "
+ "code=%u, bidx=%u, lc=%u\n",
+ errport, rc, bidx, becfg.cpu[bidx].id);
+ return NULL;
+ }
+
+ RTE_LOG(NOTICE, USER1,
+ "%s(%u)={s=%p, op=%hu, proto=%s, rxev=%p, txev=%p}, belc=%u\n",
+ __func__, lcore, fes->s, op, proto_name[becfg.proto],
+ fes->rxev, fes->txev, becfg.cpu[bidx].id);
+
+ fes->op = op;
+ fes->proto = becfg.proto;
+ fes->family = sprm->local_addr.ss_family;
+ fes->laddr = sprm->local_addr;
+ netfe_put_stream(fe, &fe->use, fes);
+
+ return fes;
+}
+
+static int
+netfe_lcore_init_tcp(const struct netfe_lcore_prm *prm)
+{
+ size_t sz;
+ int32_t rc;
+ uint32_t i, lcore, snum;
+ struct netfe_lcore *fe;
+ struct tle_evq_param eprm;
+ struct netfe_stream *fes;
+ struct netfe_sprm *sprm;
+
+ lcore = rte_lcore_id();
+
+ snum = prm->max_streams;
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u, nb_streams=%u, max_streams=%u)\n",
+ __func__, lcore, prm->nb_streams, snum);
+
+ memset(&eprm, 0, sizeof(eprm));
+ eprm.socket_id = rte_lcore_to_socket_id(lcore);
+ eprm.max_events = snum;
+
+ sz = sizeof(*fe) + snum * sizeof(struct netfe_stream);
+ fe = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ rte_lcore_to_socket_id(lcore));
+
+ if (fe == NULL) {
+ RTE_LOG(ERR, USER1, "%s:%d failed to allocate %zu bytes\n",
+ __func__, __LINE__, sz);
+ return -ENOMEM;
+ }
+
+ RTE_PER_LCORE(_fe) = fe;
+
+ fe->snum = snum;
+ /* initialize the stream pool */
+ LIST_INIT(&fe->free.head);
+ LIST_INIT(&fe->use.head);
+
+ /* allocate the event queues */
+ fe->syneq = tle_evq_create(&eprm);
+ fe->ereq = tle_evq_create(&eprm);
+ fe->rxeq = tle_evq_create(&eprm);
+ fe->txeq = tle_evq_create(&eprm);
+
+ RTE_LOG(INFO, USER1, "%s(%u) synevq=%p, erevq=%p, rxevq=%p, txevq=%p\n",
+ __func__, lcore, fe->syneq, fe->ereq, fe->rxeq, fe->txeq);
+ if (fe->syneq == NULL || fe->ereq == NULL || fe->rxeq == NULL ||
+ fe->txeq == NULL)
+ return -ENOMEM;
+
+ fes = (struct netfe_stream *)(fe + 1);
+ for (i = 0; i != snum; i++) {
+ fes[i].rxev = tle_event_alloc(fe->rxeq, fes + i);
+ fes[i].txev = tle_event_alloc(fe->txeq, fes + i);
+ fes[i].erev = tle_event_alloc(fe->ereq, fes + i);
+ netfe_put_stream(fe, &fe->free, fes + i);
+ }
+
+
+ /* open all requested streams. */
+ for (i = 0; i != prm->nb_streams; i++) {
+ sprm = &prm->stream[i].sprm;
+ fes = netfe_stream_open_tcp(fe, sprm, lcore, prm->stream[i].op,
+ sprm->bidx, becfg.server);
+ if (fes == NULL) {
+ rc = -rte_errno;
+ break;
+ }
+
+ netfe_stream_dump(fes, &sprm->local_addr, &sprm->remote_addr);
+
+ if (prm->stream[i].op == FWD) {
+ fes->fwdprm = prm->stream[i].fprm;
+ } else if (prm->stream[i].op == TXONLY) {
+ fes->txlen = prm->stream[i].txlen;
+ fes->raddr = prm->stream[i].sprm.remote_addr;
+ }
+
+ if (becfg.server == 1) {
+ rc = tle_tcp_stream_listen(fes->s);
+ RTE_LOG(INFO, USER1,
+ "%s(%u) tle_tcp_stream_listen(stream=%p) "
+ "returns %d\n",
+ __func__, lcore, fes->s, rc);
+ if (rc != 0)
+ break;
+ } else {
+ rc = tle_tcp_stream_connect(fes->s,
+ (const struct sockaddr *)&sprm->remote_addr);
+ RTE_LOG(INFO, USER1,
+ "%s(%u) tle_tcp_stream_connect(stream=%p) "
+ "returns %d\n",
+ __func__, lcore, fes->s, rc);
+ if (rc != 0)
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static inline struct netfe_stream *
+netfe_create_fwd_stream(struct netfe_lcore *fe, struct netfe_stream *fes,
+ uint32_t lcore, uint32_t bidx)
+{
+ uint32_t rc;
+ struct netfe_stream *fws;
+
+ fws = netfe_stream_open_tcp(fe, &fes->fwdprm, lcore, FWD, bidx, 0);
+ if (fws != NULL) {
+ rc = tle_tcp_stream_connect(fws->s,
+ (const struct sockaddr *)&fes->fwdprm.remote_addr);
+ NETFE_TRACE("%s(lc=%u, fes=%p): tle_tcp_stream_connect() "
+ "returns %d;\n",
+ __func__, rte_lcore_id(), fes, rc);
+
+ if (rc != 0) {
+ netfe_stream_term_tcp(fe, fws);
+ fws = NULL;
+ }
+ }
+
+ if (fws == NULL)
+ RTE_LOG(ERR, USER1, "%s(lc=%u fes=%p) failed to open "
+ "forwarding stream;\n",
+ __func__, rte_lcore_id(), fes);
+
+ return fws;
+}
+
+static inline void
+netfe_fwd_tcp(uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t i, k, n;
+ struct rte_mbuf **pkt;
+ struct netfe_stream *fed;
+
+ RTE_SET_USED(lcore);
+
+ n = fes->pbuf.num;
+ pkt = fes->pbuf.pkt;
+
+ if (n == 0)
+ return;
+
+ fed = fes->fwds;
+
+ if (fed != NULL) {
+
+ k = tle_tcp_stream_send(fed->s, pkt, n);
+
+ NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) "
+ "returns %u\n",
+ __func__, lcore, proto_name[fes->proto],
+ fed->s, n, k);
+
+ fed->stat.txp += k;
+ fed->stat.drops += n - k;
+ fes->stat.fwp += k;
+
+ } else {
+ NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n",
+ __func__, lcore, fes->s, n);
+ for (k = 0; k != n; k++) {
+ NETFE_TRACE("%s(%u, %p): free(%p);\n",
+ __func__, lcore, fes->s, pkt[k]);
+ rte_pktmbuf_free(pkt[k]);
+ }
+ fes->stat.drops += n;
+ }
+
+ /* copy unforwarded mbufs. */
+ for (i = 0; i != n - k; i++)
+ pkt[i] = pkt[i + k];
+
+ fes->pbuf.num = i;
+
+ if (i != 0) {
+ tle_event_raise(fes->txev);
+ fes->stat.txev[TLE_SEV_UP]++;
+ }
+
+ if (n == RTE_DIM(fes->pbuf.pkt)) {
+ tle_event_active(fes->rxev, TLE_SEV_UP);
+ fes->stat.rxev[TLE_SEV_UP]++;
+ }
+}
+
+static inline void
+netfe_new_conn_tcp(struct netfe_lcore *fe, __rte_unused uint32_t lcore,
+ struct netfe_stream *fes)
+{
+ uint32_t i, k, n, rc;
+ struct tle_tcp_stream_cfg *prm;
+ struct tle_tcp_accept_param acpt_prm[MAX_PKT_BURST];
+ struct tle_stream *rs[MAX_PKT_BURST];
+ struct tle_syn_req syn_reqs[MAX_PKT_BURST];
+ struct netfe_stream *ts;
+ struct netfe_stream *fs[MAX_PKT_BURST];
+
+ static const struct tle_stream_cb zcb = {.func = NULL, .data = NULL};
+
+ /* check if any syn requests are waiting */
+ n = tle_tcp_stream_synreqs(fes->s, syn_reqs, RTE_DIM(syn_reqs));
+ if (n == 0)
+ return;
+
+ NETFE_TRACE("%s(%u): tle_tcp_stream_synreqs(%p, %u) returns %u\n",
+ __func__, lcore, fes->s, MAX_PKT_BURST, n);
+
+ /* get n free streams */
+ k = netfe_get_streams(&fe->free, fs, n);
+
+ /* fill accept params to accept k connection requests*/
+ for (i = 0; i != k; i++) {
+ acpt_prm[i].syn = syn_reqs[i];
+ prm = &acpt_prm[i].cfg;
+ prm->nb_retries = 0;
+ prm->recv_ev = fs[i]->rxev;
+ prm->send_ev = fs[i]->txev;
+ prm->err_ev = fs[i]->erev;
+ tle_event_active(fs[i]->erev, TLE_SEV_DOWN);
+ prm->err_cb = zcb;
+ prm->recv_cb = zcb;
+ prm->send_cb = zcb;
+ }
+
+ /* accept k new connections */
+ rc = tle_tcp_stream_accept(fes->s, acpt_prm, rs, k);
+
+ NETFE_TRACE("%s(%u): tle_tcp_stream_accept(%p, %u) returns %u\n",
+ __func__, lcore, fes->s, k, rc);
+
+ if (rc != n) {
+ /* n - rc connections could not be accepted */
+ tle_tcp_reject(fes->s, syn_reqs + rc, n - rc);
+
+ /* put back k - rc streams free list */
+ netfe_put_streams(fe, &fe->free, fs + rc, k - rc);
+ }
+
+ /* update the params for accepted streams */
+ for (i = 0; i != rc; i++) {
+
+ ts = fs[i];
+
+ ts->s = rs[i];
+ ts->op = fes->op;
+ ts->proto = fes->proto;
+ ts->family = fes->family;
+ ts->txlen = fes->txlen;
+
+ if (fes->op == TXONLY) {
+ tle_event_active(ts->txev, TLE_SEV_UP);
+ ts->stat.txev[TLE_SEV_UP]++;
+ } else {
+ tle_event_active(ts->rxev, TLE_SEV_DOWN);
+ ts->stat.rxev[TLE_SEV_DOWN]++;
+ }
+
+ netfe_put_stream(fe, &fe->use, ts);
+ NETFE_TRACE("%s(%u) accept (stream=%p, s=%p)\n",
+ __func__, lcore, ts, rs[i]);
+
+ /* create a new fwd stream if needed */
+ if (fes->op == FWD) {
+ tle_event_active(ts->txev, TLE_SEV_DOWN);
+ ts->stat.txev[TLE_SEV_DOWN]++;
+
+ ts->fwds = netfe_create_fwd_stream(fe, fes, lcore,
+ fes->fwdprm.bidx);
+ if (ts->fwds != NULL)
+ ts->fwds->fwds = ts;
+ }
+ }
+ fe->tcp_stat.acc += rc;
+ fe->tcp_stat.rej += n - rc;
+}
+
+static inline void
+netfe_lcore_tcp_req(void)
+{
+ struct netfe_lcore *fe;
+ uint32_t j, n, lcore;
+ struct netfe_stream *fs[MAX_PKT_BURST];
+
+ fe = RTE_PER_LCORE(_fe);
+ if (fe == NULL)
+ return;
+
+ /* look for syn events */
+ n = tle_evq_get(fe->syneq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
+ if (n == 0)
+ return;
+
+ lcore = rte_lcore_id();
+
+ NETFE_TRACE("%s(%u): tle_evq_get(synevq=%p) returns %u\n",
+ __func__, lcore, fe->syneq, n);
+
+ for (j = 0; j != n; j++)
+ netfe_new_conn_tcp(fe, lcore, fs[j]);
+}
+
+static inline void
+netfe_lcore_tcp_rst(void)
+{
+ struct netfe_lcore *fe;
+ struct netfe_stream *fwds;
+ uint32_t j, n;
+ struct tle_stream *s[MAX_PKT_BURST];
+ struct netfe_stream *fs[MAX_PKT_BURST];
+ struct tle_event *rv[MAX_PKT_BURST];
+ struct tle_event *tv[MAX_PKT_BURST];
+ struct tle_event *ev[MAX_PKT_BURST];
+
+ fe = RTE_PER_LCORE(_fe);
+ if (fe == NULL)
+ return;
+
+ /* look for err events */
+ n = tle_evq_get(fe->ereq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
+ if (n == 0)
+ return;
+
+ NETFE_TRACE("%s(%u): tle_evq_get(errevq=%p) returns %u\n",
+ __func__, rte_lcore_id(), fe->ereq, n);
+
+ for (j = 0; j != n; j++) {
+ if (verbose > VERBOSE_NONE) {
+ struct tle_tcp_stream_addr addr;
+ tle_tcp_stream_get_addr(fs[j]->s, &addr);
+ netfe_stream_dump(fs[j], &addr.local, &addr.remote);
+ }
+ s[j] = fs[j]->s;
+ rv[j] = fs[j]->rxev;
+ tv[j] = fs[j]->txev;
+ ev[j] = fs[j]->erev;
+ }
+
+ tle_evq_idle(fe->rxeq, rv, n);
+ tle_evq_idle(fe->txeq, tv, n);
+ tle_evq_idle(fe->ereq, ev, n);
+
+ tle_tcp_stream_close_bulk(s, n);
+
+ for (j = 0; j != n; j++) {
+
+ /*
+ * if forwarding mode, send unsent packets and
+ * signal peer stream to terminate too.
+ */
+ fwds = fs[j]->fwds;
+ if (fwds != NULL && fwds->s != NULL) {
+
+ /* forward all unsent packets */
+ netfe_fwd_tcp(rte_lcore_id(), fs[j]);
+
+ fwds->fwds = NULL;
+ tle_event_raise(fwds->erev);
+ fs[j]->fwds = NULL;
+ }
+
+ /* now terminate the stream receiving rst event*/
+ netfe_rem_stream(&fe->use, fs[j]);
+ netfe_stream_term_tcp(fe, fs[j]);
+ fe->tcp_stat.ter++;
+ }
+}
+
+static inline void
+netfe_rxtx_process_tcp(__rte_unused uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t i, k, n;
+ struct rte_mbuf **pkt;
+
+ n = fes->pbuf.num;
+ pkt = fes->pbuf.pkt;
+
+ /* there is nothing to send. */
+ if (n == 0) {
+ tle_event_idle(fes->txev);
+ fes->stat.txev[TLE_SEV_IDLE]++;
+ return;
+ }
+
+
+ k = tle_tcp_stream_send(fes->s, pkt, n);
+
+ NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n",
+ __func__, lcore, proto_name[fes->proto],
+ fes->s, n, k);
+ fes->stat.txp += k;
+ fes->stat.drops += n - k;
+
+ /* not able to send anything. */
+ if (k == 0)
+ return;
+
+ if (n == RTE_DIM(fes->pbuf.pkt)) {
+ /* mark stream as readable */
+ tle_event_active(fes->rxev, TLE_SEV_UP);
+ fes->stat.rxev[TLE_SEV_UP]++;
+ }
+
+ /* adjust pbuf array. */
+ fes->pbuf.num = n - k;
+ for (i = 0; i != n - k; i++)
+ pkt[i] = pkt[i + k];
+}
+
+static inline void
+netfe_tx_process_tcp(uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t i, k, n;
+
+ /* refill with new mbufs. */
+ pkt_buf_fill(lcore, &fes->pbuf, fes->txlen);
+
+ n = fes->pbuf.num;
+ if (n == 0)
+ return;
+
+ /**
+ * TODO: cannot use function pointers for unequal param num.
+ */
+ k = tle_tcp_stream_send(fes->s, fes->pbuf.pkt, n);
+
+ NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n",
+ __func__, lcore, proto_name[fes->proto], fes->s, n, k);
+ fes->stat.txp += k;
+ fes->stat.drops += n - k;
+
+ if (k == 0)
+ return;
+
+ /* adjust pbuf array. */
+ fes->pbuf.num = n - k;
+ for (i = k; i != n; i++)
+ fes->pbuf.pkt[i - k] = fes->pbuf.pkt[i];
+}
+
+static inline void
+netfe_lcore_tcp(void)
+{
+ struct netfe_lcore *fe;
+ uint32_t j, n, lcore;
+ struct netfe_stream *fs[MAX_PKT_BURST];
+
+ fe = RTE_PER_LCORE(_fe);
+ if (fe == NULL)
+ return;
+
+ lcore = rte_lcore_id();
+
+ /* look for rx events */
+ n = tle_evq_get(fe->rxeq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
+
+ if (n != 0) {
+ NETFE_TRACE("%s(%u): tle_evq_get(rxevq=%p) returns %u\n",
+ __func__, lcore, fe->rxeq, n);
+ for (j = 0; j != n; j++)
+ netfe_rx_process(lcore, fs[j]);
+ }
+
+ /* look for tx events */
+ n = tle_evq_get(fe->txeq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
+
+ if (n != 0) {
+ NETFE_TRACE("%s(%u): tle_evq_get(txevq=%p) returns %u\n",
+ __func__, lcore, fe->txeq, n);
+ for (j = 0; j != n; j++) {
+ if (fs[j]->op == RXTX)
+ netfe_rxtx_process_tcp(lcore, fs[j]);
+ else if (fs[j]->op == FWD)
+ netfe_fwd_tcp(lcore, fs[j]);
+ else if (fs[j]->op == TXONLY)
+ netfe_tx_process_tcp(lcore, fs[j]);
+ }
+ }
+}
+
+static void
+netfe_lcore_fini_tcp(void)
+{
+ struct netfe_lcore *fe;
+ uint32_t i, snum;
+ struct tle_tcp_stream_addr addr;
+ struct netfe_stream *fes;
+ uint32_t acc, rej, ter;
+
+ fe = RTE_PER_LCORE(_fe);
+ if (fe == NULL)
+ return;
+
+ snum = fe->use.num;
+ for (i = 0; i != snum; i++) {
+ fes = netfe_get_stream(&fe->use);
+ tle_tcp_stream_get_addr(fes->s, &addr);
+ netfe_stream_dump(fes, &addr.local, &addr.remote);
+ netfe_stream_close(fe, fes);
+ }
+
+ acc = fe->tcp_stat.acc;
+ rej = fe->tcp_stat.rej;
+ ter = fe->tcp_stat.ter;
+ RTE_LOG(NOTICE, USER1,
+ "tcp_stats={con_acc=%u,con_rej=%u,con_ter=%u};\n",
+ acc, rej, ter);
+
+ tle_evq_destroy(fe->txeq);
+ tle_evq_destroy(fe->rxeq);
+ tle_evq_destroy(fe->ereq);
+ tle_evq_destroy(fe->syneq);
+ RTE_PER_LCORE(_fe) = NULL;
+ rte_free(fe);
+}
+
+static inline void
+netbe_lcore_tcp(void)
+{
+ uint32_t i;
+ struct netbe_lcore *lc;
+
+ lc = RTE_PER_LCORE(_be);
+ if (lc == NULL)
+ return;
+
+ for (i = 0; i != lc->prtq_num; i++) {
+ netbe_rx(lc, i);
+ tle_tcp_process(lc->ctx, TCP_MAX_PROCESS);
+ netbe_tx(lc, i);
+ }
+}
+
+static int
+lcore_main_tcp(void *arg)
+{
+ int32_t rc;
+ uint32_t lcore;
+ struct lcore_prm *prm;
+
+ prm = arg;
+ lcore = rte_lcore_id();
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n",
+ __func__, lcore);
+
+ rc = 0;
+
+ /* lcore FE init. */
+ if (prm->fe.max_streams != 0)
+ rc = netfe_lcore_init_tcp(&prm->fe);
+
+ /* lcore FE init. */
+ if (rc == 0 && prm->be.lc != NULL)
+ rc = netbe_lcore_setup(prm->be.lc);
+
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+
+ while (force_quit == 0) {
+ netfe_lcore_tcp_req();
+ netfe_lcore_tcp_rst();
+ netfe_lcore_tcp();
+ netbe_lcore_tcp();
+ }
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n",
+ __func__, lcore);
+
+ netfe_lcore_fini_tcp();
+ netbe_lcore_clear();
+
+ return rc;
+}
+
+#endif /* TCP_H_ */
diff --git a/examples/l4fwd/udp.h b/examples/l4fwd/udp.h
new file mode 100644
index 0000000..cdec6a5
--- /dev/null
+++ b/examples/l4fwd/udp.h
@@ -0,0 +1,588 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef UDP_H_
+#define UDP_H_
+
+/*
+ * helper function: opens IPv4 and IPv6 streams for selected port.
+ */
+static struct netfe_stream *
+netfe_stream_open_udp(struct netfe_lcore *fe, struct netfe_sprm *sprm,
+ uint32_t lcore, uint16_t op, uint32_t bidx)
+{
+ int32_t rc;
+ struct netfe_stream *fes;
+ struct sockaddr_in *l4;
+ struct sockaddr_in6 *l6;
+ uint16_t errport;
+ struct tle_udp_stream_param uprm;
+
+ fes = netfe_get_stream(&fe->free);
+ if (fes == NULL) {
+ rte_errno = ENOBUFS;
+ return NULL;
+ }
+
+ fes->rxev = tle_event_alloc(fe->rxeq, fes);
+ fes->txev = tle_event_alloc(fe->txeq, fes);
+
+ if (fes->rxev == NULL || fes->txev == NULL) {
+ netfe_stream_close(fe, fes);
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ if (op == TXONLY || op == FWD) {
+ tle_event_active(fes->txev, TLE_SEV_DOWN);
+ fes->stat.txev[TLE_SEV_DOWN]++;
+ }
+
+ if (op != TXONLY) {
+ tle_event_active(fes->rxev, TLE_SEV_DOWN);
+ fes->stat.rxev[TLE_SEV_DOWN]++;
+ }
+
+ memset(&uprm, 0, sizeof(uprm));
+ uprm.local_addr = sprm->local_addr;
+ uprm.remote_addr = sprm->remote_addr;
+ uprm.recv_ev = fes->rxev;
+ if (op != FWD)
+ uprm.send_ev = fes->txev;
+ fes->s = tle_udp_stream_open(becfg.cpu[bidx].ctx, &uprm);
+
+ if (fes->s == NULL) {
+ rc = rte_errno;
+ netfe_stream_close(fe, fes);
+ rte_errno = rc;
+
+ if (sprm->local_addr.ss_family == AF_INET) {
+ l4 = (struct sockaddr_in *) &sprm->local_addr;
+ errport = ntohs(l4->sin_port);
+ } else {
+ l6 = (struct sockaddr_in6 *) &sprm->local_addr;
+ errport = ntohs(l6->sin6_port);
+ }
+
+ RTE_LOG(ERR, USER1, "stream open failed for port %u with error "
+ "code=%u, bidx=%u, lc=%u\n",
+ errport, rc, bidx, becfg.cpu[bidx].id);
+ return NULL;
+ }
+
+ RTE_LOG(NOTICE, USER1,
+ "%s(%u)={s=%p, op=%hu, proto=%s, rxev=%p, txev=%p}, belc=%u\n",
+ __func__, lcore, fes->s, op, proto_name[becfg.proto],
+ fes->rxev, fes->txev, becfg.cpu[bidx].id);
+
+ fes->op = op;
+ fes->proto = becfg.proto;
+ fes->family = sprm->local_addr.ss_family;
+
+ return fes;
+}
+
+static int
+netfe_lcore_init_udp(const struct netfe_lcore_prm *prm)
+{
+ size_t sz;
+ int32_t rc;
+ uint32_t i, lcore, snum;
+ struct netfe_lcore *fe;
+ struct tle_evq_param eprm;
+ struct netfe_stream *fes;
+ struct netfe_sprm *sprm;
+
+ lcore = rte_lcore_id();
+
+ snum = prm->max_streams;
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u, nb_streams=%u, max_streams=%u)\n",
+ __func__, lcore, prm->nb_streams, snum);
+
+ memset(&eprm, 0, sizeof(eprm));
+ eprm.socket_id = rte_lcore_to_socket_id(lcore);
+ eprm.max_events = snum;
+
+ sz = sizeof(*fe) + snum * sizeof(struct netfe_stream);
+ fe = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ rte_lcore_to_socket_id(lcore));
+
+ if (fe == NULL) {
+ RTE_LOG(ERR, USER1, "%s:%d failed to allocate %zu bytes\n",
+ __func__, __LINE__, sz);
+ return -ENOMEM;
+ }
+
+ RTE_PER_LCORE(_fe) = fe;
+
+ fe->snum = snum;
+ /* initialize the stream pool */
+ LIST_INIT(&fe->free.head);
+ LIST_INIT(&fe->use.head);
+ fes = (struct netfe_stream *)(fe + 1);
+ for (i = 0; i != snum; i++, fes++)
+ netfe_put_stream(fe, &fe->free, fes);
+
+ /* allocate the event queues */
+ fe->rxeq = tle_evq_create(&eprm);
+ fe->txeq = tle_evq_create(&eprm);
+
+ RTE_LOG(INFO, USER1, "%s(%u) rx evq=%p, tx evq=%p\n",
+ __func__, lcore, fe->rxeq, fe->txeq);
+ if (fe->rxeq == NULL || fe->txeq == NULL)
+ return -ENOMEM;
+
+ rc = fwd_tbl_init(fe, AF_INET, lcore);
+ RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n",
+ __func__, lcore, AF_INET, rc);
+ if (rc != 0)
+ return rc;
+
+ rc = fwd_tbl_init(fe, AF_INET6, lcore);
+ RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n",
+ __func__, lcore, AF_INET6, rc);
+ if (rc != 0)
+ return rc;
+
+ /* open all requested streams. */
+ for (i = 0; i != prm->nb_streams; i++) {
+ sprm = &prm->stream[i].sprm;
+ fes = netfe_stream_open_udp(fe, sprm, lcore, prm->stream[i].op,
+ sprm->bidx);
+ if (fes == NULL) {
+ rc = -rte_errno;
+ break;
+ }
+
+ netfe_stream_dump(fes, &sprm->local_addr, &sprm->remote_addr);
+
+ if (prm->stream[i].op == FWD) {
+ fes->fwdprm = prm->stream[i].fprm;
+ rc = fwd_tbl_add(fe,
+ prm->stream[i].fprm.remote_addr.ss_family,
+ (const struct sockaddr *)
+ &prm->stream[i].fprm.remote_addr,
+ fes);
+ if (rc != 0) {
+ netfe_stream_close(fe, fes);
+ break;
+ }
+ } else if (prm->stream[i].op == TXONLY) {
+ fes->txlen = prm->stream[i].txlen;
+ fes->raddr = prm->stream[i].sprm.remote_addr;
+ }
+ }
+
+ return rc;
+}
+
+static struct netfe_stream *
+find_fwd_dst_udp(uint32_t lcore, struct netfe_stream *fes,
+ const struct sockaddr *sa)
+{
+ uint32_t rc;
+ struct netfe_stream *fed;
+ struct netfe_lcore *fe;
+ struct tle_udp_stream_param uprm;
+
+ fe = RTE_PER_LCORE(_fe);
+
+ fed = fwd_tbl_lkp(fe, fes->family, sa);
+ if (fed != NULL)
+ return fed;
+
+ /* create a new stream and put it into the fwd table. */
+ memset(&uprm, 0, sizeof(uprm));
+ uprm.local_addr = fes->fwdprm.local_addr;
+ uprm.remote_addr = fes->fwdprm.remote_addr;
+
+ /* open forward stream with wildcard remote addr. */
+ memset(&uprm.remote_addr.ss_family + 1, 0,
+ sizeof(uprm.remote_addr) - sizeof(uprm.remote_addr.ss_family));
+
+ fed = netfe_stream_open_udp(fe, &fes->fwdprm, lcore, FWD,
+ fes->fwdprm.bidx);
+ if (fed == NULL)
+ return NULL;
+
+ rc = fwd_tbl_add(fe, fes->family, sa, fed);
+ if (rc != 0) {
+ netfe_stream_close(fe, fed);
+ fed = NULL;
+ }
+
+ fed->fwdprm.remote_addr = *(const struct sockaddr_storage *)sa;
+ return fed;
+}
+
+static inline int
+netfe_addr_eq(struct sockaddr_storage *l, struct sockaddr_storage *r,
+ uint16_t family)
+{
+ struct sockaddr_in *l4, *r4;
+ struct sockaddr_in6 *l6, *r6;
+
+ if (family == AF_INET) {
+ l4 = (struct sockaddr_in *)l;
+ r4 = (struct sockaddr_in *)r;
+ return (l4->sin_port == r4->sin_port &&
+ l4->sin_addr.s_addr == r4->sin_addr.s_addr);
+ } else {
+ l6 = (struct sockaddr_in6 *)l;
+ r6 = (struct sockaddr_in6 *)r;
+ return (l6->sin6_port == r6->sin6_port &&
+ memcmp(&l6->sin6_addr, &r6->sin6_addr,
+ sizeof(l6->sin6_addr)));
+ }
+}
+
+static inline void
+netfe_pkt_addr(const struct rte_mbuf *m, struct sockaddr_storage *ps,
+ uint16_t family)
+{
+ const struct ipv4_hdr *ip4h;
+ const struct ipv6_hdr *ip6h;
+ const struct udp_hdr *udph;
+ struct sockaddr_in *in4;
+ struct sockaddr_in6 *in6;
+
+ NETFE_PKT_DUMP(m);
+
+ udph = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, -m->l4_len);
+
+ if (family == AF_INET) {
+ in4 = (struct sockaddr_in *)ps;
+ ip4h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+ -(m->l4_len + m->l3_len));
+ in4->sin_port = udph->src_port;
+ in4->sin_addr.s_addr = ip4h->src_addr;
+ } else {
+ in6 = (struct sockaddr_in6 *)ps;
+ ip6h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+ -(m->l4_len + m->l3_len));
+ in6->sin6_port = udph->src_port;
+ rte_memcpy(&in6->sin6_addr, ip6h->src_addr,
+ sizeof(in6->sin6_addr));
+ }
+}
+
+static inline uint32_t
+pkt_eq_addr(struct rte_mbuf *pkt[], uint32_t num, uint16_t family,
+ struct sockaddr_storage *cur, struct sockaddr_storage *nxt)
+{
+ uint32_t i;
+
+ for (i = 0; i != num; i++) {
+ netfe_pkt_addr(pkt[i], nxt, family);
+ if (netfe_addr_eq(cur, nxt, family) == 0)
+ break;
+ }
+
+ return i;
+}
+
+static inline void
+netfe_fwd_udp(uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t i, j, k, n, x;
+ uint16_t family;
+ void *pi0, *pi1, *pt;
+ struct rte_mbuf **pkt;
+ struct netfe_stream *fed;
+ struct sockaddr_storage in[2];
+
+ family = fes->family;
+ n = fes->pbuf.num;
+ pkt = fes->pbuf.pkt;
+
+ if (n == 0)
+ return;
+
+ in[0].ss_family = family;
+ in[1].ss_family = family;
+ pi0 = &in[0];
+ pi1 = &in[1];
+
+ netfe_pkt_addr(pkt[0], pi0, family);
+
+ x = 0;
+ for (i = 0; i != n; i = j) {
+
+ j = i + pkt_eq_addr(&pkt[i + 1],
+ n - i - 1, family, pi0, pi1) + 1;
+
+ fed = find_fwd_dst_udp(lcore, fes,
+ (const struct sockaddr *)pi0);
+ if (fed != NULL) {
+
+ /**
+ * TODO: cannot use function pointers for unequal
+ * number of params.
+ */
+ k = tle_udp_stream_send(fed->s, pkt + i, j - i,
+ (const struct sockaddr *)
+ &fes->fwdprm.remote_addr);
+
+ NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) "
+ "returns %u\n",
+ __func__, lcore, proto_name[fes->proto],
+ fed->s, j - i, k);
+
+ fed->stat.txp += k;
+ fed->stat.drops += j - i - k;
+ fes->stat.fwp += k;
+
+ } else {
+ NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n",
+ __func__, lcore, fes->s, j - i);
+ for (k = i; k != j; k++) {
+ NETFE_TRACE("%s(%u, %p): free(%p);\n",
+ __func__, lcore, fes->s, pkt[k]);
+ rte_pktmbuf_free(pkt[j]);
+ }
+ fes->stat.drops += j - i;
+ }
+
+ /* copy unforwarded mbufs. */
+ for (i += k; i != j; i++, x++)
+ pkt[x] = pkt[i];
+
+ /* swap the pointers */
+ pt = pi0;
+ pi0 = pi1;
+ pi1 = pt;
+ }
+
+ fes->pbuf.num = x;
+
+ if (x != 0) {
+ tle_event_raise(fes->txev);
+ fes->stat.txev[TLE_SEV_UP]++;
+ }
+
+ if (n == RTE_DIM(fes->pbuf.pkt)) {
+ tle_event_active(fes->rxev, TLE_SEV_UP);
+ fes->stat.rxev[TLE_SEV_UP]++;
+ }
+}
+
+static inline void
+netfe_rxtx_process_udp(__rte_unused uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t i, j, k, n;
+ uint16_t family;
+ void *pi0, *pi1, *pt;
+ struct rte_mbuf **pkt;
+ struct sockaddr_storage in[2];
+
+ family = fes->family;
+ n = fes->pbuf.num;
+ pkt = fes->pbuf.pkt;
+
+ /* there is nothing to send. */
+ if (n == 0) {
+ tle_event_idle(fes->txev);
+ fes->stat.txev[TLE_SEV_IDLE]++;
+ return;
+ }
+
+ in[0].ss_family = family;
+ in[1].ss_family = family;
+ pi0 = &in[0];
+ pi1 = &in[1];
+
+ netfe_pkt_addr(pkt[0], pi0, family);
+
+ for (i = 0; i != n; i = j) {
+
+ j = i + pkt_eq_addr(&pkt[i + 1],
+ n - i - 1, family, pi0, pi1) + 1;
+
+ /**
+ * TODO: cannot use function pointers for unequal param num.
+ */
+ k = tle_udp_stream_send(fes->s, pkt + i, j - i,
+ (const struct sockaddr *)pi0);
+
+ NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n",
+ __func__, lcore, proto_name[fes->proto],
+ fes->s, j - i, k);
+ fes->stat.txp += k;
+ fes->stat.drops += j - i - k;
+
+ i += k;
+
+ /* stream send buffer is full */
+ if (i != j)
+ break;
+
+ /* swap the pointers */
+ pt = pi0;
+ pi0 = pi1;
+ pi1 = pt;
+ }
+
+ /* not able to send anything. */
+ if (i == 0)
+ return;
+
+ if (n == RTE_DIM(fes->pbuf.pkt)) {
+ /* mark stream as readable */
+ tle_event_active(fes->rxev, TLE_SEV_UP);
+ fes->stat.rxev[TLE_SEV_UP]++;
+ }
+
+ /* adjust pbuf array. */
+ fes->pbuf.num = n - i;
+ for (j = i; j != n; j++)
+ pkt[j - i] = pkt[j];
+}
+
+static inline void
+netfe_tx_process_udp(uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t i, k, n;
+
+ /* refill with new mbufs. */
+ pkt_buf_fill(lcore, &fes->pbuf, fes->txlen);
+
+ n = fes->pbuf.num;
+ if (n == 0)
+ return;
+
+ /**
+ * TODO: cannot use function pointers for unequal param num.
+ */
+ k = tle_udp_stream_send(fes->s, fes->pbuf.pkt, n, NULL);
+ NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n",
+ __func__, lcore, proto_name[fes->proto], fes->s, n, k);
+ fes->stat.txp += k;
+ fes->stat.drops += n - k;
+
+ if (k == 0)
+ return;
+
+ /* adjust pbuf array. */
+ fes->pbuf.num = n - k;
+ for (i = k; i != n; i++)
+ fes->pbuf.pkt[i - k] = fes->pbuf.pkt[i];
+}
+
+static inline void
+netfe_lcore_udp(void)
+{
+ struct netfe_lcore *fe;
+ uint32_t j, n, lcore;
+ struct netfe_stream *fs[MAX_PKT_BURST];
+
+ fe = RTE_PER_LCORE(_fe);
+ if (fe == NULL)
+ return;
+
+ lcore = rte_lcore_id();
+
+ /* look for rx events */
+ n = tle_evq_get(fe->rxeq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
+
+ if (n != 0) {
+ NETFE_TRACE("%s(%u): tle_evq_get(rxevq=%p) returns %u\n",
+ __func__, lcore, fe->rxeq, n);
+ for (j = 0; j != n; j++)
+ netfe_rx_process(lcore, fs[j]);
+ }
+
+ /* look for tx events */
+ n = tle_evq_get(fe->txeq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
+
+ if (n != 0) {
+ NETFE_TRACE("%s(%u): tle_evq_get(txevq=%p) returns %u\n",
+ __func__, lcore, fe->txeq, n);
+ for (j = 0; j != n; j++) {
+ if (fs[j]->op == RXTX)
+ netfe_rxtx_process_udp(lcore, fs[j]);
+ else if (fs[j]->op == FWD)
+ netfe_fwd_udp(lcore, fs[j]);
+ else if (fs[j]->op == TXONLY)
+ netfe_tx_process_udp(lcore, fs[j]);
+ }
+ }
+}
+
+static void
+netfe_lcore_fini_udp(void)
+{
+ struct netfe_lcore *fe;
+ uint32_t i;
+ struct tle_udp_stream_param uprm;
+ struct netfe_stream *fes;
+
+ fe = RTE_PER_LCORE(_fe);
+ if (fe == NULL)
+ return;
+
+ for (i = 0; i != fe->use.num; i++) {
+ fes = netfe_get_stream(&fe->use);
+ tle_udp_stream_get_param(fes->s, &uprm);
+ netfe_stream_dump(fes, &uprm.local_addr, &uprm.remote_addr);
+ netfe_stream_close(fe, fes);
+ }
+
+ tle_evq_destroy(fe->txeq);
+ tle_evq_destroy(fe->rxeq);
+ RTE_PER_LCORE(_fe) = NULL;
+ rte_free(fe);
+}
+
+static int
+lcore_main_udp(void *arg)
+{
+ int32_t rc;
+ uint32_t lcore;
+ struct lcore_prm *prm;
+
+ prm = arg;
+ lcore = rte_lcore_id();
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n",
+ __func__, lcore);
+
+ rc = 0;
+
+ /* lcore FE init. */
+ if (prm->fe.max_streams != 0)
+ rc = netfe_lcore_init_udp(&prm->fe);
+
+ /* lcore FE init. */
+ if (rc == 0 && prm->be.lc != NULL)
+ rc = netbe_lcore_setup(prm->be.lc);
+
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+
+ while (force_quit == 0) {
+ netfe_lcore_udp();
+ netbe_lcore();
+ }
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n",
+ __func__, lcore);
+
+ netfe_lcore_fini_udp();
+ netbe_lcore_clear();
+
+ return rc;
+}
+
+#endif /* UDP_H_ */