aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ananyev <konstantin.ananyev@intel.com>2017-02-21 18:12:20 +0000
committerKonstantin Ananyev <konstantin.ananyev@intel.com>2017-02-24 16:37:08 +0000
commitaa97dd1ce910b839fed46ad55d1e70e403f5a930 (patch)
treef6f0fd494eaf499859bff9f20f5ddfac9ab99233
parentf5f10013ffef8e4ac1071087b8492fe6380d98fe (diff)
Introduce first version of TCP code.
Supported functionality: - open/close - listen/accept/connect - send/recv In order to achieve that libtle_udp library was reworked into libtle_l4p library that supports both TCP and UDP protocols. New libtle_timer library was introduced (thanks to Cisco guys and Dave Barach <dbarach@cisco.com> for sharing their timer code with us). Sample application was also reworked significantly to support both TCP and UDP traffic handling. New UT were introduced. Change-Id: I806b05011f521e89b58db403cfdd484a37beb775 Signed-off-by: Mohammad Abdul Awal <mohammad.abdul.awal@intel.com> Signed-off-by: Karol Latecki <karolx.latecki@intel.com> Signed-off-by: Daniel Mrzyglod <danielx.t.mrzyglod@intel.com> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
-rw-r--r--.gitignore4
-rw-r--r--README117
-rw-r--r--examples/Makefile2
-rw-r--r--examples/l4fwd/Makefile (renamed from examples/udpfwd/Makefile)5
-rw-r--r--examples/l4fwd/README346
-rw-r--r--examples/l4fwd/be.cfg (renamed from examples/udpfwd/be.cfg)2
-rw-r--r--examples/l4fwd/common.h662
-rw-r--r--examples/l4fwd/dpdk_legacy.h (renamed from examples/udpfwd/main_dpdk_legacy.h)30
-rw-r--r--examples/l4fwd/dpdk_version.h (renamed from examples/udpfwd/dpdk_version.h)2
-rw-r--r--examples/l4fwd/fe.cfg (renamed from examples/udpfwd/fe.cfg)0
-rw-r--r--examples/l4fwd/fwdtbl.h (renamed from examples/udpfwd/fwdtbl.h)0
-rwxr-xr-xexamples/l4fwd/gen_fe_cfg.py (renamed from examples/udpfwd/gen_fe_cfg.py)5
-rw-r--r--examples/l4fwd/lcore.h370
-rw-r--r--examples/l4fwd/main.c313
-rw-r--r--examples/l4fwd/netbe.h (renamed from examples/udpfwd/netbe.h)90
-rw-r--r--examples/l4fwd/parse.c (renamed from examples/udpfwd/parse.c)247
-rw-r--r--examples/l4fwd/parse.h (renamed from examples/udpfwd/parse.h)24
-rw-r--r--examples/l4fwd/pkt.c872
-rw-r--r--examples/l4fwd/pkt_dpdk_legacy.h (renamed from examples/udpfwd/pkt_dpdk_legacy.h)176
-rw-r--r--examples/l4fwd/port.h453
-rw-r--r--examples/l4fwd/tcp.h701
-rw-r--r--examples/l4fwd/udp.h588
-rw-r--r--examples/udpfwd/README141
-rw-r--r--examples/udpfwd/main.c2134
-rw-r--r--examples/udpfwd/pkt.c509
-rw-r--r--lib/Makefile3
-rw-r--r--lib/libtle_dring/tle_dring.h6
-rw-r--r--lib/libtle_l4p/Makefile (renamed from lib/libtle_udp/Makefile)16
-rw-r--r--lib/libtle_l4p/ctx.c527
-rw-r--r--lib/libtle_l4p/ctx.h86
-rw-r--r--lib/libtle_l4p/debug.h81
-rw-r--r--lib/libtle_l4p/event.c (renamed from lib/libtle_udp/event.c)0
-rw-r--r--lib/libtle_l4p/misc.h (renamed from lib/libtle_udp/misc.h)116
-rw-r--r--lib/libtle_l4p/net_misc.h78
-rw-r--r--lib/libtle_l4p/osdep.h (renamed from lib/libtle_udp/osdep.h)6
-rw-r--r--lib/libtle_l4p/port_bitmap.h (renamed from lib/libtle_udp/port_bitmap.h)14
-rw-r--r--lib/libtle_l4p/stream.h170
-rw-r--r--lib/libtle_l4p/stream_table.c80
-rw-r--r--lib/libtle_l4p/stream_table.h260
-rw-r--r--lib/libtle_l4p/syncookie.h194
-rw-r--r--lib/libtle_l4p/tcp_ctl.h120
-rw-r--r--lib/libtle_l4p/tcp_misc.h462
-rw-r--r--lib/libtle_l4p/tcp_ofo.c85
-rw-r--r--lib/libtle_l4p/tcp_ofo.h249
-rw-r--r--lib/libtle_l4p/tcp_rxq.h149
-rw-r--r--lib/libtle_l4p/tcp_rxtx.c2431
-rw-r--r--lib/libtle_l4p/tcp_stream.c522
-rw-r--r--lib/libtle_l4p/tcp_stream.h170
-rw-r--r--lib/libtle_l4p/tcp_timer.h126
-rw-r--r--lib/libtle_l4p/tcp_txq.h122
-rw-r--r--lib/libtle_l4p/tle_ctx.h233
-rw-r--r--lib/libtle_l4p/tle_event.h (renamed from lib/libtle_udp/tle_event.h)21
-rw-r--r--lib/libtle_l4p/tle_tcp.h395
-rw-r--r--lib/libtle_l4p/tle_udp.h187
-rw-r--r--lib/libtle_l4p/udp_rxtx.c (renamed from lib/libtle_udp/udp_rxtx.c)219
-rw-r--r--lib/libtle_l4p/udp_stream.c346
-rw-r--r--lib/libtle_l4p/udp_stream.h79
-rw-r--r--lib/libtle_timer/Makefile38
-rw-r--r--lib/libtle_timer/timer.c364
-rw-r--r--lib/libtle_timer/tle_timer.h94
-rw-r--r--lib/libtle_udp/tle_udp_impl.h384
-rw-r--r--lib/libtle_udp/udp_ctl.c794
-rw-r--r--lib/libtle_udp/udp_impl.h166
-rw-r--r--test/Makefile1
-rw-r--r--test/gtest/Makefile12
-rw-r--r--test/gtest/README82
-rw-r--r--test/gtest/main.cpp33
-rw-r--r--test/gtest/test_common.cpp276
-rw-r--r--test/gtest/test_common.h97
-rw-r--r--test/gtest/test_scapy_gen.py96
-rw-r--r--test/gtest/test_tle_ctx.cpp130
-rw-r--r--test/gtest/test_tle_ctx.h (renamed from test/gtest/test_tle_udp_ctx.h)8
-rw-r--r--test/gtest/test_tle_tcp_stream.cpp195
-rw-r--r--test/gtest/test_tle_tcp_stream.h251
-rw-r--r--test/gtest/test_tle_udp_ctx.cpp42
-rw-r--r--test/gtest/test_tle_udp_destroy.cpp4
-rw-r--r--test/gtest/test_tle_udp_destroy.h8
-rw-r--r--test/gtest/test_tle_udp_dev.cpp27
-rw-r--r--test/gtest/test_tle_udp_dev.h23
-rw-r--r--test/gtest/test_tle_udp_event.h2
-rw-r--r--test/gtest/test_tle_udp_stream.cpp78
-rw-r--r--test/gtest/test_tle_udp_stream.h200
-rw-r--r--test/gtest/test_tle_udp_stream_gen.cpp444
-rw-r--r--test/gtest/test_tle_udp_stream_gen.h541
-rw-r--r--test/timer/Makefile42
-rw-r--r--test/timer/test_timer.c272
86 files changed, 15344 insertions, 4706 deletions
diff --git a/.gitignore b/.gitignore
index 39afbd3..1c0bae7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,7 @@ dpdk/_build/
dpdk/dpdk-*.tar.gz
x86_64-native-linuxapp-gcc/
.vagrant
+.project
+.idea
+.cproject
+.pydev*
diff --git a/README b/README
index 6131a89..f6ff9ed 100644
--- a/README
+++ b/README
@@ -1,77 +1,80 @@
-OVERVIEW
-========
+1. OVERVIEW
-TLDK project scope is:
-1) Implement a set of libraries for L4 protocol processing (UDP, TCP etc.)
+ TLDK project scope is as follows:
+
+ 1) To implement a set of libraries for L4 protocol processing (UDP, TCP etc.)
for both IPv4 and IPv6.
The goal is to provide lightweight, high performance and highly adaptable
implementation for L4(UDP, TCP etc.) protocol processing. The provided API
are not planned to be compatible with BSD socket API. These libraries are
- suppose to be built on top of DPDK.
+ supposed to be built on top of DPDK.
- Note: these libraries are not supposed to be a 'complete' network stack.
+ Note: these libraries are not supposed to be a 'complete' network stack.
- Implementation of ARP, IP, ETHER, etc layers and related routing tables,
+ Implementation of ARP, IP, ETHER, etc. layers and related routing tables,
code for setup, manage and perform actual IO over underlying devices are
all out of scope of these libraries.
- Implementation of ARP, IP etc. layers and their related routing tables
- are out of scope of these libraries. Similarly, the setup, management and
- actual IO on underlying NIC devices are out of scope too.
-
- The libraries only need to know about underlying devices plus what
- HW offloads are supported, underlying device MTU and L3/L2 addresses to
- fill into L3/L2 headers for the outgoing packets.
+ The only information these libraries need to know about the
+ underlying devices:
+ - supported HW offloads
+ - MTU and L3/L2 addresses
+ That allows the libraries to fill L2/L3 headers and mbuf metadata
+ for the outgoing packets.
- These libraries should be developed in such manner, they could be used
+ These libraries should be developed in such manner, that they could be used
independently from implementations of 2) and 3).
-2) Create VPP graph nodes, plugins etc using those libraries to implement
+ 2) To create VPP graph nodes, plugins etc using those libraries to implement
a host stack.
-3) Create such mechanisms (netlink agents, packaging, etc) necessary to make
- the resulting host stack easily usable by existing non-vpp aware software.
+ 3) To create such mechanisms (netlink agents, packaging, etc) necessary
+ to make the resulting host stack easily usable by existing non-vpp aware
+ software.
-INSTALLATION GUIDE
-==================
+2. INSTALLATION GUIDE
-1. Obtain latest DPDK and build it.
+ 1) Obtain latest DPDK and build it.
(refer to http://dpdk.org for information how to download and build it).
-2. Make sure that RTE_SDK and RTE_TARGET DPDK related environment variables
+ 2) Make sure that RTE_SDK and RTE_TARGET DPDK related environment variables
are setup correctly.
-3. Go to the TLDK root directory and type: 'make all'.
-4. Run sample applications.
-
-For enabling unit tests application using GoogleTest please see:
-./test/gtest/README
-
-As an example:
-export RTE_SDK=/opt/DPDK
-export RTE_TARGET=x86_64-native-linuxapp-gcc
-
-cd tldk
-make all
-./x86_64-native-linuxapp-gcc/app/udpfwd ...
-
-CONTENTS
-========
-
-$(TLDK_ROOT)
-|
-+----lib
-| |
-| +--libtle_dring - dring library
-| |
-| +--libtle_udp - implementation of the UDP datagram processing
-|
-+----examples
-| |
-| +--udpfwd - sample app to demonstrate and test libtle_udp usage
-| (refer to examples/udpfwd/README for more information)
-|
-+----test - unit-tests
-| |
-| +--dring - UT for libtle_dring (standalone app)
-| |
-| +--gtest - UT for libtle_dring and libtle_udp (googletest)
+ 3) Go to the TLDK root directory and type: 'make all'.
+ 4) Run sample applications.
+
+ For enabling unit tests application using GoogleTest please see:
+ ./test/gtest/README
+
+ As an example:
+ export RTE_SDK=/opt/DPDK
+ export RTE_TARGET=x86_64-native-linuxapp-gcc
+
+ cd tldk
+ make all
+ ./x86_64-native-linuxapp-gcc/app/l4fwd ...
+
+3. CONTENTS
+
+ $(TLDK_ROOT)
+ |
+ +----lib
+ | |
+ | +--libtle_dring - dring library
+ | |
+ | +--libtle_l4p - implementation of the TCP/UDP packet processing
+ | |
+ | +--libtle_timer - implementation of the timer library
+ |
+ +----examples
+ | |
+ | +--l4fwd - sample app to demonstrate and test libtle_l4p TCP/UDP
+ | usage (refer to examples/l4fwd/README for more information)
+ |
+ +----test - unit-tests
+ | |
+ | +--dring - UT for libtle_dring (standalone app)
+ | |
+ | +--gtest - UT for libtle_dring, libtle_l4p and libtle_timer
+ | | (googletest)
+ | |
+ | +--timer - UT for libtle_timer (standalone app)
diff --git a/examples/Makefile b/examples/Makefile
index bed34ac..cf13574 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -21,6 +21,6 @@ endif
include $(RTE_SDK)/mk/rte.vars.mk
-DIRS-y += udpfwd
+DIRS-y += l4fwd
include $(TLDK_ROOT)/mk/tle.subdir.mk
diff --git a/examples/udpfwd/Makefile b/examples/l4fwd/Makefile
index fae1c34..f18b622 100644
--- a/examples/udpfwd/Makefile
+++ b/examples/l4fwd/Makefile
@@ -26,7 +26,7 @@ endif
include $(RTE_SDK)/mk/rte.vars.mk
# binary name
-APP = udpfwd
+APP = l4fwd
# all source are stored in SRCS-y
SRCS-y += parse.c
@@ -37,7 +37,8 @@ CFLAGS += $(WERROR_FLAGS)
CFLAGS += -I$(RTE_OUTPUT)/include
LDLIBS += -L$(RTE_OUTPUT)/lib
-LDLIBS += -ltle_udp
+LDLIBS += -ltle_l4p
+LDLIBS += -ltle_timer
EXTRA_CFLAGS += -O3
CFLAGS_parse.o += -D_GNU_SOURCE
diff --git a/examples/l4fwd/README b/examples/l4fwd/README
new file mode 100644
index 0000000..658fe3a
--- /dev/null
+++ b/examples/l4fwd/README
@@ -0,0 +1,346 @@
+1. INTRODUCTION
+
+ l4fwd is a sample application to demonstrate and test TLDK TCP/UDP
+ functionalities. Depending on configuration it can do simple send, recv or
+ both over opened TCP/UDP streams. Also it implements ability to do TCP/UDP
+ packet forwarding between different streams, so it is possible to use the
+ l4fwd application as a TCP/UDP proxy.
+
+ The l4fwd application logically is divided into two parts, Back End (BE) and
+ Front End (FE).
+
+1.1 Back End (BE)
+
+ BE is responsible for:
+ - RX over DPDK ports and feed them into TCP/UDP TLDK context(s)
+ (via tle_*_rx_bulk).
+
+ - retrieve packets ready to be send out from TCP/UDP TLDK context(s) and TX
+ them over destined DPDK port.
+
+ - Multiple RX/TX queues per port are supported by RSS. Right now the number
+ of TX is same as the number of RX queue.
+
+- Each BE lcore can serve multiple DPDK ports, TLDK TCP/UDP contexts.
+
+ BE configuration record format:
+
+ port=<uint>,addr=<ipv4/ipv6>,masklen=<uint>,mac=<ether><mtu>
+
+ port - DPDK port id to be used to send packets to the destination.
+ It is an mandatory option.
+ addr - destination network address. It is an mandatory option.
+ masklen - destination network prefix length. It is an mandatory option.
+ mac - destination Ethernet address. It is an mandatory option.
+ mtu - MTU to be used on that port (= application data size + L2/L3/L4
+ headers sizes, default=1514). It is an optional option.
+
+ Below are some example of BE entries
+
+ port=0,masklen=16,addr=192.168.0.0,mac=01:de:ad:be:ef:01
+ port=0,addr=2001:4860:b002::,masklen=64,mac=01:de:ad:be:ef:01
+
+ These examples are also available in be.cfg file.
+
+1.2 Front End (FE)
+
+ FE is responsible for:
+ - to open configured TCP/UDP streams and perform send/recv over them.
+ These streams can belong to different TCP/UDP contexts.
+
+ Each lcore can act as BE and/or FE.
+
+ In UDP mode the application can reassemble input fragmented IP packets, and
+ fragment outgoing IP packets (if destination MTU is less then packet size).
+
+ FE configuration record format:
+
+ lcore=<uint>,op=<"rx|tx|echo|fwd">,\
+ laddr=<ip>,lport=<uint16>,raddr=<ip>,rport=<uint16>,\
+ [txlen=<uint>,fwladdr=<ip>,fwlport=<uint16>,fwraddr=<ip>,fwrport=<uint16>,\
+ belcore=<uint>]
+
+ lcore - EAL lcore to manage that stream(s) in the FE. It is an mandatory
+ option.
+ belcore - EAL lcore to manage that stream(s) in the BE. It is an optional
+ option. lcore and belcore can specify the same cpu core.
+ op - operation to perform on that stream:
+ "rx" - do receive only on that stream.
+ "tx" - do send only on that stream.
+ "echo" - mimic recvfrom(..., &addr);sendto(..., &addr);
+ on that stream.
+ "fwd" - forward packets between streams.
+ It is an mandatory option.
+ laddr - local address for the stream to open. It is an mandatory option.
+ lport - local port for the stream to open. It is an mandatory option.
+ raddr - remote address for the stream to open. It is an mandatory option.
+ rport - remote port for the stream to open. It is an mandatory option.
+ txlen - data length sending in each packet (mandatory for "tx" mode only).
+ fwladdr - local address for the forwarding stream(s) to open
+ (mandatory for "fwd" mode only).
+ fwlport - local port for the forwarding stream(s) to open
+ (mandatory for "fwd" mode only).
+ fwraddr - remote address for the forwarding stream(s) to open
+ (mandatory for "fwd" mode only).
+ fwrport - remote port for the forwarding stream(s) to open
+ (mandatory for "fwd" mode only).
+
+ Below are some example of FE entries
+
+ lcore=3,op=echo,laddr=192.168.1.233,lport=0x8000,raddr=0.0.0.0,rport=0
+
+ lcore=3,op=tx,laddr=192.168.1.233,lport=0x8001,raddr=192.168.1.56,\
+ rport=0x200,txlen=72
+
+ lcore=3,op=rx,laddr=::,lport=0x200,raddr=::,rport=0,txlen=72
+
+ lcore=3,op=fwd,laddr=0.0.0.0,lport=11211,raddr=0.0.0.0,rport=0,\
+ fwladdr=::,fwlport=0,fwraddr=2001:4860:b002::56,fwrport=11211
+
+ These examples are also available in fe.cfg file with some more explanation.
+
+1.3 Configuration files format
+
+ - each record on a separate line.
+ - lines started with '#' are treated as comments.
+ - empty lines (containing whitespace chars only) are ignored.
+ - kvargs style format for each record.
+ - each FE record correspond to at least one stream to be opened
+ (could be multiple streams in case of op="fwd").
+ - each BE record define a ipv4/ipv6 destination.
+
+2. REQUIREMENTS
+
+ DPDK libraries (16.11 or higher)
+ TLDK libraries (1.0)
+ Back-End (BE) configuration file
+ Front-End(FE) configuration file
+
+3. USAGE
+
+ l4fwd <DPDK EAL parameters> -- \
+ -P | --promisc /* promiscuous mode enabled. */ \
+ -R | --rbufs <num> /* max recv buffers per stream. */ \
+ -S | --sbufs <num> /* max send buffers per stream. */ \
+ -s | --streams <num> /* streams to open per context. */ \
+ -b | --becfg <filename> /* backend configuration file. */ \
+ -f | --fecfg <filename> /* frontend configuration file. */ \
+ -U | --udp /* run the app to handle UDP streams only. */ \
+ -T | --tcp /* run the app to handle TCP streams only. */ \
+ -L | --listen /* open TCP streams in server mode (listen). */ \
+ -a | --enable-arp /* enable arp responses (request not supported) */ \
+ -v | --verbose /* different level of verbose mode */ \
+ <port0_params> <port1_params> ... <portN_params>
+
+ Note that: options -U and -T cannot be used together.
+ Option -L can be used only with option -T.
+
+ portX_params: port=<uint>,lcore=<uint>[-<uint>],[lcore=<uint>[-<uint>],]\
+ [rx_offload=<uint>,tx_offload=<uint>,mtu=<uint>,ipv4=<ipv4>,ipv6=<ipv6>]
+
+ portX_params are used to configure the particular DPDK device
+ (rte_ethdev port), and specify BE lcore that will handle RX/TX from/to the
+ device and manage BE part of corresponding TCP/UDP context.
+ Multiple BE lcore can be specified.
+
+ port - DPDK port id (RSS are supported when multiple lcores are
+ specified for a port). It is an mandatory option.
+ lcore - EAL lcore id to handle IO over that port (rx_burst/tx_burst).
+ several ports can be managed by the same lcore, and same port
+ can be managed by more than one lcore.
+ It is an mandatory option. At least one lcore option has to be
+ specified. lcore range can be specified in one lcore option.
+ e.g. lcore=2-3,lcore=6 will enable lcores 2, 3, and 6 to
+ handle BE.
+ rx_offload - RX HW offload capabilities to enable/use on this port.
+ (bitmask of DEV_RX_OFFLOAD_* values). It is an optional option.
+ tx_offload - TX HW offload capabilities to enable/use on this port.
+ (bitmask of DEV_TX_OFFLOAD_* values).
+ mtu - MTU to be used on that port (= application data size + L2/L3/L4
+ headers sizes, default=1514).
+ ipv4 - ipv4 address to assign to that port.
+ ipv6 - ipv6 address to assign to that port.
+
+ At least one of ipv4/ipv6 values have to be specified for each port.
+
+3.1 RSS
+
+ If multiple lcore is specified per DPDK port, the following RSS hash will
+ be enabled on that port:
+ ETH_RSS_UDP, or ETH_RSS_TCP
+
+ The RSS queue qid will handle the stream according to the TCP/UDP source
+ ports of the stream. The qid can be calculated as below
+
+ qid = (src_port % power_of_2(n)) % n
+
+ where n is number of lcore used to mane the DPDK port.
+
+4. EXAMPLES
+
+4.1 Sample testbed
+
++----------------------------+ +-------------------------------+
+| TLDK Box | | Linux Box |
+| | | |
+| port 0 +----------------+ port 0 |
+| 192.168.1.1 | | 192.168.1.2 |
+| 2001:4860:b002::1 | | 2001:4860:b002::2 |
+| AA:BB:CC:DD:EE:F1 | | AA:BB:CC:DD:EE:F2 |
++----------------------------+ +-------------------------------+
+
+4.2 UDP, "rx" mode, IPv4-only, Single core
+
+ This example shows receiving data from a IPv4 stream. The TLDK UDP server
+ runs on single core where both BE and FE run on cpu core 3.
+
+ be.cfg file contains:
+
+ port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2
+
+ fe.cfg file contains (udp server listening to port 6000):
+
+ lcore=3,op=rx,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0
+
+ run the l4fwd application as below (DPDK port 0 (pci 01:00.0)):
+
+ l4fwd --lcores='3' -w 01:00.0 -- \
+ --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \
+ --becfg be.cfg -U port=0,lcore=3,ipv4=192.168.1.1
+
+ This will create TLDK UDP context on lcore=3 (BE lcore) to manage
+ DPDK port 0. The port 0 will have IPv4 address 192.168.1.1.
+ All the streams will be in server mode and also managed by lcore 3.
+
+4.3 UDP, "echo" mode, IPv6-only, Multicore
+
+ This example shows receiving data from a IPv6 stream and sending the data
+ back through the same IPv6 stream. The TLDK UDP server runs on multicore
+ where BE runs on cpu core 2 and FE runs on cpu core 3.
+
+ be.cfg file contains:
+
+ port=0,masklen=64,addr=2001:4860:b002::,mac=AA:BB:CC:DD:EE:F2
+
+ fe.cfg file contains (udp server listening to port 6000):
+
+ lcore=3,op=rx,laddr=2001:4860:b002::1,lport=6000,raddr=::,rport=0
+
+ run the l4fwd application as below (DPDK port 0 (pci 01:00.0)):
+
+ l4fwd --lcores='2,3' -w 01:00.0 -- \
+ --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \
+ --becfg be.cfg -U port=0,lcore=2,ipv6=2001:4860:b002::1
+
+ This will create TLDK UDP context on lcore=2 (BE lcore) to manage
+ DPDK port 0. The port 0 will have IPv4 address 2001:4860:b002::1.
+ All the streams will be in server mode and managed by lcore 3 (FE lcore).
+ In this case, the UDP server will send the incoming data back to the sender.
+
+4.4 TCP, "echo" mode, IPv4-only, Multicore, RX-Offload
+
+ This example shows receiving data from a IPv4 stream and sending the data
+ back through the same IPv4 stream. The TLDK TCP server runs on multicore
+ where BE runs on cpu core 2 and FE runs on cpu core 3. The BE also uses
+ receive offload features of the NIC.
+
+ be.cfg file contains:
+
+ port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2
+
+ fe.cfg file contains (tcp server listening to port 6000):
+
+ lcore=3,op=echo,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0
+
+ run the l4fwd application as below (DPDK port 0 (pci 01:00.0)):
+
+ l4fwd --lcores='2,3' -w 01:00.0 -- \
+ --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \
+ --becfg be.cfg -T -L port=0,lcore=2,rx_offload=0xf,tx_offload=0,\
+ ipv4=192.168.1.1
+
+ This will create TLDK TCP context on lcore=2 (BE lcore) to manage
+ DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. The following
+ DPDK RX HW offloads will be enabled on that port.
+ DEV_RX_OFFLOAD_VLAN_STRIP,
+ DEV_RX_OFFLOAD_IPV4_CKSUM,
+ DEV_RX_OFFLOAD_UDP_CKSUM,
+ DEV_RX_OFFLOAD_TCP_CKSUM
+ No HW TX offloads will be enabled.
+ All the streams will be in server mode and managed by lcore 3 (FE core).
+ In this case, the TCP server will send the incoming data back to the sender.
+
+4.5 TCP, "fwd" (proxy) mode, IPv4-to-IPv6, Multi-core, RX-Offload
+
+ This example shows receiving data from a IPv4 stream and forwarding the
+ data to a IPv6 stream. The TLDK TCP server runs on multicore
+ where BE runs on cpu core 2 and FE runs on cpu core 3. The BE also uses
+ receive offload features of the NIC.
+
+ be.cfg file contains:
+
+ port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2
+
+ fe.cfg file contains (tcp server listening to port 6000):
+
+ lcore=3,op=fwd,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0,\
+ rladdr=::,lport=0,raddr=2001:4860:b002::2,rport=7000
+
+ run the l4fwd application as below (DPDK port 0 (pci 01:00.0)):
+
+ l4fwd --lcores='2,3' -w 01:00.0 -- \
+ --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \
+ --becfg be.cfg -T -L port=0,lcore=2,rx_offload=0xf,tx_offload=0,\
+ ipv4=192.168.1.1,ipv6=2001:4860:b002::1
+
+ This will create TLDK TCP context on lcore=2 (BE lcore) to manage
+ DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. The following
+ DPDK RX HW offloads will be enabled on that port.
+ DEV_RX_OFFLOAD_VLAN_STRIP,
+ DEV_RX_OFFLOAD_IPV4_CKSUM,
+ DEV_RX_OFFLOAD_UDP_CKSUM,
+ DEV_RX_OFFLOAD_TCP_CKSUM
+ No HW TX offloads will be enabled.
+ All the streams will be in server mode and managed by lcore 3 (FE core).
+ In this case, the IPv4 TCP server will forward the incoming data to the IPv6
+ TCP server 2001:4860:b002::2 listening to port 7000.
+
+4.6 TCP, "echo" mode, RSS, IPv4-only, Multicore, RX-Offload
+
+ This example shows receiving data from a IPv4 stream and sending the data
+ back through the same IPv4 stream. The TLDK TCP server runs on multicore
+ where BE runs on cpu cores 1-2 and FE runs on cpu core 3. As BE runs on
+ multicore, Receive Side Scaling (RSS) feature will be automatically enabled.
+ The BE also uses receive offload features of the NIC.
+
+ be.cfg file contains:
+
+ port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2
+
+ fe.cfg file contains (tcp server listening to port 6000):
+
+ lcore=3,op=echo,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0
+ lcore=3,op=echo,laddr=192.168.1.1,lport=6001,raddr=0.0.0.0,rport=0
+
+ run the l4fwd application as below (DPDK port 0 (pci 01:00.0)):
+
+ l4fwd --lcores='1,2,3' -w 01:00.0 -- \
+ --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \
+ --becfg be.cfg -T -L port=0,lcore="1-2",rx_offload=0xf,tx_offload=0,\
+ ipv4=192.168.1.1
+
+ This will create TLDK TCP context on lcore=1-2 (BE lcore) to manage
+ DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. The following
+ DPDK RX HW offloads will be enabled on that port.
+ DEV_RX_OFFLOAD_VLAN_STRIP,
+ DEV_RX_OFFLOAD_IPV4_CKSUM,
+ DEV_RX_OFFLOAD_UDP_CKSUM,
+ DEV_RX_OFFLOAD_TCP_CKSUM
+ No HW TX offloads will be enabled.
+ All the streams will be in server mode and managed by lcore 3 (FE core).
+ In this case, the TCP server will send the incoming data back to the sender.
+
+ As RSS is enabled, all the packets with destination port 6000 and 6001 will
+ be managed by HW queue 0 and queue 1 respectively. Please note that RSS
+ is not supported on the interface when both IPv4 and IPv6 are enabled.
+ Only one of IPv4 or IPv6 has to be enabled in the port.
diff --git a/examples/udpfwd/be.cfg b/examples/l4fwd/be.cfg
index 5c1d173..8e6d983 100644
--- a/examples/udpfwd/be.cfg
+++ b/examples/l4fwd/be.cfg
@@ -1,5 +1,5 @@
#
-# udpfwd BE cconfig file exaple
+# l4fwd BE config file example
#
port=0,masklen=16,addr=192.168.0.0,mac=01:de:ad:be:ef:01
port=0,addr=2001:4860:b002::,masklen=64,mac=01:de:ad:be:ef:01
diff --git a/examples/l4fwd/common.h b/examples/l4fwd/common.h
new file mode 100644
index 0000000..ff8ee7a
--- /dev/null
+++ b/examples/l4fwd/common.h
@@ -0,0 +1,662 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef COMMON_H_
+#define COMMON_H_
+
+#include <rte_arp.h>
+
+static void
+sig_handle(int signum)
+{
+ RTE_LOG(ERR, USER1, "%s(%d)\n", __func__, signum);
+ force_quit = 1;
+}
+
+static void
+netfe_stream_dump(const struct netfe_stream *fes, struct sockaddr_storage *la,
+ struct sockaddr_storage *ra)
+{
+ struct sockaddr_in *l4, *r4;
+ struct sockaddr_in6 *l6, *r6;
+ uint16_t lport, rport;
+ char laddr[INET6_ADDRSTRLEN];
+ char raddr[INET6_ADDRSTRLEN];
+
+ if (la->ss_family == AF_INET) {
+
+ l4 = (struct sockaddr_in *)la;
+ r4 = (struct sockaddr_in *)ra;
+
+ lport = l4->sin_port;
+ rport = r4->sin_port;
+
+ } else if (la->ss_family == AF_INET6) {
+
+ l6 = (struct sockaddr_in6 *)la;
+ r6 = (struct sockaddr_in6 *)ra;
+
+ lport = l6->sin6_port;
+ rport = r6->sin6_port;
+
+ } else {
+ RTE_LOG(ERR, USER1, "stream@%p - unknown family=%hu\n",
+ fes->s, la->ss_family);
+ return;
+ }
+
+ format_addr(la, laddr, sizeof(laddr));
+ format_addr(ra, raddr, sizeof(raddr));
+
+ RTE_LOG(INFO, USER1, "stream@%p={s=%p,"
+ "family=%hu,proto=%s,laddr=%s,lport=%hu,raddr=%s,rport=%hu;"
+ "stats={"
+ "rxp=%" PRIu64 ",rxb=%" PRIu64
+ ",txp=%" PRIu64 ",txb=%" PRIu64
+ ",drops=%" PRIu64 ","
+ "rxev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "],"
+ "txev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "]"
+ "};};\n",
+ fes, fes->s, la->ss_family, proto_name[fes->proto],
+ laddr, ntohs(lport), raddr, ntohs(rport),
+ fes->stat.rxp, fes->stat.rxb,
+ fes->stat.txp, fes->stat.txb,
+ fes->stat.drops,
+ fes->stat.rxev[TLE_SEV_IDLE],
+ fes->stat.rxev[TLE_SEV_DOWN],
+ fes->stat.rxev[TLE_SEV_UP],
+ fes->stat.txev[TLE_SEV_IDLE],
+ fes->stat.txev[TLE_SEV_DOWN],
+ fes->stat.txev[TLE_SEV_UP]);
+}
+
+static inline uint32_t
+netfe_get_streams(struct netfe_stream_list *list, struct netfe_stream *rs[],
+ uint32_t num)
+{
+ struct netfe_stream *s;
+ uint32_t i, n;
+
+ n = RTE_MIN(list->num, num);
+ for (i = 0, s = LIST_FIRST(&list->head);
+ i != n;
+ i++, s = LIST_NEXT(s, link)) {
+ rs[i] = s;
+ }
+
+ if (s == NULL)
+ /* we retrieved all free entries */
+ LIST_INIT(&list->head);
+ else
+ LIST_FIRST(&list->head) = s;
+
+ list->num -= n;
+
+ return n;
+}
+
+static inline struct netfe_stream *
+netfe_get_stream(struct netfe_stream_list *list)
+{
+ struct netfe_stream *s;
+
+ s = NULL;
+ if (list->num == 0)
+ return s;
+
+ netfe_get_streams(list, &s, 1);
+
+ return s;
+}
+
+static inline void
+netfe_put_streams(struct netfe_lcore *fe, struct netfe_stream_list *list,
+ struct netfe_stream *fs[], uint32_t num)
+{
+ uint32_t i, n;
+
+ n = RTE_MIN(fe->snum - list->num, num);
+ if (n != num)
+ RTE_LOG(ERR, USER1, "%s: list overflow by %u\n", __func__,
+ num - n);
+
+ for (i = 0; i != n; i++)
+ LIST_INSERT_HEAD(&list->head, fs[i], link);
+ list->num += n;
+}
+
+static inline void
+netfe_put_stream(struct netfe_lcore *fe, struct netfe_stream_list *list,
+ struct netfe_stream *s)
+{
+ if (list->num == fe->snum) {
+ RTE_LOG(ERR, USER1, "%s: list is full\n", __func__);
+ return;
+ }
+
+ netfe_put_streams(fe, list, &s, 1);
+}
+
+static inline void
+netfe_rem_stream(struct netfe_stream_list *list, struct netfe_stream *s)
+{
+ LIST_REMOVE(s, link);
+ list->num--;
+}
+
+static void
+netfe_stream_close(struct netfe_lcore *fe, struct netfe_stream *fes)
+{
+ tle_stream_close(fes->s);
+ tle_event_free(fes->txev);
+ tle_event_free(fes->rxev);
+ tle_event_free(fes->erev);
+ memset(fes, 0, sizeof(*fes));
+ netfe_put_stream(fe, &fe->free, fes);
+}
+
+/*
+ * Helper functions, verify the queue for corresponding UDP port.
+ */
+static uint8_t
+verify_queue_for_port(const struct netbe_dev *prtq, const uint16_t lport)
+{
+ uint32_t align_nb_q, qid;
+
+ align_nb_q = rte_align32pow2(prtq->port.nb_lcore);
+ qid = (lport % align_nb_q) % prtq->port.nb_lcore;
+ if (prtq->rxqid == qid)
+ return 1;
+
+ return 0;
+}
+
+static inline size_t
+pkt_buf_empty(struct pkt_buf *pb)
+{
+ uint32_t i;
+ size_t x;
+
+ x = 0;
+ for (i = 0; i != pb->num; i++) {
+ x += pb->pkt[i]->pkt_len;
+ NETFE_PKT_DUMP(pb->pkt[i]);
+ rte_pktmbuf_free(pb->pkt[i]);
+ }
+
+ pb->num = 0;
+ return x;
+}
+
+static inline void
+pkt_buf_fill(uint32_t lcore, struct pkt_buf *pb, uint32_t dlen)
+{
+ uint32_t i;
+ int32_t sid;
+
+ sid = rte_lcore_to_socket_id(lcore) + 1;
+
+ for (i = pb->num; i != RTE_DIM(pb->pkt); i++) {
+ pb->pkt[i] = rte_pktmbuf_alloc(mpool[sid]);
+ if (pb->pkt[i] == NULL)
+ break;
+ rte_pktmbuf_append(pb->pkt[i], dlen);
+ }
+
+ pb->num = i;
+}
+
+static int
+netbe_lcore_setup(struct netbe_lcore *lc)
+{
+ uint32_t i;
+ int32_t rc;
+
+ RTE_LOG(NOTICE, USER1, "%s:(lcore=%u, proto=%s, ctx=%p) start\n",
+ __func__, lc->id, proto_name[lc->proto], lc->ctx);
+
+ /*
+ * ???????
+ * wait for FE lcores to start, so BE dont' drop any packets
+ * because corresponding streams not opened yet by FE.
+ * useful when used with pcap PMDS.
+ * think better way, or should this timeout be a cmdlien parameter.
+ * ???????
+ */
+ rte_delay_ms(10);
+
+ rc = 0;
+ for (i = 0; i != lc->prtq_num && rc == 0; i++) {
+ RTE_LOG(NOTICE, USER1,
+ "%s:%u(port=%u, q=%u, proto=%s, dev=%p)\n",
+ __func__, i, lc->prtq[i].port.id, lc->prtq[i].rxqid,
+ proto_name[lc->proto], lc->prtq[i].dev);
+
+ rc = setup_rx_cb(&lc->prtq[i].port, lc, lc->prtq[i].rxqid,
+ becfg.arp);
+ if (rc < 0)
+ return rc;
+ }
+
+ if (rc == 0)
+ RTE_PER_LCORE(_be) = lc;
+ return rc;
+}
+
+static void
+netbe_lcore_clear(void)
+{
+ uint32_t i, j;
+ struct netbe_lcore *lc;
+
+ lc = RTE_PER_LCORE(_be);
+ if (lc == NULL)
+ return;
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u, proto=%s, ctx: %p) finish\n",
+ __func__, lc->id, proto_name[lc->proto], lc->ctx);
+ for (i = 0; i != lc->prtq_num; i++) {
+ RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, q=%u, lcore=%u, dev=%p) "
+ "rx_stats={"
+ "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "}, "
+ "tx_stats={"
+ "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "};\n",
+ __func__, i, lc->prtq[i].port.id, lc->prtq[i].rxqid,
+ lc->id,
+ lc->prtq[i].dev,
+ lc->prtq[i].rx_stat.in,
+ lc->prtq[i].rx_stat.up,
+ lc->prtq[i].rx_stat.drop,
+ lc->prtq[i].tx_stat.down,
+ lc->prtq[i].tx_stat.out,
+ lc->prtq[i].tx_stat.drop);
+ }
+
+ RTE_LOG(NOTICE, USER1, "tcp_stat={\n");
+ for (i = 0; i != RTE_DIM(lc->tcp_stat.flags); i++) {
+ if (lc->tcp_stat.flags[i] != 0)
+ RTE_LOG(NOTICE, USER1, "[flag=%#x]==%" PRIu64 ";\n",
+ i, lc->tcp_stat.flags[i]);
+ }
+ RTE_LOG(NOTICE, USER1, "};\n");
+
+ for (i = 0; i != lc->prtq_num; i++)
+ for (j = 0; j != lc->prtq[i].tx_buf.num; j++)
+ rte_pktmbuf_free(lc->prtq[i].tx_buf.pkt[j]);
+
+ RTE_PER_LCORE(_be) = NULL;
+}
+
+static int
+netbe_add_ipv4_route(struct netbe_lcore *lc, const struct netbe_dest *dst,
+ uint8_t idx)
+{
+ int32_t rc;
+ uint32_t addr, depth;
+ char str[INET_ADDRSTRLEN];
+
+ depth = dst->prfx;
+ addr = rte_be_to_cpu_32(dst->ipv4.s_addr);
+
+ inet_ntop(AF_INET, &dst->ipv4, str, sizeof(str));
+ rc = rte_lpm_add(lc->lpm4, addr, depth, idx);
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p,"
+ "ipv4=%s/%u,mtu=%u,"
+ "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) "
+ "returns %d;\n",
+ __func__, lc->id, dst->port, lc->dst4[idx].dev,
+ str, depth, lc->dst4[idx].mtu,
+ dst->mac.addr_bytes[0], dst->mac.addr_bytes[1],
+ dst->mac.addr_bytes[2], dst->mac.addr_bytes[3],
+ dst->mac.addr_bytes[4], dst->mac.addr_bytes[5],
+ rc);
+ return rc;
+}
+
+static int
+netbe_add_ipv6_route(struct netbe_lcore *lc, const struct netbe_dest *dst,
+ uint8_t idx)
+{
+ int32_t rc;
+ uint32_t depth;
+ char str[INET6_ADDRSTRLEN];
+
+ depth = dst->prfx;
+
+ rc = rte_lpm6_add(lc->lpm6, (uint8_t *)(uintptr_t)dst->ipv6.s6_addr,
+ depth, idx);
+
+ inet_ntop(AF_INET6, &dst->ipv6, str, sizeof(str));
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p,"
+ "ipv6=%s/%u,mtu=%u,"
+ "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) "
+ "returns %d;\n",
+ __func__, lc->id, dst->port, lc->dst6[idx].dev,
+ str, depth, lc->dst4[idx].mtu,
+ dst->mac.addr_bytes[0], dst->mac.addr_bytes[1],
+ dst->mac.addr_bytes[2], dst->mac.addr_bytes[3],
+ dst->mac.addr_bytes[4], dst->mac.addr_bytes[5],
+ rc);
+ return rc;
+}
+
+static void
+fill_dst(struct tle_dest *dst, struct netbe_dev *bed,
+ const struct netbe_dest *bdp, uint16_t l3_type, int32_t sid,
+ uint8_t proto_id)
+{
+ struct ether_hdr *eth;
+ struct ipv4_hdr *ip4h;
+ struct ipv6_hdr *ip6h;
+
+ dst->dev = bed->dev;
+ dst->head_mp = frag_mpool[sid + 1];
+ dst->mtu = RTE_MIN(bdp->mtu, bed->port.mtu);
+ dst->l2_len = sizeof(*eth);
+
+ eth = (struct ether_hdr *)dst->hdr;
+
+ ether_addr_copy(&bed->port.mac, &eth->s_addr);
+ ether_addr_copy(&bdp->mac, &eth->d_addr);
+ eth->ether_type = rte_cpu_to_be_16(l3_type);
+
+ if (l3_type == ETHER_TYPE_IPv4) {
+ dst->l3_len = sizeof(*ip4h);
+ ip4h = (struct ipv4_hdr *)(eth + 1);
+ ip4h->version_ihl = 4 << 4 |
+ sizeof(*ip4h) / IPV4_IHL_MULTIPLIER;
+ ip4h->time_to_live = 64;
+ ip4h->next_proto_id = proto_id;
+ } else if (l3_type == ETHER_TYPE_IPv6) {
+ dst->l3_len = sizeof(*ip6h);
+ ip6h = (struct ipv6_hdr *)(eth + 1);
+ ip6h->vtc_flow = 6 << 4;
+ ip6h->proto = proto_id;
+ ip6h->hop_limits = 64;
+ }
+}
+
+static int
+netbe_add_dest(struct netbe_lcore *lc, uint32_t dev_idx, uint16_t family,
+ const struct netbe_dest *dst, uint32_t dnum)
+{
+ int32_t rc, sid;
+ uint8_t proto;
+ uint16_t l3_type;
+ uint32_t i, n, m;
+ struct tle_dest *dp;
+
+ if (family == AF_INET) {
+ n = lc->dst4_num;
+ dp = lc->dst4 + n;
+ m = RTE_DIM(lc->dst4);
+ l3_type = ETHER_TYPE_IPv4;
+ } else {
+ n = lc->dst6_num;
+ dp = lc->dst6 + n;
+ m = RTE_DIM(lc->dst6);
+ l3_type = ETHER_TYPE_IPv6;
+ }
+
+ if (n + dnum >= m) {
+ RTE_LOG(ERR, USER1, "%s(lcore=%u, family=%hu, dnum=%u) exceeds "
+ "maximum allowed number of destinations(%u);\n",
+ __func__, lc->id, family, dnum, m);
+ return -ENOSPC;
+ }
+
+ sid = rte_lcore_to_socket_id(lc->id);
+ proto = (becfg.proto == TLE_PROTO_UDP) ? IPPROTO_UDP : IPPROTO_TCP;
+ rc = 0;
+
+ for (i = 0; i != dnum && rc == 0; i++) {
+ fill_dst(dp + i, lc->prtq + dev_idx, dst + i, l3_type, sid,
+ proto);
+ if (family == AF_INET)
+ rc = netbe_add_ipv4_route(lc, dst + i, n + i);
+ else
+ rc = netbe_add_ipv6_route(lc, dst + i, n + i);
+ }
+
+ if (family == AF_INET)
+ lc->dst4_num = n + i;
+ else
+ lc->dst6_num = n + i;
+
+ return rc;
+}
+
+static inline void
+fill_arp_reply(struct netbe_dev *dev, struct rte_mbuf *m)
+{
+ struct ether_hdr *eth;
+ struct arp_hdr *ahdr;
+ struct arp_ipv4 *adata;
+ uint32_t tip;
+
+ /* set up the ethernet data */
+ eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ eth->d_addr = eth->s_addr;
+ eth->s_addr = dev->port.mac;
+
+ /* set up the arp data */
+ ahdr = rte_pktmbuf_mtod_offset(m, struct arp_hdr *, m->l2_len);
+ adata = &ahdr->arp_data;
+
+ ahdr->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY);
+
+ tip = adata->arp_tip;
+ adata->arp_tip = adata->arp_sip;
+ adata->arp_sip = tip;
+
+ adata->arp_tha = adata->arp_sha;
+ adata->arp_sha = dev->port.mac;
+}
+
+/* this is a semi ARP response implementation of RFC 826
+ * in RFC, it algo is as below
+ *
+ * ?Do I have the hardware type in ar$hrd?
+ * Yes: (almost definitely)
+ * [optionally check the hardware length ar$hln]
+ * ?Do I speak the protocol in ar$pro?
+ * Yes:
+ * [optionally check the protocol length ar$pln]
+ * Merge_flag := false
+ * If the pair <protocol type, sender protocol address> is
+ * already in my translation table, update the sender
+ * hardware address field of the entry with the new
+ * information in the packet and set Merge_flag to true.
+ * ?Am I the target protocol address?
+ * Yes:
+ * If Merge_flag is false, add the triplet <protocol type,
+ * sender protocol address, sender hardware address> to
+ * the translation table.
+ * ?Is the opcode ares_op$REQUEST? (NOW look at the opcode!!)
+ * Yes:
+ * Swap hardware and protocol fields, putting the local
+ * hardware and protocol addresses in the sender fields.
+ * Set the ar$op field to ares_op$REPLY
+ * Send the packet to the (new) target hardware address on
+ * the same hardware on which the request was received.
+ *
+ * So, in our implementation we skip updating the local cache,
+ * we assume that local cache is ok, so we just reply the packet.
+ */
+
+static inline void
+send_arp_reply(struct netbe_dev *dev, struct pkt_buf *pb)
+{
+ uint32_t i, n, num;
+ struct rte_mbuf **m;
+
+ m = pb->pkt;
+ num = pb->num;
+ for (i = 0; i != num; i++) {
+ fill_arp_reply(dev, m[i]);
+ NETBE_PKT_DUMP(m[i]);
+ }
+
+ n = rte_eth_tx_burst(dev->port.id, dev->txqid, m, num);
+ NETBE_TRACE("%s: sent n=%u arp replies\n", __func__, n);
+
+ /* free mbufs with unsent arp response */
+ for (i = n; i != num; i++)
+ rte_pktmbuf_free(m[i]);
+
+ pb->num = 0;
+}
+
+static inline void
+netbe_rx(struct netbe_lcore *lc, uint32_t pidx)
+{
+ uint32_t j, k, n;
+ struct rte_mbuf *pkt[MAX_PKT_BURST];
+ struct rte_mbuf *rp[MAX_PKT_BURST];
+ int32_t rc[MAX_PKT_BURST];
+ struct pkt_buf *abuf;
+
+ n = rte_eth_rx_burst(lc->prtq[pidx].port.id,
+ lc->prtq[pidx].rxqid, pkt, RTE_DIM(pkt));
+
+ if (n != 0) {
+ lc->prtq[pidx].rx_stat.in += n;
+ NETBE_TRACE("%s(%u): rte_eth_rx_burst(%u, %u) returns %u\n",
+ __func__, lc->id, lc->prtq[pidx].port.id,
+ lc->prtq[pidx].rxqid, n);
+
+ k = tle_rx_bulk(lc->prtq[pidx].dev, pkt, rp, rc, n);
+
+ lc->prtq[pidx].rx_stat.up += k;
+ lc->prtq[pidx].rx_stat.drop += n - k;
+ NETBE_TRACE("%s(%u): tle_%s_rx_bulk(%p, %u) returns %u\n",
+ __func__, lc->id, proto_name[lc->proto],
+ lc->prtq[pidx].dev, n, k);
+
+ for (j = 0; j != n - k; j++) {
+ NETBE_TRACE("%s:%d(port=%u) rp[%u]={%p, %d};\n",
+ __func__, __LINE__, lc->prtq[pidx].port.id,
+ j, rp[j], rc[j]);
+ rte_pktmbuf_free(rp[j]);
+ }
+ }
+
+ /* respond to incoming arp requests */
+ abuf = &lc->prtq[pidx].arp_buf;
+ if (abuf->num == 0)
+ return;
+
+ send_arp_reply(&lc->prtq[pidx], abuf);
+}
+
+static inline void
+netbe_tx(struct netbe_lcore *lc, uint32_t pidx)
+{
+ uint32_t j, k, n;
+ struct rte_mbuf **mb;
+
+ n = lc->prtq[pidx].tx_buf.num;
+ k = RTE_DIM(lc->prtq[pidx].tx_buf.pkt) - n;
+ mb = lc->prtq[pidx].tx_buf.pkt;
+
+ if (k >= RTE_DIM(lc->prtq[pidx].tx_buf.pkt) / 2) {
+ j = tle_tx_bulk(lc->prtq[pidx].dev, mb + n, k);
+ n += j;
+ lc->prtq[pidx].tx_stat.down += j;
+ }
+
+ if (n == 0)
+ return;
+
+ NETBE_TRACE("%s(%u): tle_%s_tx_bulk(%p) returns %u,\n"
+ "total pkts to send: %u\n",
+ __func__, lc->id, proto_name[lc->proto],
+ lc->prtq[pidx].dev, j, n);
+
+ for (j = 0; j != n; j++)
+ NETBE_PKT_DUMP(mb[j]);
+
+ k = rte_eth_tx_burst(lc->prtq[pidx].port.id,
+ lc->prtq[pidx].txqid, mb, n);
+
+ lc->prtq[pidx].tx_stat.out += k;
+ lc->prtq[pidx].tx_stat.drop += n - k;
+ NETBE_TRACE("%s(%u): rte_eth_tx_burst(%u, %u, %u) returns %u\n",
+ __func__, lc->id, lc->prtq[pidx].port.id, lc->prtq[pidx].txqid,
+ n, k);
+
+ lc->prtq[pidx].tx_buf.num = n - k;
+ if (k != 0)
+ for (j = k; j != n; j++)
+ mb[j - k] = mb[j];
+}
+
+static inline void
+netbe_lcore(void)
+{
+ uint32_t i;
+ struct netbe_lcore *lc;
+
+ lc = RTE_PER_LCORE(_be);
+ if (lc == NULL)
+ return;
+
+ for (i = 0; i != lc->prtq_num; i++) {
+ netbe_rx(lc, i);
+ netbe_tx(lc, i);
+ }
+}
+
+static inline void
+netfe_rx_process(__rte_unused uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t k, n;
+
+ n = fes->pbuf.num;
+ k = RTE_DIM(fes->pbuf.pkt) - n;
+
+ /* packet buffer is full, can't receive any new packets. */
+ if (k == 0) {
+ tle_event_idle(fes->rxev);
+ fes->stat.rxev[TLE_SEV_IDLE]++;
+ return;
+ }
+
+ n = tle_stream_recv(fes->s, fes->pbuf.pkt + n, k);
+ if (n == 0)
+ return;
+
+ NETFE_TRACE("%s(%u): tle_%s_stream_recv(%p, %u) returns %u\n",
+ __func__, lcore, proto_name[fes->proto], fes->s, k, n);
+
+ fes->pbuf.num += n;
+ fes->stat.rxp += n;
+
+ /* free all received mbufs. */
+ if (fes->op == RXONLY)
+ fes->stat.rxb += pkt_buf_empty(&fes->pbuf);
+ /* mark stream as writable */
+ else if (k == RTE_DIM(fes->pbuf.pkt)) {
+ if (fes->op == RXTX) {
+ tle_event_active(fes->txev, TLE_SEV_UP);
+ fes->stat.txev[TLE_SEV_UP]++;
+ } else if (fes->op == FWD) {
+ tle_event_raise(fes->txev);
+ fes->stat.txev[TLE_SEV_UP]++;
+ }
+ }
+}
+
+#endif /* COMMON_H_ */
diff --git a/examples/udpfwd/main_dpdk_legacy.h b/examples/l4fwd/dpdk_legacy.h
index e4bff24..84fab17 100644
--- a/examples/udpfwd/main_dpdk_legacy.h
+++ b/examples/l4fwd/dpdk_legacy.h
@@ -19,27 +19,11 @@
#include "dpdk_version.h"
/*
- * Helper functions, verify the queue for corresponding UDP port.
- */
-static uint8_t
-verify_queue_for_port(const struct netbe_dev *prtq, const uint16_t lport)
-{
- uint32_t align_nb_q, qid;
-
- align_nb_q = rte_align32pow2(prtq->port.nb_lcore);
- qid = (lport % align_nb_q) % prtq->port.nb_lcore;
- if (prtq->rxqid == qid)
- return 1;
-
- return 0;
-}
-
-/*
* UDP IPv4 destination lookup callback.
*/
static int
lpm4_dst_lookup(void *data, const struct in_addr *addr,
- struct tle_udp_dest *res)
+ struct tle_dest *res)
{
int32_t rc;
#ifdef DPDK_VERSION_GE_1604
@@ -48,7 +32,7 @@ lpm4_dst_lookup(void *data, const struct in_addr *addr,
uint8_t idx;
#endif
struct netbe_lcore *lc;
- struct tle_udp_dest *dst;
+ struct tle_dest *dst;
lc = data;
@@ -56,7 +40,7 @@ lpm4_dst_lookup(void *data, const struct in_addr *addr,
if (rc == 0) {
dst = &lc->dst4[idx];
rte_memcpy(res, dst, dst->l2_len + dst->l3_len +
- offsetof(struct tle_udp_dest, hdr));
+ offsetof(struct tle_dest, hdr));
}
return rc;
}
@@ -105,7 +89,7 @@ lcore_lpm_init(struct netbe_lcore *lc)
*/
static int
netbe_find4(const struct in_addr *laddr, const uint16_t lport,
- const struct in_addr *raddr, const uint32_t be_lc)
+ const struct in_addr *raddr, const uint32_t belc)
{
uint32_t i, j;
#ifdef DPDK_VERSION_GE_1604
@@ -120,14 +104,14 @@ netbe_find4(const struct in_addr *laddr, const uint16_t lport,
return 0;
/* search by provided be_lcore */
- if (be_lc != LCORE_ID_ANY) {
+ if (belc != LCORE_ID_ANY) {
for (i = 0; i != becfg.cpu_num; i++) {
bc = becfg.cpu + i;
- if (be_lc == bc->id)
+ if (belc == bc->id)
return i;
}
RTE_LOG(NOTICE, USER1, "%s: no stream with be_lcore=%u\n",
- __func__, be_lc);
+ __func__, belc);
return -ENOENT;
}
diff --git a/examples/udpfwd/dpdk_version.h b/examples/l4fwd/dpdk_version.h
index 4f6bdfb..43235c8 100644
--- a/examples/udpfwd/dpdk_version.h
+++ b/examples/l4fwd/dpdk_version.h
@@ -23,7 +23,7 @@
#define DPDK_VERSION_GE_1604
#endif
#elif defined(RTE_VER_YEAR)
-#if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0)
+#if RTE_VERSION_NUM(16, 4, 0, 0) <= RTE_VERSION
#define DPDK_VERSION_GE_1604
#endif
#else
diff --git a/examples/udpfwd/fe.cfg b/examples/l4fwd/fe.cfg
index 2706323..2706323 100644
--- a/examples/udpfwd/fe.cfg
+++ b/examples/l4fwd/fe.cfg
diff --git a/examples/udpfwd/fwdtbl.h b/examples/l4fwd/fwdtbl.h
index 1c4265e..1c4265e 100644
--- a/examples/udpfwd/fwdtbl.h
+++ b/examples/l4fwd/fwdtbl.h
diff --git a/examples/udpfwd/gen_fe_cfg.py b/examples/l4fwd/gen_fe_cfg.py
index dbb500b..67a8d5c 100755
--- a/examples/udpfwd/gen_fe_cfg.py
+++ b/examples/l4fwd/gen_fe_cfg.py
@@ -31,7 +31,7 @@ def print_usage ():
"lcores are comma-separated, within double quote"
print " -b, --be_lcore_list: list of lcores used for BE. Multiple " \
"lcores are comma-separated, within double quote"
- print " -p, --start_port: starting UDP port number"
+ print " -p, --start_port: starting TCP/UDP port number"
print " -n, --number_of_streams: number of streams to be generated"
print " -m, --mode: mode of the application. [echo, rx, tx, fwd]"
print " -q, --local_address: local address of the stream"
@@ -55,7 +55,8 @@ def print_stream(mode, la, ra, fwd_la, fwd_ra, lcore, belcore, lport,
lport_str = str(lport)
fwrport_str = str(fwrport)
- stream = "lcore=" + str(lcore) + ",belcore=" + str(belcore) + ",op=" + mode
+ stream = "lcore=" + str(lcore) + ",belcore=" + str(belcore)
+ stream += ",op=" + mode
stream += ",laddr=" + la + ",lport=" + lport_str
stream += ",raddr=" + ra + ",rport=0"
diff --git a/examples/l4fwd/lcore.h b/examples/l4fwd/lcore.h
new file mode 100644
index 0000000..d88e434
--- /dev/null
+++ b/examples/l4fwd/lcore.h
@@ -0,0 +1,370 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LCORE_H_
+#define LCORE_H_
+
+#include "dpdk_legacy.h"
+
+/*
+ * IPv6 destination lookup callback.
+ */
+static int
+lpm6_dst_lookup(void *data, const struct in6_addr *addr,
+ struct tle_dest *res)
+{
+ int32_t rc;
+ uint8_t idx;
+ struct netbe_lcore *lc;
+ struct tle_dest *dst;
+ uintptr_t p;
+
+ lc = data;
+ p = (uintptr_t)addr->s6_addr;
+
+ rc = rte_lpm6_lookup(lc->lpm6, (uint8_t *)p, &idx);
+ if (rc == 0) {
+ dst = &lc->dst6[idx];
+ rte_memcpy(res, dst, dst->l2_len + dst->l3_len +
+ offsetof(struct tle_dest, hdr));
+ }
+ return rc;
+}
+
+static int
+create_context(struct netbe_lcore *lc, const struct tle_ctx_param *ctx_prm)
+{
+ uint32_t rc = 0, sid;
+ uint64_t frag_cycles;
+ struct tle_ctx_param cprm;
+
+ if (lc->ctx == NULL) {
+ sid = rte_lcore_to_socket_id(lc->id);
+
+ rc = lcore_lpm_init(lc);
+ if (rc != 0)
+ return rc;
+
+ cprm = *ctx_prm;
+ cprm.socket_id = sid;
+ cprm.proto = lc->proto;
+ cprm.lookup4 = lpm4_dst_lookup;
+ cprm.lookup4_data = lc;
+ cprm.lookup6 = lpm6_dst_lookup;
+ cprm.lookup6_data = lc;
+
+ frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) /
+ MS_PER_S * FRAG_TTL;
+
+ lc->ftbl = rte_ip_frag_table_create(cprm.max_streams,
+ FRAG_TBL_BUCKET_ENTRIES, cprm.max_streams,
+ frag_cycles, sid);
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u): frag_tbl=%p;\n",
+ __func__, lc->id, lc->ftbl);
+
+ lc->ctx = tle_ctx_create(&cprm);
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u): proto=%s, ctx=%p;\n",
+ __func__, lc->id, proto_name[lc->proto], lc->ctx);
+
+ if (lc->ctx == NULL || lc->ftbl == NULL)
+ rc = ENOMEM;
+ }
+
+ return rc;
+}
+
+/*
+ * BE lcore setup routine.
+ */
+static int
+lcore_init(struct netbe_lcore *lc, const struct tle_ctx_param *ctx_prm,
+ const uint32_t prtqid, const uint16_t *bl_ports, uint32_t nb_bl_ports)
+{
+ int32_t rc = 0;
+ struct tle_dev_param dprm;
+
+ rc = create_context(lc, ctx_prm);
+
+ if (rc == 0 && lc->ctx != NULL) {
+ memset(&dprm, 0, sizeof(dprm));
+ dprm.rx_offload = lc->prtq[prtqid].port.rx_offload;
+ dprm.tx_offload = lc->prtq[prtqid].port.tx_offload;
+ dprm.local_addr4.s_addr = lc->prtq[prtqid].port.ipv4;
+ memcpy(&dprm.local_addr6, &lc->prtq[prtqid].port.ipv6,
+ sizeof(lc->prtq[prtqid].port.ipv6));
+ dprm.bl4.nb_port = nb_bl_ports;
+ dprm.bl4.port = bl_ports;
+ dprm.bl6.nb_port = nb_bl_ports;
+ dprm.bl6.port = bl_ports;
+
+ lc->prtq[prtqid].dev = tle_add_dev(lc->ctx, &dprm);
+
+ RTE_LOG(NOTICE, USER1,
+ "%s(lcore=%u, port=%u, qid=%u), dev: %p\n",
+ __func__, lc->id, lc->prtq[prtqid].port.id,
+ lc->prtq[prtqid].rxqid, lc->prtq[prtqid].dev);
+
+ if (lc->prtq[prtqid].dev == NULL)
+ rc = -rte_errno;
+
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(lcore=%u) failed with error code: %d\n",
+ __func__, lc->id, rc);
+ tle_ctx_destroy(lc->ctx);
+ rte_ip_frag_table_destroy(lc->ftbl);
+ rte_lpm_free(lc->lpm4);
+ rte_lpm6_free(lc->lpm6);
+ rte_free(lc->prtq[prtqid].port.lcore_id);
+ lc->prtq[prtqid].port.nb_lcore = 0;
+ rte_free(lc->prtq);
+ lc->prtq_num = 0;
+ return rc;
+ }
+ }
+
+ return rc;
+}
+
+static uint16_t
+create_blocklist(const struct netbe_port *beprt, uint16_t *bl_ports,
+ uint32_t q)
+{
+ uint32_t i, j, qid, align_nb_q;
+
+ align_nb_q = rte_align32pow2(beprt->nb_lcore);
+ for (i = 0, j = 0; i < (UINT16_MAX + 1); i++) {
+ qid = (i % align_nb_q) % beprt->nb_lcore;
+ if (qid != q)
+ bl_ports[j++] = i;
+ }
+
+ return j;
+}
+
+static int
+netbe_lcore_init(struct netbe_cfg *cfg, const struct tle_ctx_param *ctx_prm)
+{
+ int32_t rc;
+ uint32_t i, j, nb_bl_ports = 0, sz;
+ struct netbe_lcore *lc;
+ static uint16_t *bl_ports;
+
+ /* Create the context and attached queue for each lcore. */
+ rc = 0;
+ sz = sizeof(uint16_t) * UINT16_MAX;
+ bl_ports = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE);
+ for (i = 0; i < cfg->cpu_num; i++) {
+ lc = &cfg->cpu[i];
+ for (j = 0; j < lc->prtq_num; j++) {
+ memset((uint8_t *)bl_ports, 0, sz);
+ /* create list of blocked ports based on q */
+ nb_bl_ports = create_blocklist(&lc->prtq[j].port,
+ bl_ports, lc->prtq[j].rxqid);
+ RTE_LOG(NOTICE, USER1,
+ "lc=%u, q=%u, nb_bl_ports=%u\n",
+ lc->id, lc->prtq[j].rxqid, nb_bl_ports);
+
+ rc = lcore_init(lc, ctx_prm, j, bl_ports, nb_bl_ports);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: failed with error code: %d\n",
+ __func__, rc);
+ rte_free(bl_ports);
+ return rc;
+ }
+ }
+ }
+ rte_free(bl_ports);
+
+ return 0;
+}
+
+static int
+netfe_lcore_cmp(const void *s1, const void *s2)
+{
+ const struct netfe_stream_prm *p1, *p2;
+
+ p1 = s1;
+ p2 = s2;
+ return p1->lcore - p2->lcore;
+}
+
+static int
+netbe_find6(const struct in6_addr *laddr, uint16_t lport,
+ const struct in6_addr *raddr, uint32_t belc)
+{
+ uint32_t i, j;
+ uint8_t idx;
+ struct netbe_lcore *bc;
+
+ /* we have exactly one BE, use it for all traffic */
+ if (becfg.cpu_num == 1)
+ return 0;
+
+ /* search by provided be_lcore */
+ if (belc != LCORE_ID_ANY) {
+ for (i = 0; i != becfg.cpu_num; i++) {
+ bc = becfg.cpu + i;
+ if (belc == bc->id)
+ return i;
+ }
+ RTE_LOG(NOTICE, USER1, "%s: no stream with belcore=%u\n",
+ __func__, belc);
+ return -ENOENT;
+ }
+
+ /* search by local address */
+ if (memcmp(laddr, &in6addr_any, sizeof(*laddr)) != 0) {
+ for (i = 0; i != becfg.cpu_num; i++) {
+ bc = becfg.cpu + i;
+ /* search by queue for the local port */
+ for (j = 0; j != bc->prtq_num; j++) {
+ if (memcmp(laddr, &bc->prtq[j].port.ipv6,
+ sizeof(*laddr)) == 0) {
+
+ if (lport == 0)
+ return i;
+
+ if (verify_queue_for_port(bc->prtq + j,
+ lport) != 0)
+ return i;
+ }
+ }
+ }
+ }
+
+ /* search by remote address */
+ if (memcmp(raddr, &in6addr_any, sizeof(*raddr)) == 0) {
+ for (i = 0; i != becfg.cpu_num; i++) {
+ bc = becfg.cpu + i;
+ if (rte_lpm6_lookup(bc->lpm6,
+ (uint8_t *)(uintptr_t)raddr->s6_addr,
+ &idx) == 0) {
+
+ if (lport == 0)
+ return i;
+
+ /* search by queue for the local port */
+ for (j = 0; j != bc->prtq_num; j++)
+ if (verify_queue_for_port(bc->prtq + j,
+ lport) != 0)
+ return i;
+ }
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int
+netbe_find(const struct sockaddr_storage *la,
+ const struct sockaddr_storage *ra,
+ uint32_t belc)
+{
+ const struct sockaddr_in *l4, *r4;
+ const struct sockaddr_in6 *l6, *r6;
+
+ if (la->ss_family == AF_INET) {
+ l4 = (const struct sockaddr_in *)la;
+ r4 = (const struct sockaddr_in *)ra;
+ return netbe_find4(&l4->sin_addr, ntohs(l4->sin_port),
+ &r4->sin_addr, belc);
+ } else if (la->ss_family == AF_INET6) {
+ l6 = (const struct sockaddr_in6 *)la;
+ r6 = (const struct sockaddr_in6 *)ra;
+ return netbe_find6(&l6->sin6_addr, ntohs(l6->sin6_port),
+ &r6->sin6_addr, belc);
+ }
+ return -EINVAL;
+}
+
+static int
+netfe_sprm_flll_be(struct netfe_sprm *sp, uint32_t line, uint32_t belc)
+{
+ int32_t bidx;
+
+ bidx = netbe_find(&sp->local_addr, &sp->remote_addr, belc);
+
+ if (bidx < 0) {
+ RTE_LOG(ERR, USER1, "%s(line=%u): no BE for that stream\n",
+ __func__, line);
+ return -EINVAL;
+ }
+ sp->bidx = bidx;
+ return 0;
+}
+
+/* start front-end processing. */
+static int
+netfe_lcore_fill(struct lcore_prm prm[RTE_MAX_LCORE],
+ struct netfe_lcore_prm *lprm)
+{
+ uint32_t belc;
+ uint32_t i, j, lc, ln;
+ struct netfe_stream_prm *s;
+
+ /* determine on what BE each stream should be open. */
+ for (i = 0; i != lprm->nb_streams; i++) {
+ s = lprm->stream + i;
+ ln = s->line;
+ belc = s->belcore;
+ if (netfe_sprm_flll_be(&s->sprm, ln, belc) != 0 ||
+ (s->op == FWD &&
+ netfe_sprm_flll_be(&s->fprm, ln, belc) != 0))
+ return -EINVAL;
+ }
+
+ /* group all fe parameters by lcore. */
+
+ qsort(lprm->stream, lprm->nb_streams, sizeof(lprm->stream[0]),
+ netfe_lcore_cmp);
+
+ for (i = 0; i != lprm->nb_streams; i = j) {
+
+ lc = lprm->stream[i].lcore;
+ ln = lprm->stream[i].line;
+
+ if (rte_lcore_is_enabled(lc) == 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(line=%u): lcore %u is not enabled\n",
+ __func__, ln, lc);
+ return -EINVAL;
+ }
+
+ if (rte_get_master_lcore() != lc &&
+ rte_eal_get_lcore_state(lc) == RUNNING) {
+ RTE_LOG(ERR, USER1,
+ "%s(line=%u): lcore %u already in use\n",
+ __func__, ln, lc);
+ return -EINVAL;
+ }
+
+ for (j = i + 1; j != lprm->nb_streams &&
+ lc == lprm->stream[j].lcore;
+ j++)
+ ;
+
+ prm[lc].fe.max_streams = lprm->max_streams;
+ prm[lc].fe.nb_streams = j - i;
+ prm[lc].fe.stream = lprm->stream + i;
+ }
+
+ return 0;
+}
+
+#endif /* LCORE_H_ */
diff --git a/examples/l4fwd/main.c b/examples/l4fwd/main.c
new file mode 100644
index 0000000..37bd03e
--- /dev/null
+++ b/examples/l4fwd/main.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <time.h>
+
+#include "netbe.h"
+#include "parse.h"
+
+#define MAX_RULES 0x100
+#define MAX_TBL8 0x800
+
+#define RX_RING_SIZE 0x400
+#define TX_RING_SIZE 0x800
+
+#define MPOOL_CACHE_SIZE 0x100
+#define MPOOL_NB_BUF 0x20000
+
+#define FRAG_MBUF_BUF_SIZE (RTE_PKTMBUF_HEADROOM + TLE_DST_MAX_HDR)
+#define FRAG_TTL MS_PER_S
+#define FRAG_TBL_BUCKET_ENTRIES 16
+
+#define FIRST_PORT 0x8000
+
+#define RX_CSUM_OFFLOAD (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM)
+#define TX_CSUM_OFFLOAD (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)
+
+RTE_DEFINE_PER_LCORE(struct netbe_lcore *, _be);
+RTE_DEFINE_PER_LCORE(struct netfe_lcore *, _fe);
+
+#include "fwdtbl.h"
+
+/**
+ * Location to be modified to create the IPv4 hash key which helps
+ * to distribute packets based on the destination TCP/UDP port.
+ */
+#define RSS_HASH_KEY_DEST_PORT_LOC_IPV4 15
+
+/**
+ * Location to be modified to create the IPv6 hash key which helps
+ * to distribute packets based on the destination TCP/UDP port.
+ */
+#define RSS_HASH_KEY_DEST_PORT_LOC_IPV6 39
+
+/**
+ * Size of the rte_eth_rss_reta_entry64 array to update through
+ * rte_eth_dev_rss_reta_update.
+ */
+#define RSS_RETA_CONF_ARRAY_SIZE (ETH_RSS_RETA_SIZE_512/RTE_RETA_GROUP_SIZE)
+
+static volatile int force_quit;
+
+static struct netbe_cfg becfg;
+static struct rte_mempool *mpool[RTE_MAX_NUMA_NODES + 1];
+static struct rte_mempool *frag_mpool[RTE_MAX_NUMA_NODES + 1];
+static char proto_name[3][10] = {"udp", "tcp", ""};
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = {
+ .max_rx_pkt_len = ETHER_MAX_VLAN_FRAME_LEN,
+ .hw_vlan_strip = 1,
+ .jumbo_frame = 1,
+ },
+};
+
+/* function pointers */
+static TLE_RX_BULK_FUNCTYPE tle_rx_bulk;
+static TLE_TX_BULK_FUNCTYPE tle_tx_bulk;
+static TLE_STREAM_RECV_FUNCTYPE tle_stream_recv;
+static TLE_STREAM_CLOSE_FUNCTYPE tle_stream_close;
+
+static LCORE_MAIN_FUNCTYPE lcore_main;
+
+#include "common.h"
+#include "parse.h"
+#include "lcore.h"
+#include "port.h"
+#include "tcp.h"
+#include "udp.h"
+
+int verbose = VERBOSE_NONE;
+
+static void
+netbe_lcore_fini(struct netbe_cfg *cfg)
+{
+ uint32_t i;
+
+ for (i = 0; i != cfg->cpu_num; i++) {
+ tle_ctx_destroy(cfg->cpu[i].ctx);
+ rte_ip_frag_table_destroy(cfg->cpu[i].ftbl);
+ rte_lpm_free(cfg->cpu[i].lpm4);
+ rte_lpm6_free(cfg->cpu[i].lpm6);
+
+ rte_free(cfg->cpu[i].prtq);
+ cfg->cpu[i].prtq_num = 0;
+ }
+
+ rte_free(cfg->cpu);
+ cfg->cpu_num = 0;
+ for (i = 0; i != cfg->prt_num; i++) {
+ rte_free(cfg->prt[i].lcore_id);
+ cfg->prt[i].nb_lcore = 0;
+ }
+ rte_free(cfg->prt);
+ cfg->prt_num = 0;
+}
+
+static int
+netbe_dest_init(const char *fname, struct netbe_cfg *cfg)
+{
+ int32_t rc;
+ uint32_t f, i, p;
+ uint32_t k, l, cnt;
+ struct netbe_lcore *lc;
+ struct netbe_dest_prm prm;
+
+ rc = netbe_parse_dest(fname, &prm);
+ if (rc != 0)
+ return rc;
+
+ rc = 0;
+ for (i = 0; i != prm.nb_dest; i++) {
+
+ p = prm.dest[i].port;
+ f = prm.dest[i].family;
+
+ cnt = 0;
+ for (k = 0; k != cfg->cpu_num; k++) {
+ lc = cfg->cpu + k;
+ for (l = 0; l != lc->prtq_num; l++)
+ if (lc->prtq[l].port.id == p) {
+ rc = netbe_add_dest(lc, l, f,
+ prm.dest + i, 1);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(lc=%u, family=%u) "
+ "could not add "
+ "destinations(%u)\n",
+ __func__, lc->id, f, i);
+ return -ENOSPC;
+ }
+ cnt++;
+ }
+ }
+
+ if (cnt == 0) {
+ RTE_LOG(ERR, USER1, "%s(%s) error at line %u: "
+ "port %u not managed by any lcore;\n",
+ __func__, fname, prm.dest[i].line, p);
+ break;
+ }
+ }
+
+ free(prm.dest);
+ return rc;
+}
+
+static void
+func_ptrs_init(uint32_t proto) {
+ if (proto == TLE_PROTO_TCP) {
+ tle_rx_bulk = tle_tcp_rx_bulk;
+ tle_tx_bulk = tle_tcp_tx_bulk;
+ tle_stream_recv = tle_tcp_stream_recv;
+ tle_stream_close = tle_tcp_stream_close;
+
+ lcore_main = lcore_main_tcp;
+
+ } else {
+ tle_rx_bulk = tle_udp_rx_bulk;
+ tle_tx_bulk = tle_udp_tx_bulk;
+ tle_stream_recv = tle_udp_stream_recv;
+ tle_stream_close = tle_udp_stream_close;
+
+ lcore_main = lcore_main_udp;
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ int32_t rc;
+ uint32_t i;
+ struct tle_ctx_param ctx_prm;
+ struct netfe_lcore_prm feprm;
+ struct rte_eth_stats stats;
+ char fecfg_fname[PATH_MAX + 1];
+ char becfg_fname[PATH_MAX + 1];
+ struct lcore_prm prm[RTE_MAX_LCORE];
+ struct rte_eth_dev_info dev_info;
+
+ fecfg_fname[0] = 0;
+ becfg_fname[0] = 0;
+ memset(prm, 0, sizeof(prm));
+
+ rc = rte_eal_init(argc, argv);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE,
+ "%s: rte_eal_init failed with error code: %d\n",
+ __func__, rc);
+
+ memset(&ctx_prm, 0, sizeof(ctx_prm));
+
+ signal(SIGINT, sig_handle);
+
+ argc -= rc;
+ argv += rc;
+
+ rc = parse_app_options(argc, argv, &becfg, &ctx_prm,
+ fecfg_fname, becfg_fname);
+ if (rc != 0)
+ rte_exit(EXIT_FAILURE,
+ "%s: parse_app_options failed with error code: %d\n",
+ __func__, rc);
+
+ /* init all the function pointer */
+ func_ptrs_init(becfg.proto);
+
+ rc = netbe_port_init(&becfg);
+ if (rc != 0)
+ rte_exit(EXIT_FAILURE,
+ "%s: netbe_port_init failed with error code: %d\n",
+ __func__, rc);
+
+ rc = netbe_lcore_init(&becfg, &ctx_prm);
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+
+ rc = netbe_dest_init(becfg_fname, &becfg);
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+
+ for (i = 0; i != becfg.prt_num && rc == 0; i++) {
+ RTE_LOG(NOTICE, USER1, "%s: starting port %u\n",
+ __func__, becfg.prt[i].id);
+ rc = rte_eth_dev_start(becfg.prt[i].id);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: rte_eth_dev_start(%u) returned "
+ "error code: %d\n",
+ __func__, becfg.prt[i].id, rc);
+ sig_handle(SIGQUIT);
+ }
+ rte_eth_dev_info_get(becfg.prt[i].id, &dev_info);
+ rc = update_rss_reta(&becfg.prt[i], &dev_info);
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+ }
+
+ feprm.max_streams = ctx_prm.max_streams * becfg.cpu_num;
+
+ rc = (rc != 0) ? rc : netfe_parse_cfg(fecfg_fname, &feprm);
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+
+ for (i = 0; rc == 0 && i != becfg.cpu_num; i++)
+ prm[becfg.cpu[i].id].be.lc = becfg.cpu + i;
+
+ rc = (rc != 0) ? rc : netfe_lcore_fill(prm, &feprm);
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+
+ /* launch all slave lcores. */
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0)
+ rte_eal_remote_launch(lcore_main, prm + i, i);
+ }
+
+ /* launch master lcore. */
+ i = rte_get_master_lcore();
+ if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0)
+ lcore_main(prm + i);
+
+ rte_eal_mp_wait_lcore();
+
+ for (i = 0; i != becfg.prt_num; i++) {
+ RTE_LOG(NOTICE, USER1, "%s: stoping port %u\n",
+ __func__, becfg.prt[i].id);
+ rte_eth_stats_get(becfg.prt[i].id, &stats);
+ RTE_LOG(NOTICE, USER1, "port %u stats={\n"
+ "ipackets=%" PRIu64 ";"
+ "ibytes=%" PRIu64 ";"
+ "ierrors=%" PRIu64 ";"
+ "imissed=%" PRIu64 ";\n"
+ "opackets=%" PRIu64 ";"
+ "obytes=%" PRIu64 ";"
+ "oerrors=%" PRIu64 ";\n"
+ "}\n",
+ becfg.prt[i].id,
+ stats.ipackets,
+ stats.ibytes,
+ stats.ierrors,
+ stats.imissed,
+ stats.opackets,
+ stats.obytes,
+ stats.oerrors);
+ rte_eth_dev_stop(becfg.prt[i].id);
+ }
+
+ netbe_lcore_fini(&becfg);
+
+ return 0;
+}
diff --git a/examples/udpfwd/netbe.h b/examples/l4fwd/netbe.h
index 1e5d9a7..6d25603 100644
--- a/examples/udpfwd/netbe.h
+++ b/examples/l4fwd/netbe.h
@@ -38,8 +38,10 @@
#include <rte_hash.h>
#include <rte_ip.h>
#include <rte_ip_frag.h>
+#include <rte_tcp.h>
#include <rte_udp.h>
-#include <tle_udp_impl.h>
+#include <tle_tcp.h>
+#include <tle_udp.h>
#include <tle_event.h>
#define MAX_PKT_BURST 0x20
@@ -48,13 +50,24 @@
#define RSS_HASH_KEY_LENGTH 64
/*
+ * global variables
+ */
+
+enum {
+ VERBOSE_NONE = 0,
+ VERBOSE_NUM = 9
+};
+
+extern int verbose;
+
+/*
* BE related structures.
*/
struct netbe_port {
uint32_t id;
uint32_t nb_lcore;
- uint32_t *lcore;
+ uint32_t *lcore_id;
uint32_t mtu;
uint32_t rx_offload;
uint32_t tx_offload;
@@ -92,7 +105,7 @@ struct netbe_dev {
uint16_t rxqid;
uint16_t txqid;
struct netbe_port port;
- struct tle_udp_dev *dev;
+ struct tle_dev *dev;
struct {
uint64_t in;
uint64_t up;
@@ -104,6 +117,7 @@ struct netbe_dev {
uint64_t drop;
} tx_stat;
struct pkt_buf tx_buf;
+ struct pkt_buf arp_buf;
};
/* 8 bit LPM user data. */
@@ -111,21 +125,28 @@ struct netbe_dev {
struct netbe_lcore {
uint32_t id;
+ uint32_t proto; /**< L4 proto to handle. */
struct rte_lpm *lpm4;
struct rte_lpm6 *lpm6;
struct rte_ip_frag_tbl *ftbl;
- struct tle_udp_ctx *ctx;
+ struct tle_ctx *ctx;
uint32_t prtq_num;
uint32_t dst4_num;
uint32_t dst6_num;
struct netbe_dev *prtq;
- struct tle_udp_dest dst4[LCORE_MAX_DST];
- struct tle_udp_dest dst6[LCORE_MAX_DST];
+ struct tle_dest dst4[LCORE_MAX_DST];
+ struct tle_dest dst6[LCORE_MAX_DST];
struct rte_ip_frag_death_row death_row;
+ struct {
+ uint64_t flags[UINT8_MAX + 1];
+ } tcp_stat;
};
struct netbe_cfg {
uint32_t promisc;
+ uint32_t proto;
+ uint32_t server;
+ uint32_t arp;
uint32_t prt_num;
uint32_t cpu_num;
struct netbe_port *prt;
@@ -145,13 +166,14 @@ enum {
struct netfe_sprm {
uint32_t bidx; /* BE index to use. */
- struct tle_udp_stream_param prm;
+ struct sockaddr_storage local_addr; /**< stream local address. */
+ struct sockaddr_storage remote_addr; /**< stream remote address. */
};
struct netfe_stream_prm {
uint32_t lcore;
- uint32_t be_lcore;
- uint32_t line;
+ uint32_t belcore;
+ uint16_t line;
uint16_t op;
uint16_t txlen; /* valid/used only for TXONLY op. */
struct netfe_sprm sprm;
@@ -165,33 +187,53 @@ struct netfe_lcore_prm {
};
struct netfe_stream {
- struct tle_udp_stream *s;
+ struct tle_stream *s;
+ struct tle_event *erev;
struct tle_event *rxev;
struct tle_event *txev;
uint16_t op;
+ uint16_t proto;
uint16_t family;
uint16_t txlen;
struct {
uint64_t rxp;
+ uint64_t rxb;
uint64_t txp;
+ uint64_t txb;
uint64_t fwp;
uint64_t drops;
uint64_t rxev[TLE_SEV_NUM];
uint64_t txev[TLE_SEV_NUM];
+ uint64_t erev[TLE_SEV_NUM];
} stat;
struct pkt_buf pbuf;
+ struct sockaddr_storage laddr;
struct sockaddr_storage raddr;
struct netfe_sprm fwdprm;
+ struct netfe_stream *fwds;
+ LIST_ENTRY(netfe_stream) link;
+};
+
+struct netfe_stream_list {
+ uint32_t num;
+ LIST_HEAD(, netfe_stream) head;
};
struct netfe_lcore {
uint32_t snum; /* max number of streams */
- uint32_t sidx; /* last open stream index */
+ struct tle_evq *syneq;
+ struct tle_evq *ereq;
struct tle_evq *rxeq;
struct tle_evq *txeq;
struct rte_hash *fw4h;
struct rte_hash *fw6h;
- struct netfe_stream *fs;
+ struct {
+ uint64_t acc;
+ uint64_t rej;
+ uint64_t ter;
+ } tcp_stat;
+ struct netfe_stream_list free;
+ struct netfe_stream_list use;
};
struct lcore_prm {
@@ -261,6 +303,28 @@ struct lcore_prm {
} while (0)
int setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc,
- uint16_t qid);
+ uint16_t qid, uint32_t arp);
+
+/*
+ * application function pointers
+ */
+
+typedef int (*LCORE_MAIN_FUNCTYPE)(void *arg);
+
+/*
+ * tle_l4p lib function pointers
+ */
+
+typedef uint16_t (*TLE_RX_BULK_FUNCTYPE)
+ (struct tle_dev *dev, struct rte_mbuf *pkt[],
+ struct rte_mbuf *rp[], int32_t rc[], uint16_t num);
+
+typedef uint16_t (*TLE_TX_BULK_FUNCTYPE)
+ (struct tle_dev *dev, struct rte_mbuf *pkt[], uint16_t num);
+
+typedef uint16_t (*TLE_STREAM_RECV_FUNCTYPE)
+ (struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num);
+
+typedef int (*TLE_STREAM_CLOSE_FUNCTYPE)(struct tle_stream *s);
#endif /* __NETBE_H__ */
diff --git a/examples/udpfwd/parse.c b/examples/l4fwd/parse.c
index f46c7df..6593221 100644
--- a/examples/udpfwd/parse.c
+++ b/examples/l4fwd/parse.c
@@ -28,6 +28,75 @@ static const struct {
{ .name = "fwd", .op = FWD,},
};
+#define OPT_SHORT_ARP 'a'
+#define OPT_LONG_ARP "enable-arp"
+
+#define OPT_SHORT_SBULK 'B'
+#define OPT_LONG_SBULK "sburst"
+
+#define OPT_SHORT_PROMISC 'P'
+#define OPT_LONG_PROMISC "promisc"
+
+#define OPT_SHORT_RBUFS 'R'
+#define OPT_LONG_RBUFS "rbufs"
+
+#define OPT_SHORT_SBUFS 'S'
+#define OPT_LONG_SBUFS "sbufs"
+
+#define OPT_SHORT_BECFG 'b'
+#define OPT_LONG_BECFG "becfg"
+
+#define OPT_SHORT_FECFG 'f'
+#define OPT_LONG_FECFG "fecfg"
+
+#define OPT_SHORT_STREAMS 's'
+#define OPT_LONG_STREAMS "streams"
+
+#define OPT_SHORT_UDP 'U'
+#define OPT_LONG_UDP "udp"
+
+#define OPT_SHORT_TCP 'T'
+#define OPT_LONG_TCP "tcp"
+
+#define OPT_SHORT_LISTEN 'L'
+#define OPT_LONG_LISTEN "listen"
+
+#define OPT_SHORT_VERBOSE 'v'
+#define OPT_LONG_VERBOSE "verbose"
+
+static const struct option long_opt[] = {
+ {OPT_LONG_ARP, 1, 0, OPT_SHORT_ARP},
+ {OPT_LONG_SBULK, 1, 0, OPT_SHORT_SBULK},
+ {OPT_LONG_PROMISC, 0, 0, OPT_SHORT_PROMISC},
+ {OPT_LONG_RBUFS, 1, 0, OPT_SHORT_RBUFS},
+ {OPT_LONG_SBUFS, 1, 0, OPT_SHORT_SBUFS},
+ {OPT_LONG_BECFG, 1, 0, OPT_SHORT_BECFG},
+ {OPT_LONG_FECFG, 1, 0, OPT_SHORT_FECFG},
+ {OPT_LONG_STREAMS, 1, 0, OPT_SHORT_STREAMS},
+ {OPT_LONG_UDP, 0, 0, OPT_SHORT_UDP},
+ {OPT_LONG_TCP, 0, 0, OPT_SHORT_TCP},
+ {OPT_LONG_LISTEN, 0, 0, OPT_SHORT_LISTEN},
+ {OPT_LONG_VERBOSE, 1, 0, OPT_SHORT_VERBOSE},
+ {NULL, 0, 0, 0}
+};
+
+static int
+parse_uint_val(__rte_unused const char *key, const char *val, void *prm)
+{
+ union parse_val *rv;
+ unsigned long v;
+ char *end;
+
+ rv = prm;
+ errno = 0;
+ v = strtoul(val, &end, 0);
+ if (errno != 0 || end[0] != 0 || v > UINT32_MAX)
+ return -EINVAL;
+
+ rv->u64 = v;
+ return 0;
+}
+
static int
parse_ipv4_val(__rte_unused const char *key, const char *val, void *prm)
{
@@ -117,6 +186,7 @@ parse_lcore_list_val(__rte_unused const char *key, const char *val, void *prm)
char *end;
rv = prm;
+
errno = 0;
a = strtoul(val, &end, 0);
if (errno != 0 || (end[0] != 0 && end[0] != '-') || a > UINT32_MAX)
@@ -197,7 +267,7 @@ parse_kvargs(const char *arg, const char *keys_man[], uint32_t nb_man,
}
int
-parse_netbe_arg(struct netbe_port *prt, const char *arg, rte_cpuset_t *cpuset)
+parse_netbe_arg(struct netbe_port *prt, const char *arg, rte_cpuset_t *pcpu)
{
int32_t rc;
uint32_t i, j, nc;
@@ -239,14 +309,14 @@ parse_netbe_arg(struct netbe_port *prt, const char *arg, rte_cpuset_t *cpuset)
for (i = 0, nc = 0; i < RTE_MAX_LCORE; i++)
nc += CPU_ISSET(i, &val[1].cpuset);
- prt->lcore = rte_zmalloc(NULL, nc * sizeof(prt->lcore[0]),
+ prt->lcore_id = rte_zmalloc(NULL, nc * sizeof(prt->lcore_id[0]),
RTE_CACHE_LINE_SIZE);
prt->nb_lcore = nc;
for (i = 0, j = 0; i < RTE_MAX_LCORE; i++)
if (CPU_ISSET(i, &val[1].cpuset))
- prt->lcore[j++] = i;
- CPU_OR(cpuset, cpuset, &val[1].cpuset);
+ prt->lcore_id[j++] = i;
+ CPU_OR(pcpu, pcpu, &val[1].cpuset);
prt->mtu = val[2].u64;
prt->rx_offload = val[3].u64;
@@ -348,7 +418,7 @@ netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm)
n = 0;
num = 0;
dp = NULL;
-
+ rc = 0;
for (ln = 0; fgets(line, sizeof(line), f) != NULL; ln++) {
/* skip spaces at the start. */
@@ -379,8 +449,9 @@ netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm)
}
dp[n].line = ln + 1;
- if ((rc = parse_netbe_dest(dp + n, s)) != 0 ||
- (rc = check_netbe_dest(dp + n)) != 0) {
+ rc = parse_netbe_dest(dp + n, s);
+ rc = (rc != 0) ? rc : check_netbe_dest(dp + n);
+ if (rc != 0) {
RTE_LOG(ERR, USER1, "%s(%s) failed to parse line %u\n",
__func__, fname, dp[n].line);
break;
@@ -465,12 +536,12 @@ parse_netfe_arg(struct netfe_stream_prm *sp, const char *arg)
return rc;
sp->lcore = val[0].u64;
sp->op = val[1].u64;
- pv2saddr(&sp->sprm.prm.local_addr, val + 2, val + 3);
- pv2saddr(&sp->sprm.prm.remote_addr, val + 4, val + 5);
+ pv2saddr(&sp->sprm.local_addr, val + 2, val + 3);
+ pv2saddr(&sp->sprm.remote_addr, val + 4, val + 5);
sp->txlen = val[6].u64;
- pv2saddr(&sp->fprm.prm.local_addr, val + 7, val + 8);
- pv2saddr(&sp->fprm.prm.remote_addr, val + 9, val + 10);
- sp->be_lcore = val[11].u64;
+ pv2saddr(&sp->fprm.local_addr, val + 7, val + 8);
+ pv2saddr(&sp->fprm.remote_addr, val + 9, val + 10);
+ sp->belcore = val[11].u64;
return 0;
}
@@ -510,8 +581,8 @@ check_netfe_arg(const struct netfe_stream_prm *sp)
{
char buf[INET6_ADDRSTRLEN];
- if (sp->sprm.prm.local_addr.ss_family !=
- sp->sprm.prm.remote_addr.ss_family) {
+ if (sp->sprm.local_addr.ss_family !=
+ sp->sprm.remote_addr.ss_family) {
RTE_LOG(ERR, USER1, "invalid arg at line %u: "
"laddr and raddr for different protocols\n",
sp->line);
@@ -524,27 +595,27 @@ check_netfe_arg(const struct netfe_stream_prm *sp)
"exceeds allowed values: (0, %u]\n",
sp->line, sp->txlen, RTE_MBUF_DEFAULT_DATAROOM);
return -EINVAL;
- } else if (is_addr_wc(&sp->sprm.prm.remote_addr)) {
+ } else if (is_addr_wc(&sp->sprm.remote_addr)) {
RTE_LOG(ERR, USER1, "invalid arg at line %u: "
"raddr=%s are not allowed for op=%s;\n",
sp->line,
- format_addr(&sp->sprm.prm.remote_addr,
+ format_addr(&sp->sprm.remote_addr,
buf, sizeof(buf)),
format_feop(sp->op));
return -EINVAL;
}
} else if (sp->op == FWD) {
- if (sp->fprm.prm.local_addr.ss_family !=
- sp->fprm.prm.remote_addr.ss_family) {
+ if (sp->fprm.local_addr.ss_family !=
+ sp->fprm.remote_addr.ss_family) {
RTE_LOG(ERR, USER1, "invalid arg at line %u: "
"fwladdr and fwraddr for different protocols\n",
sp->line);
return -EINVAL;
- } else if (is_addr_wc(&sp->fprm.prm.remote_addr)) {
+ } else if (is_addr_wc(&sp->fprm.remote_addr)) {
RTE_LOG(ERR, USER1, "invalid arg at line %u: "
"fwaddr=%s are not allowed for op=%s;\n",
sp->line,
- format_addr(&sp->fprm.prm.remote_addr,
+ format_addr(&sp->fprm.remote_addr,
buf, sizeof(buf)),
format_feop(sp->op));
return -EINVAL;
@@ -575,7 +646,7 @@ netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp)
n = 0;
num = 0;
sp = NULL;
-
+ rc = 0;
for (ln = 0; fgets(line, sizeof(line), f) != NULL; ln++) {
/* skip spaces at the start. */
@@ -615,8 +686,9 @@ netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp)
}
sp[n].line = ln + 1;
- if ((rc = parse_netfe_arg(sp + n, s)) != 0 ||
- (rc = check_netfe_arg(sp + n)) != 0) {
+ rc = parse_netfe_arg(sp + n, s);
+ rc = (rc != 0) ? rc : check_netfe_arg(sp + n);
+ if (rc != 0) {
RTE_LOG(ERR, USER1, "%s(%s) failed to parse line %u\n",
__func__, fname, sp[n].line);
break;
@@ -636,3 +708,132 @@ netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp)
lp->nb_streams = n;
return rc;
}
+
+int
+parse_app_options(int argc, char **argv, struct netbe_cfg *cfg,
+ struct tle_ctx_param *ctx_prm,
+ char *fecfg_fname, char *becfg_fname)
+{
+ int32_t opt, opt_idx, rc;
+ uint64_t v;
+ uint32_t i, j, n, nc;
+ rte_cpuset_t cpuset;
+ uint32_t udp = 0, tcp = 0, listen = 0;
+
+ optind = 0;
+ optarg = NULL;
+ while ((opt = getopt_long(argc, argv, "aB:LPR:S:TUb:f:s:v:", long_opt,
+ &opt_idx)) != EOF) {
+ if (opt == OPT_SHORT_ARP) {
+ cfg->arp = 1;
+ } else if (opt == OPT_SHORT_SBULK) {
+ rc = parse_uint_val(NULL, optarg, &v);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
+ "for option: \'%c\'\n",
+ __func__, optarg, opt);
+ ctx_prm->send_bulk_size = v;
+ } else if (opt == OPT_SHORT_PROMISC) {
+ cfg->promisc = 1;
+ } else if (opt == OPT_SHORT_RBUFS) {
+ rc = parse_uint_val(NULL, optarg, &v);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
+ "for option: \'%c\'\n",
+ __func__, optarg, opt);
+ ctx_prm->max_stream_rbufs = v;
+ } else if (opt == OPT_SHORT_SBUFS) {
+ rc = parse_uint_val(NULL, optarg, &v);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
+ "for option: \'%c\'\n",
+ __func__, optarg, opt);
+ ctx_prm->max_stream_sbufs = v;
+ } else if (opt == OPT_SHORT_STREAMS) {
+ rc = parse_uint_val(NULL, optarg, &v);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
+ "for option: \'%c\'\n",
+ __func__, optarg, opt);
+ ctx_prm->max_streams = v;
+ } else if (opt == OPT_SHORT_VERBOSE) {
+ rc = parse_uint_val(NULL, optarg, &v);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
+ "for option: \'%c\'\n",
+ __func__, optarg, opt);
+ verbose = (v > VERBOSE_NUM) ? VERBOSE_NUM : v;
+ } else if (opt == OPT_SHORT_BECFG) {
+ snprintf(becfg_fname, PATH_MAX, "%s",
+ optarg);
+ } else if (opt == OPT_SHORT_FECFG) {
+ snprintf(fecfg_fname, PATH_MAX, "%s",
+ optarg);
+ } else if (opt == OPT_SHORT_UDP) {
+ udp = 1;
+ cfg->proto = TLE_PROTO_UDP;
+ } else if (opt == OPT_SHORT_TCP) {
+ tcp = 1;
+ cfg->proto = TLE_PROTO_TCP;
+ } else if (opt == OPT_SHORT_LISTEN) {
+ listen = 1;
+ cfg->server = 1;
+ } else {
+ rte_exit(EXIT_FAILURE,
+ "%s: unknown option: \'%c\'\n",
+ __func__, opt);
+ }
+ }
+
+ if (!udp && !tcp)
+ rte_exit(EXIT_FAILURE, "%s: either UDP or TCP option has to be "
+ "provided\n", __func__);
+
+ if (udp && tcp)
+ rte_exit(EXIT_FAILURE, "%s: both UDP and TCP options are not "
+ "allowed\n", __func__);
+
+ if (udp && listen)
+ rte_exit(EXIT_FAILURE,
+ "%s: listen mode cannot be opened with UDP\n",
+ __func__);
+
+ if (udp && cfg->arp)
+ rte_exit(EXIT_FAILURE,
+ "%s: arp cannot be enabled with UDP\n",
+ __func__);
+
+ /* parse port params */
+ argc -= optind;
+ argv += optind;
+
+ /* allocate memory for number of ports defined */
+ n = (uint32_t)argc;
+ cfg->prt = rte_zmalloc(NULL, sizeof(struct netbe_port) * n,
+ RTE_CACHE_LINE_SIZE);
+ cfg->prt_num = n;
+
+ rc = 0;
+ for (i = 0; i != n; i++) {
+ rc = parse_netbe_arg(cfg->prt + i, argv[i], &cpuset);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: processing of \"%s\" failed with error "
+ "code: %d\n", __func__, argv[i], rc);
+ for (j = 0; j != i; j++)
+ rte_free(cfg->prt[j].lcore_id);
+ rte_free(cfg->prt);
+ return rc;
+ }
+ }
+
+ /* count the number of CPU defined in ports */
+ for (i = 0, nc = 0; i < RTE_MAX_LCORE; i++)
+ nc += CPU_ISSET(i, &cpuset);
+
+ /* allocate memory for number of CPU defined */
+ cfg->cpu = rte_zmalloc(NULL, sizeof(struct netbe_lcore) * nc,
+ RTE_CACHE_LINE_SIZE);
+
+ return 0;
+}
diff --git a/examples/udpfwd/parse.h b/examples/l4fwd/parse.h
index 7df7671..4303623 100644
--- a/examples/udpfwd/parse.h
+++ b/examples/l4fwd/parse.h
@@ -33,23 +33,6 @@ union parse_val {
rte_cpuset_t cpuset;
};
-static int
-parse_uint_val(__rte_unused const char *key, const char *val, void *prm)
-{
- union parse_val *rv;
- unsigned long v;
- char *end;
-
- rv = prm;
- errno = 0;
- v = strtoul(val, &end, 0);
- if (errno != 0 || end[0] != 0 || v > UINT32_MAX)
- return -EINVAL;
-
- rv->u64 = v;
- return 0;
-}
-
static const char *
format_addr(const struct sockaddr_storage *sp, char buf[], size_t len)
{
@@ -71,11 +54,16 @@ format_addr(const struct sockaddr_storage *sp, char buf[], size_t len)
}
int parse_netbe_arg(struct netbe_port *prt, const char *arg,
- rte_cpuset_t *cpuset);
+ rte_cpuset_t *pcpu);
int netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm);
int netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp);
+int
+parse_app_options(int argc, char **argv, struct netbe_cfg *cfg,
+ struct tle_ctx_param *ctx_prm,
+ char *fecfg_fname, char *becfg_fname);
+
#endif /* __PARSE_H__ */
diff --git a/examples/l4fwd/pkt.c b/examples/l4fwd/pkt.c
new file mode 100644
index 0000000..660e618
--- /dev/null
+++ b/examples/l4fwd/pkt.c
@@ -0,0 +1,872 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <netinet/ip6.h>
+#include <rte_arp.h>
+
+#include "netbe.h"
+
+static inline uint64_t
+_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
+ uint64_t ol3, uint64_t ol2)
+{
+ return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49;
+}
+
+static inline void
+fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4)
+{
+ m->tx_offload = _mbuf_tx_offload(l2, l3, l4, 0, 0, 0);
+}
+
+static inline int
+is_ipv4_frag(const struct ipv4_hdr *iph)
+{
+ const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG);
+
+ return ((mask & iph->fragment_offset) != 0);
+}
+
+static inline uint32_t
+get_tcp_header_size(struct rte_mbuf *m, uint32_t l2_len, uint32_t l3_len)
+{
+ const struct tcp_hdr *tcp;
+
+ tcp = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len);
+ return (tcp->data_off >> 4) * 4;
+}
+
+static inline void
+adjust_ipv4_pktlen(struct rte_mbuf *m, uint32_t l2_len)
+{
+ uint32_t plen, trim;
+ const struct ipv4_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2_len);
+ plen = rte_be_to_cpu_16(iph->total_length) + l2_len;
+ if (plen < m->pkt_len) {
+ trim = m->pkt_len - plen;
+ rte_pktmbuf_trim(m, trim);
+ }
+}
+
+static inline void
+adjust_ipv6_pktlen(struct rte_mbuf *m, uint32_t l2_len)
+{
+ uint32_t plen, trim;
+ const struct ipv6_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, l2_len);
+ plen = rte_be_to_cpu_16(iph->payload_len) + sizeof(*iph) + l2_len;
+ if (plen < m->pkt_len) {
+ trim = m->pkt_len - plen;
+ rte_pktmbuf_trim(m, trim);
+ }
+}
+
+static inline void
+tcp_stat_update(struct netbe_lcore *lc, const struct rte_mbuf *m,
+ uint32_t l2_len, uint32_t l3_len)
+{
+ const struct tcp_hdr *th;
+
+ th = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len);
+ lc->tcp_stat.flags[th->tcp_flags]++;
+}
+
+static inline uint32_t
+get_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, uint32_t frag)
+{
+ const struct ipv4_hdr *iph;
+ int32_t dlen, len;
+
+ dlen = rte_pktmbuf_data_len(m);
+ dlen -= l2;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2);
+ len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER;
+
+ if (frag != 0 && is_ipv4_frag(iph)) {
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_FRAG;
+ }
+
+ if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto))
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+
+ return len;
+}
+
+static inline void
+fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto,
+ uint32_t frag, uint32_t l4_len)
+{
+ uint32_t len;
+
+ len = get_ipv4_hdr_len(m, l2, proto, frag);
+ fill_pkt_hdr_len(m, l2, len, l4_len);
+ adjust_ipv4_pktlen(m, l2);
+}
+
+static inline int
+ipv6x_hdr(uint32_t proto)
+{
+ return (proto == IPPROTO_HOPOPTS ||
+ proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_FRAGMENT ||
+ proto == IPPROTO_AH ||
+ proto == IPPROTO_NONE ||
+ proto == IPPROTO_DSTOPTS);
+}
+
+static inline uint32_t
+get_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto,
+ uint32_t fproto)
+{
+ const struct ip6_ext *ipx;
+ int32_t dlen, len, ofs;
+
+ len = sizeof(struct ipv6_hdr);
+
+ dlen = rte_pktmbuf_data_len(m);
+ dlen -= l2;
+
+ ofs = l2 + len;
+ ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs);
+
+ while (ofs > 0 && len < dlen) {
+
+ switch (nproto) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ ofs = (ipx->ip6e_len + 1) << 3;
+ break;
+ case IPPROTO_AH:
+ ofs = (ipx->ip6e_len + 2) << 2;
+ break;
+ case IPPROTO_FRAGMENT:
+ /*
+ * tso_segsz is not used by RX, so use it as temporary
+ * buffer to store the fragment offset.
+ */
+ m->tso_segsz = ofs;
+ ofs = sizeof(struct ip6_frag);
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_FRAG;
+ break;
+ default:
+ ofs = 0;
+ }
+
+ if (ofs > 0) {
+ nproto = ipx->ip6e_nxt;
+ len += ofs;
+ ipx += ofs / sizeof(*ipx);
+ }
+ }
+
+ /* unrecognized or invalid packet. */
+ if ((ofs == 0 && nproto != fproto) || len > dlen)
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+
+ return len;
+}
+
+static inline uint32_t
+get_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto)
+{
+ const struct ipv6_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ if (iph->proto == fproto)
+ return sizeof(struct ipv6_hdr);
+ else if (ipv6x_hdr(iph->proto) != 0)
+ return get_ipv6x_hdr_len(m, l2, iph->proto, fproto);
+
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return 0;
+}
+
+static inline void
+fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto,
+ uint32_t l4_len)
+{
+ uint32_t len;
+
+ len = get_ipv6_hdr_len(m, l2, fproto);
+ fill_pkt_hdr_len(m, l2, len, l4_len);
+ adjust_ipv6_pktlen(m, l2);
+}
+
+static inline struct rte_mbuf *
+handle_arp(struct rte_mbuf *m, struct netbe_lcore *lc, uint8_t port,
+ uint32_t l2len)
+{
+ const struct arp_hdr *ahdr;
+ struct pkt_buf *abuf;
+
+ ahdr = rte_pktmbuf_mtod_offset(m, const struct arp_hdr *, l2len);
+
+ if (ahdr->arp_hrd != rte_be_to_cpu_16(ARP_HRD_ETHER) ||
+ ahdr->arp_pro != rte_be_to_cpu_16(ETHER_TYPE_IPv4) ||
+ ahdr->arp_op != rte_be_to_cpu_16(ARP_OP_REQUEST)) {
+
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return m;
+ }
+
+ m->l2_len = l2len;
+ abuf = &lc->prtq[port].arp_buf;
+ if (abuf->num >= RTE_DIM(abuf->pkt))
+ return m;
+
+ abuf->pkt[abuf->num++] = m;
+
+ return NULL;
+}
+
+static inline struct rte_mbuf *
+fill_eth_tcp_arp_hdr_len(struct rte_mbuf *m, struct netbe_lcore *lc,
+ uint8_t port)
+{
+ uint32_t dlen, l2_len, l3_len, l4_len;
+ uint16_t etp;
+ const struct ether_hdr *eth;
+
+ dlen = rte_pktmbuf_data_len(m);
+
+ /* check that first segment is at least 54B long. */
+ if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
+ sizeof(struct tcp_hdr)) {
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return m;
+ }
+
+ l2_len = sizeof(*eth);
+
+ eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
+ etp = eth->ether_type;
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
+ l2_len += sizeof(struct vlan_hdr);
+
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_ARP))
+ return handle_arp(m, lc, port, l2_len);
+
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
+ m->packet_type = RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1);
+ l4_len = get_tcp_header_size(m, l2_len, l3_len);
+ fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(m, l2_len);
+ } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
+ dlen >= l2_len + sizeof(struct ipv6_hdr) +
+ sizeof(struct tcp_hdr)) {
+ m->packet_type = RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(m, l2_len, l3_len);
+ fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(m, l2_len);
+ } else
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+
+ return m;
+}
+
+static inline void
+fill_eth_tcp_hdr_len(struct rte_mbuf *m)
+{
+ uint32_t dlen, l2_len, l3_len, l4_len;
+ uint16_t etp;
+ const struct ether_hdr *eth;
+
+ dlen = rte_pktmbuf_data_len(m);
+
+ /* check that first segment is at least 54B long. */
+ if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
+ sizeof(struct tcp_hdr)) {
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return;
+ }
+
+ l2_len = sizeof(*eth);
+
+ eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
+ etp = eth->ether_type;
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
+ l2_len += sizeof(struct vlan_hdr);
+
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
+ m->packet_type = RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1);
+ l4_len = get_tcp_header_size(m, l2_len, l3_len);
+ fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(m, l2_len);
+ } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
+ dlen >= l2_len + sizeof(struct ipv6_hdr) +
+ sizeof(struct tcp_hdr)) {
+ m->packet_type = RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(m, l2_len, l3_len);
+ fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(m, l2_len);
+ } else
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+}
+
+static inline void
+fill_eth_udp_hdr_len(struct rte_mbuf *m)
+{
+ uint32_t dlen, l2_len;
+ uint16_t etp;
+ const struct ether_hdr *eth;
+
+ dlen = rte_pktmbuf_data_len(m);
+
+ /* check that first segment is at least 42B long. */
+ if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
+ sizeof(struct udp_hdr)) {
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return;
+ }
+
+ l2_len = sizeof(*eth);
+
+ eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
+ etp = eth->ether_type;
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
+ l2_len += sizeof(struct vlan_hdr);
+
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
+ m->packet_type = RTE_PTYPE_L4_UDP |
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ fill_ipv4_hdr_len(m, l2_len, IPPROTO_UDP, 1,
+ sizeof(struct udp_hdr));
+ } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
+ dlen >= l2_len + sizeof(struct ipv6_hdr) +
+ sizeof(struct udp_hdr)) {
+ m->packet_type = RTE_PTYPE_L4_UDP |
+ RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ fill_ipv6_hdr_len(m, l2_len, IPPROTO_UDP,
+ sizeof(struct udp_hdr));
+ } else
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+}
+
+static inline uint16_t
+ipv4x_cksum(const void *iph, size_t len)
+{
+ uint16_t cksum;
+
+ cksum = rte_raw_cksum(iph, len);
+ return (cksum == 0xffff) ? cksum : ~cksum;
+}
+
+static inline void
+fix_reassembled(struct rte_mbuf *m, int32_t hwcsum, uint32_t proto)
+{
+ struct ipv4_hdr *iph;
+
+ /* update packet type. */
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+
+ if (proto == IPPROTO_TCP)
+ m->packet_type |= RTE_PTYPE_L4_TCP;
+ else
+ m->packet_type |= RTE_PTYPE_L4_UDP;
+
+ /* fix reassemble setting TX flags. */
+ m->ol_flags &= ~PKT_TX_IP_CKSUM;
+
+ /* fix l3_len after reassemble. */
+ if (RTE_ETH_IS_IPV6_HDR(m->packet_type))
+ m->l3_len = m->l3_len - sizeof(struct ipv6_extension_fragment);
+
+ /* recalculate ipv4 cksum after reassemble. */
+ else if (hwcsum == 0 && RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
+ iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
+ iph->hdr_checksum = ipv4x_cksum(iph, m->l3_len);
+ }
+}
+
+static struct rte_mbuf *
+reassemble(struct rte_mbuf *m, struct netbe_lcore *lc, uint64_t tms,
+ uint8_t port, uint32_t proto)
+{
+ uint32_t l3cs;
+ struct rte_ip_frag_tbl *tbl;
+ struct rte_ip_frag_death_row *dr;
+
+ tbl = lc->ftbl;
+ dr = &lc->death_row;
+ l3cs = lc->prtq[port].port.rx_offload & DEV_RX_OFFLOAD_IPV4_CKSUM;
+
+ if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
+
+ struct ipv4_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
+
+ /* process this fragment. */
+ m = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, iph);
+
+ } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
+
+ struct ipv6_hdr *iph;
+ struct ipv6_extension_fragment *fhdr;
+
+ iph = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len);
+
+ /*
+ * we store fragment header offset in tso_segsz before
+ * temporary, just to avoid another scan of ipv6 header.
+ */
+ fhdr = rte_pktmbuf_mtod_offset(m,
+ struct ipv6_extension_fragment *, m->tso_segsz);
+ m->tso_segsz = 0;
+
+ /* process this fragment. */
+ m = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, iph, fhdr);
+
+ } else {
+ rte_pktmbuf_free(m);
+ m = NULL;
+ }
+
+ /* got reassembled packet. */
+ if (m != NULL)
+ fix_reassembled(m, l3cs, proto);
+
+ return m;
+}
+
+/* exclude NULLs from the final list of packets. */
+static inline uint32_t
+compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero)
+{
+ uint32_t i, j, k, l;
+
+ for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) {
+
+ /* found a hole. */
+ if (pkt[j] == NULL) {
+
+ /* find how big is it. */
+ for (i = j; i-- != 0 && pkt[i] == NULL; )
+ ;
+ /* fill the hole. */
+ for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++)
+ pkt[l] = pkt[k];
+
+ nb_pkt -= j - i;
+ nb_zero -= j - i;
+ j = i + 1;
+ }
+ }
+
+ return nb_pkt;
+}
+
+/*
+ * if it is a fragment, try to reassemble it,
+ * if by some reason it can't be done, then
+ * set pkt[] entry to NULL.
+ */
+#define DO_REASSEMBLE(proto) \
+do { \
+ if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == \
+ RTE_PTYPE_L4_FRAG) { \
+ cts = (cts == 0) ? rte_rdtsc() : cts; \
+ pkt[j] = reassemble(pkt[j], lc, cts, port, (proto)); \
+ x += (pkt[j] == NULL); \
+ } \
+} while (0)
+
+/*
+ * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k)
+ */
+static uint16_t
+type0_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, tp;
+ struct netbe_lcore *lc;
+ uint32_t l4_len, l3_len, l2_len;
+ const struct ether_hdr *eth;
+
+ lc = user_param;
+ l2_len = sizeof(*eth);
+
+ RTE_SET_USED(lc);
+
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ /* non fragmented tcp packets. */
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4 |
+ RTE_PTYPE_L2_ETHER):
+ l4_len = get_tcp_header_size(pkt[j], l2_len,
+ sizeof(struct ipv4_hdr));
+ fill_pkt_hdr_len(pkt[j], l2_len,
+ sizeof(struct ipv4_hdr), l4_len);
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6 |
+ RTE_PTYPE_L2_ETHER):
+ l4_len = get_tcp_header_size(pkt[j], l2_len,
+ sizeof(struct ipv6_hdr));
+ fill_pkt_hdr_len(pkt[j], l2_len,
+ sizeof(struct ipv6_hdr), l4_len);
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
+ IPPROTO_TCP, 0);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+ }
+
+ return nb_pkts;
+}
+
+/*
+ * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k)
+ */
+static uint16_t
+type0_udp_rx_callback(uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, tp, x;
+ uint64_t cts;
+ struct netbe_lcore *lc;
+ uint32_t l2_len;
+ const struct ether_hdr *eth;
+
+ lc = user_param;
+ cts = 0;
+ l2_len = sizeof(*eth);
+
+ x = 0;
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ /* non fragmented udp packets. */
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4 |
+ RTE_PTYPE_L2_ETHER):
+ fill_pkt_hdr_len(pkt[j], l2_len,
+ sizeof(struct ipv4_hdr),
+ sizeof(struct udp_hdr));
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6 |
+ RTE_PTYPE_L2_ETHER):
+ fill_pkt_hdr_len(pkt[j], l2_len,
+ sizeof(struct ipv6_hdr),
+ sizeof(struct udp_hdr));
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv4_hdr_len(pkt[j], l2_len,
+ UINT32_MAX, 0, sizeof(struct udp_hdr));
+ break;
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv6_hdr_len(pkt[j], l2_len,
+ IPPROTO_UDP, sizeof(struct udp_hdr));
+ break;
+ /* possibly fragmented udp packets. */
+ case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER):
+ case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER):
+ fill_ipv4_hdr_len(pkt[j], l2_len,
+ IPPROTO_UDP, 1, sizeof(struct udp_hdr));
+ break;
+ case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER):
+ case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER):
+ fill_ipv6_hdr_len(pkt[j], l2_len,
+ IPPROTO_UDP, sizeof(struct udp_hdr));
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+
+ DO_REASSEMBLE(IPPROTO_UDP);
+ }
+
+ /* reassemble was invoked, cleanup its death-row. */
+ if (cts != 0)
+ rte_ip_frag_free_death_row(&lc->death_row, 0);
+
+ if (x == 0)
+ return nb_pkts;
+
+ NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
+ "%u non-reassembled fragments;\n",
+ __func__, port, queue, nb_pkts, x);
+
+ return compress_pkt_list(pkt, nb_pkts, x);
+}
+
+/*
+ * HW can recognize L2/L3/L4 and fragments (i40e).
+ */
+static uint16_t
+type1_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, tp;
+ struct netbe_lcore *lc;
+ uint32_t l4_len, l3_len, l2_len;
+ const struct ether_hdr *eth;
+
+ lc = user_param;
+ l2_len = sizeof(*eth);
+
+ RTE_SET_USED(lc);
+
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
+ IPPROTO_TCP, 0);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ tcp_stat_update(lc, pkt[j], l2_len, l3_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ tcp_stat_update(lc, pkt[j], l2_len, l3_len);
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+
+ }
+
+ return nb_pkts;
+}
+
+/*
+ * HW can recognize L2/L3/L4 and fragments (i40e).
+ */
+static uint16_t
+type1_udp_rx_callback(uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, tp, x;
+ uint64_t cts;
+ struct netbe_lcore *lc;
+ uint32_t l2_len;
+ const struct ether_hdr *eth;
+
+ lc = user_param;
+ cts = 0;
+ l2_len = sizeof(*eth);
+
+ x = 0;
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv4_hdr_len(pkt[j], l2_len,
+ UINT32_MAX, 0, sizeof(struct udp_hdr));
+ break;
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv6_hdr_len(pkt[j], l2_len,
+ IPPROTO_UDP, sizeof(struct udp_hdr));
+ break;
+ case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv4_hdr_len(pkt[j], l2_len,
+ IPPROTO_UDP, 0, sizeof(struct udp_hdr));
+ break;
+ case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv6_hdr_len(pkt[j], l2_len,
+ IPPROTO_UDP, sizeof(struct udp_hdr));
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+
+ DO_REASSEMBLE(IPPROTO_UDP);
+ }
+
+ /* reassemble was invoked, cleanup its death-row. */
+ if (cts != 0)
+ rte_ip_frag_free_death_row(&lc->death_row, 0);
+
+ if (x == 0)
+ return nb_pkts;
+
+ NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
+ "%u non-reassembled fragments;\n",
+ __func__, port, queue, nb_pkts, x);
+
+ return compress_pkt_list(pkt, nb_pkts, x);
+}
+
+/*
+ * generic, assumes HW doesn't recognize any packet type.
+ */
+static uint16_t
+typen_tcp_arp_rx_callback(uint8_t port, uint16_t queue, struct rte_mbuf *pkt[],
+ uint16_t nb_pkts, uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, x;
+ struct netbe_lcore *lc;
+
+ lc = user_param;
+
+ RTE_SET_USED(queue);
+ RTE_SET_USED(max_pkts);
+
+ x = 0;
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+ pkt[j] = fill_eth_tcp_arp_hdr_len(pkt[j], lc, port);
+ x += (pkt[j] == NULL);
+ }
+
+ if (x == 0)
+ return nb_pkts;
+
+ return compress_pkt_list(pkt, nb_pkts, x);
+}
+
+static uint16_t
+typen_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j;
+ struct netbe_lcore *lc;
+
+ lc = user_param;
+
+ RTE_SET_USED(lc);
+
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+ fill_eth_tcp_hdr_len(pkt[j]);
+ }
+
+ return nb_pkts;
+}
+
+static uint16_t
+typen_udp_rx_callback(uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, x;
+ uint64_t cts;
+ struct netbe_lcore *lc;
+
+ lc = user_param;
+ cts = 0;
+
+ x = 0;
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+ fill_eth_udp_hdr_len(pkt[j]);
+
+ DO_REASSEMBLE(IPPROTO_UDP);
+ }
+
+ /* reassemble was invoked, cleanup its death-row. */
+ if (cts != 0)
+ rte_ip_frag_free_death_row(&lc->death_row, 0);
+
+ if (x == 0)
+ return nb_pkts;
+
+ NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
+ "%u non-reassembled fragments;\n",
+ __func__, port, queue, nb_pkts, x);
+
+ return compress_pkt_list(pkt, nb_pkts, x);
+}
+
+#include "pkt_dpdk_legacy.h"
diff --git a/examples/udpfwd/pkt_dpdk_legacy.h b/examples/l4fwd/pkt_dpdk_legacy.h
index c32f044..d840978 100644
--- a/examples/udpfwd/pkt_dpdk_legacy.h
+++ b/examples/l4fwd/pkt_dpdk_legacy.h
@@ -18,59 +18,39 @@
#include "dpdk_version.h"
+struct ptype2cb {
+ uint32_t mask;
+ const char *name;
+ rte_rx_callback_fn fn;
+};
+
+enum {
+ ETHER_PTYPE = 0x1,
+ IPV4_PTYPE = 0x2,
+ IPV4_EXT_PTYPE = 0x4,
+ IPV6_PTYPE = 0x8,
+ IPV6_EXT_PTYPE = 0x10,
+ TCP_PTYPE = 0x20,
+ UDP_PTYPE = 0x40,
+};
+
#ifdef DPDK_VERSION_GE_1604
-int
-setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc,
- uint16_t qid)
+static uint32_t
+get_ptypes(const struct netbe_port *uprt)
{
- int32_t i, rc;
uint32_t smask;
- void *cb;
-
+ int32_t i, rc;
const uint32_t pmask = RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK |
RTE_PTYPE_L4_MASK;
- enum {
- ETHER_PTYPE = 0x1,
- IPV4_PTYPE = 0x2,
- IPV4_EXT_PTYPE = 0x4,
- IPV6_PTYPE = 0x8,
- IPV6_EXT_PTYPE = 0x10,
- UDP_PTYPE = 0x20,
- };
-
- static const struct {
- uint32_t mask;
- const char *name;
- rte_rx_callback_fn fn;
- } ptype2cb[] = {
- {
- .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE |
- IPV6_PTYPE | IPV6_EXT_PTYPE | UDP_PTYPE,
- .name = "HW l2/l3x/l4 ptype",
- .fn = type0_rx_callback,
- },
- {
- .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE |
- UDP_PTYPE,
- .name = "HW l2/l3/l4 ptype",
- .fn = type1_rx_callback,
- },
- {
- .mask = 0,
- .name = "no HW ptype",
- .fn = typen_rx_callback,
- },
- };
-
smask = 0;
rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, NULL, 0);
if (rc < 0) {
RTE_LOG(ERR, USER1,
"%s(port=%u) failed to get supported ptypes;\n",
__func__, uprt->id);
- return rc;
+ return smask;
}
uint32_t ptype[rc];
@@ -95,13 +75,106 @@ setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc,
case RTE_PTYPE_L3_IPV6_EXT:
smask |= IPV6_EXT_PTYPE;
break;
+ case RTE_PTYPE_L4_TCP:
+ smask |= TCP_PTYPE;
+ break;
case RTE_PTYPE_L4_UDP:
smask |= UDP_PTYPE;
break;
}
}
- for (i = 0; i != RTE_DIM(ptype2cb); i++) {
+ return smask;
+}
+
+#else
+
+static uint32_t
+get_ptypes(__rte_unused const struct netbe_port *uprt)
+{
+ return 0;
+}
+
+#endif /* DPDK_VERSION_GE_1604 */
+
+int
+setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc,
+ uint16_t qid, uint32_t arp)
+{
+ int32_t rc;
+ uint32_t i, n, smask;
+ void *cb;
+ const struct ptype2cb *ptype2cb;
+
+ static const struct ptype2cb tcp_ptype2cb[] = {
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE |
+ IPV6_PTYPE | IPV6_EXT_PTYPE | TCP_PTYPE,
+ .name = "HW l2/l3x/l4-tcp ptype",
+ .fn = type0_tcp_rx_callback,
+ },
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE |
+ TCP_PTYPE,
+ .name = "HW l2/l3/l4-tcp ptype",
+ .fn = type1_tcp_rx_callback,
+ },
+ {
+ .mask = 0,
+ .name = "tcp no HW ptype",
+ .fn = typen_tcp_rx_callback,
+ },
+ };
+
+ static const struct ptype2cb tcp_arp_ptype2cb[] = {
+ {
+ .mask = 0,
+ .name = "tcp with arp no HW ptype",
+ .fn = typen_tcp_arp_rx_callback,
+ },
+ };
+
+ static const struct ptype2cb udp_ptype2cb[] = {
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE |
+ IPV6_PTYPE | IPV6_EXT_PTYPE | UDP_PTYPE,
+ .name = "HW l2/l3x/l4-udp ptype",
+ .fn = type0_udp_rx_callback,
+ },
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE |
+ UDP_PTYPE,
+ .name = "HW l2/l3/l4-udp ptype",
+ .fn = type1_udp_rx_callback,
+ },
+ {
+ .mask = 0,
+ .name = "udp no HW ptype",
+ .fn = typen_udp_rx_callback,
+ },
+ };
+
+ smask = get_ptypes(uprt);
+
+ if (lc->proto == TLE_PROTO_TCP) {
+ if (arp != 0) {
+ ptype2cb = tcp_arp_ptype2cb;
+ n = RTE_DIM(tcp_arp_ptype2cb);
+ } else {
+ ptype2cb = tcp_ptype2cb;
+ n = RTE_DIM(tcp_ptype2cb);
+ }
+ } else if (lc->proto == TLE_PROTO_UDP) {
+ ptype2cb = udp_ptype2cb;
+ n = RTE_DIM(udp_ptype2cb);
+ } else {
+ RTE_LOG(ERR, USER1,
+ "%s(lc=%u) unsupported proto: %u\n",
+ __func__, lc->id, lc->proto);
+ return -EINVAL;
+ }
+
+ for (i = 0; i != n; i++) {
if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) {
cb = rte_eth_add_rx_callback(uprt->id, qid,
ptype2cb[i].fn, lc);
@@ -121,25 +194,4 @@ setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc,
return -ENOENT;
}
-#else
-
-int
-setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc,
- uint16_t qid)
-{
- void *cb;
- int32_t rc;
-
- cb = rte_eth_add_rx_callback(uprt->id, qid, typen_rx_callback, lc);
- rc = -rte_errno;
- RTE_LOG(ERR, USER1,
- "%s(port=%u), setup RX callback \"%s\" "
- "returns %p;\n",
- __func__, uprt->id, "no HW ptype", cb);
-
- return ((cb == NULL) ? rc : 0);
-}
-
-#endif /* DPDK_VERSION_GE_1604 */
-
#endif /* PKT_DPDK_LEGACY_H_ */
diff --git a/examples/l4fwd/port.h b/examples/l4fwd/port.h
new file mode 100644
index 0000000..bc13dca
--- /dev/null
+++ b/examples/l4fwd/port.h
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PORT_H_
+#define PORT_H_
+
+static void
+prepare_hash_key(struct netbe_port *uprt, uint8_t key_size, uint16_t family)
+{
+ uint32_t align_nb_q;
+
+ align_nb_q = rte_align32pow2(uprt->nb_lcore);
+ memset(uprt->hash_key, 0, RSS_HASH_KEY_LENGTH);
+ uprt->hash_key_size = key_size;
+ if (family == AF_INET)
+ uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV4] = align_nb_q;
+ else
+ uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV6] = align_nb_q;
+}
+
+static int
+update_rss_conf(struct netbe_port *uprt,
+ const struct rte_eth_dev_info *dev_info,
+ struct rte_eth_conf *port_conf, uint32_t proto)
+{
+ uint8_t hash_key_size;
+
+ if (uprt->nb_lcore > 1) {
+ if (dev_info->hash_key_size > 0)
+ hash_key_size = dev_info->hash_key_size;
+ else {
+ RTE_LOG(ERR, USER1,
+ "%s: dev_info did not provide a valid hash "
+ "key size\n", __func__);
+ return -EINVAL;
+ }
+
+ if (uprt->ipv4 != INADDR_ANY &&
+ memcmp(&uprt->ipv6, &in6addr_any,
+ sizeof(uprt->ipv6)) != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: RSS for both IPv4 and IPv6 not "
+ "supported!\n", __func__);
+ return -EINVAL;
+ } else if (uprt->ipv4 != INADDR_ANY) {
+ prepare_hash_key(uprt, hash_key_size, AF_INET);
+ } else if (memcmp(&uprt->ipv6, &in6addr_any, sizeof(uprt->ipv6))
+ != 0) {
+ prepare_hash_key(uprt, hash_key_size, AF_INET6);
+ } else {
+ RTE_LOG(ERR, USER1,
+ "%s: No IPv4 or IPv6 address is found!\n",
+ __func__);
+ return -EINVAL;
+ }
+ port_conf->rxmode.mq_mode = ETH_MQ_RX_RSS;
+ if (proto == TLE_PROTO_TCP)
+ port_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_TCP;
+ else
+ port_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_UDP;
+ port_conf->rx_adv_conf.rss_conf.rss_key_len = hash_key_size;
+ port_conf->rx_adv_conf.rss_conf.rss_key = uprt->hash_key;
+ }
+
+ return 0;
+}
+
+static uint32_t
+qidx_from_hash_index(uint32_t hash, uint32_t align_nb_q)
+{
+ uint32_t i, nb_bit, q;
+
+ nb_bit = (sizeof(uint32_t) * CHAR_BIT) - __builtin_clz(align_nb_q - 1);
+ q = (hash & 1);
+ for (i = 1; i < nb_bit; i++) {
+ hash >>= 1;
+ q <<= 1;
+ q |= (hash & 1);
+ }
+
+ return q;
+}
+
+static int
+update_rss_reta(struct netbe_port *uprt,
+ const struct rte_eth_dev_info *dev_info)
+{
+ struct rte_eth_rss_reta_entry64 reta_conf[RSS_RETA_CONF_ARRAY_SIZE];
+ int32_t i, rc, align_nb_q;
+ int32_t q_index, idx, shift;
+
+ if (uprt->nb_lcore > 1) {
+ if (dev_info->reta_size == 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: Redirection table size 0 is invalid for "
+ "RSS\n", __func__);
+ return -EINVAL;
+ }
+ RTE_LOG(NOTICE, USER1,
+ "%s: The reta size of port %d is %u\n",
+ __func__, uprt->id, dev_info->reta_size);
+
+ if (dev_info->reta_size > ETH_RSS_RETA_SIZE_512) {
+ RTE_LOG(ERR, USER1,
+ "%s: More than %u entries of Reta not supported\n",
+ __func__, ETH_RSS_RETA_SIZE_512);
+ return -EINVAL;
+ }
+
+ memset(reta_conf, 0, sizeof(reta_conf));
+ align_nb_q = rte_align32pow2(uprt->nb_lcore);
+ for (i = 0; i < align_nb_q; i++) {
+ q_index = qidx_from_hash_index(i, align_nb_q) %
+ uprt->nb_lcore;
+
+ idx = i / RTE_RETA_GROUP_SIZE;
+ shift = i % RTE_RETA_GROUP_SIZE;
+ reta_conf[idx].mask |= (1ULL << shift);
+ reta_conf[idx].reta[shift] = q_index;
+ RTE_LOG(NOTICE, USER1,
+ "%s: port=%u RSS reta conf: hash=%u, q=%u\n",
+ __func__, uprt->id, i, q_index);
+ }
+
+ rc = rte_eth_dev_rss_reta_update(uprt->id,
+ reta_conf, dev_info->reta_size);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: Bad redirection table parameter, "
+ "rc = %d\n", __func__, rc);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Initilise DPDK port.
+ * In current version, multi-queue per port is used.
+ */
+static int
+port_init(struct netbe_port *uprt, uint32_t proto)
+{
+ int32_t rc;
+ struct rte_eth_conf port_conf;
+ struct rte_eth_dev_info dev_info;
+
+ rte_eth_dev_info_get(uprt->id, &dev_info);
+ if ((dev_info.rx_offload_capa & uprt->rx_offload) != uprt->rx_offload) {
+ RTE_LOG(ERR, USER1,
+ "port#%u supported/requested RX offloads don't match, "
+ "supported: %#x, requested: %#x;\n",
+ uprt->id, dev_info.rx_offload_capa, uprt->rx_offload);
+ return -EINVAL;
+ }
+ if ((dev_info.tx_offload_capa & uprt->tx_offload) != uprt->tx_offload) {
+ RTE_LOG(ERR, USER1,
+ "port#%u supported/requested TX offloads don't match, "
+ "supported: %#x, requested: %#x;\n",
+ uprt->id, dev_info.tx_offload_capa, uprt->tx_offload);
+ return -EINVAL;
+ }
+
+ port_conf = port_conf_default;
+ if ((uprt->rx_offload & RX_CSUM_OFFLOAD) != 0) {
+ RTE_LOG(ERR, USER1, "%s(%u): enabling RX csum offload;\n",
+ __func__, uprt->id);
+ port_conf.rxmode.hw_ip_checksum = 1;
+ }
+ port_conf.rxmode.max_rx_pkt_len = uprt->mtu + ETHER_CRC_LEN;
+
+ rc = update_rss_conf(uprt, &dev_info, &port_conf, proto);
+ if (rc != 0)
+ return rc;
+
+ rc = rte_eth_dev_configure(uprt->id, uprt->nb_lcore, uprt->nb_lcore,
+ &port_conf);
+ RTE_LOG(NOTICE, USER1,
+ "%s: rte_eth_dev_configure(prt_id=%u, nb_rxq=%u, nb_txq=%u) "
+ "returns %d;\n", __func__, uprt->id, uprt->nb_lcore,
+ uprt->nb_lcore, rc);
+ if (rc != 0)
+ return rc;
+
+ return 0;
+}
+
+static int
+queue_init(struct netbe_port *uprt, struct rte_mempool *mp)
+{
+ int32_t socket, rc;
+ uint16_t q;
+ struct rte_eth_dev_info dev_info;
+
+ rte_eth_dev_info_get(uprt->id, &dev_info);
+
+ socket = rte_eth_dev_socket_id(uprt->id);
+
+ dev_info.default_rxconf.rx_drop_en = 1;
+
+ dev_info.default_txconf.tx_free_thresh = TX_RING_SIZE / 2;
+ if (uprt->tx_offload != 0) {
+ RTE_LOG(ERR, USER1, "%s(%u): enabling full featured TX;\n",
+ __func__, uprt->id);
+ dev_info.default_txconf.txq_flags = 0;
+ }
+
+ for (q = 0; q < uprt->nb_lcore; q++) {
+ rc = rte_eth_rx_queue_setup(uprt->id, q, RX_RING_SIZE,
+ socket, &dev_info.default_rxconf, mp);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: rx queue=%u setup failed with error "
+ "code: %d\n", __func__, q, rc);
+ return rc;
+ }
+ }
+
+ for (q = 0; q < uprt->nb_lcore; q++) {
+ rc = rte_eth_tx_queue_setup(uprt->id, q, TX_RING_SIZE,
+ socket, &dev_info.default_txconf);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: tx queue=%u setup failed with error "
+ "code: %d\n", __func__, q, rc);
+ return rc;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Check that lcore is enabled, not master, and not in use already.
+ */
+static int
+check_lcore(uint32_t lc)
+{
+ if (rte_lcore_is_enabled(lc) == 0) {
+ RTE_LOG(ERR, USER1, "lcore %u is not enabled\n", lc);
+ return -EINVAL;
+ }
+ if (rte_eal_get_lcore_state(lc) == RUNNING) {
+ RTE_LOG(ERR, USER1, "lcore %u already running %p\n",
+ lc, lcore_config[lc].f);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static void
+log_netbe_prt(const struct netbe_port *uprt)
+{
+ uint32_t i;
+ char corelist[2 * RTE_MAX_LCORE + 1];
+ char hashkey[2 * RSS_HASH_KEY_LENGTH];
+
+ memset(corelist, 0, sizeof(corelist));
+ memset(hashkey, 0, sizeof(hashkey));
+ for (i = 0; i < uprt->nb_lcore; i++)
+ if (i < uprt->nb_lcore - 1)
+ sprintf(corelist + (2 * i), "%u,", uprt->lcore_id[i]);
+ else
+ sprintf(corelist + (2 * i), "%u", uprt->lcore_id[i]);
+
+ for (i = 0; i < uprt->hash_key_size; i++)
+ sprintf(hashkey + (2 * i), "%02x", uprt->hash_key[i]);
+
+ RTE_LOG(NOTICE, USER1,
+ "uprt %p = <id = %u, lcore = <%s>, mtu = %u, "
+ "rx_offload = %u, tx_offload = %u,\n"
+ "ipv4 = %#x, "
+ "ipv6 = %04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx, "
+ "mac = %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx>;\n"
+ "hashkey = %s;\n",
+ uprt, uprt->id, corelist,
+ uprt->mtu, uprt->rx_offload, uprt->tx_offload,
+ uprt->ipv4,
+ uprt->ipv6.s6_addr16[0], uprt->ipv6.s6_addr16[1],
+ uprt->ipv6.s6_addr16[2], uprt->ipv6.s6_addr16[3],
+ uprt->ipv6.s6_addr16[4], uprt->ipv6.s6_addr16[5],
+ uprt->ipv6.s6_addr16[6], uprt->ipv6.s6_addr16[7],
+ uprt->mac.addr_bytes[0], uprt->mac.addr_bytes[1],
+ uprt->mac.addr_bytes[2], uprt->mac.addr_bytes[3],
+ uprt->mac.addr_bytes[4], uprt->mac.addr_bytes[5],
+ hashkey);
+}
+
+static void
+log_netbe_cfg(const struct netbe_cfg *ucfg)
+{
+ uint32_t i;
+
+ RTE_LOG(NOTICE, USER1,
+ "ucfg @ %p, prt_num = %u\n", ucfg, ucfg->prt_num);
+
+ for (i = 0; i != ucfg->prt_num; i++)
+ log_netbe_prt(ucfg->prt + i);
+}
+
+static int
+pool_init(uint32_t sid)
+{
+ int32_t rc;
+ struct rte_mempool *mp;
+ char name[RTE_MEMPOOL_NAMESIZE];
+
+ snprintf(name, sizeof(name), "MP%u", sid);
+ mp = rte_pktmbuf_pool_create(name, MPOOL_NB_BUF, MPOOL_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, sid - 1);
+ if (mp == NULL) {
+ rc = -rte_errno;
+ RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n",
+ __func__, sid - 1, rc);
+ return rc;
+ }
+
+ mpool[sid] = mp;
+ return 0;
+}
+
+static int
+frag_pool_init(uint32_t sid)
+{
+ int32_t rc;
+ struct rte_mempool *frag_mp;
+ char frag_name[RTE_MEMPOOL_NAMESIZE];
+
+ snprintf(frag_name, sizeof(frag_name), "frag_MP%u", sid);
+ frag_mp = rte_pktmbuf_pool_create(frag_name, MPOOL_NB_BUF,
+ MPOOL_CACHE_SIZE, 0, FRAG_MBUF_BUF_SIZE, sid - 1);
+ if (frag_mp == NULL) {
+ rc = -rte_errno;
+ RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n",
+ __func__, sid - 1, rc);
+ return rc;
+ }
+
+ frag_mpool[sid] = frag_mp;
+ return 0;
+}
+
+static struct netbe_lcore *
+find_initilized_lcore(struct netbe_cfg *cfg, uint32_t lc_num)
+{
+ uint32_t i;
+
+ for (i = 0; i < cfg->cpu_num; i++)
+ if (cfg->cpu[i].id == lc_num)
+ return &cfg->cpu[i];
+
+ return NULL;
+}
+
+/*
+ * Setup all enabled ports.
+ */
+static int
+netbe_port_init(struct netbe_cfg *cfg)
+{
+ int32_t rc;
+ uint32_t i, sid, j;
+ struct netbe_port *prt;
+ struct netbe_lcore *lc;
+
+ for (i = 0; i != cfg->prt_num; i++) {
+ prt = cfg->prt + i;
+ rc = port_init(prt, cfg->proto);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: port=%u init failed with error code: %d\n",
+ __func__, prt->id, rc);
+ return rc;
+ }
+ rte_eth_macaddr_get(prt->id, &prt->mac);
+ if (cfg->promisc)
+ rte_eth_promiscuous_enable(prt->id);
+
+ for (j = 0; j < prt->nb_lcore; j++) {
+ rc = check_lcore(prt->lcore_id[j]);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: processing failed with err: %d\n",
+ __func__, rc);
+ return rc;
+ }
+
+ sid = rte_lcore_to_socket_id(prt->lcore_id[j]) + 1;
+ assert(sid < RTE_DIM(mpool));
+
+ if (mpool[sid] == NULL) {
+ rc = pool_init(sid);
+ if (rc != 0)
+ return rc;
+ }
+
+ if (frag_mpool[sid] == NULL) {
+ rc = frag_pool_init(sid);
+ if (rc != 0)
+ return rc;
+ }
+
+ rc = queue_init(prt, mpool[sid]);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: lcore=%u queue init failed with "
+ "err: %d\n",
+ __func__, prt->lcore_id[j], rc);
+ return rc;
+ }
+
+ /* calculate number of queues and assign queue id
+ * per lcore. */
+ lc = find_initilized_lcore(cfg, prt->lcore_id[j]);
+ if (lc == NULL) {
+ lc = &cfg->cpu[cfg->cpu_num];
+ lc->id = prt->lcore_id[j];
+ lc->proto = becfg.proto;
+ cfg->cpu_num++;
+ }
+
+ lc->prtq = rte_realloc(lc->prtq, sizeof(*(lc->prtq)) *
+ (lc->prtq_num + 1), RTE_CACHE_LINE_SIZE);
+ if (lc->prtq == NULL) {
+ RTE_LOG(ERR, USER1,
+ "%s: failed to reallocate memory\n",
+ __func__);
+ return -ENOMEM;
+ }
+ lc->prtq[lc->prtq_num].rxqid = j;
+ lc->prtq[lc->prtq_num].txqid = j;
+ lc->prtq[lc->prtq_num].port = *prt;
+ lc->prtq_num++;
+ }
+ }
+ log_netbe_cfg(cfg);
+
+ return 0;
+}
+
+#endif /* PORT_H_ */
diff --git a/examples/l4fwd/tcp.h b/examples/l4fwd/tcp.h
new file mode 100644
index 0000000..031ad8d
--- /dev/null
+++ b/examples/l4fwd/tcp.h
@@ -0,0 +1,701 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TCP_H_
+#define TCP_H_
+
+#define TCP_MAX_PROCESS 0x20
+
+static inline void
+netfe_stream_term_tcp(struct netfe_lcore *fe, struct netfe_stream *fes)
+{
+ fes->s = NULL;
+ fes->fwds = NULL;
+ memset(&fes->stat, 0, sizeof(fes->stat));
+ netfe_put_stream(fe, &fe->free, fes);
+}
+
+static inline void
+netfe_stream_close_tcp(struct netfe_lcore *fe, struct netfe_stream *fes)
+{
+ tle_tcp_stream_close(fes->s);
+ netfe_stream_term_tcp(fe, fes);
+}
+
+/*
+ * helper function: opens IPv4 and IPv6 streams for selected port.
+ */
+static struct netfe_stream *
+netfe_stream_open_tcp(struct netfe_lcore *fe, struct netfe_sprm *sprm,
+ uint32_t lcore, uint16_t op, uint32_t bidx, uint8_t server_mode)
+{
+ int32_t rc;
+ struct netfe_stream *fes;
+ struct sockaddr_in *l4;
+ struct sockaddr_in6 *l6;
+ uint16_t errport;
+ struct tle_tcp_stream_param tprm;
+
+ fes = netfe_get_stream(&fe->free);
+ if (fes == NULL) {
+ rte_errno = ENOBUFS;
+ return NULL;
+ }
+
+ if (server_mode != 0) {
+ tle_event_free(fes->rxev);
+ fes->rxev = tle_event_alloc(fe->syneq, fes);
+ }
+
+ if (fes->rxev == NULL) {
+ netfe_stream_close_tcp(fe, fes);
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ /* activate rx, tx and err events for the stream */
+ if (op == TXONLY || op == FWD) {
+ tle_event_active(fes->txev, TLE_SEV_DOWN);
+ fes->stat.txev[TLE_SEV_DOWN]++;
+ }
+
+ if (op != TXONLY || server_mode != 0) {
+ tle_event_active(fes->rxev, TLE_SEV_DOWN);
+ fes->stat.rxev[TLE_SEV_DOWN]++;
+ }
+ tle_event_active(fes->erev, TLE_SEV_DOWN);
+ fes->stat.erev[TLE_SEV_DOWN]++;
+
+ memset(&tprm, 0, sizeof(tprm));
+ tprm.addr.local = sprm->local_addr;
+ tprm.addr.remote = sprm->remote_addr;
+ tprm.cfg.err_ev = fes->erev;
+ tprm.cfg.recv_ev = fes->rxev;
+ if (op != FWD)
+ tprm.cfg.send_ev = fes->txev;
+
+ fes->s = tle_tcp_stream_open(becfg.cpu[bidx].ctx, &tprm);
+
+ if (fes->s == NULL) {
+ rc = rte_errno;
+ netfe_stream_close_tcp(fe, fes);
+ rte_errno = rc;
+
+ if (sprm->local_addr.ss_family == AF_INET) {
+ l4 = (struct sockaddr_in *) &sprm->local_addr;
+ errport = ntohs(l4->sin_port);
+ } else {
+ l6 = (struct sockaddr_in6 *) &sprm->local_addr;
+ errport = ntohs(l6->sin6_port);
+ }
+
+ RTE_LOG(ERR, USER1, "stream open failed for port %u with error "
+ "code=%u, bidx=%u, lc=%u\n",
+ errport, rc, bidx, becfg.cpu[bidx].id);
+ return NULL;
+ }
+
+ RTE_LOG(NOTICE, USER1,
+ "%s(%u)={s=%p, op=%hu, proto=%s, rxev=%p, txev=%p}, belc=%u\n",
+ __func__, lcore, fes->s, op, proto_name[becfg.proto],
+ fes->rxev, fes->txev, becfg.cpu[bidx].id);
+
+ fes->op = op;
+ fes->proto = becfg.proto;
+ fes->family = sprm->local_addr.ss_family;
+ fes->laddr = sprm->local_addr;
+ netfe_put_stream(fe, &fe->use, fes);
+
+ return fes;
+}
+
+static int
+netfe_lcore_init_tcp(const struct netfe_lcore_prm *prm)
+{
+ size_t sz;
+ int32_t rc;
+ uint32_t i, lcore, snum;
+ struct netfe_lcore *fe;
+ struct tle_evq_param eprm;
+ struct netfe_stream *fes;
+ struct netfe_sprm *sprm;
+
+ lcore = rte_lcore_id();
+
+ snum = prm->max_streams;
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u, nb_streams=%u, max_streams=%u)\n",
+ __func__, lcore, prm->nb_streams, snum);
+
+ memset(&eprm, 0, sizeof(eprm));
+ eprm.socket_id = rte_lcore_to_socket_id(lcore);
+ eprm.max_events = snum;
+
+ sz = sizeof(*fe) + snum * sizeof(struct netfe_stream);
+ fe = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ rte_lcore_to_socket_id(lcore));
+
+ if (fe == NULL) {
+ RTE_LOG(ERR, USER1, "%s:%d failed to allocate %zu bytes\n",
+ __func__, __LINE__, sz);
+ return -ENOMEM;
+ }
+
+ RTE_PER_LCORE(_fe) = fe;
+
+ fe->snum = snum;
+ /* initialize the stream pool */
+ LIST_INIT(&fe->free.head);
+ LIST_INIT(&fe->use.head);
+
+ /* allocate the event queues */
+ fe->syneq = tle_evq_create(&eprm);
+ fe->ereq = tle_evq_create(&eprm);
+ fe->rxeq = tle_evq_create(&eprm);
+ fe->txeq = tle_evq_create(&eprm);
+
+ RTE_LOG(INFO, USER1, "%s(%u) synevq=%p, erevq=%p, rxevq=%p, txevq=%p\n",
+ __func__, lcore, fe->syneq, fe->ereq, fe->rxeq, fe->txeq);
+ if (fe->syneq == NULL || fe->ereq == NULL || fe->rxeq == NULL ||
+ fe->txeq == NULL)
+ return -ENOMEM;
+
+ fes = (struct netfe_stream *)(fe + 1);
+ for (i = 0; i != snum; i++) {
+ fes[i].rxev = tle_event_alloc(fe->rxeq, fes + i);
+ fes[i].txev = tle_event_alloc(fe->txeq, fes + i);
+ fes[i].erev = tle_event_alloc(fe->ereq, fes + i);
+ netfe_put_stream(fe, &fe->free, fes + i);
+ }
+
+
+ /* open all requested streams. */
+ for (i = 0; i != prm->nb_streams; i++) {
+ sprm = &prm->stream[i].sprm;
+ fes = netfe_stream_open_tcp(fe, sprm, lcore, prm->stream[i].op,
+ sprm->bidx, becfg.server);
+ if (fes == NULL) {
+ rc = -rte_errno;
+ break;
+ }
+
+ netfe_stream_dump(fes, &sprm->local_addr, &sprm->remote_addr);
+
+ if (prm->stream[i].op == FWD) {
+ fes->fwdprm = prm->stream[i].fprm;
+ } else if (prm->stream[i].op == TXONLY) {
+ fes->txlen = prm->stream[i].txlen;
+ fes->raddr = prm->stream[i].sprm.remote_addr;
+ }
+
+ if (becfg.server == 1) {
+ rc = tle_tcp_stream_listen(fes->s);
+ RTE_LOG(INFO, USER1,
+ "%s(%u) tle_tcp_stream_listen(stream=%p) "
+ "returns %d\n",
+ __func__, lcore, fes->s, rc);
+ if (rc != 0)
+ break;
+ } else {
+ rc = tle_tcp_stream_connect(fes->s,
+ (const struct sockaddr *)&sprm->remote_addr);
+ RTE_LOG(INFO, USER1,
+ "%s(%u) tle_tcp_stream_connect(stream=%p) "
+ "returns %d\n",
+ __func__, lcore, fes->s, rc);
+ if (rc != 0)
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static inline struct netfe_stream *
+netfe_create_fwd_stream(struct netfe_lcore *fe, struct netfe_stream *fes,
+ uint32_t lcore, uint32_t bidx)
+{
+ uint32_t rc;
+ struct netfe_stream *fws;
+
+ fws = netfe_stream_open_tcp(fe, &fes->fwdprm, lcore, FWD, bidx, 0);
+ if (fws != NULL) {
+ rc = tle_tcp_stream_connect(fws->s,
+ (const struct sockaddr *)&fes->fwdprm.remote_addr);
+ NETFE_TRACE("%s(lc=%u, fes=%p): tle_tcp_stream_connect() "
+ "returns %d;\n",
+ __func__, rte_lcore_id(), fes, rc);
+
+ if (rc != 0) {
+ netfe_stream_term_tcp(fe, fws);
+ fws = NULL;
+ }
+ }
+
+ if (fws == NULL)
+ RTE_LOG(ERR, USER1, "%s(lc=%u fes=%p) failed to open "
+ "forwarding stream;\n",
+ __func__, rte_lcore_id(), fes);
+
+ return fws;
+}
+
+static inline void
+netfe_fwd_tcp(uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t i, k, n;
+ struct rte_mbuf **pkt;
+ struct netfe_stream *fed;
+
+ RTE_SET_USED(lcore);
+
+ n = fes->pbuf.num;
+ pkt = fes->pbuf.pkt;
+
+ if (n == 0)
+ return;
+
+ fed = fes->fwds;
+
+ if (fed != NULL) {
+
+ k = tle_tcp_stream_send(fed->s, pkt, n);
+
+ NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) "
+ "returns %u\n",
+ __func__, lcore, proto_name[fes->proto],
+ fed->s, n, k);
+
+ fed->stat.txp += k;
+ fed->stat.drops += n - k;
+ fes->stat.fwp += k;
+
+ } else {
+ NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n",
+ __func__, lcore, fes->s, n);
+ for (k = 0; k != n; k++) {
+ NETFE_TRACE("%s(%u, %p): free(%p);\n",
+ __func__, lcore, fes->s, pkt[k]);
+ rte_pktmbuf_free(pkt[k]);
+ }
+ fes->stat.drops += n;
+ }
+
+ /* copy unforwarded mbufs. */
+ for (i = 0; i != n - k; i++)
+ pkt[i] = pkt[i + k];
+
+ fes->pbuf.num = i;
+
+ if (i != 0) {
+ tle_event_raise(fes->txev);
+ fes->stat.txev[TLE_SEV_UP]++;
+ }
+
+ if (n == RTE_DIM(fes->pbuf.pkt)) {
+ tle_event_active(fes->rxev, TLE_SEV_UP);
+ fes->stat.rxev[TLE_SEV_UP]++;
+ }
+}
+
+static inline void
+netfe_new_conn_tcp(struct netfe_lcore *fe, __rte_unused uint32_t lcore,
+ struct netfe_stream *fes)
+{
+ uint32_t i, k, n, rc;
+ struct tle_tcp_stream_cfg *prm;
+ struct tle_tcp_accept_param acpt_prm[MAX_PKT_BURST];
+ struct tle_stream *rs[MAX_PKT_BURST];
+ struct tle_syn_req syn_reqs[MAX_PKT_BURST];
+ struct netfe_stream *ts;
+ struct netfe_stream *fs[MAX_PKT_BURST];
+
+ static const struct tle_stream_cb zcb = {.func = NULL, .data = NULL};
+
+ /* check if any syn requests are waiting */
+ n = tle_tcp_stream_synreqs(fes->s, syn_reqs, RTE_DIM(syn_reqs));
+ if (n == 0)
+ return;
+
+ NETFE_TRACE("%s(%u): tle_tcp_stream_synreqs(%p, %u) returns %u\n",
+ __func__, lcore, fes->s, MAX_PKT_BURST, n);
+
+ /* get n free streams */
+ k = netfe_get_streams(&fe->free, fs, n);
+
+ /* fill accept params to accept k connection requests*/
+ for (i = 0; i != k; i++) {
+ acpt_prm[i].syn = syn_reqs[i];
+ prm = &acpt_prm[i].cfg;
+ prm->nb_retries = 0;
+ prm->recv_ev = fs[i]->rxev;
+ prm->send_ev = fs[i]->txev;
+ prm->err_ev = fs[i]->erev;
+ tle_event_active(fs[i]->erev, TLE_SEV_DOWN);
+ prm->err_cb = zcb;
+ prm->recv_cb = zcb;
+ prm->send_cb = zcb;
+ }
+
+ /* accept k new connections */
+ rc = tle_tcp_stream_accept(fes->s, acpt_prm, rs, k);
+
+ NETFE_TRACE("%s(%u): tle_tcp_stream_accept(%p, %u) returns %u\n",
+ __func__, lcore, fes->s, k, rc);
+
+ if (rc != n) {
+ /* n - rc connections could not be accepted */
+ tle_tcp_reject(fes->s, syn_reqs + rc, n - rc);
+
+ /* put back k - rc streams free list */
+ netfe_put_streams(fe, &fe->free, fs + rc, k - rc);
+ }
+
+ /* update the params for accepted streams */
+ for (i = 0; i != rc; i++) {
+
+ ts = fs[i];
+
+ ts->s = rs[i];
+ ts->op = fes->op;
+ ts->proto = fes->proto;
+ ts->family = fes->family;
+ ts->txlen = fes->txlen;
+
+ if (fes->op == TXONLY) {
+ tle_event_active(ts->txev, TLE_SEV_UP);
+ ts->stat.txev[TLE_SEV_UP]++;
+ } else {
+ tle_event_active(ts->rxev, TLE_SEV_DOWN);
+ ts->stat.rxev[TLE_SEV_DOWN]++;
+ }
+
+ netfe_put_stream(fe, &fe->use, ts);
+ NETFE_TRACE("%s(%u) accept (stream=%p, s=%p)\n",
+ __func__, lcore, ts, rs[i]);
+
+ /* create a new fwd stream if needed */
+ if (fes->op == FWD) {
+ tle_event_active(ts->txev, TLE_SEV_DOWN);
+ ts->stat.txev[TLE_SEV_DOWN]++;
+
+ ts->fwds = netfe_create_fwd_stream(fe, fes, lcore,
+ fes->fwdprm.bidx);
+ if (ts->fwds != NULL)
+ ts->fwds->fwds = ts;
+ }
+ }
+ fe->tcp_stat.acc += rc;
+ fe->tcp_stat.rej += n - rc;
+}
+
+static inline void
+netfe_lcore_tcp_req(void)
+{
+ struct netfe_lcore *fe;
+ uint32_t j, n, lcore;
+ struct netfe_stream *fs[MAX_PKT_BURST];
+
+ fe = RTE_PER_LCORE(_fe);
+ if (fe == NULL)
+ return;
+
+ /* look for syn events */
+ n = tle_evq_get(fe->syneq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
+ if (n == 0)
+ return;
+
+ lcore = rte_lcore_id();
+
+ NETFE_TRACE("%s(%u): tle_evq_get(synevq=%p) returns %u\n",
+ __func__, lcore, fe->syneq, n);
+
+ for (j = 0; j != n; j++)
+ netfe_new_conn_tcp(fe, lcore, fs[j]);
+}
+
+static inline void
+netfe_lcore_tcp_rst(void)
+{
+ struct netfe_lcore *fe;
+ struct netfe_stream *fwds;
+ uint32_t j, n;
+ struct tle_stream *s[MAX_PKT_BURST];
+ struct netfe_stream *fs[MAX_PKT_BURST];
+ struct tle_event *rv[MAX_PKT_BURST];
+ struct tle_event *tv[MAX_PKT_BURST];
+ struct tle_event *ev[MAX_PKT_BURST];
+
+ fe = RTE_PER_LCORE(_fe);
+ if (fe == NULL)
+ return;
+
+ /* look for err events */
+ n = tle_evq_get(fe->ereq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
+ if (n == 0)
+ return;
+
+ NETFE_TRACE("%s(%u): tle_evq_get(errevq=%p) returns %u\n",
+ __func__, rte_lcore_id(), fe->ereq, n);
+
+ for (j = 0; j != n; j++) {
+ if (verbose > VERBOSE_NONE) {
+ struct tle_tcp_stream_addr addr;
+ tle_tcp_stream_get_addr(fs[j]->s, &addr);
+ netfe_stream_dump(fs[j], &addr.local, &addr.remote);
+ }
+ s[j] = fs[j]->s;
+ rv[j] = fs[j]->rxev;
+ tv[j] = fs[j]->txev;
+ ev[j] = fs[j]->erev;
+ }
+
+ tle_evq_idle(fe->rxeq, rv, n);
+ tle_evq_idle(fe->txeq, tv, n);
+ tle_evq_idle(fe->ereq, ev, n);
+
+ tle_tcp_stream_close_bulk(s, n);
+
+ for (j = 0; j != n; j++) {
+
+ /*
+ * if forwarding mode, send unsent packets and
+ * signal peer stream to terminate too.
+ */
+ fwds = fs[j]->fwds;
+ if (fwds != NULL && fwds->s != NULL) {
+
+ /* forward all unsent packets */
+ netfe_fwd_tcp(rte_lcore_id(), fs[j]);
+
+ fwds->fwds = NULL;
+ tle_event_raise(fwds->erev);
+ fs[j]->fwds = NULL;
+ }
+
+ /* now terminate the stream receiving rst event*/
+ netfe_rem_stream(&fe->use, fs[j]);
+ netfe_stream_term_tcp(fe, fs[j]);
+ fe->tcp_stat.ter++;
+ }
+}
+
+static inline void
+netfe_rxtx_process_tcp(__rte_unused uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t i, k, n;
+ struct rte_mbuf **pkt;
+
+ n = fes->pbuf.num;
+ pkt = fes->pbuf.pkt;
+
+ /* there is nothing to send. */
+ if (n == 0) {
+ tle_event_idle(fes->txev);
+ fes->stat.txev[TLE_SEV_IDLE]++;
+ return;
+ }
+
+
+ k = tle_tcp_stream_send(fes->s, pkt, n);
+
+ NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n",
+ __func__, lcore, proto_name[fes->proto],
+ fes->s, n, k);
+ fes->stat.txp += k;
+ fes->stat.drops += n - k;
+
+ /* not able to send anything. */
+ if (k == 0)
+ return;
+
+ if (n == RTE_DIM(fes->pbuf.pkt)) {
+ /* mark stream as readable */
+ tle_event_active(fes->rxev, TLE_SEV_UP);
+ fes->stat.rxev[TLE_SEV_UP]++;
+ }
+
+ /* adjust pbuf array. */
+ fes->pbuf.num = n - k;
+ for (i = 0; i != n - k; i++)
+ pkt[i] = pkt[i + k];
+}
+
+static inline void
+netfe_tx_process_tcp(uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t i, k, n;
+
+ /* refill with new mbufs. */
+ pkt_buf_fill(lcore, &fes->pbuf, fes->txlen);
+
+ n = fes->pbuf.num;
+ if (n == 0)
+ return;
+
+ /**
+ * TODO: cannot use function pointers for unequal param num.
+ */
+ k = tle_tcp_stream_send(fes->s, fes->pbuf.pkt, n);
+
+ NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n",
+ __func__, lcore, proto_name[fes->proto], fes->s, n, k);
+ fes->stat.txp += k;
+ fes->stat.drops += n - k;
+
+ if (k == 0)
+ return;
+
+ /* adjust pbuf array. */
+ fes->pbuf.num = n - k;
+ for (i = k; i != n; i++)
+ fes->pbuf.pkt[i - k] = fes->pbuf.pkt[i];
+}
+
+static inline void
+netfe_lcore_tcp(void)
+{
+ struct netfe_lcore *fe;
+ uint32_t j, n, lcore;
+ struct netfe_stream *fs[MAX_PKT_BURST];
+
+ fe = RTE_PER_LCORE(_fe);
+ if (fe == NULL)
+ return;
+
+ lcore = rte_lcore_id();
+
+ /* look for rx events */
+ n = tle_evq_get(fe->rxeq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
+
+ if (n != 0) {
+ NETFE_TRACE("%s(%u): tle_evq_get(rxevq=%p) returns %u\n",
+ __func__, lcore, fe->rxeq, n);
+ for (j = 0; j != n; j++)
+ netfe_rx_process(lcore, fs[j]);
+ }
+
+ /* look for tx events */
+ n = tle_evq_get(fe->txeq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
+
+ if (n != 0) {
+ NETFE_TRACE("%s(%u): tle_evq_get(txevq=%p) returns %u\n",
+ __func__, lcore, fe->txeq, n);
+ for (j = 0; j != n; j++) {
+ if (fs[j]->op == RXTX)
+ netfe_rxtx_process_tcp(lcore, fs[j]);
+ else if (fs[j]->op == FWD)
+ netfe_fwd_tcp(lcore, fs[j]);
+ else if (fs[j]->op == TXONLY)
+ netfe_tx_process_tcp(lcore, fs[j]);
+ }
+ }
+}
+
+static void
+netfe_lcore_fini_tcp(void)
+{
+ struct netfe_lcore *fe;
+ uint32_t i, snum;
+ struct tle_tcp_stream_addr addr;
+ struct netfe_stream *fes;
+ uint32_t acc, rej, ter;
+
+ fe = RTE_PER_LCORE(_fe);
+ if (fe == NULL)
+ return;
+
+ snum = fe->use.num;
+ for (i = 0; i != snum; i++) {
+ fes = netfe_get_stream(&fe->use);
+ tle_tcp_stream_get_addr(fes->s, &addr);
+ netfe_stream_dump(fes, &addr.local, &addr.remote);
+ netfe_stream_close(fe, fes);
+ }
+
+ acc = fe->tcp_stat.acc;
+ rej = fe->tcp_stat.rej;
+ ter = fe->tcp_stat.ter;
+ RTE_LOG(NOTICE, USER1,
+ "tcp_stats={con_acc=%u,con_rej=%u,con_ter=%u};\n",
+ acc, rej, ter);
+
+ tle_evq_destroy(fe->txeq);
+ tle_evq_destroy(fe->rxeq);
+ tle_evq_destroy(fe->ereq);
+ tle_evq_destroy(fe->syneq);
+ RTE_PER_LCORE(_fe) = NULL;
+ rte_free(fe);
+}
+
+static inline void
+netbe_lcore_tcp(void)
+{
+ uint32_t i;
+ struct netbe_lcore *lc;
+
+ lc = RTE_PER_LCORE(_be);
+ if (lc == NULL)
+ return;
+
+ for (i = 0; i != lc->prtq_num; i++) {
+ netbe_rx(lc, i);
+ tle_tcp_process(lc->ctx, TCP_MAX_PROCESS);
+ netbe_tx(lc, i);
+ }
+}
+
+static int
+lcore_main_tcp(void *arg)
+{
+ int32_t rc;
+ uint32_t lcore;
+ struct lcore_prm *prm;
+
+ prm = arg;
+ lcore = rte_lcore_id();
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n",
+ __func__, lcore);
+
+ rc = 0;
+
+ /* lcore FE init. */
+ if (prm->fe.max_streams != 0)
+ rc = netfe_lcore_init_tcp(&prm->fe);
+
+ /* lcore FE init. */
+ if (rc == 0 && prm->be.lc != NULL)
+ rc = netbe_lcore_setup(prm->be.lc);
+
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+
+ while (force_quit == 0) {
+ netfe_lcore_tcp_req();
+ netfe_lcore_tcp_rst();
+ netfe_lcore_tcp();
+ netbe_lcore_tcp();
+ }
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n",
+ __func__, lcore);
+
+ netfe_lcore_fini_tcp();
+ netbe_lcore_clear();
+
+ return rc;
+}
+
+#endif /* TCP_H_ */
diff --git a/examples/l4fwd/udp.h b/examples/l4fwd/udp.h
new file mode 100644
index 0000000..cdec6a5
--- /dev/null
+++ b/examples/l4fwd/udp.h
@@ -0,0 +1,588 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef UDP_H_
+#define UDP_H_
+
+/*
+ * helper function: opens IPv4 and IPv6 streams for selected port.
+ */
+static struct netfe_stream *
+netfe_stream_open_udp(struct netfe_lcore *fe, struct netfe_sprm *sprm,
+ uint32_t lcore, uint16_t op, uint32_t bidx)
+{
+ int32_t rc;
+ struct netfe_stream *fes;
+ struct sockaddr_in *l4;
+ struct sockaddr_in6 *l6;
+ uint16_t errport;
+ struct tle_udp_stream_param uprm;
+
+ fes = netfe_get_stream(&fe->free);
+ if (fes == NULL) {
+ rte_errno = ENOBUFS;
+ return NULL;
+ }
+
+ fes->rxev = tle_event_alloc(fe->rxeq, fes);
+ fes->txev = tle_event_alloc(fe->txeq, fes);
+
+ if (fes->rxev == NULL || fes->txev == NULL) {
+ netfe_stream_close(fe, fes);
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ if (op == TXONLY || op == FWD) {
+ tle_event_active(fes->txev, TLE_SEV_DOWN);
+ fes->stat.txev[TLE_SEV_DOWN]++;
+ }
+
+ if (op != TXONLY) {
+ tle_event_active(fes->rxev, TLE_SEV_DOWN);
+ fes->stat.rxev[TLE_SEV_DOWN]++;
+ }
+
+ memset(&uprm, 0, sizeof(uprm));
+ uprm.local_addr = sprm->local_addr;
+ uprm.remote_addr = sprm->remote_addr;
+ uprm.recv_ev = fes->rxev;
+ if (op != FWD)
+ uprm.send_ev = fes->txev;
+ fes->s = tle_udp_stream_open(becfg.cpu[bidx].ctx, &uprm);
+
+ if (fes->s == NULL) {
+ rc = rte_errno;
+ netfe_stream_close(fe, fes);
+ rte_errno = rc;
+
+ if (sprm->local_addr.ss_family == AF_INET) {
+ l4 = (struct sockaddr_in *) &sprm->local_addr;
+ errport = ntohs(l4->sin_port);
+ } else {
+ l6 = (struct sockaddr_in6 *) &sprm->local_addr;
+ errport = ntohs(l6->sin6_port);
+ }
+
+ RTE_LOG(ERR, USER1, "stream open failed for port %u with error "
+ "code=%u, bidx=%u, lc=%u\n",
+ errport, rc, bidx, becfg.cpu[bidx].id);
+ return NULL;
+ }
+
+ RTE_LOG(NOTICE, USER1,
+ "%s(%u)={s=%p, op=%hu, proto=%s, rxev=%p, txev=%p}, belc=%u\n",
+ __func__, lcore, fes->s, op, proto_name[becfg.proto],
+ fes->rxev, fes->txev, becfg.cpu[bidx].id);
+
+ fes->op = op;
+ fes->proto = becfg.proto;
+ fes->family = sprm->local_addr.ss_family;
+
+ return fes;
+}
+
+static int
+netfe_lcore_init_udp(const struct netfe_lcore_prm *prm)
+{
+ size_t sz;
+ int32_t rc;
+ uint32_t i, lcore, snum;
+ struct netfe_lcore *fe;
+ struct tle_evq_param eprm;
+ struct netfe_stream *fes;
+ struct netfe_sprm *sprm;
+
+ lcore = rte_lcore_id();
+
+ snum = prm->max_streams;
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u, nb_streams=%u, max_streams=%u)\n",
+ __func__, lcore, prm->nb_streams, snum);
+
+ memset(&eprm, 0, sizeof(eprm));
+ eprm.socket_id = rte_lcore_to_socket_id(lcore);
+ eprm.max_events = snum;
+
+ sz = sizeof(*fe) + snum * sizeof(struct netfe_stream);
+ fe = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ rte_lcore_to_socket_id(lcore));
+
+ if (fe == NULL) {
+ RTE_LOG(ERR, USER1, "%s:%d failed to allocate %zu bytes\n",
+ __func__, __LINE__, sz);
+ return -ENOMEM;
+ }
+
+ RTE_PER_LCORE(_fe) = fe;
+
+ fe->snum = snum;
+ /* initialize the stream pool */
+ LIST_INIT(&fe->free.head);
+ LIST_INIT(&fe->use.head);
+ fes = (struct netfe_stream *)(fe + 1);
+ for (i = 0; i != snum; i++, fes++)
+ netfe_put_stream(fe, &fe->free, fes);
+
+ /* allocate the event queues */
+ fe->rxeq = tle_evq_create(&eprm);
+ fe->txeq = tle_evq_create(&eprm);
+
+ RTE_LOG(INFO, USER1, "%s(%u) rx evq=%p, tx evq=%p\n",
+ __func__, lcore, fe->rxeq, fe->txeq);
+ if (fe->rxeq == NULL || fe->txeq == NULL)
+ return -ENOMEM;
+
+ rc = fwd_tbl_init(fe, AF_INET, lcore);
+ RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n",
+ __func__, lcore, AF_INET, rc);
+ if (rc != 0)
+ return rc;
+
+ rc = fwd_tbl_init(fe, AF_INET6, lcore);
+ RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n",
+ __func__, lcore, AF_INET6, rc);
+ if (rc != 0)
+ return rc;
+
+ /* open all requested streams. */
+ for (i = 0; i != prm->nb_streams; i++) {
+ sprm = &prm->stream[i].sprm;
+ fes = netfe_stream_open_udp(fe, sprm, lcore, prm->stream[i].op,
+ sprm->bidx);
+ if (fes == NULL) {
+ rc = -rte_errno;
+ break;
+ }
+
+ netfe_stream_dump(fes, &sprm->local_addr, &sprm->remote_addr);
+
+ if (prm->stream[i].op == FWD) {
+ fes->fwdprm = prm->stream[i].fprm;
+ rc = fwd_tbl_add(fe,
+ prm->stream[i].fprm.remote_addr.ss_family,
+ (const struct sockaddr *)
+ &prm->stream[i].fprm.remote_addr,
+ fes);
+ if (rc != 0) {
+ netfe_stream_close(fe, fes);
+ break;
+ }
+ } else if (prm->stream[i].op == TXONLY) {
+ fes->txlen = prm->stream[i].txlen;
+ fes->raddr = prm->stream[i].sprm.remote_addr;
+ }
+ }
+
+ return rc;
+}
+
+static struct netfe_stream *
+find_fwd_dst_udp(uint32_t lcore, struct netfe_stream *fes,
+ const struct sockaddr *sa)
+{
+ uint32_t rc;
+ struct netfe_stream *fed;
+ struct netfe_lcore *fe;
+ struct tle_udp_stream_param uprm;
+
+ fe = RTE_PER_LCORE(_fe);
+
+ fed = fwd_tbl_lkp(fe, fes->family, sa);
+ if (fed != NULL)
+ return fed;
+
+ /* create a new stream and put it into the fwd table. */
+ memset(&uprm, 0, sizeof(uprm));
+ uprm.local_addr = fes->fwdprm.local_addr;
+ uprm.remote_addr = fes->fwdprm.remote_addr;
+
+ /* open forward stream with wildcard remote addr. */
+ memset(&uprm.remote_addr.ss_family + 1, 0,
+ sizeof(uprm.remote_addr) - sizeof(uprm.remote_addr.ss_family));
+
+ fed = netfe_stream_open_udp(fe, &fes->fwdprm, lcore, FWD,
+ fes->fwdprm.bidx);
+ if (fed == NULL)
+ return NULL;
+
+ rc = fwd_tbl_add(fe, fes->family, sa, fed);
+ if (rc != 0) {
+ netfe_stream_close(fe, fed);
+ fed = NULL;
+ }
+
+ fed->fwdprm.remote_addr = *(const struct sockaddr_storage *)sa;
+ return fed;
+}
+
+static inline int
+netfe_addr_eq(struct sockaddr_storage *l, struct sockaddr_storage *r,
+ uint16_t family)
+{
+ struct sockaddr_in *l4, *r4;
+ struct sockaddr_in6 *l6, *r6;
+
+ if (family == AF_INET) {
+ l4 = (struct sockaddr_in *)l;
+ r4 = (struct sockaddr_in *)r;
+ return (l4->sin_port == r4->sin_port &&
+ l4->sin_addr.s_addr == r4->sin_addr.s_addr);
+ } else {
+ l6 = (struct sockaddr_in6 *)l;
+ r6 = (struct sockaddr_in6 *)r;
+ return (l6->sin6_port == r6->sin6_port &&
+ memcmp(&l6->sin6_addr, &r6->sin6_addr,
+ sizeof(l6->sin6_addr)));
+ }
+}
+
+static inline void
+netfe_pkt_addr(const struct rte_mbuf *m, struct sockaddr_storage *ps,
+ uint16_t family)
+{
+ const struct ipv4_hdr *ip4h;
+ const struct ipv6_hdr *ip6h;
+ const struct udp_hdr *udph;
+ struct sockaddr_in *in4;
+ struct sockaddr_in6 *in6;
+
+ NETFE_PKT_DUMP(m);
+
+ udph = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, -m->l4_len);
+
+ if (family == AF_INET) {
+ in4 = (struct sockaddr_in *)ps;
+ ip4h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+ -(m->l4_len + m->l3_len));
+ in4->sin_port = udph->src_port;
+ in4->sin_addr.s_addr = ip4h->src_addr;
+ } else {
+ in6 = (struct sockaddr_in6 *)ps;
+ ip6h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+ -(m->l4_len + m->l3_len));
+ in6->sin6_port = udph->src_port;
+ rte_memcpy(&in6->sin6_addr, ip6h->src_addr,
+ sizeof(in6->sin6_addr));
+ }
+}
+
+static inline uint32_t
+pkt_eq_addr(struct rte_mbuf *pkt[], uint32_t num, uint16_t family,
+ struct sockaddr_storage *cur, struct sockaddr_storage *nxt)
+{
+ uint32_t i;
+
+ for (i = 0; i != num; i++) {
+ netfe_pkt_addr(pkt[i], nxt, family);
+ if (netfe_addr_eq(cur, nxt, family) == 0)
+ break;
+ }
+
+ return i;
+}
+
+static inline void
+netfe_fwd_udp(uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t i, j, k, n, x;
+ uint16_t family;
+ void *pi0, *pi1, *pt;
+ struct rte_mbuf **pkt;
+ struct netfe_stream *fed;
+ struct sockaddr_storage in[2];
+
+ family = fes->family;
+ n = fes->pbuf.num;
+ pkt = fes->pbuf.pkt;
+
+ if (n == 0)
+ return;
+
+ in[0].ss_family = family;
+ in[1].ss_family = family;
+ pi0 = &in[0];
+ pi1 = &in[1];
+
+ netfe_pkt_addr(pkt[0], pi0, family);
+
+ x = 0;
+ for (i = 0; i != n; i = j) {
+
+ j = i + pkt_eq_addr(&pkt[i + 1],
+ n - i - 1, family, pi0, pi1) + 1;
+
+ fed = find_fwd_dst_udp(lcore, fes,
+ (const struct sockaddr *)pi0);
+ if (fed != NULL) {
+
+ /**
+ * TODO: cannot use function pointers for unequal
+ * number of params.
+ */
+ k = tle_udp_stream_send(fed->s, pkt + i, j - i,
+ (const struct sockaddr *)
+ &fes->fwdprm.remote_addr);
+
+ NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) "
+ "returns %u\n",
+ __func__, lcore, proto_name[fes->proto],
+ fed->s, j - i, k);
+
+ fed->stat.txp += k;
+ fed->stat.drops += j - i - k;
+ fes->stat.fwp += k;
+
+ } else {
+ NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n",
+ __func__, lcore, fes->s, j - i);
+ for (k = i; k != j; k++) {
+ NETFE_TRACE("%s(%u, %p): free(%p);\n",
+ __func__, lcore, fes->s, pkt[k]);
+ rte_pktmbuf_free(pkt[j]);
+ }
+ fes->stat.drops += j - i;
+ }
+
+ /* copy unforwarded mbufs. */
+ for (i += k; i != j; i++, x++)
+ pkt[x] = pkt[i];
+
+ /* swap the pointers */
+ pt = pi0;
+ pi0 = pi1;
+ pi1 = pt;
+ }
+
+ fes->pbuf.num = x;
+
+ if (x != 0) {
+ tle_event_raise(fes->txev);
+ fes->stat.txev[TLE_SEV_UP]++;
+ }
+
+ if (n == RTE_DIM(fes->pbuf.pkt)) {
+ tle_event_active(fes->rxev, TLE_SEV_UP);
+ fes->stat.rxev[TLE_SEV_UP]++;
+ }
+}
+
+static inline void
+netfe_rxtx_process_udp(__rte_unused uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t i, j, k, n;
+ uint16_t family;
+ void *pi0, *pi1, *pt;
+ struct rte_mbuf **pkt;
+ struct sockaddr_storage in[2];
+
+ family = fes->family;
+ n = fes->pbuf.num;
+ pkt = fes->pbuf.pkt;
+
+ /* there is nothing to send. */
+ if (n == 0) {
+ tle_event_idle(fes->txev);
+ fes->stat.txev[TLE_SEV_IDLE]++;
+ return;
+ }
+
+ in[0].ss_family = family;
+ in[1].ss_family = family;
+ pi0 = &in[0];
+ pi1 = &in[1];
+
+ netfe_pkt_addr(pkt[0], pi0, family);
+
+ for (i = 0; i != n; i = j) {
+
+ j = i + pkt_eq_addr(&pkt[i + 1],
+ n - i - 1, family, pi0, pi1) + 1;
+
+ /**
+ * TODO: cannot use function pointers for unequal param num.
+ */
+ k = tle_udp_stream_send(fes->s, pkt + i, j - i,
+ (const struct sockaddr *)pi0);
+
+ NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n",
+ __func__, lcore, proto_name[fes->proto],
+ fes->s, j - i, k);
+ fes->stat.txp += k;
+ fes->stat.drops += j - i - k;
+
+ i += k;
+
+ /* stream send buffer is full */
+ if (i != j)
+ break;
+
+ /* swap the pointers */
+ pt = pi0;
+ pi0 = pi1;
+ pi1 = pt;
+ }
+
+ /* not able to send anything. */
+ if (i == 0)
+ return;
+
+ if (n == RTE_DIM(fes->pbuf.pkt)) {
+ /* mark stream as readable */
+ tle_event_active(fes->rxev, TLE_SEV_UP);
+ fes->stat.rxev[TLE_SEV_UP]++;
+ }
+
+ /* adjust pbuf array. */
+ fes->pbuf.num = n - i;
+ for (j = i; j != n; j++)
+ pkt[j - i] = pkt[j];
+}
+
+static inline void
+netfe_tx_process_udp(uint32_t lcore, struct netfe_stream *fes)
+{
+ uint32_t i, k, n;
+
+ /* refill with new mbufs. */
+ pkt_buf_fill(lcore, &fes->pbuf, fes->txlen);
+
+ n = fes->pbuf.num;
+ if (n == 0)
+ return;
+
+ /**
+ * TODO: cannot use function pointers for unequal param num.
+ */
+ k = tle_udp_stream_send(fes->s, fes->pbuf.pkt, n, NULL);
+ NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n",
+ __func__, lcore, proto_name[fes->proto], fes->s, n, k);
+ fes->stat.txp += k;
+ fes->stat.drops += n - k;
+
+ if (k == 0)
+ return;
+
+ /* adjust pbuf array. */
+ fes->pbuf.num = n - k;
+ for (i = k; i != n; i++)
+ fes->pbuf.pkt[i - k] = fes->pbuf.pkt[i];
+}
+
+static inline void
+netfe_lcore_udp(void)
+{
+ struct netfe_lcore *fe;
+ uint32_t j, n, lcore;
+ struct netfe_stream *fs[MAX_PKT_BURST];
+
+ fe = RTE_PER_LCORE(_fe);
+ if (fe == NULL)
+ return;
+
+ lcore = rte_lcore_id();
+
+ /* look for rx events */
+ n = tle_evq_get(fe->rxeq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
+
+ if (n != 0) {
+ NETFE_TRACE("%s(%u): tle_evq_get(rxevq=%p) returns %u\n",
+ __func__, lcore, fe->rxeq, n);
+ for (j = 0; j != n; j++)
+ netfe_rx_process(lcore, fs[j]);
+ }
+
+ /* look for tx events */
+ n = tle_evq_get(fe->txeq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
+
+ if (n != 0) {
+ NETFE_TRACE("%s(%u): tle_evq_get(txevq=%p) returns %u\n",
+ __func__, lcore, fe->txeq, n);
+ for (j = 0; j != n; j++) {
+ if (fs[j]->op == RXTX)
+ netfe_rxtx_process_udp(lcore, fs[j]);
+ else if (fs[j]->op == FWD)
+ netfe_fwd_udp(lcore, fs[j]);
+ else if (fs[j]->op == TXONLY)
+ netfe_tx_process_udp(lcore, fs[j]);
+ }
+ }
+}
+
+static void
+netfe_lcore_fini_udp(void)
+{
+ struct netfe_lcore *fe;
+ uint32_t i;
+ struct tle_udp_stream_param uprm;
+ struct netfe_stream *fes;
+
+ fe = RTE_PER_LCORE(_fe);
+ if (fe == NULL)
+ return;
+
+ for (i = 0; i != fe->use.num; i++) {
+ fes = netfe_get_stream(&fe->use);
+ tle_udp_stream_get_param(fes->s, &uprm);
+ netfe_stream_dump(fes, &uprm.local_addr, &uprm.remote_addr);
+ netfe_stream_close(fe, fes);
+ }
+
+ tle_evq_destroy(fe->txeq);
+ tle_evq_destroy(fe->rxeq);
+ RTE_PER_LCORE(_fe) = NULL;
+ rte_free(fe);
+}
+
+static int
+lcore_main_udp(void *arg)
+{
+ int32_t rc;
+ uint32_t lcore;
+ struct lcore_prm *prm;
+
+ prm = arg;
+ lcore = rte_lcore_id();
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n",
+ __func__, lcore);
+
+ rc = 0;
+
+ /* lcore FE init. */
+ if (prm->fe.max_streams != 0)
+ rc = netfe_lcore_init_udp(&prm->fe);
+
+ /* lcore FE init. */
+ if (rc == 0 && prm->be.lc != NULL)
+ rc = netbe_lcore_setup(prm->be.lc);
+
+ if (rc != 0)
+ sig_handle(SIGQUIT);
+
+ while (force_quit == 0) {
+ netfe_lcore_udp();
+ netbe_lcore();
+ }
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n",
+ __func__, lcore);
+
+ netfe_lcore_fini_udp();
+ netbe_lcore_clear();
+
+ return rc;
+}
+
+#endif /* UDP_H_ */
diff --git a/examples/udpfwd/README b/examples/udpfwd/README
deleted file mode 100644
index 91b6e76..0000000
--- a/examples/udpfwd/README
+++ /dev/null
@@ -1,141 +0,0 @@
-Introduction
-============
-
-udpfwd is a sample application to demonstrate and test libtle_udp.
-Depending on configuration it can do simple send/recv or both over
-opened udp streams. Also it implements ability to do UDP datagram
-forwarding between different streams, so it is possible to use that
-application as some sort of 'UDP proxy'.
-The application can reassemble input fragmented IP packets,
-and fragment outgoing IP packets (if destination MTU is less then packet size).
-To build and run the application DPDK and TLDK libraries are required.
-
-Logically the application is divided into two parts:
-
-- Back End (BE)
-BE is responsible for:
- - RX over DPDK ports and feed them into UDP TLDK context(s)
- (via tle_udp_rx_bulk).
- - retrieve packets ready to be send out from UDP TLDK context(s)
- and TX them over destined DPDK port.
-Multiple RX/TX queues per port are supported by RSS. Right now the number of
-TX is same as the number of RX queue.
-Each BE lcore can serve multiple DPDK ports, TLDK UDP contexts.
-
-- Front End (FE)
-FE responsibility is to open configured UDP streams and perform
-send/recv over them. These streams can belong to different UDP contexts.
-
-Right now each lcore can act as BE and/or FE.
-
-Usage
-=====
-
-udpfwd <EAL parameters> -- \
- -P | --promisc /* promiscuous mode enabled. */ \
- -R | --rbufs <num> /* max recv buffers per stream. */ \
- -S | --sbufs <num> /* max send buffers per stream. */ \
- -s | --streams <num> /* streams to open per context. */ \
- -b | --becfg <filename> /* backend configuration file. */ \
- -f | --fecfg <filename> /* frontend configuration file. */ \
- <port0_params> <port1_params> ... <portN_params>
-
-port_params: port=<uint>,lcore=<uint>[-<uint>],\
-[rx_offload=<uint>,tx_offload=<uint>,mtu=<uint>,ipv4=<ipv4>,ipv6=<ipv6>]
-
-port_params are used to configure the particular DPDK device (rte_ethdev port),
-and specify BE lcore that will do RX/TX from/to the device and manage
-BE part of corresponding UDP context. Multiple BE lcore can be specified.
-
-port - DPDK port id (multiple queues are supported when multiple lcore
- is specified for a port).
-lcore - EAL lcore id to do IO over that port (rx_burst/tx_burst).
- several ports can be managed by the same lcore, and same port can
- belong to more than one lcore.
-rx_offload - RX HW offload capabilities to enable/use on this port.
- (bitmask of DEV_RX_OFFLOAD_* values).
-tx_offload - TX HW offload capabilities to enable/use on this port.
- (bitmask of DEV_TX_OFFLOAD_* values).
-mtu - MTU to be used on that port
- ( = UDP data size + L2/L3/L4 headers sizes, default=1514).
-ipv4 - ipv4 address to assign to that port.
-ipv6 - ipv6 address to assign to that port.
-
-At least one of ipv4/ipv6 values have to be specified for each port.
-
-As an example:
-udpfwd --lcores='3,6,8' -w 01:00.0 -- \
---promisc --rbufs 0x1000 --sbufs 0x1000 --streams 0x100 \
---fecfg ./fe.cfg --becfg ./be.cfg \
-port=0,lcore=6,lcore=8,rx_offload=0xf,tx_offload=0,\
-ipv4=192.168.1.233,ipv6=2001:4860:b002::28
-
-Will create TLDK UDP context on lcore=6 and lcore=8 (BE lcore) to manage
-DPDK port 0. Will assign IPv4 address 192.168.1.233 and IPv6 address
-2001:4860:b002::28 to that port.
-The following supported by DPDK RX HW offloads:
- DEV_RX_OFFLOAD_VLAN_STRIP,
- DEV_RX_OFFLOAD_IPV4_CKSUM,
- DEV_RX_OFFLOAD_UDP_CKSUM,
- DEV_RX_OFFLOAD_TCP_CKSUM
-will be enabled on that port.
-No HW TX offloads will be enabled.
-
-If multiple lcore is specified per DPDK port, the following RSS hash will
-be enabled on that port:
- ETH_RSS_UDP
-
-
-Fornt-End (FE) and Back-End (BE) configuration files format:
-------------------------------------------------------------
- - each record on a separate line.
- - lines started with '#' are treated as comments.
- - empty lines (containing whitespace chars only) are ignored.
- - kvargs style format for each record.
- - each FE record correspond to at least one stream to be opened
- (could be multiple streams in case of op="fwd").
- - each BE record define a ipv4/ipv6 destination.
-
-FE config record format:
-------------------------
-
-lcore=<uint>,op=<"rx|tx|echo|fwd">,\
-laddr=<ip>,lport=<uint16>,raddr=<ip>,rport=<uint16>,\
-[txlen=<uint>,fwladdr=<ip>,fwlport=<uint16>,fwraddr=<ip>,fwrport=<uint16>,\
-belcore=<uint>]
-
-lcore - EAL lcore to manage that stream(s) in the FE.
-op - operation to perform on that stream:
- "rx" - do receive only on that stream.
- "tx" - do send only on that stream.
- "echo" - mimic recvfrom(..., &addr);sendto(..., &addr);
- on that stream.
- "fwd" - forward packets between streams.
-laddr - local address for the stream to open.
-lport - local port for the stream to open.
-raddr - remote address for the stream to open.
-rport - remote port for the stream to open.
-txlen - data length to send with each packet ("tx" mode only).
-fwladdr - local address for the forwarding stream(s) to open
- ("fwd mode only).
-fwlport - local port for the forwarding stream(s) to open
- ("fwd mode only).
-fwraddr - remote address for the forwarding stream(s) to open
- ("fwd mode only).
-fwrport - remote port for the forwarding stream(s) to open
- ("fwd mode only).
-belcore - EAL lcore to manage that stream(s) in the BE.
-
-Refer to fe.cfg for an example.
-
-BE config record format:
-------------------------
-
-port=<uint>,addr=<ipv4/ipv6>,masklen=<uint>,mac=<ether>
-
-port - port number to be used to send packets to the destination.
-addr - destionation network address.
-masklen - desitantion network prefix length.
-mac - destination ethernet address.
-
-Refer to fe.cfg for an example.
diff --git a/examples/udpfwd/main.c b/examples/udpfwd/main.c
deleted file mode 100644
index 0463588..0000000
--- a/examples/udpfwd/main.c
+++ /dev/null
@@ -1,2134 +0,0 @@
-/*
- * Copyright (c) 2016 Intel Corporation.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "netbe.h"
-#include "parse.h"
-
-#define MAX_RULES 0x100
-#define MAX_TBL8 0x800
-
-#define RX_RING_SIZE 0x400
-#define TX_RING_SIZE 0x800
-
-#define MPOOL_CACHE_SIZE 0x100
-#define MPOOL_NB_BUF 0x20000
-
-#define FRAG_MBUF_BUF_SIZE (RTE_PKTMBUF_HEADROOM + TLE_UDP_MAX_HDR)
-#define FRAG_TTL MS_PER_S
-#define FRAG_TBL_BUCKET_ENTRIES 16
-
-#define FIRST_PORT 0x8000
-
-#define RX_CSUM_OFFLOAD (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM)
-#define TX_CSUM_OFFLOAD (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM)
-
-#define OPT_SHORT_SBULK 'B'
-#define OPT_LONG_SBULK "sburst"
-
-#define OPT_SHORT_PROMISC 'P'
-#define OPT_LONG_PROMISC "promisc"
-
-#define OPT_SHORT_RBUFS 'R'
-#define OPT_LONG_RBUFS "rbufs"
-
-#define OPT_SHORT_SBUFS 'S'
-#define OPT_LONG_SBUFS "sbufs"
-
-#define OPT_SHORT_STREAMS 's'
-#define OPT_LONG_STREAMS "streams"
-
-#define OPT_SHORT_FECFG 'f'
-#define OPT_LONG_FECFG "fecfg"
-
-#define OPT_SHORT_BECFG 'b'
-#define OPT_LONG_BECFG "becfg"
-
-RTE_DEFINE_PER_LCORE(struct netbe_lcore *, _be);
-RTE_DEFINE_PER_LCORE(struct netfe_lcore *, _fe);
-
-#include "fwdtbl.h"
-
-static const struct option long_opt[] = {
- {OPT_LONG_BECFG, 1, 0, OPT_SHORT_BECFG},
- {OPT_LONG_FECFG, 1, 0, OPT_SHORT_FECFG},
- {OPT_LONG_PROMISC, 0, 0, OPT_SHORT_PROMISC},
- {OPT_LONG_RBUFS, 1, 0, OPT_SHORT_RBUFS},
- {OPT_LONG_SBUFS, 1, 0, OPT_SHORT_SBUFS},
- {OPT_LONG_SBULK, 1, 0, OPT_SHORT_SBULK},
- {OPT_LONG_STREAMS, 1, 0, OPT_SHORT_STREAMS},
- {NULL, 0, 0, 0}
-};
-
-/**
- * IPv4 Input size in bytes for RSS hash key calculation.
- * source address, destination address, source port, and destination port.
- */
-#define IPV4_TUPLE_SIZE 12
-
-/**
- * IPv6 Input size in bytes for RSS hash key calculation.
- * source address, destination address, source port, and destination port.
- */
-#define IPV6_TUPLE_SIZE 36
-
-/**
- * Location to be modified to create the IPv4 hash key which helps
- * to distribute packets based on the destination UDP port.
- */
-#define RSS_HASH_KEY_DEST_PORT_LOC_IPV4 15
-
-/*
- * Location to be modified to create the IPv6 hash key which helps
- * to distribute packets based on the destination UDP port.
- */
-#define RSS_HASH_KEY_DEST_PORT_LOC_IPV6 39
-
-/**
- * Size of the rte_eth_rss_reta_entry64 array to update through
- * rte_eth_dev_rss_reta_update.
- */
-#define RSS_RETA_CONF_ARRAY_SIZE (ETH_RSS_RETA_SIZE_512/RTE_RETA_GROUP_SIZE)
-
-#define NETBE_REALLOC(loc, n) do { \
- (loc) = rte_realloc((loc), sizeof(*(loc)) * (n), RTE_CACHE_LINE_SIZE); \
- if ((loc) == NULL) { \
- RTE_LOG(ERR, USER1, \
- "%s: failed to reallocate memory\n", \
- __func__); \
- return -ENOMEM; \
- } \
-} while (0)
-
-static volatile int force_quit;
-
-static struct netbe_cfg becfg;
-static struct rte_mempool *mpool[RTE_MAX_NUMA_NODES + 1];
-static struct rte_mempool *frag_mpool[RTE_MAX_NUMA_NODES + 1];
-
-static const struct rte_eth_conf port_conf_default = {
- .rxmode = {
- .max_rx_pkt_len = ETHER_MAX_VLAN_FRAME_LEN,
- .hw_vlan_strip = 1,
- .jumbo_frame = 1,
- },
-};
-
-#include "parse.h"
-#include "main_dpdk_legacy.h"
-
-static void
-sig_handle(int signum)
-{
- RTE_LOG(ERR, USER1, "%s(%d)\n", __func__, signum);
- force_quit = 1;
-}
-
-static void
-prepare_hash_key(struct netbe_port *uprt, uint8_t key_size, uint16_t family)
-{
- uint32_t align_nb_q;
-
- align_nb_q = rte_align32pow2(uprt->nb_lcore);
- memset(uprt->hash_key, 0, RSS_HASH_KEY_LENGTH);
- uprt->hash_key_size = key_size;
- if (family == AF_INET)
- uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV4] = align_nb_q;
- else
- uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV6] = align_nb_q;
-}
-
-static uint32_t
-qidx_from_hash_index(uint32_t hash, uint32_t align_nb_q)
-{
- uint32_t i, nb_bit, q;
-
- nb_bit = (sizeof(uint32_t) * CHAR_BIT) - __builtin_clz(align_nb_q - 1);
- q = (hash & 1);
- for (i = 1; i < nb_bit; i++) {
- hash >>= 1;
- q <<= 1;
- q |= (hash & 1);
- }
-
- return q;
-}
-
-static int
-update_rss_conf(struct netbe_port *uprt,
- const struct rte_eth_dev_info *dev_info,
- struct rte_eth_conf *port_conf)
-{
- uint8_t hash_key_size;
-
- if (uprt->nb_lcore > 1) {
- if (dev_info->hash_key_size > 0)
- hash_key_size = dev_info->hash_key_size;
- else {
- RTE_LOG(ERR, USER1,
- "%s: dev_info did not provide a valid hash key size\n",
- __func__);
- return -EINVAL;
- }
-
- if (uprt->ipv4 != INADDR_ANY &&
- memcmp(&uprt->ipv6, &in6addr_any,
- sizeof(uprt->ipv6)) != 0) {
- RTE_LOG(ERR, USER1,
- "%s: RSS for both IPv4 and IPv6 not supported!\n",
- __func__);
- return -EINVAL;
- } else if (uprt->ipv4 != INADDR_ANY) {
- prepare_hash_key(uprt, hash_key_size, AF_INET);
- } else if (memcmp(&uprt->ipv6, &in6addr_any, sizeof(uprt->ipv6))
- != 0) {
- prepare_hash_key(uprt, hash_key_size, AF_INET6);
- } else {
- RTE_LOG(ERR, USER1,
- "%s: No IPv4 or IPv6 address is found!\n",
- __func__);
- return -EINVAL;
- }
- port_conf->rxmode.mq_mode = ETH_MQ_RX_RSS;
- port_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_UDP;
- port_conf->rx_adv_conf.rss_conf.rss_key_len = hash_key_size;
- port_conf->rx_adv_conf.rss_conf.rss_key = uprt->hash_key;
- }
-
- return 0;
-}
-
-static int
-update_rss_reta(struct netbe_port *uprt,
- const struct rte_eth_dev_info *dev_info)
-{
- struct rte_eth_rss_reta_entry64 reta_conf[RSS_RETA_CONF_ARRAY_SIZE];
- int32_t i, rc, align_nb_q;
- int32_t q_index, idx, shift;
-
- if (uprt->nb_lcore > 1) {
- if (dev_info->reta_size == 0) {
- RTE_LOG(ERR, USER1,
- "%s: Redirection table size 0 is invalid for RSS\n",
- __func__);
- return -EINVAL;
- }
- RTE_LOG(NOTICE, USER1,
- "%s: The reta size of port %d is %u\n",
- __func__, uprt->id, dev_info->reta_size);
-
- if (dev_info->reta_size > ETH_RSS_RETA_SIZE_512) {
- RTE_LOG(ERR, USER1,
- "%s: More than %u entries of Reta not supported\n",
- __func__, ETH_RSS_RETA_SIZE_512);
- return -EINVAL;
- }
-
- memset(reta_conf, 0, sizeof(reta_conf));
- align_nb_q = rte_align32pow2(uprt->nb_lcore);
- for (i = 0; i < align_nb_q; i++) {
- q_index = qidx_from_hash_index(i, align_nb_q) %
- uprt->nb_lcore;
-
- idx = i / RTE_RETA_GROUP_SIZE;
- shift = i % RTE_RETA_GROUP_SIZE;
- reta_conf[idx].mask |= (1ULL << shift);
- reta_conf[idx].reta[shift] = q_index;
- RTE_LOG(NOTICE, USER1,
- "%s: port=%u RSS reta conf: hash=%u, q=%u\n",
- __func__, uprt->id, i, q_index);
- }
-
- rc = rte_eth_dev_rss_reta_update(uprt->id,
- reta_conf, dev_info->reta_size);
- if (rc != 0) {
- RTE_LOG(ERR, USER1,
- "%s: Bad redirection table parameter, rc = %d\n",
- __func__, rc);
- return rc;
- }
- }
-
- return 0;
-}
-
-/*
- * Initilise DPDK port.
- * In current version, multi-queue per port is used.
- */
-static int
-port_init(struct netbe_port *uprt)
-{
- int32_t rc;
- struct rte_eth_conf port_conf;
- struct rte_eth_dev_info dev_info;
-
- rte_eth_dev_info_get(uprt->id, &dev_info);
- if ((dev_info.rx_offload_capa & uprt->rx_offload) != uprt->rx_offload) {
- RTE_LOG(ERR, USER1,
- "port#%u supported/requested RX offloads don't match, "
- "supported: %#x, requested: %#x;\n",
- uprt->id, dev_info.rx_offload_capa, uprt->rx_offload);
- return -EINVAL;
- }
- if ((dev_info.tx_offload_capa & uprt->tx_offload) != uprt->tx_offload) {
- RTE_LOG(ERR, USER1,
- "port#%u supported/requested TX offloads don't match, "
- "supported: %#x, requested: %#x;\n",
- uprt->id, dev_info.tx_offload_capa, uprt->tx_offload);
- return -EINVAL;
- }
-
- port_conf = port_conf_default;
- if ((uprt->rx_offload & RX_CSUM_OFFLOAD) != 0) {
- RTE_LOG(ERR, USER1, "%s(%u): enabling RX csum offload;\n",
- __func__, uprt->id);
- port_conf.rxmode.hw_ip_checksum = 1;
- }
- port_conf.rxmode.max_rx_pkt_len = uprt->mtu + ETHER_CRC_LEN;
-
- rc = update_rss_conf(uprt, &dev_info, &port_conf);
- if (rc != 0)
- return rc;
-
- rc = rte_eth_dev_configure(uprt->id, uprt->nb_lcore, uprt->nb_lcore,
- &port_conf);
- RTE_LOG(NOTICE, USER1,
- "%s: rte_eth_dev_configure(prt_id=%u, nb_rxq=%u, nb_txq=%u) "
- "returns %d;\n", __func__, uprt->id, uprt->nb_lcore,
- uprt->nb_lcore, rc);
- if (rc != 0)
- return rc;
-
- return 0;
-}
-
-static int
-queue_init(struct netbe_port *uprt, struct rte_mempool *mp)
-{
- int32_t socket, rc;
- uint16_t q;
- struct rte_eth_dev_info dev_info;
-
- rte_eth_dev_info_get(uprt->id, &dev_info);
-
- socket = rte_eth_dev_socket_id(uprt->id);
-
- dev_info.default_rxconf.rx_drop_en = 1;
-
- dev_info.default_txconf.tx_free_thresh = TX_RING_SIZE / 2;
- if (uprt->tx_offload != 0) {
- RTE_LOG(ERR, USER1, "%s(%u): enabling full featured TX;\n",
- __func__, uprt->id);
- dev_info.default_txconf.txq_flags = 0;
- }
-
- for (q = 0; q < uprt->nb_lcore; q++) {
- rc = rte_eth_rx_queue_setup(uprt->id, q, RX_RING_SIZE,
- socket, &dev_info.default_rxconf, mp);
- if (rc < 0) {
- RTE_LOG(ERR, USER1,
- "%s: rx queue=%u setup failed with error code: %d\n",
- __func__, q, rc);
- return rc;
- }
- }
-
- for (q = 0; q < uprt->nb_lcore; q++) {
- rc = rte_eth_tx_queue_setup(uprt->id, q, TX_RING_SIZE,
- socket, &dev_info.default_txconf);
- if (rc < 0) {
- RTE_LOG(ERR, USER1,
- "%s: tx queue=%u setup failed with error code: %d\n",
- __func__, q, rc);
- return rc;
- }
- }
- return 0;
-}
-
-/*
- * Check that lcore is enabled, not master, and not in use already.
- */
-static int
-check_lcore(uint32_t lc)
-{
- if (rte_lcore_is_enabled(lc) == 0) {
- RTE_LOG(ERR, USER1, "lcore %u is not enabled\n", lc);
- return -EINVAL;
- }
- if (rte_eal_get_lcore_state(lc) == RUNNING) {
- RTE_LOG(ERR, USER1, "lcore %u already running %p\n",
- lc, lcore_config[lc].f);
- return -EINVAL;
- }
- return 0;
-}
-
-static void
-log_netbe_prt(const struct netbe_port *uprt)
-{
- uint32_t i;
- char corelist[2 * RTE_MAX_LCORE + 1];
- char hashkey[2 * RSS_HASH_KEY_LENGTH];
-
- memset(corelist, 0, sizeof(corelist));
- memset(hashkey, 0, sizeof(hashkey));
- for (i = 0; i < uprt->nb_lcore; i++)
- if (i < uprt->nb_lcore - 1)
- sprintf(corelist + (2 * i), "%u,", uprt->lcore[i]);
- else
- sprintf(corelist + (2 * i), "%u", uprt->lcore[i]);
-
- for (i = 0; i < uprt->hash_key_size; i++)
- sprintf(hashkey + (2 * i), "%02x", uprt->hash_key[i]);
-
- RTE_LOG(NOTICE, USER1,
- "uprt %p = <id = %u, lcore = <%s>, mtu = %u, "
- "rx_offload = %u, tx_offload = %u,\n"
- "ipv4 = %#x, "
- "ipv6 = %04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx, "
- "mac = %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx>;\n"
- "hashkey = %s;\n",
- uprt, uprt->id, corelist,
- uprt->mtu, uprt->rx_offload, uprt->tx_offload,
- uprt->ipv4,
- uprt->ipv6.s6_addr16[0], uprt->ipv6.s6_addr16[1],
- uprt->ipv6.s6_addr16[2], uprt->ipv6.s6_addr16[3],
- uprt->ipv6.s6_addr16[4], uprt->ipv6.s6_addr16[5],
- uprt->ipv6.s6_addr16[6], uprt->ipv6.s6_addr16[7],
- uprt->mac.addr_bytes[0], uprt->mac.addr_bytes[1],
- uprt->mac.addr_bytes[2], uprt->mac.addr_bytes[3],
- uprt->mac.addr_bytes[4], uprt->mac.addr_bytes[5],
- hashkey);
-}
-
-static void
-log_netbe_cfg(const struct netbe_cfg *ucfg)
-{
- uint32_t i;
-
- RTE_LOG(NOTICE, USER1,
- "ucfg @ %p, prt_num = %u\n", ucfg, ucfg->prt_num);
-
- for (i = 0; i != ucfg->prt_num; i++)
- log_netbe_prt(ucfg->prt + i);
-}
-
-static int
-pool_init(uint32_t sid)
-{
- int32_t rc;
- struct rte_mempool *mp;
- char name[RTE_MEMPOOL_NAMESIZE];
-
- snprintf(name, sizeof(name), "MP%u", sid);
- mp = rte_pktmbuf_pool_create(name, MPOOL_NB_BUF, MPOOL_CACHE_SIZE, 0,
- RTE_MBUF_DEFAULT_BUF_SIZE, sid - 1);
- if (mp == NULL) {
- rc = -rte_errno;
- RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n",
- __func__, sid - 1, rc);
- return rc;
- }
-
- mpool[sid] = mp;
- return 0;
-}
-
-static int
-frag_pool_init(uint32_t sid)
-{
- int32_t rc;
- struct rte_mempool *frag_mp;
- char frag_name[RTE_MEMPOOL_NAMESIZE];
-
- snprintf(frag_name, sizeof(frag_name), "frag_MP%u", sid);
- frag_mp = rte_pktmbuf_pool_create(frag_name, MPOOL_NB_BUF,
- MPOOL_CACHE_SIZE, 0, FRAG_MBUF_BUF_SIZE, sid - 1);
- if (frag_mp == NULL) {
- rc = -rte_errno;
- RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n",
- __func__, sid - 1, rc);
- return rc;
- }
-
- frag_mpool[sid] = frag_mp;
- return 0;
-}
-
-static struct netbe_lcore *
-find_initilized_lcore(struct netbe_cfg *cfg, uint32_t lc_num)
-{
- uint32_t i;
-
- for (i = 0; i < cfg->cpu_num; i++)
- if (cfg->cpu[i].id == lc_num)
- return &cfg->cpu[i];
-
- return NULL;
-}
-
-/*
- * Setup all enabled ports.
- */
-static int
-netbe_port_init(struct netbe_cfg *cfg, int argc, char *argv[])
-{
- int32_t rc;
- uint32_t i, n, sid, j;
- struct netbe_port *prt;
- rte_cpuset_t cpuset;
- uint32_t nc;
- struct netbe_lcore *lc;
-
- n = (uint32_t)argc;
- cfg->prt = rte_zmalloc(NULL, sizeof(struct netbe_port) * n,
- RTE_CACHE_LINE_SIZE);
- cfg->prt_num = n;
-
- rc = 0;
- for (i = 0; i != n; i++) {
- rc = parse_netbe_arg(cfg->prt + i, argv[i], &cpuset);
- if (rc != 0) {
- RTE_LOG(ERR, USER1,
- "%s: processing of \"%s\" failed with error code: %d\n",
- __func__, argv[i], rc);
- return rc;
- }
- }
-
- for (i = 0, nc = 0; i < RTE_MAX_LCORE; i++)
- nc += CPU_ISSET(i, &cpuset);
- cfg->cpu = rte_zmalloc(NULL, sizeof(struct netbe_lcore) * nc,
- RTE_CACHE_LINE_SIZE);
-
- for (i = 0; i != cfg->prt_num; i++) {
- prt = cfg->prt + i;
- rc = port_init(prt);
- if (rc != 0) {
- RTE_LOG(ERR, USER1,
- "%s: port=%u init failed with error code: %d\n",
- __func__, prt->id, rc);
- return rc;
- }
- rte_eth_macaddr_get(prt->id, &prt->mac);
- if (cfg->promisc)
- rte_eth_promiscuous_enable(prt->id);
-
- for (j = 0; j < prt->nb_lcore; j++) {
- rc = check_lcore(prt->lcore[j]);
- if (rc != 0)
- return rc;
-
- sid = rte_lcore_to_socket_id(prt->lcore[j]) + 1;
- assert(sid < RTE_DIM(mpool));
-
- if (mpool[sid] == NULL) {
- rc = pool_init(sid);
- if (rc != 0)
- return rc;
- }
-
- if (frag_mpool[sid] == NULL) {
- rc = frag_pool_init(sid);
- if (rc != 0)
- return rc;
- }
-
- rc = queue_init(prt, mpool[sid]);
- if (rc != 0) {
- RTE_LOG(ERR, USER1,
- "%s: lcore=%u queue init failed with err: %d\n",
- __func__, prt->lcore[j], rc);
- return rc;
- }
-
- /* calculate number of queues and assign queue id per lcore. */
- lc = find_initilized_lcore(cfg, prt->lcore[j]);
- if (lc == NULL) {
- lc = &cfg->cpu[cfg->cpu_num];
- lc->id = prt->lcore[j];
- cfg->cpu_num++;
- }
-
- NETBE_REALLOC(lc->prtq, lc->prtq_num + 1);
- lc->prtq[lc->prtq_num].rxqid = j;
- lc->prtq[lc->prtq_num].txqid = j;
- lc->prtq[lc->prtq_num].port = *prt;
- lc->prtq_num++;
- }
- }
- log_netbe_cfg(cfg);
-
- return 0;
-}
-
-/*
- * UDP IPv6 destination lookup callback.
- */
-static int
-lpm6_dst_lookup(void *data, const struct in6_addr *addr,
- struct tle_udp_dest *res)
-{
- int32_t rc;
- uint8_t idx;
- struct netbe_lcore *lc;
- struct tle_udp_dest *dst;
- uintptr_t p;
-
- lc = data;
- p = (uintptr_t)addr->s6_addr;
-
- rc = rte_lpm6_lookup(lc->lpm6, (uint8_t *)p, &idx);
- if (rc == 0) {
- dst = &lc->dst6[idx];
- rte_memcpy(res, dst, dst->l2_len + dst->l3_len +
- offsetof(struct tle_udp_dest, hdr));
- }
- return rc;
-}
-
-static int
-netbe_add_ipv4_route(struct netbe_lcore *lc, const struct netbe_dest *dst,
- uint8_t idx)
-{
- int32_t rc;
- uint32_t addr, depth;
- char str[INET_ADDRSTRLEN];
-
- depth = dst->prfx;
- addr = rte_be_to_cpu_32(dst->ipv4.s_addr);
-
- inet_ntop(AF_INET, &dst->ipv4, str, sizeof(str));
- rc = rte_lpm_add(lc->lpm4, addr, depth, idx);
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p,"
- "ipv4=%s/%u,mtu=%u,"
- "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) "
- "returns %d;\n",
- __func__, lc->id, dst->port, lc->dst4[idx].dev,
- str, depth, lc->dst4[idx].mtu,
- dst->mac.addr_bytes[0], dst->mac.addr_bytes[1],
- dst->mac.addr_bytes[2], dst->mac.addr_bytes[3],
- dst->mac.addr_bytes[4], dst->mac.addr_bytes[5],
- rc);
- return rc;
-}
-
-static int
-netbe_add_ipv6_route(struct netbe_lcore *lc, const struct netbe_dest *dst,
- uint8_t idx)
-{
- int32_t rc;
- uint32_t depth;
- char str[INET6_ADDRSTRLEN];
-
- depth = dst->prfx;
-
- rc = rte_lpm6_add(lc->lpm6, (uint8_t *)(uintptr_t)dst->ipv6.s6_addr,
- depth, idx);
-
- inet_ntop(AF_INET6, &dst->ipv6, str, sizeof(str));
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p,"
- "ipv6=%s/%u,mtu=%u,"
- "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) "
- "returns %d;\n",
- __func__, lc->id, dst->port, lc->dst6[idx].dev,
- str, depth, lc->dst4[idx].mtu,
- dst->mac.addr_bytes[0], dst->mac.addr_bytes[1],
- dst->mac.addr_bytes[2], dst->mac.addr_bytes[3],
- dst->mac.addr_bytes[4], dst->mac.addr_bytes[5],
- rc);
- return rc;
-}
-
-static void
-fill_dst(struct tle_udp_dest *dst, struct netbe_dev *bed,
- const struct netbe_dest *bdp, uint16_t l3_type, int32_t sid)
-{
- struct ether_hdr *eth;
- struct ipv4_hdr *ip4h;
- struct ipv6_hdr *ip6h;
-
- static const struct ipv4_hdr ipv4_tmpl = {
- .version_ihl = 4 << 4 | sizeof(*ip4h) / IPV4_IHL_MULTIPLIER,
- .time_to_live = 64,
- .next_proto_id = IPPROTO_UDP,
- };
-
- static const struct ipv6_hdr ipv6_tmpl = {
- .vtc_flow = 6 << 4,
- .proto = IPPROTO_UDP,
- .hop_limits = 64,
- };
-
- dst->dev = bed->dev;
- dst->head_mp = frag_mpool[sid + 1];
- dst->mtu = RTE_MIN(bdp->mtu, bed->port.mtu);
- dst->l2_len = sizeof(*eth);
-
- eth = (struct ether_hdr *)dst->hdr;
-
- ether_addr_copy(&bed->port.mac, &eth->s_addr);
- ether_addr_copy(&bdp->mac, &eth->d_addr);
- eth->ether_type = rte_cpu_to_be_16(l3_type);
-
- if (l3_type == ETHER_TYPE_IPv4) {
- dst->l3_len = sizeof(*ip4h);
- ip4h = (struct ipv4_hdr *)(eth + 1);
- ip4h[0] = ipv4_tmpl;
- } else if (l3_type == ETHER_TYPE_IPv6) {
- dst->l3_len = sizeof(*ip6h);
- ip6h = (struct ipv6_hdr *)(eth + 1);
- ip6h[0] = ipv6_tmpl;
- }
-}
-
-static int
-create_context(struct netbe_lcore *lc, const struct tle_udp_ctx_param *ctx_prm)
-{
- uint32_t rc = 0, sid;
- uint64_t frag_cycles;
- struct tle_udp_ctx_param cprm;
-
- if (lc->ctx == NULL) {
- sid = rte_lcore_to_socket_id(lc->id);
-
- rc = lcore_lpm_init(lc);
- if (rc != 0)
- return rc;
-
- cprm = *ctx_prm;
- cprm.socket_id = sid;
- cprm.lookup4 = lpm4_dst_lookup;
- cprm.lookup4_data = lc;
- cprm.lookup6 = lpm6_dst_lookup;
- cprm.lookup6_data = lc;
-
- /* to facilitate both IPv4 and IPv6. */
- cprm.max_streams *= 2;
-
- frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) /
- MS_PER_S * FRAG_TTL;
-
- lc->ftbl = rte_ip_frag_table_create(cprm.max_streams,
- FRAG_TBL_BUCKET_ENTRIES, cprm.max_streams,
- frag_cycles, sid);
-
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u): frag_tbl=%p;\n",
- __func__, lc->id, lc->ftbl);
-
- lc->ctx = tle_udp_create(&cprm);
-
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u): udp_ctx=%p;\n",
- __func__, lc->id, lc->ctx);
-
- if (lc->ctx == NULL || lc->ftbl == NULL)
- rc = ENOMEM;
- }
-
- return rc;
-}
-
-/*
- * BE lcore setup routine.
- */
-static int
-lcore_init(struct netbe_lcore *lc, const struct tle_udp_ctx_param *ctx_prm,
- const uint32_t prtqid, const uint16_t *bl_ports, uint32_t nb_bl_ports)
-{
- int32_t rc = 0;
- struct tle_udp_dev_param dprm;
-
- rc = create_context(lc, ctx_prm);
-
- if (lc->ctx != NULL) {
- memset(&dprm, 0, sizeof(dprm));
- dprm.rx_offload = lc->prtq[prtqid].port.rx_offload;
- dprm.tx_offload = lc->prtq[prtqid].port.tx_offload;
- dprm.local_addr4.s_addr = lc->prtq[prtqid].port.ipv4;
- memcpy(&dprm.local_addr6, &lc->prtq[prtqid].port.ipv6,
- sizeof(lc->prtq[prtqid].port.ipv6));
- dprm.bl4.nb_port = nb_bl_ports;
- dprm.bl4.port = bl_ports;
- dprm.bl6.nb_port = nb_bl_ports;
- dprm.bl6.port = bl_ports;
-
- lc->prtq[prtqid].dev = tle_udp_add_dev(lc->ctx, &dprm);
-
- RTE_LOG(NOTICE, USER1,
- "%s(lcore=%u, port=%u, qid=%u), udp_dev: %p\n",
- __func__, lc->id, lc->prtq[prtqid].port.id,
- lc->prtq[prtqid].rxqid, lc->prtq[prtqid].dev);
-
- if (lc->prtq[prtqid].dev == NULL)
- rc = -rte_errno;
-
- if (rc != 0) {
- RTE_LOG(ERR, USER1,
- "%s(lcore=%u) failed with error code: %d\n",
- __func__, lc->id, rc);
- tle_udp_destroy(lc->ctx);
- rte_ip_frag_table_destroy(lc->ftbl);
- rte_lpm_free(lc->lpm4);
- rte_lpm6_free(lc->lpm6);
- rte_free(lc->prtq[prtqid].port.lcore);
- lc->prtq[prtqid].port.nb_lcore = 0;
- rte_free(lc->prtq);
- lc->prtq_num = 0;
- return rc;
- }
- }
-
- return rc;
-}
-
-static uint16_t
-create_blocklist(const struct netbe_port *beprt, uint16_t *bl_ports,
- uint32_t q)
-{
- uint32_t i, j, qid, align_nb_q;
-
- align_nb_q = rte_align32pow2(beprt->nb_lcore);
- for (i = 0, j = 0; i < (UINT16_MAX + 1); i++) {
- qid = (i % align_nb_q) % beprt->nb_lcore;
- if (qid != q)
- bl_ports[j++] = i;
- }
-
- return j;
-}
-
-static int
-netbe_lcore_init(struct netbe_cfg *cfg,
- const struct tle_udp_ctx_param *ctx_prm)
-{
- int32_t rc;
- uint32_t i, j, nb_bl_ports = 0, sz;
- struct netbe_lcore *lc;
- static uint16_t *bl_ports;
-
- /* Create the udp context and attached queue for each lcore. */
- rc = 0;
- sz = sizeof(uint16_t) * UINT16_MAX;
- bl_ports = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE);
- for (i = 0; i < cfg->cpu_num; i++) {
- lc = &cfg->cpu[i];
- for (j = 0; j < lc->prtq_num; j++) {
- memset((uint8_t *)bl_ports, 0, sz);
- /* create list of blocked ports based on q */
- nb_bl_ports = create_blocklist(&lc->prtq[j].port,
- bl_ports, lc->prtq[j].rxqid);
- RTE_LOG(NOTICE, USER1,
- "lc=%u, q=%u, nb_bl_ports=%u\n",
- lc->id, lc->prtq[j].rxqid, nb_bl_ports);
-
- rc = lcore_init(lc, ctx_prm, j, bl_ports, nb_bl_ports);
- if (rc != 0) {
- RTE_LOG(ERR, USER1,
- "%s: failed with error code: %d\n",
- __func__, rc);
- rte_free(bl_ports);
- return rc;
- }
- }
- }
- rte_free(bl_ports);
-
- return 0;
-}
-
-static void
-netbe_lcore_fini(struct netbe_cfg *cfg)
-{
- uint32_t i;
-
- for (i = 0; i != cfg->cpu_num; i++) {
- tle_udp_destroy(cfg->cpu[i].ctx);
- rte_ip_frag_table_destroy(cfg->cpu[i].ftbl);
- rte_lpm_free(cfg->cpu[i].lpm4);
- rte_lpm6_free(cfg->cpu[i].lpm6);
-
- rte_free(cfg->cpu[i].prtq);
- cfg->cpu[i].prtq_num = 0;
- }
-
- rte_free(cfg->cpu);
- cfg->cpu_num = 0;
- for (i = 0; i != cfg->prt_num; i++) {
- rte_free(cfg->prt[i].lcore);
- cfg->prt[i].nb_lcore = 0;
- }
- rte_free(cfg->prt);
- cfg->prt_num = 0;
-}
-
-static int
-netbe_add_dest(struct netbe_lcore *lc, uint32_t dev_idx, uint16_t family,
- const struct netbe_dest *dst, uint32_t dnum)
-{
- int32_t rc, sid;
- uint16_t l3_type;
- uint32_t i, n, m;
- struct tle_udp_dest *dp;
-
- if (family == AF_INET) {
- n = lc->dst4_num;
- dp = lc->dst4 + n;
- m = RTE_DIM(lc->dst4);
- l3_type = ETHER_TYPE_IPv4;
- } else {
- n = lc->dst6_num;
- dp = lc->dst6 + n;
- m = RTE_DIM(lc->dst6);
- l3_type = ETHER_TYPE_IPv6;
- }
-
- if (n + dnum >= m) {
- RTE_LOG(ERR, USER1, "%s(lcore=%u, family=%hu, dnum=%u) exceeds "
- "maximum allowed number of destinations(%u);\n",
- __func__, lc->id, family, dnum, m);
- return -ENOSPC;
- }
-
- sid = rte_lcore_to_socket_id(lc->id);
- rc = 0;
-
- for (i = 0; i != dnum && rc == 0; i++) {
- fill_dst(dp + i, lc->prtq + dev_idx, dst + i, l3_type, sid);
- if (family == AF_INET)
- rc = netbe_add_ipv4_route(lc, dst + i, n + i);
- else
- rc = netbe_add_ipv6_route(lc, dst + i, n + i);
- }
-
- if (family == AF_INET)
- lc->dst4_num = n + i;
- else
- lc->dst6_num = n + i;
-
- return rc;
-}
-
-static int
-netbe_dest_init(const char *fname, struct netbe_cfg *cfg)
-{
- int32_t rc;
- uint32_t f, i, p;
- uint32_t k, l, cnt;
- struct netbe_lcore *lc;
- struct netbe_dest_prm prm;
-
- rc = netbe_parse_dest(fname, &prm);
- if (rc != 0)
- return rc;
-
- rc = 0;
- for (i = 0; i != prm.nb_dest; i++) {
-
- p = prm.dest[i].port;
- f = prm.dest[i].family;
-
- cnt = 0;
- for (k = 0; k != cfg->cpu_num; k++) {
- lc = cfg->cpu + k;
- for (l = 0; l != lc->prtq_num; l++)
- if (lc->prtq[l].port.id == p) {
- rc = netbe_add_dest(lc, l, f,
- prm.dest + i, 1);
- if (rc != 0) {
- RTE_LOG(ERR, USER1,
- "%s(lcore=%u, family=%u) could not "
- "add destinations(%u);\n",
- __func__, lc->id, f, i);
- return -ENOSPC;
- }
- cnt++;
- }
- }
-
- if (cnt == 0) {
- RTE_LOG(ERR, USER1, "%s(%s) error at line %u: "
- "port %u not managed by any lcore;\n",
- __func__, fname, prm.dest[i].line, p);
- break;
- }
- }
-
- free(prm.dest);
- return rc;
-}
-
-static void
-netfe_stream_close(struct netfe_lcore *fe, uint32_t dec)
-{
- uint32_t sidx;
-
- fe->sidx -= dec;
- sidx = fe->sidx;
- tle_event_free(fe->fs[sidx].txev);
- tle_event_free(fe->fs[sidx].rxev);
- tle_udp_stream_close(fe->fs[sidx].s);
- memset(&fe->fs[sidx], 0, sizeof(fe->fs[sidx]));
-}
-
-static void
-netfe_stream_dump(const struct netfe_stream *fes)
-{
- struct sockaddr_in *l4, *r4;
- struct sockaddr_in6 *l6, *r6;
- uint16_t lport, rport;
- struct tle_udp_stream_param sprm;
- char laddr[INET6_ADDRSTRLEN];
- char raddr[INET6_ADDRSTRLEN];
-
- tle_udp_stream_get_param(fes->s, &sprm);
-
- if (sprm.local_addr.ss_family == AF_INET) {
-
- l4 = (struct sockaddr_in *)&sprm.local_addr;
- r4 = (struct sockaddr_in *)&sprm.remote_addr;
-
- lport = l4->sin_port;
- rport = r4->sin_port;
-
- } else if (sprm.local_addr.ss_family == AF_INET6) {
-
- l6 = (struct sockaddr_in6 *)&sprm.local_addr;
- r6 = (struct sockaddr_in6 *)&sprm.remote_addr;
-
- lport = l6->sin6_port;
- rport = r6->sin6_port;
-
- } else {
- RTE_LOG(ERR, USER1, "stream@%p - unknown family=%hu\n",
- fes->s, sprm.local_addr.ss_family);
- return;
- }
-
- format_addr(&sprm.local_addr, laddr, sizeof(laddr));
- format_addr(&sprm.remote_addr, raddr, sizeof(raddr));
-
- RTE_LOG(INFO, USER1,
- "stream@%p={"
- "family=%hu,laddr=%s,lport=%hu,raddr=%s,rport=%hu,"
- "stats={"
- "rxp=%" PRIu64 ",txp=%" PRIu64 ",drops=%" PRIu64 ","
- "rxev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "],"
- "txev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "],"
- "}};\n",
- fes->s,
- sprm.local_addr.ss_family,
- laddr, ntohs(lport), raddr, ntohs(rport),
- fes->stat.rxp, fes->stat.txp, fes->stat.drops,
- fes->stat.rxev[TLE_SEV_IDLE],
- fes->stat.rxev[TLE_SEV_DOWN],
- fes->stat.rxev[TLE_SEV_UP],
- fes->stat.txev[TLE_SEV_IDLE],
- fes->stat.txev[TLE_SEV_DOWN],
- fes->stat.txev[TLE_SEV_UP]);
-}
-
-/*
- * helper function: opens IPv4 and IPv6 streams for selected port.
- */
-static struct netfe_stream *
-netfe_stream_open(struct netfe_lcore *fe, struct tle_udp_stream_param *sprm,
- uint32_t lcore, uint16_t op, uint32_t bidx)
-{
- int32_t rc;
- uint32_t sidx;
- struct netfe_stream *fes;
- struct sockaddr_in *l4;
- struct sockaddr_in6 *l6;
- uint16_t errport;
-
- sidx = fe->sidx;
- fes = fe->fs + sidx;
- if (sidx >= fe->snum) {
- rte_errno = ENOBUFS;
- return NULL;
- }
-
- fes->rxev = tle_event_alloc(fe->rxeq, &fe->fs[sidx]);
- fes->txev = tle_event_alloc(fe->txeq, &fe->fs[sidx]);
- sprm->recv_ev = fes->rxev;
- if (op != FWD)
- sprm->send_ev = fes->txev;
-
- RTE_LOG(ERR, USER1,
- "%s(%u) [%u]={op=%hu, rxev=%p, txev=%p}, be_lc=%u\n",
- __func__, lcore, sidx, op, fes->rxev, fes->txev,
- becfg.cpu[bidx].id);
- if (fes->rxev == NULL || fes->txev == NULL) {
- netfe_stream_close(fe, 0);
- rte_errno = ENOMEM;
- return NULL;
- }
-
- if (op == TXONLY || op == FWD) {
- tle_event_active(fes->txev, TLE_SEV_DOWN);
- fes->stat.txev[TLE_SEV_DOWN]++;
- }
-
- if (op != TXONLY) {
- tle_event_active(fes->rxev, TLE_SEV_DOWN);
- fes->stat.rxev[TLE_SEV_DOWN]++;
- }
-
- fes->s = tle_udp_stream_open(becfg.cpu[bidx].ctx, sprm);
- if (fes->s == NULL) {
- rc = rte_errno;
- netfe_stream_close(fe, 0);
- rte_errno = rc;
-
- if (sprm->local_addr.ss_family == AF_INET) {
- l4 = (struct sockaddr_in *) &sprm->local_addr;
- errport = ntohs(l4->sin_port);
- } else {
- l6 = (struct sockaddr_in6 *) &sprm->local_addr;
- errport = ntohs(l6->sin6_port);
- }
- RTE_LOG(ERR, USER1, "stream open failed for port %u with error "
- "code=%u, bidx=%u, lc=%u\n",
- errport, rc, bidx, becfg.cpu[bidx].id);
- return NULL;
- }
-
- fes->op = op;
- fes->family = sprm->local_addr.ss_family;
-
- fe->sidx = sidx + 1;
- return fes;
-}
-
-static inline int
-netfe_addr_eq(struct sockaddr_storage *l, struct sockaddr_storage *r,
- uint16_t family)
-{
- struct sockaddr_in *l4, *r4;
- struct sockaddr_in6 *l6, *r6;
-
- if (family == AF_INET) {
- l4 = (struct sockaddr_in *)l;
- r4 = (struct sockaddr_in *)r;
- return (l4->sin_port == r4->sin_port &&
- l4->sin_addr.s_addr == r4->sin_addr.s_addr);
- } else {
- l6 = (struct sockaddr_in6 *)l;
- r6 = (struct sockaddr_in6 *)r;
- return (l6->sin6_port == r6->sin6_port &&
- memcmp(&l6->sin6_addr, &r6->sin6_addr,
- sizeof(l6->sin6_addr)));
- }
-}
-
-static inline void
-netfe_pkt_addr(const struct rte_mbuf *m, struct sockaddr_storage *ps,
- uint16_t family)
-{
- const struct ipv4_hdr *ip4h;
- const struct ipv6_hdr *ip6h;
- const struct udp_hdr *udph;
- struct sockaddr_in *in4;
- struct sockaddr_in6 *in6;
-
- NETFE_PKT_DUMP(m);
-
- udph = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, -m->l4_len);
-
- if (family == AF_INET) {
- in4 = (struct sockaddr_in *)ps;
- ip4h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
- -(m->l4_len + m->l3_len));
- in4->sin_port = udph->src_port;
- in4->sin_addr.s_addr = ip4h->src_addr;
- } else {
- in6 = (struct sockaddr_in6 *)ps;
- ip6h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
- -(m->l4_len + m->l3_len));
- in6->sin6_port = udph->src_port;
- rte_memcpy(&in6->sin6_addr, ip6h->src_addr,
- sizeof(in6->sin6_addr));
- }
-}
-
-static inline uint32_t
-pkt_eq_addr(struct rte_mbuf *pkt[], uint32_t num, uint16_t family,
- struct sockaddr_storage *cur, struct sockaddr_storage *nxt)
-{
- uint32_t i;
-
- for (i = 0; i != num; i++) {
- netfe_pkt_addr(pkt[i], nxt, family);
- if (netfe_addr_eq(cur, nxt, family) == 0)
- break;
- }
-
- return i;
-}
-
-static inline void
-pkt_buf_empty(struct pkt_buf *pb)
-{
- uint32_t i;
-
- for (i = 0; i != pb->num; i++)
- rte_pktmbuf_free(pb->pkt[i]);
-
- pb->num = 0;
-}
-
-static inline void
-pkt_buf_fill(uint32_t lcore, struct pkt_buf *pb, uint32_t dlen)
-{
- uint32_t i;
- int32_t sid;
-
- sid = rte_lcore_to_socket_id(lcore) + 1;
-
- for (i = pb->num; i != RTE_DIM(pb->pkt); i++) {
- pb->pkt[i] = rte_pktmbuf_alloc(mpool[sid]);
- if (pb->pkt[i] == NULL)
- break;
- rte_pktmbuf_append(pb->pkt[i], dlen);
- }
-
- pb->num = i;
-}
-
-static struct netfe_stream *
-find_fwd_dst(uint32_t lcore, struct netfe_stream *fes,
- const struct sockaddr *sa)
-{
- uint32_t rc;
- struct netfe_stream *fed;
- struct netfe_lcore *fe;
- struct tle_udp_stream_param sprm;
-
- fe = RTE_PER_LCORE(_fe);
-
- fed = fwd_tbl_lkp(fe, fes->family, sa);
- if (fed != NULL)
- return fed;
-
- /* create a new stream and put it into the fwd table. */
-
- sprm = fes->fwdprm.prm;
-
- /* open forward stream with wildcard remote addr. */
- memset(&sprm.remote_addr.ss_family + 1, 0,
- sizeof(sprm.remote_addr) - sizeof(sprm.remote_addr.ss_family));
- fed = netfe_stream_open(fe, &sprm, lcore, FWD, fes->fwdprm.bidx);
- if (fed == NULL)
- return NULL;
-
- rc = fwd_tbl_add(fe, fes->family, sa, fed);
- if (rc != 0) {
- netfe_stream_close(fe, 1);
- fed = NULL;
- }
-
- fed->fwdprm.prm.remote_addr = *(const struct sockaddr_storage *)sa;
- return fed;
-}
-
-static inline void
-netfe_tx_process(uint32_t lcore, struct netfe_stream *fes)
-{
- uint32_t i, k, n;
-
- /* refill with new mbufs. */
- pkt_buf_fill(lcore, &fes->pbuf, fes->txlen);
-
- n = fes->pbuf.num;
- if (n == 0)
- return;
-
- k = tle_udp_stream_send(fes->s, fes->pbuf.pkt, n, NULL);
- NETFE_TRACE("%s(%u): tle_udp_stream_send(%p, %u) returns %u\n",
- __func__, lcore, fes->s, n, k);
- fes->stat.txp += k;
- fes->stat.drops += n - k;
-
- if (k == 0)
- return;
-
- /* adjust pbuf array. */
- fes->pbuf.num = n - k;
- for (i = k; i != n; i++)
- fes->pbuf.pkt[i - k] = fes->pbuf.pkt[i];
-}
-
-static inline void
-netfe_fwd(uint32_t lcore, struct netfe_stream *fes)
-{
- uint32_t i, j, k, n, x;
- uint16_t family;
- void *pi0, *pi1, *pt;
- struct rte_mbuf **pkt;
- struct netfe_stream *fed;
- struct sockaddr_storage in[2];
-
- family = fes->family;
- n = fes->pbuf.num;
- pkt = fes->pbuf.pkt;
-
- if (n == 0)
- return;
-
- in[0].ss_family = family;
- in[1].ss_family = family;
- pi0 = &in[0];
- pi1 = &in[1];
-
- netfe_pkt_addr(pkt[0], pi0, family);
-
- x = 0;
- for (i = 0; i != n; i = j) {
-
- j = i + pkt_eq_addr(&pkt[i + 1],
- n - i - 1, family, pi0, pi1) + 1;
-
- fed = find_fwd_dst(lcore, fes, (const struct sockaddr *)pi0);
- if (fed != NULL) {
-
- k = tle_udp_stream_send(fed->s, pkt + i, j - i,
- (const struct sockaddr *)
- &fes->fwdprm.prm.remote_addr);
-
- NETFE_TRACE("%s(%u): tle_udp_stream_send(%p, %u) "
- "returns %u\n",
- __func__, lcore, fed->s, j - i, k);
- fed->stat.txp += k;
- fed->stat.drops += j - i - k;
- fes->stat.fwp += k;
-
- } else {
- NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n",
- __func__, lcore, fes->s, j - i);
- for (k = i; k != j; k++) {
- NETFE_TRACE("%s(%u, %p): free(%p);\n",
- __func__, lcore, fes->s, pkt[k]);
- rte_pktmbuf_free(pkt[j]);
- }
- fes->stat.drops += j - i;
- }
-
- /* copy unforwarded mbufs. */
- for (i += k; i != j; i++, x++)
- pkt[x] = pkt[i];
-
- /* swap the pointers */
- pt = pi0;
- pi0 = pi1;
- pi1 = pt;
- }
-
- fes->pbuf.num = x;
-
- if (x != 0) {
- tle_event_raise(fes->txev);
- fes->stat.txev[TLE_SEV_UP]++;
- }
-
- if (n == RTE_DIM(fes->pbuf.pkt)) {
- tle_event_active(fes->rxev, TLE_SEV_UP);
- fes->stat.rxev[TLE_SEV_UP]++;
- }
-}
-
-static inline void
-netfe_rx_process(__rte_unused uint32_t lcore, struct netfe_stream *fes)
-{
- uint32_t k, n;
-
- n = fes->pbuf.num;
- k = RTE_DIM(fes->pbuf.pkt) - n;
-
- /* packet buffer is full, can't receive any new packets. */
- if (k == 0) {
- tle_event_idle(fes->rxev);
- fes->stat.rxev[TLE_SEV_IDLE]++;
- return;
- }
-
- n = tle_udp_stream_recv(fes->s, fes->pbuf.pkt + n, k);
- if (n == 0)
- return;
-
- NETFE_TRACE("%s(%u): tle_udp_stream_recv(%p, %u) returns %u\n",
- __func__, lcore, fes->s, k, n);
-
- fes->pbuf.num += n;
- fes->stat.rxp += n;
-
- /* free all received mbufs. */
- if (fes->op == RXONLY)
- pkt_buf_empty(&fes->pbuf);
- /* mark stream as writable */
- else if (k == RTE_DIM(fes->pbuf.pkt)) {
- if (fes->op == RXTX) {
- tle_event_active(fes->txev, TLE_SEV_UP);
- fes->stat.txev[TLE_SEV_UP]++;
- } else if (fes->op == FWD) {
- tle_event_raise(fes->txev);
- fes->stat.txev[TLE_SEV_UP]++;
- }
- }
-}
-
-static inline void
-netfe_rxtx_process(__rte_unused uint32_t lcore, struct netfe_stream *fes)
-{
- uint32_t i, j, k, n;
- uint16_t family;
- void *pi0, *pi1, *pt;
- struct rte_mbuf **pkt;
- struct sockaddr_storage in[2];
-
- family = fes->family;
- n = fes->pbuf.num;
- pkt = fes->pbuf.pkt;
-
- /* there is nothing to send. */
- if (n == 0) {
- tle_event_idle(fes->txev);
- fes->stat.txev[TLE_SEV_IDLE]++;
- return;
- }
-
- in[0].ss_family = family;
- in[1].ss_family = family;
- pi0 = &in[0];
- pi1 = &in[1];
-
- netfe_pkt_addr(pkt[0], pi0, family);
-
- for (i = 0; i != n; i = j) {
-
- j = i + pkt_eq_addr(&pkt[i + 1],
- n - i - 1, family, pi0, pi1) + 1;
-
- k = tle_udp_stream_send(fes->s, pkt + i, j - i,
- (const struct sockaddr *)pi0);
-
- NETFE_TRACE("%s(%u): tle_udp_stream_send(%p, %u) returns %u\n",
- __func__, lcore, fes->s, j - i, k);
- fes->stat.txp += k;
- fes->stat.drops += j - i - k;
-
- i += k;
-
- /* stream send buffer is full */
- if (i != j)
- break;
-
- /* swap the pointers */
- pt = pi0;
- pi0 = pi1;
- pi1 = pt;
- }
-
- /* not able to send anything. */
- if (i == 0)
- return;
-
- if (n == RTE_DIM(fes->pbuf.pkt)) {
- /* mark stream as readable */
- tle_event_active(fes->rxev, TLE_SEV_UP);
- fes->stat.rxev[TLE_SEV_UP]++;
- }
-
- /* adjust pbuf array. */
- fes->pbuf.num = n - i;
- for (j = i; j != n; j++)
- pkt[j - i] = pkt[j];
-}
-
-static int
-netfe_lcore_init(const struct netfe_lcore_prm *prm)
-{
- size_t sz;
- int32_t rc;
- uint32_t i, lcore, snum;
- struct netfe_lcore *fe;
- struct tle_evq_param eprm;
- struct tle_udp_stream_param sprm;
- struct netfe_stream *fes;
-
- lcore = rte_lcore_id();
-
- snum = prm->max_streams;
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u, nb_streams=%u, max_streams=%u)\n",
- __func__, lcore, prm->nb_streams, snum);
-
- memset(&eprm, 0, sizeof(eprm));
- eprm.socket_id = rte_lcore_to_socket_id(lcore);
- eprm.max_events = snum;
-
- sz = sizeof(*fe) + snum * sizeof(fe->fs[0]);
- fe = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
- rte_lcore_to_socket_id(lcore));
-
- if (fe == NULL) {
- RTE_LOG(ERR, USER1, "%s:%d failed to allocate %zu bytes\n",
- __func__, __LINE__, sz);
- return -ENOMEM;
- }
-
- RTE_PER_LCORE(_fe) = fe;
-
- fe->snum = snum;
- fe->fs = (struct netfe_stream *)(fe + 1);
-
- fe->rxeq = tle_evq_create(&eprm);
- fe->txeq = tle_evq_create(&eprm);
-
- RTE_LOG(INFO, USER1, "%s(%u) rx evq=%p, tx evq=%p\n",
- __func__, lcore, fe->rxeq, fe->txeq);
- if (fe->rxeq == NULL || fe->txeq == NULL)
- return -ENOMEM;
-
- rc = fwd_tbl_init(fe, AF_INET, lcore);
- RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n",
- __func__, lcore, AF_INET, rc);
- if (rc != 0)
- return rc;
-
- rc = fwd_tbl_init(fe, AF_INET6, lcore);
- RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n",
- __func__, lcore, AF_INET6, rc);
- if (rc != 0)
- return rc;
-
- /* open all requested streams. */
- for (i = 0; i != prm->nb_streams; i++) {
- sprm = prm->stream[i].sprm.prm;
- fes = netfe_stream_open(fe, &sprm, lcore, prm->stream[i].op,
- prm->stream[i].sprm.bidx);
- if (fes == NULL) {
- rc = -rte_errno;
- break;
- }
-
- netfe_stream_dump(fes);
-
- if (prm->stream[i].op == FWD) {
- fes->fwdprm = prm->stream[i].fprm;
- rc = fwd_tbl_add(fe,
- prm->stream[i].fprm.prm.remote_addr.ss_family,
- (const struct sockaddr *)
- &prm->stream[i].fprm.prm.remote_addr,
- fes);
- if (rc != 0) {
- netfe_stream_close(fe, 1);
- break;
- }
- } else if (prm->stream[i].op == TXONLY) {
- fes->txlen = prm->stream[i].txlen;
- fes->raddr = sprm.remote_addr;
- }
- }
-
- return rc;
-}
-
-static inline void
-netfe_lcore(void)
-{
- struct netfe_lcore *fe;
- uint32_t j, n, lcore;
- struct netfe_stream *fs[MAX_PKT_BURST];
-
- fe = RTE_PER_LCORE(_fe);
- if (fe == NULL)
- return;
-
- lcore = rte_lcore_id();
-
- n = tle_evq_get(fe->rxeq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
-
- if (n != 0) {
- NETFE_TRACE("%s(%u): tle_evq_get(rxevq=%p) returns %u\n",
- __func__, lcore, fe->rxeq, n);
- for (j = 0; j != n; j++)
- netfe_rx_process(lcore, fs[j]);
- }
-
- n = tle_evq_get(fe->txeq, (const void **)(uintptr_t)fs, RTE_DIM(fs));
-
- if (n != 0) {
- NETFE_TRACE("%s(%u): tle_evq_get(txevq=%p) returns %u\n",
- __func__, lcore, fe->txeq, n);
- for (j = 0; j != n; j++) {
- if (fs[j]->op == RXTX)
- netfe_rxtx_process(lcore, fs[j]);
- else if (fs[j]->op == FWD)
- netfe_fwd(lcore, fs[j]);
- else if (fs[j]->op == TXONLY)
- netfe_tx_process(lcore, fs[j]);
- }
- }
-}
-
-static void
-netfe_lcore_fini(void)
-{
- struct netfe_lcore *fe;
- uint32_t i;
-
- fe = RTE_PER_LCORE(_fe);
- if (fe == NULL)
- return;
-
- while (fe->sidx != 0) {
- i = fe->sidx - 1;
- netfe_stream_dump(fe->fs + i);
- netfe_stream_close(fe, 1);
- }
-
- tle_evq_destroy(fe->txeq);
- tle_evq_destroy(fe->rxeq);
- RTE_PER_LCORE(_fe) = NULL;
- rte_free(fe);
-}
-
-static inline void
-netbe_rx(struct netbe_lcore *lc, uint32_t pidx)
-{
- uint32_t j, k, n;
- struct rte_mbuf *pkt[MAX_PKT_BURST];
- struct rte_mbuf *rp[MAX_PKT_BURST];
- int32_t rc[MAX_PKT_BURST];
-
- n = rte_eth_rx_burst(lc->prtq[pidx].port.id,
- lc->prtq[pidx].rxqid, pkt, RTE_DIM(pkt));
- if (n == 0)
- return;
-
- lc->prtq[pidx].rx_stat.in += n;
- NETBE_TRACE("%s(%u): rte_eth_rx_burst(%u, %u) returns %u\n",
- __func__, lc->id, lc->prtq[pidx].port.id, lc->prtq[pidx].rxqid,
- n);
-
- k = tle_udp_rx_bulk(lc->prtq[pidx].dev, pkt, rp, rc, n);
-
- lc->prtq[pidx].rx_stat.up += k;
- lc->prtq[pidx].rx_stat.drop += n - k;
- NETBE_TRACE("%s(%u): tle_udp_rx_bulk(%p, %u) returns %u\n",
- __func__, lc->id, lc->prtq[pidx].dev, n, k);
-
- for (j = 0; j != n - k; j++) {
- NETBE_TRACE("%s:%d(port=%u) rp[%u]={%p, %d};\n",
- __func__, __LINE__, lc->prtq[pidx].port.id,
- j, rp[j], rc[j]);
- rte_pktmbuf_free(rp[j]);
- }
-}
-
-static inline void
-netbe_tx(struct netbe_lcore *lc, uint32_t pidx)
-{
- uint32_t j, k, n;
- struct rte_mbuf **mb;
-
- n = lc->prtq[pidx].tx_buf.num;
- k = RTE_DIM(lc->prtq[pidx].tx_buf.pkt) - n;
- mb = lc->prtq[pidx].tx_buf.pkt;
-
- if (k >= RTE_DIM(lc->prtq[pidx].tx_buf.pkt) / 2) {
- j = tle_udp_tx_bulk(lc->prtq[pidx].dev, mb + n, k);
- n += j;
- lc->prtq[pidx].tx_stat.down += j;
- }
-
- if (n == 0)
- return;
-
- NETBE_TRACE("%s(%u): tle_udp_tx_bulk(%p) returns %u,\n"
- "total pkts to send: %u\n",
- __func__, lc->id, lc->prtq[pidx].dev, j, n);
-
- for (j = 0; j != n; j++)
- NETBE_PKT_DUMP(mb[j]);
-
- k = rte_eth_tx_burst(lc->prtq[pidx].port.id,
- lc->prtq[pidx].txqid, mb, n);
-
- lc->prtq[pidx].tx_stat.out += k;
- lc->prtq[pidx].tx_stat.drop += n - k;
- NETBE_TRACE("%s(%u): rte_eth_tx_burst(%u, %u, %u) returns %u\n",
- __func__, lc->id, lc->prtq[pidx].port.id, lc->prtq[pidx].txqid,
- n, k);
-
- lc->prtq[pidx].tx_buf.num = n - k;
- if (k != 0)
- for (j = k; j != n; j++)
- mb[j - k] = mb[j];
-}
-
-static int
-netbe_lcore_setup(struct netbe_lcore *lc)
-{
- uint32_t i;
- int32_t rc;
-
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u, udp_ctx: %p) start\n",
- __func__, lc->id, lc->ctx);
-
- /*
- * ???????
- * wait for FE lcores to start, so BE dont' drop any packets
- * because corresponding streams not opened yet by FE.
- * useful when used with pcap PMDS.
- * think better way, or should this timeout be a cmdlien parameter.
- * ???????
- */
- rte_delay_ms(10);
-
- rc = 0;
- for (i = 0; i != lc->prtq_num && rc == 0; i++) {
- RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, udp_dev: %p)\n",
- __func__, i, lc->prtq[i].port.id, lc->prtq[i].dev);
- rc = setup_rx_cb(&lc->prtq[i].port, lc, lc->prtq[i].rxqid);
- if (rc < 0)
- return rc;
- }
-
- if (rc == 0)
- RTE_PER_LCORE(_be) = lc;
- return rc;
-}
-
-static inline void
-netbe_lcore(void)
-{
- uint32_t i;
- struct netbe_lcore *lc;
-
- lc = RTE_PER_LCORE(_be);
- if (lc == NULL)
- return;
-
- for (i = 0; i != lc->prtq_num; i++) {
- netbe_rx(lc, i);
- netbe_tx(lc, i);
- }
-}
-
-static void
-netbe_lcore_clear(void)
-{
- uint32_t i, j;
- struct netbe_lcore *lc;
-
- lc = RTE_PER_LCORE(_be);
- if (lc == NULL)
- return;
-
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u, udp_ctx: %p) finish\n",
- __func__, lc->id, lc->ctx);
- for (i = 0; i != lc->prtq_num; i++) {
- RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, lcore=%u, q=%u, dev=%p) "
- "rx_stats={"
- "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "}, "
- "tx_stats={"
- "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "};\n",
- __func__, i, lc->prtq[i].port.id, lc->id,
- lc->prtq[i].rxqid,
- lc->prtq[i].dev,
- lc->prtq[i].rx_stat.in,
- lc->prtq[i].rx_stat.up,
- lc->prtq[i].rx_stat.drop,
- lc->prtq[i].tx_stat.down,
- lc->prtq[i].tx_stat.out,
- lc->prtq[i].tx_stat.drop);
- }
-
- for (i = 0; i != lc->prtq_num; i++)
- for (j = 0; j != lc->prtq[i].tx_buf.num; j++)
- rte_pktmbuf_free(lc->prtq[i].tx_buf.pkt[j]);
-
- RTE_PER_LCORE(_be) = NULL;
-}
-
-static int
-lcore_main(void *arg)
-{
- int32_t rc;
- uint32_t lcore;
- struct lcore_prm *prm;
-
- prm = arg;
- lcore = rte_lcore_id();
-
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n",
- __func__, lcore);
-
- rc = 0;
-
- /* lcore FE init. */
- if (prm->fe.max_streams != 0)
- rc = netfe_lcore_init(&prm->fe);
-
- /* lcore FE init. */
- if (rc == 0 && prm->be.lc != NULL)
- rc = netbe_lcore_setup(prm->be.lc);
-
- if (rc != 0)
- sig_handle(SIGQUIT);
-
- while (force_quit == 0) {
- netfe_lcore();
- netbe_lcore();
- }
-
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n",
- __func__, lcore);
-
- netfe_lcore_fini();
- netbe_lcore_clear();
-
- return rc;
-}
-
-static int
-netfe_lcore_cmp(const void *s1, const void *s2)
-{
- const struct netfe_stream_prm *p1, *p2;
-
- p1 = s1;
- p2 = s2;
- return p1->lcore - p2->lcore;
-}
-
-static int
-netbe_find6(const struct in6_addr *laddr, uint16_t lport,
- const struct in6_addr *raddr, uint32_t be_lc)
-{
- uint32_t i, j;
- uint8_t idx;
- struct netbe_lcore *bc;
-
- /* we have exactly one BE, use it for all traffic */
- if (becfg.cpu_num == 1)
- return 0;
-
- /* search by provided be_lcore */
- if (be_lc != LCORE_ID_ANY) {
- for (i = 0; i != becfg.cpu_num; i++) {
- bc = becfg.cpu + i;
- if (be_lc == bc->id)
- return i;
- }
- RTE_LOG(NOTICE, USER1, "%s: no stream with be_lcore=%u\n",
- __func__, be_lc);
- return -ENOENT;
- }
-
- /* search by local address */
- if (memcmp(laddr, &in6addr_any, sizeof(*laddr)) != 0) {
- for (i = 0; i != becfg.cpu_num; i++) {
- bc = becfg.cpu + i;
- /* search by queue for the local port */
- for (j = 0; j != bc->prtq_num; j++) {
- if (memcmp(laddr, &bc->prtq[j].port.ipv6,
- sizeof(*laddr)) == 0) {
-
- if (lport == 0)
- return i;
-
- if (verify_queue_for_port(bc->prtq + j,
- lport) != 0)
- return i;
- }
- }
- }
- }
-
- /* search by remote address */
- if (memcmp(raddr, &in6addr_any, sizeof(*raddr)) == 0) {
- for (i = 0; i != becfg.cpu_num; i++) {
- bc = becfg.cpu + i;
- if (rte_lpm6_lookup(bc->lpm6,
- (uint8_t *)(uintptr_t)raddr->s6_addr,
- &idx) == 0) {
-
- if (lport == 0)
- return i;
-
- /* search by queue for the local port */
- for (j = 0; j != bc->prtq_num; j++)
- if (verify_queue_for_port(bc->prtq + j,
- lport) != 0)
- return i;
- }
- }
- }
-
- return -ENOENT;
-}
-
-static int
-netbe_find(const struct tle_udp_stream_param *p, uint32_t be_lc)
-{
- const struct sockaddr_in *l4, *r4;
- const struct sockaddr_in6 *l6, *r6;
-
- if (p->local_addr.ss_family == AF_INET) {
- l4 = (const struct sockaddr_in *)&p->local_addr;
- r4 = (const struct sockaddr_in *)&p->remote_addr;
- return netbe_find4(&l4->sin_addr, ntohs(l4->sin_port),
- &r4->sin_addr, be_lc);
- } else if (p->local_addr.ss_family == AF_INET6) {
- l6 = (const struct sockaddr_in6 *)&p->local_addr;
- r6 = (const struct sockaddr_in6 *)&p->remote_addr;
- return netbe_find6(&l6->sin6_addr, ntohs(l6->sin6_port),
- &r6->sin6_addr, be_lc);
- }
- return -EINVAL;
-}
-
-static int
-netfe_sprm_flll_be(struct netfe_sprm *sp, uint32_t line, uint32_t be_lc)
-{
- int32_t bidx;
-
- bidx = netbe_find(&sp->prm, be_lc);
- if (bidx < 0) {
- RTE_LOG(ERR, USER1, "%s(line=%u): no BE for that stream\n",
- __func__, line);
- return -EINVAL;
- }
- sp->bidx = bidx;
- return 0;
-}
-
-/* start front-end processing. */
-static int
-netfe_lcore_fill(struct lcore_prm prm[RTE_MAX_LCORE],
- struct netfe_lcore_prm *lprm)
-{
- uint32_t be_lc;
- uint32_t i, j, lc, ln;
-
- /* determine on what BE each stream should be open. */
- for (i = 0; i != lprm->nb_streams; i++) {
- lc = lprm->stream[i].lcore;
- ln = lprm->stream[i].line;
- be_lc = lprm->stream[i].be_lcore;
- if (netfe_sprm_flll_be(&lprm->stream[i].sprm, ln,
- be_lc) != 0 ||
- (lprm->stream[i].op == FWD &&
- netfe_sprm_flll_be(&lprm->stream[i].fprm, ln,
- be_lc) != 0))
- return -EINVAL;
- }
-
- /* group all fe parameters by lcore. */
-
- qsort(lprm->stream, lprm->nb_streams, sizeof(lprm->stream[0]),
- netfe_lcore_cmp);
-
- for (i = 0; i != lprm->nb_streams; i = j) {
-
- lc = lprm->stream[i].lcore;
- ln = lprm->stream[i].line;
-
- if (rte_lcore_is_enabled(lc) == 0) {
- RTE_LOG(ERR, USER1,
- "%s(line=%u): lcore %u is not enabled\n",
- __func__, ln, lc);
- return -EINVAL;
- }
-
- if (rte_get_master_lcore() != lc &&
- rte_eal_get_lcore_state(lc) == RUNNING) {
- RTE_LOG(ERR, USER1,
- "%s(line=%u): lcore %u already in use\n",
- __func__, ln, lc);
- return -EINVAL;
- }
-
- for (j = i + 1; j != lprm->nb_streams &&
- lc == lprm->stream[j].lcore;
- j++)
- ;
-
- prm[lc].fe.max_streams = lprm->max_streams;
- prm[lc].fe.nb_streams = j - i;
- prm[lc].fe.stream = lprm->stream + i;
- }
-
- return 0;
-}
-
-int
-main(int argc, char *argv[])
-{
- int32_t opt, opt_idx, rc;
- uint32_t i;
- uint64_t v;
- struct tle_udp_ctx_param ctx_prm;
- struct netfe_lcore_prm feprm;
- struct rte_eth_stats stats;
- char fecfg_fname[PATH_MAX + 1];
- char becfg_fname[PATH_MAX + 1];
- struct lcore_prm prm[RTE_MAX_LCORE];
- struct rte_eth_dev_info dev_info;
-
- fecfg_fname[0] = 0;
- becfg_fname[0] = 0;
- memset(prm, 0, sizeof(prm));
-
- rc = rte_eal_init(argc, argv);
- if (rc < 0)
- rte_exit(EXIT_FAILURE,
- "%s: rte_eal_init failed with error code: %d\n",
- __func__, rc);
-
- memset(&ctx_prm, 0, sizeof(ctx_prm));
-
- argc -= rc;
- argv += rc;
-
- optind = 0;
- optarg = NULL;
- while ((opt = getopt_long(argc, argv, "B:PR:S:b:f:s:", long_opt,
- &opt_idx)) != EOF) {
- if (opt == OPT_SHORT_SBULK) {
- rc = parse_uint_val(NULL, optarg, &v);
- if (rc < 0)
- rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
- "for option: \'%c\'\n",
- __func__, optarg, opt);
- ctx_prm.send_bulk_size = v;
- } else if (opt == OPT_SHORT_PROMISC) {
- becfg.promisc = 1;
- } else if (opt == OPT_SHORT_RBUFS) {
- rc = parse_uint_val(NULL, optarg, &v);
- if (rc < 0)
- rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
- "for option: \'%c\'\n",
- __func__, optarg, opt);
- ctx_prm.max_stream_rbufs = v;
- } else if (opt == OPT_SHORT_SBUFS) {
- rc = parse_uint_val(NULL, optarg, &v);
- if (rc < 0)
- rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
- "for option: \'%c\'\n",
- __func__, optarg, opt);
- ctx_prm.max_stream_sbufs = v;
- } else if (opt == OPT_SHORT_STREAMS) {
- rc = parse_uint_val(NULL, optarg, &v);
- if (rc < 0)
- rte_exit(EXIT_FAILURE, "%s: invalid value: %s "
- "for option: \'%c\'\n",
- __func__, optarg, opt);
- ctx_prm.max_streams = v;
- } else if (opt == OPT_SHORT_BECFG) {
- snprintf(becfg_fname, sizeof(becfg_fname), "%s",
- optarg);
- } else if (opt == OPT_SHORT_FECFG) {
- snprintf(fecfg_fname, sizeof(fecfg_fname), "%s",
- optarg);
- } else {
- rte_exit(EXIT_FAILURE,
- "%s: unknown option: \'%c\'\n",
- __func__, opt);
- }
- }
-
- signal(SIGINT, sig_handle);
-
- rc = netbe_port_init(&becfg, argc - optind, argv + optind);
- if (rc != 0)
- rte_exit(EXIT_FAILURE,
- "%s: netbe_port_init failed with error code: %d\n",
- __func__, rc);
-
- rc = netbe_lcore_init(&becfg, &ctx_prm);
- if (rc != 0)
- sig_handle(SIGQUIT);
-
- if ((rc = netbe_dest_init(becfg_fname, &becfg)) != 0)
- sig_handle(SIGQUIT);
-
- for (i = 0; i != becfg.prt_num && rc == 0; i++) {
- RTE_LOG(NOTICE, USER1, "%s: starting port %u\n",
- __func__, becfg.prt[i].id);
- rc = rte_eth_dev_start(becfg.prt[i].id);
- if (rc != 0) {
- RTE_LOG(ERR, USER1,
- "%s: rte_eth_dev_start(%u) returned "
- "error code: %d\n",
- __func__, becfg.prt[i].id, rc);
- sig_handle(SIGQUIT);
- }
- rte_eth_dev_info_get(becfg.prt[i].id, &dev_info);
- rc = update_rss_reta(&becfg.prt[i], &dev_info);
- if (rc != 0)
- sig_handle(SIGQUIT);
- }
-
- feprm.max_streams = ctx_prm.max_streams * becfg.cpu_num;
- if (rc == 0 && (rc = netfe_parse_cfg(fecfg_fname, &feprm)) != 0)
- sig_handle(SIGQUIT);
-
- for (i = 0; rc == 0 && i != becfg.cpu_num; i++)
- prm[becfg.cpu[i].id].be.lc = becfg.cpu + i;
-
- if (rc == 0 && (rc = netfe_lcore_fill(prm, &feprm)) != 0)
- sig_handle(SIGQUIT);
-
- /* launch all slave lcores. */
- RTE_LCORE_FOREACH_SLAVE(i) {
- if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0)
- rte_eal_remote_launch(lcore_main, prm + i, i);
- }
-
- /* launch master lcore. */
- i = rte_get_master_lcore();
- if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0)
- lcore_main(prm + i);
-
- rte_eal_mp_wait_lcore();
-
- for (i = 0; i != becfg.prt_num; i++) {
- RTE_LOG(NOTICE, USER1, "%s: stoping port %u\n",
- __func__, becfg.prt[i].id);
- rte_eth_stats_get(becfg.prt[i].id, &stats);
- RTE_LOG(NOTICE, USER1, "port %u stats={\n"
- "ipackets=%" PRIu64 ";"
- "ibytes=%" PRIu64 ";"
- "ierrors=%" PRIu64 ";\n"
- "opackets=%" PRIu64 ";"
- "obytes=%" PRIu64 ";"
- "oerrors=%" PRIu64 ";\n"
- "}\n",
- becfg.prt[i].id,
- stats.ipackets,
- stats.ibytes,
- stats.ierrors,
- stats.opackets,
- stats.obytes,
- stats.oerrors);
- rte_eth_dev_stop(becfg.prt[i].id);
- }
-
- netbe_lcore_fini(&becfg);
-
- return 0;
-}
diff --git a/examples/udpfwd/pkt.c b/examples/udpfwd/pkt.c
deleted file mode 100644
index a68e428..0000000
--- a/examples/udpfwd/pkt.c
+++ /dev/null
@@ -1,509 +0,0 @@
-/*
- * Copyright (c) 2016 Intel Corporation.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "netbe.h"
-#include <netinet/ip6.h>
-
-static inline uint64_t
-_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
- uint64_t ol3, uint64_t ol2)
-{
- return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49;
-}
-
-static inline void
-fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4)
-{
- m->tx_offload = _mbuf_tx_offload(l2, l3, l4, 0, 0, 0);
-}
-
-static inline int
-is_ipv4_frag(const struct ipv4_hdr *iph)
-{
- const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG);
-
- return ((mask & iph->fragment_offset) != 0);
-}
-
-static inline void
-fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto,
- uint32_t frag)
-{
- const struct ipv4_hdr *iph;
- int32_t dlen, len;
-
- dlen = rte_pktmbuf_data_len(m);
- dlen -= l2 + sizeof(struct udp_hdr);
-
- iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2);
- len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER;
-
- if (frag != 0 && is_ipv4_frag(iph)) {
- m->packet_type &= ~RTE_PTYPE_L4_MASK;
- m->packet_type |= RTE_PTYPE_L4_FRAG;
- }
-
- if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto))
- m->packet_type = RTE_PTYPE_UNKNOWN;
- else
- fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr));
-}
-
-static inline int
-ipv6x_hdr(uint32_t proto)
-{
- return (proto == IPPROTO_HOPOPTS ||
- proto == IPPROTO_ROUTING ||
- proto == IPPROTO_FRAGMENT ||
- proto == IPPROTO_AH ||
- proto == IPPROTO_NONE ||
- proto == IPPROTO_DSTOPTS);
-}
-
-static inline void
-fill_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto,
- uint32_t fproto)
-{
- const struct ip6_ext *ipx;
- int32_t dlen, len, ofs;
-
- len = sizeof(struct ipv6_hdr);
-
- dlen = rte_pktmbuf_data_len(m);
- dlen -= l2 + sizeof(struct udp_hdr);
-
- ofs = l2 + len;
- ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs);
-
- while (ofs > 0 && len < dlen) {
-
- switch (nproto) {
- case IPPROTO_HOPOPTS:
- case IPPROTO_ROUTING:
- case IPPROTO_DSTOPTS:
- ofs = (ipx->ip6e_len + 1) << 3;
- break;
- case IPPROTO_AH:
- ofs = (ipx->ip6e_len + 2) << 2;
- break;
- case IPPROTO_FRAGMENT:
- /*
- * tso_segsz is not used by RX, so suse it as temporary
- * buffer to store the fragment offset.
- */
- m->tso_segsz = ofs;
- ofs = sizeof(struct ip6_frag);
- m->packet_type &= ~RTE_PTYPE_L4_MASK;
- m->packet_type |= RTE_PTYPE_L4_FRAG;
- break;
- default:
- ofs = 0;
- }
-
- if (ofs > 0) {
- nproto = ipx->ip6e_nxt;
- len += ofs;
- ipx += ofs / sizeof(*ipx);
- }
- }
-
- /* undercognised or invalid packet. */
- if ((ofs == 0 && nproto != fproto) || len > dlen)
- m->packet_type = RTE_PTYPE_UNKNOWN;
- else
- fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr));
-}
-
-static inline void
-fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto)
-{
- const struct ipv6_hdr *iph;
-
- iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *,
- sizeof(struct ether_hdr));
-
- if (iph->proto == fproto)
- fill_pkt_hdr_len(m, l2, sizeof(struct ipv6_hdr),
- sizeof(struct udp_hdr));
- else if (ipv6x_hdr(iph->proto) != 0)
- fill_ipv6x_hdr_len(m, l2, iph->proto, fproto);
-}
-
-static inline void
-fill_eth_hdr_len(struct rte_mbuf *m)
-{
- uint32_t dlen, l2;
- uint16_t etp;
- const struct ether_hdr *eth;
-
- dlen = rte_pktmbuf_data_len(m);
-
- /* check that first segment is at least 42B long. */
- if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
- sizeof(struct udp_hdr)) {
- m->packet_type = RTE_PTYPE_UNKNOWN;
- return;
- }
-
- l2 = sizeof(*eth);
-
- eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
- etp = eth->ether_type;
- if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
- l2 += sizeof(struct vlan_hdr);
-
- if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
- m->packet_type = RTE_PTYPE_L4_UDP |
- RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_L2_ETHER;
- fill_ipv4_hdr_len(m, l2, IPPROTO_UDP, 1);
- } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
- dlen >= l2 + sizeof(struct ipv6_hdr) +
- sizeof(struct udp_hdr)) {
- m->packet_type = RTE_PTYPE_L4_UDP |
- RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_L2_ETHER;
- fill_ipv6_hdr_len(m, l2, IPPROTO_UDP);
- } else
- m->packet_type = RTE_PTYPE_UNKNOWN;
-}
-
-static inline uint16_t
-ipv4x_cksum(const void *iph, size_t len)
-{
- uint16_t cksum;
-
- cksum = rte_raw_cksum(iph, len);
- return (cksum == 0xffff) ? cksum : ~cksum;
-}
-
-static inline void
-fix_reassembled(struct rte_mbuf *m, int32_t hwcsum)
-{
- struct ipv4_hdr *iph;
-
- /* update packet type. */
- m->packet_type &= ~RTE_PTYPE_L4_MASK;
- m->packet_type |= RTE_PTYPE_L4_UDP;
-
- /* fix reassemble setting TX flags. */
- m->ol_flags &= ~PKT_TX_IP_CKSUM;
-
- /* fix l3_len after reassemble. */
- if (RTE_ETH_IS_IPV6_HDR(m->packet_type))
- m->l3_len = m->l3_len - sizeof(struct ipv6_extension_fragment);
-
- /* recalculate ipv4 cksum after reassemble. */
- else if (hwcsum == 0 && RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
- iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
- iph->hdr_checksum = ipv4x_cksum(iph, m->l3_len);
- }
-}
-
-static struct rte_mbuf *
-reassemble(struct rte_mbuf *m, struct netbe_lcore *lc, uint64_t tms,
- uint8_t port)
-{
- uint32_t l3cs;
- struct rte_ip_frag_tbl *tbl;
- struct rte_ip_frag_death_row *dr;
-
- tbl = lc->ftbl;
- dr = &lc->death_row;
- l3cs = lc->prtq[port].port.rx_offload & DEV_RX_OFFLOAD_IPV4_CKSUM;
-
- if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
-
- struct ipv4_hdr *iph;
-
- iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
-
- /* process this fragment. */
- m = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, iph);
-
- } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
-
- struct ipv6_hdr *iph;
- struct ipv6_extension_fragment *fhdr;
-
- iph = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len);
-
- /*
- * we store fragment header offset in tso_segsz before
- * temporary, just to avoid another scan of ipv6 header.
- */
- fhdr = rte_pktmbuf_mtod_offset(m,
- struct ipv6_extension_fragment *, m->tso_segsz);
- m->tso_segsz = 0;
-
- /* process this fragment. */
- m = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, iph, fhdr);
-
- } else {
- rte_pktmbuf_free(m);
- m = NULL;
- }
-
- /* got reassembled packet. */
- if (m != NULL)
- fix_reassembled(m, l3cs);
-
- return m;
-}
-
-/* exclude NULLs from the final list of packets. */
-static inline uint32_t
-compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero)
-{
- uint32_t i, j, k, l;
-
- for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) {
-
- /* found a hole. */
- if (pkt[j] == NULL) {
-
- /* find how big is it. */
- for (i = j; i-- != 0 && pkt[i] == NULL; )
- ;
- /* fill the hole. */
- for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++)
- pkt[l] = pkt[k];
-
- nb_pkt -= j - i;
- nb_zero -= j - i;
- j = i + 1;
- }
- }
-
- return nb_pkt;
-}
-
-/*
- * HW can recognise L2/L3 with/without extentions/L4 (ixgbe/igb/fm10k)
- */
-static uint16_t __rte_unused
-type0_rx_callback(uint8_t port, __rte_unused uint16_t queue,
- struct rte_mbuf *pkt[], uint16_t nb_pkts,
- __rte_unused uint16_t max_pkts, void *user_param)
-{
- uint32_t j, tp, x;
- uint64_t cts;
- struct netbe_lcore *lc;
-
- lc = user_param;
- cts = 0;
-
- x = 0;
- for (j = 0; j != nb_pkts; j++) {
-
- NETBE_PKT_DUMP(pkt[j]);
-
- tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
- RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
-
- switch (tp) {
- /* non fragmented udp packets. */
- case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4 |
- RTE_PTYPE_L2_ETHER):
- fill_pkt_hdr_len(pkt[j], sizeof(struct ether_hdr),
- sizeof(struct ipv4_hdr),
- sizeof(struct udp_hdr));
- break;
- case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6 |
- RTE_PTYPE_L2_ETHER):
- fill_pkt_hdr_len(pkt[j], sizeof(struct ether_hdr),
- sizeof(struct ipv6_hdr),
- sizeof(struct udp_hdr));
- break;
- case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT |
- RTE_PTYPE_L2_ETHER):
- fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr),
- UINT32_MAX, 0);
- break;
- case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT |
- RTE_PTYPE_L2_ETHER):
- fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr),
- IPPROTO_UDP);
- break;
- /* possibly fragmented udp packets. */
- case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER):
- case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER):
- fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr),
- IPPROTO_UDP, 1);
- break;
- case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER):
- case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER):
- fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr),
- IPPROTO_UDP);
- break;
- default:
- /* treat packet types as invalid. */
- pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
- break;
- }
-
- /*
- * if it is a fragment, try to reassemble it,
- * if by some reason it can't be done, then
- * set pkt[] entry to NULL.
- */
- if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) ==
- RTE_PTYPE_L4_FRAG) {
- cts = (cts == 0) ? rte_rdtsc() : cts;
- pkt[j] = reassemble(pkt[j], lc, cts, port);
- x += (pkt[j] == NULL);
- }
- }
-
- /* reassemble was invoked, cleanup its death-row. */
- if (cts != 0)
- rte_ip_frag_free_death_row(&lc->death_row, 0);
-
- if (x == 0)
- return nb_pkts;
-
- NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
- "%u non-reassembled fragments;\n",
- __func__, port, queue, nb_pkts, x);
-
- return compress_pkt_list(pkt, nb_pkts, x);
-}
-
-/*
- * HW can recognise L2/L3/L4 and fragments (i40e).
- */
-static uint16_t __rte_unused
-type1_rx_callback(uint8_t port, __rte_unused uint16_t queue,
- struct rte_mbuf *pkt[], uint16_t nb_pkts,
- __rte_unused uint16_t max_pkts, void *user_param)
-{
- uint32_t j, tp, x;
- uint64_t cts;
- struct netbe_lcore *lc;
-
- lc = user_param;
- cts = 0;
-
- x = 0;
- for (j = 0; j != nb_pkts; j++) {
-
- NETBE_PKT_DUMP(pkt[j]);
-
- tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
- RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
-
- switch (tp) {
- case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_L2_ETHER):
- fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr),
- UINT32_MAX, 0);
- break;
- case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_L2_ETHER):
- fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr),
- IPPROTO_UDP);
- break;
- case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_L2_ETHER):
- fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr),
- IPPROTO_UDP, 0);
- break;
- case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_L2_ETHER):
- fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr),
- IPPROTO_UDP);
- break;
- default:
- /* treat packet types as invalid. */
- pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
- break;
- }
-
- /*
- * if it is a fragment, try to reassemble it,
- * if by some reason it can't be done, then
- * set pkt[] entry to NULL.
- */
- if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) ==
- RTE_PTYPE_L4_FRAG) {
- cts = (cts == 0) ? rte_rdtsc() : cts;
- pkt[j] = reassemble(pkt[j], lc, cts, port);
- x += (pkt[j] == NULL);
- }
- }
-
- /* reassemble was invoked, cleanup its death-row. */
- if (cts != 0)
- rte_ip_frag_free_death_row(&lc->death_row, 0);
-
- if (x == 0)
- return nb_pkts;
-
- NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
- "%u non-reassembled fragments;\n",
- __func__, port, queue, nb_pkts, x);
-
- return compress_pkt_list(pkt, nb_pkts, x);
-}
-
-/*
- * generic, assumes HW doesn't recognise any packet type.
- */
-static uint16_t
-typen_rx_callback(uint8_t port, __rte_unused uint16_t queue,
- struct rte_mbuf *pkt[], uint16_t nb_pkts,
- __rte_unused uint16_t max_pkts, void *user_param)
-{
- uint32_t j, x;
- uint64_t cts;
- struct netbe_lcore *lc;
-
- lc = user_param;
- cts = 0;
-
- x = 0;
- for (j = 0; j != nb_pkts; j++) {
-
- NETBE_PKT_DUMP(pkt[j]);
- fill_eth_hdr_len(pkt[j]);
-
- /*
- * if it is a fragment, try to reassemble it,
- * if by some reason it can't be done, then
- * set pkt[] entry to NULL.
- */
- if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) ==
- RTE_PTYPE_L4_FRAG) {
- cts = (cts == 0) ? rte_rdtsc() : cts;
- pkt[j] = reassemble(pkt[j], lc, cts, port);
- x += (pkt[j] == NULL);
- }
- }
-
- /* reassemble was invoked, cleanup its death-row. */
- if (cts != 0)
- rte_ip_frag_free_death_row(&lc->death_row, 0);
-
- if (x == 0)
- return nb_pkts;
-
- NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
- "%u non-reassembled fragments;\n",
- __func__, port, queue, nb_pkts, x);
-
- return compress_pkt_list(pkt, nb_pkts, x);
-}
-
-#include "pkt_dpdk_legacy.h"
diff --git a/lib/Makefile b/lib/Makefile
index 8ce9bac..201f078 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,6 +22,7 @@ endif
include $(RTE_SDK)/mk/rte.vars.mk
DIRS-y += libtle_dring
-DIRS-y += libtle_udp
+DIRS-y += libtle_timer
+DIRS-y += libtle_l4p
include $(TLDK_ROOT)/mk/tle.subdir.mk
diff --git a/lib/libtle_dring/tle_dring.h b/lib/libtle_dring/tle_dring.h
index e89679d..f589ece 100644
--- a/lib/libtle_dring/tle_dring.h
+++ b/lib/libtle_dring/tle_dring.h
@@ -81,6 +81,12 @@ struct tle_dring {
struct tle_drb dummy; /**< dummy block */
};
+static inline uint32_t
+tle_dring_count(const struct tle_dring *dr)
+{
+ return dr->prod.tail - dr->cons.tail;
+}
+
/*
* helper routine, to copy objects to/from the ring.
*/
diff --git a/lib/libtle_udp/Makefile b/lib/libtle_l4p/Makefile
index 44cb6aa..c0d3e80 100644
--- a/lib/libtle_udp/Makefile
+++ b/lib/libtle_l4p/Makefile
@@ -21,25 +21,33 @@ RTE_TARGET ?= x86_64-native-linuxapp-gcc
include $(RTE_SDK)/mk/rte.vars.mk
# library name
-LIB = libtle_udp.a
+LIB = libtle_l4p.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
-EXPORT_MAP := tle_udp_version.map
+EXPORT_MAP := tle_l4p_version.map
LIBABIVER := 1
#source files
+SRCS-y += ctx.c
SRCS-y += event.c
-SRCS-y += udp_ctl.c
+SRCS-y += stream_table.c
+SRCS-y += tcp_ofo.c
+SRCS-y += tcp_stream.c
+SRCS-y += tcp_rxtx.c
+SRCS-y += udp_stream.c
SRCS-y += udp_rxtx.c
# install this header file
-SYMLINK-y-include += tle_udp_impl.h
+SYMLINK-y-include += tle_ctx.h
SYMLINK-y-include += tle_event.h
+SYMLINK-y-include += tle_tcp.h
+SYMLINK-y-include += tle_udp.h
# this lib dependencies
DEPDIRS-y += lib/libtle_dring
+DEPDIRS-y += lib/libtle_timer
include $(TLDK_ROOT)/mk/tle.lib.mk
diff --git a/lib/libtle_l4p/ctx.c b/lib/libtle_l4p/ctx.c
new file mode 100644
index 0000000..7ebef9d
--- /dev/null
+++ b/lib/libtle_l4p/ctx.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+
+#include "stream.h"
+#include "misc.h"
+
+#define LPORT_START 0x8000
+#define LPORT_END MAX_PORT_NUM
+
+#define LPORT_START_BLK PORT_BLK(LPORT_START)
+#define LPORT_END_BLK PORT_BLK(LPORT_END)
+
+const struct in6_addr tle_ipv6_any = IN6ADDR_ANY_INIT;
+const struct in6_addr tle_ipv6_none = {
+ {
+ .__u6_addr32 = {
+ UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX
+ },
+ },
+};
+
+struct stream_ops tle_stream_ops[TLE_PROTO_NUM] = {};
+
+static int
+check_dev_prm(const struct tle_dev_param *dev_prm)
+{
+ /* no valid IPv4/IPv6 addresses provided. */
+ if (dev_prm->local_addr4.s_addr == INADDR_ANY &&
+ memcmp(&dev_prm->local_addr6, &tle_ipv6_any,
+ sizeof(tle_ipv6_any)) == 0)
+ return -EINVAL;
+
+ if (dev_prm->bl4.nb_port > UINT16_MAX ||
+ (dev_prm->bl4.nb_port != 0 &&
+ dev_prm->bl4.port == NULL))
+ return -EINVAL;
+
+ if (dev_prm->bl6.nb_port > UINT16_MAX ||
+ (dev_prm->bl6.nb_port != 0 &&
+ dev_prm->bl6.port == NULL))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+check_ctx_prm(const struct tle_ctx_param *prm)
+{
+ if (prm->proto >= TLE_PROTO_NUM)
+ return -EINVAL;
+ return 0;
+}
+
+struct tle_ctx *
+tle_ctx_create(const struct tle_ctx_param *ctx_prm)
+{
+ struct tle_ctx *ctx;
+ size_t sz;
+ uint32_t i;
+ int32_t rc;
+
+ if (ctx_prm == NULL || check_ctx_prm(ctx_prm) != 0) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ sz = sizeof(*ctx);
+ ctx = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ ctx_prm->socket_id);
+ if (ctx == NULL) {
+ UDP_LOG(ERR, "allocation of %zu bytes for new ctx "
+ "on socket %d failed\n",
+ sz, ctx_prm->socket_id);
+ return NULL;
+ }
+
+ ctx->prm = *ctx_prm;
+
+ rc = tle_stream_ops[ctx_prm->proto].init_streams(ctx);
+ if (rc != 0) {
+ UDP_LOG(ERR, "init_streams(ctx=%p, proto=%u) failed "
+ "with error code: %d;\n",
+ ctx, ctx_prm->proto, rc);
+ tle_ctx_destroy(ctx);
+ rte_errno = -rc;
+ return NULL;
+ }
+
+ for (i = 0; i != RTE_DIM(ctx->use); i++)
+ tle_pbm_init(ctx->use + i, LPORT_START_BLK);
+
+ ctx->streams.nb_free = ctx->prm.max_streams;
+ return ctx;
+}
+
+void
+tle_ctx_destroy(struct tle_ctx *ctx)
+{
+ uint32_t i;
+
+ if (ctx == NULL) {
+ rte_errno = EINVAL;
+ return;
+ }
+
+ for (i = 0; i != RTE_DIM(ctx->dev); i++)
+ tle_del_dev(ctx->dev + i);
+
+ tle_stream_ops[ctx->prm.proto].fini_streams(ctx);
+ rte_free(ctx);
+}
+
+void
+tle_ctx_invalidate(struct tle_ctx *ctx)
+{
+ RTE_SET_USED(ctx);
+}
+
+static void
+fill_pbm(struct tle_pbm *pbm, const struct tle_bl_port *blp)
+{
+ uint32_t i;
+
+ for (i = 0; i != blp->nb_port; i++)
+ tle_pbm_set(pbm, blp->port[i]);
+}
+
+static int
+init_dev_proto(struct tle_dev *dev, uint32_t idx, int32_t socket_id,
+ const struct tle_bl_port *blp)
+{
+ size_t sz;
+
+ sz = sizeof(*dev->dp[idx]);
+ dev->dp[idx] = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ socket_id);
+
+ if (dev->dp[idx] == NULL) {
+ UDP_LOG(ERR, "allocation of %zu bytes on "
+ "socket %d for %u-th device failed\n",
+ sz, socket_id, idx);
+ return ENOMEM;
+ }
+
+ tle_pbm_init(&dev->dp[idx]->use, LPORT_START_BLK);
+ fill_pbm(&dev->dp[idx]->use, blp);
+ return 0;
+}
+
+static struct tle_dev *
+find_free_dev(struct tle_ctx *ctx)
+{
+ uint32_t i;
+
+ if (ctx->nb_dev < RTE_DIM(ctx->dev)) {
+ for (i = 0; i != RTE_DIM(ctx->dev); i++) {
+ if (ctx->dev[i].ctx != ctx)
+ return ctx->dev + i;
+ }
+ }
+
+ rte_errno = ENODEV;
+ return NULL;
+}
+
+struct tle_dev *
+tle_add_dev(struct tle_ctx *ctx, const struct tle_dev_param *dev_prm)
+{
+ int32_t rc;
+ struct tle_dev *dev;
+
+ if (ctx == NULL || dev_prm == NULL || check_dev_prm(dev_prm) != 0) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ dev = find_free_dev(ctx);
+ if (dev == NULL)
+ return NULL;
+ rc = 0;
+
+ /* device can handle IPv4 traffic */
+ if (dev_prm->local_addr4.s_addr != INADDR_ANY) {
+ rc = init_dev_proto(dev, TLE_V4, ctx->prm.socket_id,
+ &dev_prm->bl4);
+ if (rc == 0)
+ fill_pbm(&ctx->use[TLE_V4], &dev_prm->bl4);
+ }
+
+ /* device can handle IPv6 traffic */
+ if (rc == 0 && memcmp(&dev_prm->local_addr6, &tle_ipv6_any,
+ sizeof(tle_ipv6_any)) != 0) {
+ rc = init_dev_proto(dev, TLE_V6, ctx->prm.socket_id,
+ &dev_prm->bl6);
+ if (rc == 0)
+ fill_pbm(&ctx->use[TLE_V6], &dev_prm->bl6);
+ }
+
+ if (rc != 0) {
+ /* cleanup and return an error. */
+ rte_free(dev->dp[TLE_V4]);
+ rte_free(dev->dp[TLE_V6]);
+ rte_errno = rc;
+ return NULL;
+ }
+
+ /* setup RX data. */
+ if (dev_prm->local_addr4.s_addr != INADDR_ANY &&
+ (dev_prm->rx_offload & DEV_RX_OFFLOAD_IPV4_CKSUM) == 0)
+ dev->rx.ol_flags[TLE_V4] |= PKT_RX_IP_CKSUM_BAD;
+
+ if (((dev_prm->rx_offload & DEV_RX_OFFLOAD_UDP_CKSUM) == 0 &&
+ ctx->prm.proto == TLE_PROTO_UDP) ||
+ ((dev_prm->rx_offload &
+ DEV_RX_OFFLOAD_TCP_CKSUM) == 0 &&
+ ctx->prm.proto == TLE_PROTO_TCP)) {
+ dev->rx.ol_flags[TLE_V4] |= PKT_RX_L4_CKSUM_BAD;
+ dev->rx.ol_flags[TLE_V6] |= PKT_RX_L4_CKSUM_BAD;
+ }
+
+ /* setup TX data. */
+ tle_dring_reset(&dev->tx.dr);
+
+ if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_UDP_CKSUM) != 0 &&
+ ctx->prm.proto == TLE_PROTO_UDP) {
+ dev->tx.ol_flags[TLE_V4] |= PKT_TX_IPV4 | PKT_TX_UDP_CKSUM;
+ dev->tx.ol_flags[TLE_V6] |= PKT_TX_IPV6 | PKT_TX_UDP_CKSUM;
+ } else if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_TCP_CKSUM) != 0 &&
+ ctx->prm.proto == TLE_PROTO_TCP) {
+ dev->tx.ol_flags[TLE_V4] |= PKT_TX_IPV4 | PKT_TX_TCP_CKSUM;
+ dev->tx.ol_flags[TLE_V6] |= PKT_TX_IPV6 | PKT_TX_TCP_CKSUM;
+ }
+
+ if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_IPV4_CKSUM) != 0)
+ dev->tx.ol_flags[TLE_V4] |= PKT_TX_IPV4 | PKT_TX_IP_CKSUM;
+
+ dev->prm = *dev_prm;
+ dev->ctx = ctx;
+ ctx->nb_dev++;
+
+ return dev;
+}
+
+static void
+empty_dring(struct tle_dring *dr, uint32_t proto)
+{
+ uint32_t i, k, n;
+ struct tle_stream *s;
+ struct rte_mbuf *pkt[MAX_PKT_BURST];
+ struct tle_drb *drb[MAX_PKT_BURST];
+
+ do {
+ k = RTE_DIM(drb);
+ n = tle_dring_sc_dequeue(dr, (const void **)(uintptr_t)pkt,
+ RTE_DIM(pkt), drb, &k);
+
+ /* free mbufs */
+ for (i = 0; i != n; i++)
+ rte_pktmbuf_free(pkt[i]);
+ /* free drbs */
+ for (i = 0; i != k; i++) {
+ s = drb[i]->udata;
+ tle_stream_ops[proto].free_drbs(s, drb + i, 1);
+ }
+ } while (n != 0);
+}
+
+int
+tle_del_dev(struct tle_dev *dev)
+{
+ uint32_t p;
+ struct tle_ctx *ctx;
+
+ if (dev == NULL || dev->ctx == NULL)
+ return -EINVAL;
+
+ ctx = dev->ctx;
+ p = dev - ctx->dev;
+
+ if (p >= RTE_DIM(ctx->dev) ||
+ (dev->dp[TLE_V4] == NULL &&
+ dev->dp[TLE_V6] == NULL))
+ return -EINVAL;
+
+ /* emtpy TX queues. */
+ empty_dring(&dev->tx.dr, ctx->prm.proto);
+
+ rte_free(dev->dp[TLE_V4]);
+ rte_free(dev->dp[TLE_V6]);
+ memset(dev, 0, sizeof(*dev));
+ ctx->nb_dev--;
+ return 0;
+}
+
+static struct tle_dev *
+find_ipv4_dev(struct tle_ctx *ctx, const struct in_addr *addr)
+{
+ uint32_t i;
+
+ for (i = 0; i != RTE_DIM(ctx->dev); i++) {
+ if (ctx->dev[i].prm.local_addr4.s_addr == addr->s_addr &&
+ ctx->dev[i].dp[TLE_V4] != NULL)
+ return ctx->dev + i;
+ }
+
+ return NULL;
+}
+
+static struct tle_dev *
+find_ipv6_dev(struct tle_ctx *ctx, const struct in6_addr *addr)
+{
+ uint32_t i;
+
+ for (i = 0; i != RTE_DIM(ctx->dev); i++) {
+ if (memcmp(&ctx->dev[i].prm.local_addr6, addr,
+ sizeof(*addr)) == 0 &&
+ ctx->dev[i].dp[TLE_V6] != NULL)
+ return ctx->dev + i;
+ }
+
+ return NULL;
+}
+
+static int
+stream_fill_dev(struct tle_ctx *ctx, struct tle_stream *s,
+ const struct sockaddr *addr)
+{
+ struct tle_dev *dev;
+ struct tle_pbm *pbm;
+ const struct sockaddr_in *lin4;
+ const struct sockaddr_in6 *lin6;
+ uint32_t i, p, sp, t;
+
+ if (addr->sa_family == AF_INET) {
+ lin4 = (const struct sockaddr_in *)addr;
+ t = TLE_V4;
+ p = lin4->sin_port;
+ } else if (addr->sa_family == AF_INET6) {
+ lin6 = (const struct sockaddr_in6 *)addr;
+ t = TLE_V6;
+ p = lin6->sin6_port;
+ } else
+ return EINVAL;
+
+ p = ntohs(p);
+
+ /* if local address is not wildcard, find device it belongs to. */
+ if (t == TLE_V4 && lin4->sin_addr.s_addr != INADDR_ANY) {
+ dev = find_ipv4_dev(ctx, &lin4->sin_addr);
+ if (dev == NULL)
+ return ENODEV;
+ } else if (t == TLE_V6 && memcmp(&tle_ipv6_any, &lin6->sin6_addr,
+ sizeof(tle_ipv6_any)) != 0) {
+ dev = find_ipv6_dev(ctx, &lin6->sin6_addr);
+ if (dev == NULL)
+ return ENODEV;
+ } else
+ dev = NULL;
+
+ if (dev != NULL)
+ pbm = &dev->dp[t]->use;
+ else
+ pbm = &ctx->use[t];
+
+ /* try to acquire local port number. */
+ if (p == 0) {
+ p = tle_pbm_find_range(pbm, pbm->blk, LPORT_END_BLK);
+ if (p == 0 && pbm->blk > LPORT_START_BLK)
+ p = tle_pbm_find_range(pbm, LPORT_START_BLK, pbm->blk);
+ } else if (tle_pbm_check(pbm, p) != 0)
+ return EEXIST;
+
+ if (p == 0)
+ return ENFILE;
+
+ /* fill socket's dst port and type */
+
+ sp = htons(p);
+ s->type = t;
+ s->port.dst = sp;
+
+ /* mark port as in-use */
+
+ tle_pbm_set(&ctx->use[t], p);
+ if (dev != NULL) {
+ tle_pbm_set(pbm, p);
+ dev->dp[t]->streams[sp] = s;
+ } else {
+ for (i = 0; i != RTE_DIM(ctx->dev); i++) {
+ if (ctx->dev[i].dp[t] != NULL) {
+ tle_pbm_set(&ctx->dev[i].dp[t]->use, p);
+ ctx->dev[i].dp[t]->streams[sp] = s;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int
+stream_clear_dev(struct tle_ctx *ctx, const struct tle_stream *s)
+{
+ struct tle_dev *dev;
+ uint32_t i, p, sp, t;
+
+ t = s->type;
+ sp = s->port.dst;
+ p = ntohs(sp);
+
+ /* if local address is not wildcard, find device it belongs to. */
+ if (t == TLE_V4 && s->ipv4.addr.dst != INADDR_ANY) {
+ dev = find_ipv4_dev(ctx,
+ (const struct in_addr *)&s->ipv4.addr.dst);
+ if (dev == NULL)
+ return ENODEV;
+ } else if (t == TLE_V6 && memcmp(&tle_ipv6_any, &s->ipv6.addr.dst,
+ sizeof(tle_ipv6_any)) != 0) {
+ dev = find_ipv6_dev(ctx,
+ (const struct in6_addr *)&s->ipv6.addr.dst);
+ if (dev == NULL)
+ return ENODEV;
+ } else
+ dev = NULL;
+
+ tle_pbm_clear(&ctx->use[t], p);
+ if (dev != NULL) {
+ if (dev->dp[t]->streams[sp] == s) {
+ tle_pbm_clear(&dev->dp[t]->use, p);
+ dev->dp[t]->streams[sp] = NULL;
+ }
+ } else {
+ for (i = 0; i != RTE_DIM(ctx->dev); i++) {
+ if (ctx->dev[i].dp[t] != NULL &&
+ ctx->dev[i].dp[t]->streams[sp] == s) {
+ tle_pbm_clear(&ctx->dev[i].dp[t]->use, p);
+ ctx->dev[i].dp[t]->streams[sp] = NULL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void
+fill_ipv4_am(const struct sockaddr_in *in, uint32_t *addr, uint32_t *mask)
+{
+ *addr = in->sin_addr.s_addr;
+ *mask = (*addr == INADDR_ANY) ? INADDR_ANY : INADDR_NONE;
+}
+
+static void
+fill_ipv6_am(const struct sockaddr_in6 *in, rte_xmm_t *addr, rte_xmm_t *mask)
+{
+ const struct in6_addr *pm;
+
+ memcpy(addr, &in->sin6_addr, sizeof(*addr));
+ if (memcmp(&tle_ipv6_any, addr, sizeof(*addr)) == 0)
+ pm = &tle_ipv6_any;
+ else
+ pm = &tle_ipv6_none;
+
+ memcpy(mask, pm, sizeof(*mask));
+}
+
+int
+stream_fill_ctx(struct tle_ctx *ctx, struct tle_stream *s,
+ const struct sockaddr *laddr, const struct sockaddr *raddr)
+{
+ const struct sockaddr_in *rin;
+ int32_t rc;
+
+ /* setup ports and port mask fields (except dst port). */
+ rin = (const struct sockaddr_in *)raddr;
+ s->port.src = rin->sin_port;
+ s->pmsk.src = (s->port.src == 0) ? 0 : UINT16_MAX;
+ s->pmsk.dst = UINT16_MAX;
+
+ /* setup src and dst addresses. */
+ if (laddr->sa_family == AF_INET) {
+ fill_ipv4_am((const struct sockaddr_in *)laddr,
+ &s->ipv4.addr.dst, &s->ipv4.mask.dst);
+ fill_ipv4_am((const struct sockaddr_in *)raddr,
+ &s->ipv4.addr.src, &s->ipv4.mask.src);
+ } else if (laddr->sa_family == AF_INET6) {
+ fill_ipv6_am((const struct sockaddr_in6 *)laddr,
+ &s->ipv6.addr.dst, &s->ipv6.mask.dst);
+ fill_ipv6_am((const struct sockaddr_in6 *)raddr,
+ &s->ipv6.addr.src, &s->ipv6.mask.src);
+ }
+
+ rte_spinlock_lock(&ctx->dev_lock);
+ rc = stream_fill_dev(ctx, s, laddr);
+ rte_spinlock_unlock(&ctx->dev_lock);
+
+ return rc;
+}
+
+/* free stream's destination port */
+int
+stream_clear_ctx(struct tle_ctx *ctx, struct tle_stream *s)
+{
+ int32_t rc;
+
+ rte_spinlock_lock(&ctx->dev_lock);
+ rc = stream_clear_dev(ctx, s);
+ rte_spinlock_unlock(&ctx->dev_lock);
+
+ return rc;
+}
diff --git a/lib/libtle_l4p/ctx.h b/lib/libtle_l4p/ctx.h
new file mode 100644
index 0000000..cc32081
--- /dev/null
+++ b/lib/libtle_l4p/ctx.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _CTX_H_
+#define _CTX_H_
+
+#include <rte_spinlock.h>
+#include <rte_vect.h>
+#include <tle_dring.h>
+#include <tle_ctx.h>
+
+#include "port_bitmap.h"
+#include "osdep.h"
+#include "net_misc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct tle_dport {
+ struct tle_pbm use; /* ports in use. */
+ struct tle_stream *streams[MAX_PORT_NUM]; /* port to stream. */
+};
+
+struct tle_dev {
+ struct tle_ctx *ctx;
+ struct {
+ uint64_t ol_flags[TLE_VNUM];
+ } rx;
+ struct {
+ /* used by FE. */
+ uint64_t ol_flags[TLE_VNUM];
+ rte_atomic32_t packet_id[TLE_VNUM];
+
+ /* used by FE & BE. */
+ struct tle_dring dr;
+ } tx;
+ struct tle_dev_param prm; /* copy of device parameters. */
+ struct tle_dport *dp[TLE_VNUM]; /* device L4 ports */
+};
+
+struct tle_ctx {
+ struct tle_ctx_param prm;
+ struct {
+ rte_spinlock_t lock;
+ uint32_t nb_free; /* number of free streams. */
+ STAILQ_HEAD(, tle_stream) free;
+ void *buf; /* space allocated for streams */
+ } streams;
+
+ rte_spinlock_t dev_lock;
+ uint32_t nb_dev;
+ struct tle_pbm use[TLE_VNUM]; /* all ports in use. */
+ struct tle_dev dev[RTE_MAX_ETHPORTS];
+};
+
+struct stream_ops {
+ int (*init_streams)(struct tle_ctx *);
+ void (*fini_streams)(struct tle_ctx *);
+ void (*free_drbs)(struct tle_stream *, struct tle_drb *[], uint32_t);
+};
+
+extern struct stream_ops tle_stream_ops[TLE_PROTO_NUM];
+
+int stream_fill_ctx(struct tle_ctx *ctx, struct tle_stream *s,
+ const struct sockaddr *laddr, const struct sockaddr *raddr);
+
+int stream_clear_ctx(struct tle_ctx *ctx, struct tle_stream *s);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UDP_IMPL_H_ */
diff --git a/lib/libtle_l4p/debug.h b/lib/libtle_l4p/debug.h
new file mode 100644
index 0000000..b2a8b52
--- /dev/null
+++ b/lib/libtle_l4p/debug.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DEBUG_H_
+#define _DEBUG_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FUNC_SEQ_VERIFY(v) do { \
+ static uint64_t nb_call; \
+ static typeof(v) x; \
+ if (nb_call++ != 0) \
+ RTE_VERIFY(tcp_seq_leq(x, v)); \
+ x = (v); \
+} while (0)
+
+#define FUNC_VERIFY(e, c) do { \
+ static uint64_t nb_call; \
+ if ((e) == 0) \
+ nb_call++; \
+ else \
+ nb_call = 0; \
+ RTE_VERIFY(nb_call != (c)); \
+} while (0)
+
+#define FUNC_STAT(v, c) do { \
+ static uint64_t nb_call, nb_data; \
+ nb_call++; \
+ nb_data += (v); \
+ if ((nb_call & ((c) - 1)) == 0) { \
+ printf("%s#%d@%u: nb_call=%lu, avg(" #v ")=%#Lf\n", \
+ __func__, __LINE__, rte_lcore_id(), nb_call, \
+ (long double)nb_data / nb_call); \
+ nb_call = 0; \
+ nb_data = 0; \
+ } \
+} while (0)
+
+#define FUNC_TM_STAT(v, c) do { \
+ static uint64_t nb_call, nb_data; \
+ static uint64_t cts, pts, sts; \
+ cts = rte_rdtsc(); \
+ if (pts != 0) \
+ sts += cts - pts; \
+ pts = cts; \
+ nb_call++; \
+ nb_data += (v); \
+ if ((nb_call & ((c) - 1)) == 0) { \
+ printf("%s#%d@%u: nb_call=%lu, " \
+ "avg(" #v ")=%#Lf, " \
+ "avg(cycles)=%#Lf, " \
+ "avg(cycles/" #v ")=%#Lf\n", \
+ __func__, __LINE__, rte_lcore_id(), nb_call, \
+ (long double)nb_data / nb_call, \
+ (long double)sts / nb_call, \
+ (long double)sts / nb_data); \
+ nb_call = 0; \
+ nb_data = 0; \
+ sts = 0; \
+ } \
+} while (0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DEBUG_H_ */
diff --git a/lib/libtle_udp/event.c b/lib/libtle_l4p/event.c
index 66c5a3b..66c5a3b 100644
--- a/lib/libtle_udp/event.c
+++ b/lib/libtle_l4p/event.c
diff --git a/lib/libtle_udp/misc.h b/lib/libtle_l4p/misc.h
index ffe665f..55dca10 100644
--- a/lib/libtle_udp/misc.h
+++ b/lib/libtle_l4p/misc.h
@@ -21,6 +21,30 @@ extern "C" {
#endif
static inline int
+xmm_cmp(const rte_xmm_t *da, const rte_xmm_t *sa)
+{
+ uint64_t ret;
+
+ ret = (sa->u64[0] ^ da->u64[0]) |
+ (sa->u64[1] ^ da->u64[1]);
+
+ return (ret != 0);
+}
+
+static inline int
+ymm_cmp(const _ymm_t *da, const _ymm_t *sa)
+{
+ uint64_t ret;
+
+ ret = (sa->u64[0] ^ da->u64[0]) |
+ (sa->u64[1] ^ da->u64[1]) |
+ (sa->u64[2] ^ da->u64[2]) |
+ (sa->u64[3] ^ da->u64[3]);
+
+ return (ret != 0);
+}
+
+static inline int
ymm_mask_cmp(const _ymm_t *da, const _ymm_t *sa, const _ymm_t *sm)
{
uint64_t ret;
@@ -75,11 +99,11 @@ _tx_offload_l4_offset(uint64_t ofl)
/**
* Process the non-complemented checksum of a buffer.
- * Similar to rte_raw_cksum(), but provide better perfomance
+ * Similar to rte_raw_cksum(), but provide better performance
* (at least on IA platforms).
* @param buf
* Pointer to the buffer.
- * @param len
+ * @param size
* Length of the buffer.
* @return
* The non-complemented checksum.
@@ -89,7 +113,7 @@ __raw_cksum(const uint8_t *buf, uint32_t size)
{
uint64_t s, sum;
uint32_t i, n;
- uint32_t dw1, dw2;
+ uint32_t dw1, dw2;
uint16_t w1, w2;
const uint64_t *b;
@@ -124,7 +148,6 @@ __raw_cksum(const uint8_t *buf, uint32_t size)
return w1;
}
-
/**
* Process UDP or TCP checksum over possibly multi-segmented packet.
* @param mb
@@ -223,7 +246,7 @@ _ipv4_udptcp_mbuf_cksum(const struct rte_mbuf *mb, uint16_t l4_ofs,
uint32_t cksum;
cksum = _ipv4x_phdr_cksum(ipv4_hdr, mb->l3_len, 0);
- cksum = __udptcp_mbuf_cksum(mb, l4_ofs, cksum);
+ cksum = __udptcp_mbuf_cksum(mb, l4_ofs, cksum);
return cksum;
}
@@ -247,7 +270,7 @@ _ipv6_udptcp_mbuf_cksum(const struct rte_mbuf *mb, uint16_t l4_ofs,
uint32_t cksum;
cksum = rte_ipv6_phdr_cksum(ipv6_hdr, 0);
- cksum = __udptcp_mbuf_cksum(mb, l4_ofs, cksum);
+ cksum = __udptcp_mbuf_cksum(mb, l4_ofs, cksum);
return cksum;
}
@@ -261,6 +284,46 @@ _ipv4x_cksum(const void *iph, size_t len)
return (cksum == 0xffff) ? cksum : ~cksum;
}
+static inline int
+check_pkt_csum(const struct rte_mbuf *m, uint64_t ol_flags, uint32_t type,
+ uint32_t proto)
+{
+ const struct ipv4_hdr *l3h4;
+ const struct ipv6_hdr *l3h6;
+ const struct udp_hdr *l4h;
+ int32_t ret;
+ uint16_t csum;
+
+ ret = 0;
+ l3h4 = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, m->l2_len);
+ l3h6 = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, m->l2_len);
+
+ if ((ol_flags & PKT_RX_IP_CKSUM_BAD) != 0) {
+ csum = _ipv4x_cksum(l3h4, m->l3_len);
+ ret = (csum != UINT16_MAX);
+ }
+
+ if (ret == 0 && (ol_flags & PKT_RX_L4_CKSUM_BAD) != 0) {
+
+ /*
+ * for IPv4 it is allowed to have zero UDP cksum,
+ * for IPv6 valid UDP cksum is mandatory.
+ */
+ if (type == TLE_V4) {
+ l4h = (const struct udp_hdr *)((uintptr_t)l3h4 +
+ m->l3_len);
+ csum = (proto == IPPROTO_UDP && l4h->dgram_cksum == 0) ?
+ UINT16_MAX : _ipv4_udptcp_mbuf_cksum(m,
+ m->l2_len + m->l3_len, l3h4);
+ } else
+ csum = _ipv6_udptcp_mbuf_cksum(m,
+ m->l2_len + m->l3_len, l3h6);
+
+ ret = (csum != UINT16_MAX);
+ }
+
+ return ret;
+}
/*
* Analog of read-write locks, very much in favour of read side.
@@ -304,6 +367,47 @@ rwl_up(rte_atomic32_t *p)
rte_atomic32_sub(p, INT32_MIN);
}
+/* exclude NULLs from the final list of packets. */
+static inline uint32_t
+compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero)
+{
+ uint32_t i, j, k, l;
+
+ for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) {
+
+ /* found a hole. */
+ if (pkt[j] == NULL) {
+
+ /* find how big is it. */
+ for (i = j; i-- != 0 && pkt[i] == NULL; )
+ ;
+ /* fill the hole. */
+ for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++)
+ pkt[l] = pkt[k];
+
+ nb_pkt -= j - i;
+ nb_zero -= j - i;
+ j = i + 1;
+ }
+ }
+
+ return nb_pkt;
+}
+
+/* empty ring and free queued mbufs */
+static inline void
+empty_mbuf_ring(struct rte_ring *r)
+{
+ uint32_t i, n;
+ struct rte_mbuf *mb[MAX_PKT_BURST];
+
+ do {
+ n = rte_ring_dequeue_burst(r, (void **)mb, RTE_DIM(mb));
+ for (i = 0; i != n; i++)
+ rte_pktmbuf_free(mb[i]);
+ } while (n != 0);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/libtle_l4p/net_misc.h b/lib/libtle_l4p/net_misc.h
new file mode 100644
index 0000000..2d8dac2
--- /dev/null
+++ b/lib/libtle_l4p/net_misc.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NET_MISC_H_
+#define _NET_MISC_H_
+
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include "osdep.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PKT_L234_HLEN(m) (_tx_offload_l4_offset(m->tx_offload))
+#define PKT_L4_PLEN(m) ((m)->pkt_len - PKT_L234_HLEN(m))
+
+/*
+ * Some network protocols related structures definitions.
+ * Main purpose to simplify (and optimise) processing and representation
+ * of protocol related data.
+ */
+
+enum {
+ TLE_V4,
+ TLE_V6,
+ TLE_VNUM
+};
+
+extern const struct in6_addr tle_ipv6_any;
+extern const struct in6_addr tle_ipv6_none;
+
+union l4_ports {
+ uint32_t raw;
+ struct {
+ uint16_t src;
+ uint16_t dst;
+ };
+};
+
+union ipv4_addrs {
+ uint64_t raw;
+ struct {
+ uint32_t src;
+ uint32_t dst;
+ };
+};
+
+union ipv6_addrs {
+ _ymm_t raw;
+ struct {
+ rte_xmm_t src;
+ rte_xmm_t dst;
+ };
+};
+
+union ip_addrs {
+ union ipv4_addrs v4;
+ union ipv6_addrs v6;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _NET_MISC_H_ */
diff --git a/lib/libtle_udp/osdep.h b/lib/libtle_l4p/osdep.h
index 8e91964..ed7e883 100644
--- a/lib/libtle_udp/osdep.h
+++ b/lib/libtle_l4p/osdep.h
@@ -17,6 +17,8 @@
#define _OSDEP_H_
#include <rte_vect.h>
+#include <rte_memcpy.h>
+#include <rte_spinlock.h>
#include <rte_log.h>
#ifdef __cplusplus
@@ -36,6 +38,8 @@ extern "C" {
#define UDP_LOG(lvl, fmt, args...) RTE_LOG(lvl, USER1, fmt, ##args)
+#define TCP_LOG(lvl, fmt, args...) RTE_LOG(lvl, USER1, fmt, ##args)
+
/*
* if no AVX support, define _ymm_t here.
*/
@@ -60,6 +64,8 @@ typedef union _ymm {
#endif /* __AVX__ */
+#include "debug.h"
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/libtle_udp/port_bitmap.h b/lib/libtle_l4p/port_bitmap.h
index 6aff4e6..c0420d5 100644
--- a/lib/libtle_udp/port_bitmap.h
+++ b/lib/libtle_l4p/port_bitmap.h
@@ -21,7 +21,7 @@ extern "C" {
#endif
/*
- * Simple implementation of bitmap for all possible UDP ports [0-UINT16_MAX].
+ * Simple implementation of bitmap for all possible L4 ports [0-UINT16_MAX].
*/
#define MAX_PORT_NUM (UINT16_MAX + 1)
@@ -31,14 +31,14 @@ extern "C" {
#define MAX_PORT_BLK PORT_BLK(MAX_PORT_NUM)
-struct udp_pbm {
+struct tle_pbm {
uint32_t nb_set; /* number of bits set. */
uint32_t blk; /* last block with free entry. */
uint32_t bm[MAX_PORT_BLK];
};
static inline void
-udp_pbm_init(struct udp_pbm *pbm, uint32_t blk)
+tle_pbm_init(struct tle_pbm *pbm, uint32_t blk)
{
pbm->bm[0] = 1;
pbm->nb_set = 1;
@@ -46,7 +46,7 @@ udp_pbm_init(struct udp_pbm *pbm, uint32_t blk)
}
static inline void
-udp_pbm_set(struct udp_pbm *pbm, uint16_t port)
+tle_pbm_set(struct tle_pbm *pbm, uint16_t port)
{
uint32_t i, b, v;
@@ -58,7 +58,7 @@ udp_pbm_set(struct udp_pbm *pbm, uint16_t port)
}
static inline void
-udp_pbm_clear(struct udp_pbm *pbm, uint16_t port)
+tle_pbm_clear(struct tle_pbm *pbm, uint16_t port)
{
uint32_t i, b, v;
@@ -71,7 +71,7 @@ udp_pbm_clear(struct udp_pbm *pbm, uint16_t port)
static inline uint32_t
-udp_pbm_check(const struct udp_pbm *pbm, uint16_t port)
+tle_pbm_check(const struct tle_pbm *pbm, uint16_t port)
{
uint32_t i, v;
@@ -81,7 +81,7 @@ udp_pbm_check(const struct udp_pbm *pbm, uint16_t port)
}
static inline uint16_t
-udp_pbm_find_range(struct udp_pbm *pbm, uint32_t start_blk, uint32_t end_blk)
+tle_pbm_find_range(struct tle_pbm *pbm, uint32_t start_blk, uint32_t end_blk)
{
uint32_t i, v;
uint16_t p;
diff --git a/lib/libtle_l4p/stream.h b/lib/libtle_l4p/stream.h
new file mode 100644
index 0000000..f3b5828
--- /dev/null
+++ b/lib/libtle_l4p/stream.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _STREAM_H_
+#define _STREAM_H_
+
+#include "ctx.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Common structure that must be present as first field in all partcular
+ * L4 (UDP/TCP, etc.) stream implementations.
+ */
+struct tle_stream {
+
+ STAILQ_ENTRY(tle_stream) link;
+ struct tle_ctx *ctx;
+
+ uint8_t type; /* TLE_V4 or TLE_V6 */
+
+ /* Stream address information. */
+ union l4_ports port;
+ union l4_ports pmsk;
+
+ union {
+ struct {
+ union ipv4_addrs addr;
+ union ipv4_addrs mask;
+ } ipv4;
+ struct {
+ union ipv6_addrs addr;
+ union ipv6_addrs mask;
+ } ipv6;
+ };
+};
+
+static inline uint32_t
+get_streams(struct tle_ctx *ctx, struct tle_stream *s[], uint32_t num)
+{
+ struct tle_stream *p;
+ uint32_t i, n;
+
+ rte_spinlock_lock(&ctx->streams.lock);
+
+ n = RTE_MIN(ctx->streams.nb_free, num);
+ for (i = 0, p = STAILQ_FIRST(&ctx->streams.free);
+ i != n;
+ i++, p = STAILQ_NEXT(p, link))
+ s[i] = p;
+
+ if (p == NULL)
+ /* we retrieved all free entries */
+ STAILQ_INIT(&ctx->streams.free);
+ else
+ STAILQ_FIRST(&ctx->streams.free) = p;
+
+ ctx->streams.nb_free -= n;
+ rte_spinlock_unlock(&ctx->streams.lock);
+ return n;
+}
+
+static inline struct tle_stream *
+get_stream(struct tle_ctx *ctx)
+{
+ struct tle_stream *s;
+
+ s = NULL;
+ if (ctx->streams.nb_free == 0)
+ return s;
+
+ get_streams(ctx, &s, 1);
+ return s;
+}
+
+static inline void
+put_stream(struct tle_ctx *ctx, struct tle_stream *s, int32_t head)
+{
+ s->type = TLE_VNUM;
+ rte_spinlock_lock(&ctx->streams.lock);
+ if (head != 0)
+ STAILQ_INSERT_HEAD(&ctx->streams.free, s, link);
+ else
+ STAILQ_INSERT_TAIL(&ctx->streams.free, s, link);
+ ctx->streams.nb_free++;
+ rte_spinlock_unlock(&ctx->streams.lock);
+}
+
+/* calculate number of drbs per stream. */
+static inline uint32_t
+calc_stream_drb_num(const struct tle_ctx *ctx, uint32_t obj_num)
+{
+ uint32_t num;
+
+ num = (ctx->prm.max_stream_sbufs + obj_num - 1) / obj_num;
+ num = num + num / 2;
+ num = RTE_MAX(num, RTE_DIM(ctx->dev) + 1);
+ return num;
+}
+
+static inline uint32_t
+drb_nb_elem(const struct tle_ctx *ctx)
+{
+ return (ctx->prm.send_bulk_size != 0) ?
+ ctx->prm.send_bulk_size : MAX_PKT_BURST;
+}
+
+static inline int32_t
+stream_get_dest(struct tle_stream *s, const void *dst_addr,
+ struct tle_dest *dst)
+{
+ int32_t rc;
+ const struct in_addr *d4;
+ const struct in6_addr *d6;
+ struct tle_ctx *ctx;
+ struct tle_dev *dev;
+
+ ctx = s->ctx;
+
+ /* it is here just to keep gcc happy. */
+ d4 = NULL;
+
+ if (s->type == TLE_V4) {
+ d4 = dst_addr;
+ rc = ctx->prm.lookup4(ctx->prm.lookup4_data, d4, dst);
+ } else if (s->type == TLE_V6) {
+ d6 = dst_addr;
+ rc = ctx->prm.lookup6(ctx->prm.lookup6_data, d6, dst);
+ } else
+ rc = -ENOENT;
+
+ if (rc < 0 || dst->dev == NULL || dst->dev->ctx != ctx)
+ return -ENOENT;
+
+ dev = dst->dev;
+ if (s->type == TLE_V4) {
+ struct ipv4_hdr *l3h;
+ l3h = (struct ipv4_hdr *)(dst->hdr + dst->l2_len);
+ l3h->src_addr = dev->prm.local_addr4.s_addr;
+ l3h->dst_addr = d4->s_addr;
+ } else {
+ struct ipv6_hdr *l3h;
+ l3h = (struct ipv6_hdr *)(dst->hdr + dst->l2_len);
+ rte_memcpy(l3h->src_addr, &dev->prm.local_addr6,
+ sizeof(l3h->src_addr));
+ rte_memcpy(l3h->dst_addr, d6, sizeof(l3h->dst_addr));
+ }
+
+ return dev - ctx->dev;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _STREAM_H_ */
diff --git a/lib/libtle_l4p/stream_table.c b/lib/libtle_l4p/stream_table.c
new file mode 100644
index 0000000..5a89553
--- /dev/null
+++ b/lib/libtle_l4p/stream_table.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+
+#include "stream_table.h"
+
+void
+stbl_fini(struct stbl *st)
+{
+ uint32_t i;
+
+ for (i = 0; i != RTE_DIM(st->ht); i++) {
+ rte_hash_free(st->ht[i].t);
+ rte_free(st->ht[i].ent);
+ }
+
+ memset(st, 0, sizeof(*st));
+}
+
+int
+stbl_init(struct stbl *st, uint32_t num, int32_t socket)
+{
+ int32_t rc;
+ size_t i, sz;
+ struct rte_hash_parameters hprm;
+ char buf[RTE_HASH_NAMESIZE];
+
+ num = RTE_MAX(5 * num / 4, 0x10U);
+
+ memset(&hprm, 0, sizeof(hprm));
+ hprm.name = buf;
+ hprm.entries = num;
+ hprm.socket_id = socket;
+
+ rc = 0;
+
+ snprintf(buf, sizeof(buf), "stbl4@%p", st);
+ hprm.key_len = sizeof(struct stbl4_key);
+ st->ht[TLE_V4].t = rte_hash_create(&hprm);
+ if (st->ht[TLE_V4].t == NULL)
+ rc = (rte_errno != 0) ? -rte_errno : -ENOMEM;
+
+ if (rc == 0) {
+ snprintf(buf, sizeof(buf), "stbl6@%p", st);
+ hprm.key_len = sizeof(struct stbl6_key);
+ st->ht[TLE_V6].t = rte_hash_create(&hprm);
+ if (st->ht[TLE_V6].t == NULL)
+ rc = (rte_errno != 0) ? -rte_errno : -ENOMEM;
+ }
+
+ for (i = 0; i != RTE_DIM(st->ht) && rc == 0; i++) {
+
+ sz = sizeof(*st->ht[i].ent) * num;
+ st->ht[i].ent = rte_zmalloc_socket(NULL, sz,
+ RTE_CACHE_LINE_SIZE, socket);
+ if (st->ht[i].ent == NULL)
+ rc = -ENOMEM;
+ else
+ st->ht[i].nb_ent = num;
+ }
+
+ if (rc != 0)
+ stbl_fini(st);
+
+ return rc;
+}
diff --git a/lib/libtle_l4p/stream_table.h b/lib/libtle_l4p/stream_table.h
new file mode 100644
index 0000000..8ad1103
--- /dev/null
+++ b/lib/libtle_l4p/stream_table.h
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _STREAM_TABLE_H_
+#define _STREAM_TABLE_H_
+
+#include <rte_hash.h>
+#include "tcp_misc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* current stbl entry contains packet. */
+#define STE_PKT 1
+
+struct stbl_entry {
+ void *data;
+};
+
+struct shtbl {
+ uint32_t nb_ent; /* max number of entries in the table. */
+ rte_spinlock_t l; /* lock to protect the hash table */
+ struct rte_hash *t;
+ struct stbl_entry *ent;
+} __rte_cache_aligned;
+
+struct stbl {
+ struct shtbl ht[TLE_VNUM];
+};
+
+struct stbl4_key {
+ union l4_ports port;
+ union ipv4_addrs addr;
+} __attribute__((__packed__));
+
+struct stbl6_key {
+ union l4_ports port;
+ union ipv6_addrs addr;
+} __attribute__((__packed__));
+
+struct stbl_key {
+ union l4_ports port;
+ union {
+ union ipv4_addrs addr4;
+ union ipv6_addrs addr6;
+ };
+} __attribute__((__packed__));
+
+extern void stbl_fini(struct stbl *st);
+
+extern int stbl_init(struct stbl *st, uint32_t num, int32_t socket);
+
+static inline void
+stbl_pkt_fill_key(struct stbl_key *k, const union pkt_info *pi, uint32_t type)
+{
+ static const struct stbl_key zero = {
+ .port.raw = 0,
+ };
+
+ k->port = pi->port;
+ if (type == TLE_V4)
+ k->addr4 = pi->addr4;
+ else if (type == TLE_V6)
+ k->addr6 = *pi->addr6;
+ else
+ *k = zero;
+}
+
+static inline void
+stbl_lock(struct stbl *st, uint32_t type)
+{
+ rte_spinlock_lock(&st->ht[type].l);
+}
+
+static inline void
+stbl_unlock(struct stbl *st, uint32_t type)
+{
+ rte_spinlock_unlock(&st->ht[type].l);
+}
+
+static inline struct stbl_entry *
+stbl_add_entry(struct stbl *st, const union pkt_info *pi)
+{
+ int32_t rc;
+ uint32_t type;
+ struct shtbl *ht;
+ struct stbl_key k;
+
+ type = pi->tf.type;
+ stbl_pkt_fill_key(&k, pi, type);
+ ht = st->ht + type;
+
+ rc = rte_hash_add_key(ht->t, &k);
+ if ((uint32_t)rc >= ht->nb_ent)
+ return NULL;
+ return ht->ent + rc;
+}
+
+static inline struct stbl_entry *
+stbl_add_pkt(struct stbl *st, const union pkt_info *pi, const void *pkt)
+{
+ struct stbl_entry *se;
+
+ se = stbl_add_entry(st, pi);
+ if (se != NULL)
+ se->data = (void *)((uintptr_t)pkt | STE_PKT);
+ return se;
+}
+
+static inline struct stbl_entry *
+stbl_find_entry(struct stbl *st, const union pkt_info *pi)
+{
+ int32_t rc;
+ uint32_t type;
+ struct shtbl *ht;
+ struct stbl_key k;
+
+ type = pi->tf.type;
+ stbl_pkt_fill_key(&k, pi, type);
+ ht = st->ht + type;
+
+ rc = rte_hash_lookup(ht->t, &k);
+ if ((uint32_t)rc >= ht->nb_ent)
+ return NULL;
+ return ht->ent + rc;
+}
+
+static inline int
+stbl_data_pkt(const void *p)
+{
+ return ((uintptr_t)p & STE_PKT);
+}
+
+static inline void *
+stbl_get_pkt(const struct stbl_entry *se)
+{
+ return (void *)((uintptr_t)se->data ^ STE_PKT);
+}
+
+static inline void *
+stbl_find_data(struct stbl *st, const union pkt_info *pi)
+{
+ struct stbl_entry *ent;
+
+ ent = stbl_find_entry(st, pi);
+ return (ent == NULL) ? NULL : ent->data;
+}
+
+static inline void
+stbl_del_pkt(struct stbl *st, struct stbl_entry *se, const union pkt_info *pi)
+{
+ uint32_t type;
+ struct stbl_key k;
+
+ se->data = NULL;
+
+ type = pi->tf.type;
+ stbl_pkt_fill_key(&k, pi, type);
+ rte_hash_del_key(st->ht[type].t, &k);
+}
+
+static inline void
+stbl_del_pkt_lock(struct stbl *st, struct stbl_entry *se,
+ const union pkt_info *pi)
+{
+ uint32_t type;
+ struct stbl_key k;
+
+ se->data = NULL;
+
+ type = pi->tf.type;
+ stbl_pkt_fill_key(&k, pi, type);
+ stbl_lock(st, type);
+ rte_hash_del_key(st->ht[type].t, &k);
+ stbl_unlock(st, type);
+}
+
+#include "tcp_stream.h"
+
+static inline void
+stbl_stream_fill_key(struct stbl_key *k, const struct tle_stream *s,
+ uint32_t type)
+{
+ static const struct stbl_key zero = {
+ .port.raw = 0,
+ };
+
+ k->port = s->port;
+ if (type == TLE_V4)
+ k->addr4 = s->ipv4.addr;
+ else if (type == TLE_V6)
+ k->addr6 = s->ipv6.addr;
+ else
+ *k = zero;
+}
+
+static inline struct stbl_entry *
+stbl_add_stream_lock(struct stbl *st, const struct tle_tcp_stream *s)
+{
+ uint32_t type;
+ struct stbl_key k;
+ struct stbl_entry *se;
+ struct shtbl *ht;
+ int32_t rc;
+
+ type = s->s.type;
+ stbl_stream_fill_key(&k, &s->s, type);
+ ht = st->ht + type;
+
+ stbl_lock(st, type);
+ rc = rte_hash_add_key(ht->t, &k);
+ stbl_unlock(st, type);
+
+ if ((uint32_t)rc >= ht->nb_ent)
+ return NULL;
+
+ se = ht->ent + rc;
+ if (se != NULL)
+ se->data = (void *)(uintptr_t)s;
+
+ return se;
+}
+
+static inline void
+stbl_del_stream_lock(struct stbl *st, struct stbl_entry *se,
+ const struct tle_tcp_stream *s)
+{
+ uint32_t type;
+ struct stbl_key k;
+
+ if (se == NULL)
+ return;
+
+ se->data = NULL;
+
+ type = s->s.type;
+ stbl_stream_fill_key(&k, &s->s, type);
+ stbl_lock(st, type);
+ rte_hash_del_key(st->ht[type].t, &k);
+ stbl_unlock(st, type);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _STREAM_TABLE_H_ */
diff --git a/lib/libtle_l4p/syncookie.h b/lib/libtle_l4p/syncookie.h
new file mode 100644
index 0000000..276d45a
--- /dev/null
+++ b/lib/libtle_l4p/syncookie.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _SYNCOOKIE_H_
+#define _SYNCOOKIE_H_
+
+#include "tcp_misc.h"
+#include <rte_jhash.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SYNC_SEED0 0x736f6d65
+#define SYNC_SEED1 0x646f7261
+
+struct sync_in4 {
+ uint32_t seq;
+ union l4_ports port;
+ union ipv4_addrs addr;
+};
+
+static const rte_xmm_t mss4len = {
+ .u32 = {
+ TCP4_MIN_MSS, /* 536 */
+ 1300,
+ TCP4_OP_MSS, /* 1440 */
+ TCP4_NOP_MSS, /* 1460 */
+ },
+};
+
+static const rte_xmm_t mss6len = {
+ .u32 = {
+ TCP6_MIN_MSS, /* 1220 */
+ TCP6_OP_MSS, /* 1420 */
+ TCP6_NOP_MSS, /* 1440 */
+ 8940,
+ },
+};
+
+#define SYNC_MSS_BITS 2
+#define SYNC_MSS_MASK ((1 << SYNC_MSS_BITS) - 1)
+
+#define SYNC_TMS_WSCALE_BITS 4
+#define SYNC_TMS_WSCALE_MASK ((1 << SYNC_TMS_WSCALE_BITS) - 1)
+
+#define SYNC_TMS_RESERVE_BITS 2
+
+#define SYNC_TMS_OPT_BITS (SYNC_TMS_WSCALE_BITS + SYNC_TMS_RESERVE_BITS)
+#define SYNC_TMS_OPT_MASK ((1 << SYNC_TMS_OPT_BITS) - 1)
+
+/* allow around 2 minutes for 3-way handshake. */
+#define SYNC_MAX_TMO 0x20000
+
+
+/* ??? use SipHash as FreeBSD does. ??? */
+static inline uint32_t
+sync_hash4(const union pkt_info *pi, uint32_t seq)
+{
+ uint32_t v0, v1;
+ struct sync_in4 in4;
+
+ in4.seq = seq;
+ in4.port = pi->port;
+ in4.addr = pi->addr4;
+
+ v0 = SYNC_SEED0;
+ v1 = SYNC_SEED1;
+ rte_jhash_32b_2hashes(&in4.seq, sizeof(in4) / sizeof(uint32_t),
+ &v0, &v1);
+ return v0 + v1;
+}
+
+static inline uint32_t
+sync_hash6(const union pkt_info *pi, uint32_t seq)
+{
+ uint32_t v0, v1;
+
+ v0 = SYNC_SEED0;
+ v1 = SYNC_SEED1;
+ rte_jhash_32b_2hashes(pi->addr6->raw.u32,
+ sizeof(*pi->addr6) / sizeof(uint32_t), &v0, &v1);
+ return rte_jhash_3words(v0, seq, pi->port.raw, v1);
+}
+
+static inline uint32_t
+sync_mss2idx(uint16_t mss, const rte_xmm_t *msl)
+{
+ if (mss >= msl->u32[2])
+ return (mss >= msl->u32[3]) ? 3 : 2;
+ else
+ return (mss >= msl->u32[1]) ? 1 : 0;
+}
+
+static inline uint32_t
+sync_gen_seq(const union pkt_info *pi, uint32_t seq, uint32_t ts, uint16_t mss)
+{
+ uint32_t h, mi;
+
+ if (pi->tf.type == TLE_V4) {
+ h = sync_hash4(pi, seq);
+ mi = sync_mss2idx(mss, &mss4len);
+ } else {
+ h = sync_hash6(pi, seq);
+ mi = sync_mss2idx(mss, &mss6len);
+ }
+
+ h += (ts & ~SYNC_MSS_MASK) | mi;
+ return h;
+}
+
+static inline uint32_t
+sync_gen_ts(uint32_t ts, uint32_t wscale)
+{
+ ts = (ts - (SYNC_TMS_OPT_MASK + 1)) & ~SYNC_TMS_OPT_MASK;
+ ts |= wscale;
+ return ts;
+}
+
+static inline int
+sync_check_ack(const union pkt_info *pi, uint32_t seq, uint32_t ack,
+ uint32_t ts)
+{
+ uint32_t h, mi, pts;
+
+ h = (pi->tf.type == TLE_V4) ? sync_hash4(pi, seq) : sync_hash6(pi, seq);
+
+ h = ack - h;
+ pts = h & ~SYNC_MSS_MASK;
+ mi = h & SYNC_MSS_MASK;
+
+ if (ts - pts > SYNC_MAX_TMO)
+ return -ERANGE;
+
+ return (pi->tf.type == TLE_V4) ? mss4len.u32[mi] : mss6len.u32[mi];
+}
+
+static inline void
+sync_get_opts(struct syn_opts *so, uintptr_t p, uint32_t len)
+{
+ so->ts = get_tms_opts(p, len);
+ so->wscale = so->ts.ecr & SYNC_TMS_WSCALE_MASK;
+}
+
+static inline void
+sync_fill_tcb(struct tcb *tcb, const union seg_info *si,
+ const struct rte_mbuf *mb)
+{
+ const struct tcp_hdr *th;
+
+ th = rte_pktmbuf_mtod_offset(mb, const struct tcp_hdr *,
+ mb->l2_len + mb->l3_len);
+
+ tcb->rcv.nxt = si->seq;
+ tcb->rcv.irs = si->seq - 1;
+
+ tcb->snd.nxt = si->ack;
+ tcb->snd.una = si->ack;
+ tcb->snd.iss = si->ack - 1;
+ tcb->snd.rcvr = tcb->snd.iss;
+
+ tcb->snd.wu.wl1 = si->seq;
+ tcb->snd.wu.wl2 = si->ack;
+
+ get_syn_opts(&tcb->so, (uintptr_t)(th + 1), mb->l4_len - sizeof(*th));
+
+ tcb->snd.wscale = tcb->so.wscale;
+ tcb->snd.mss = tcb->so.mss;
+ tcb->snd.wnd = si->wnd << tcb->snd.wscale;
+
+ tcb->snd.ts = tcb->so.ts.ecr;
+ tcb->rcv.ts = tcb->so.ts.val;
+
+ tcb->rcv.wscale = (tcb->so.wscale == TCP_WSCALE_NONE) ?
+ TCP_WSCALE_NONE : TCP_WSCALE_DEFAULT;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _STREAM_TABLE_H_ */
diff --git a/lib/libtle_l4p/tcp_ctl.h b/lib/libtle_l4p/tcp_ctl.h
new file mode 100644
index 0000000..dcb9c3e
--- /dev/null
+++ b/lib/libtle_l4p/tcp_ctl.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Some helper stream control functions definitions.
+ */
+
+#ifndef _TCP_CTL_H_
+#define _TCP_CTL_H_
+
+#include "tcp_stream.h"
+#include "tcp_ofo.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+tcp_stream_down(struct tle_tcp_stream *s)
+{
+ rwl_down(&s->rx.use);
+ rwl_down(&s->tx.use);
+}
+
+static inline void
+tcp_stream_up(struct tle_tcp_stream *s)
+{
+ rwl_up(&s->rx.use);
+ rwl_up(&s->tx.use);
+}
+
+/* empty stream's receive queue */
+static void
+empty_rq(struct tle_tcp_stream *s)
+{
+ empty_mbuf_ring(s->rx.q);
+ tcp_ofo_reset(s->rx.ofo);
+}
+
+/* empty stream's listen queue */
+static void
+empty_lq(struct tle_tcp_stream *s, struct stbl *st)
+{
+ uint32_t i, n;
+ struct rte_mbuf *mb;
+ union pkt_info pi;
+ union seg_info si;
+ struct stbl_entry *se[MAX_PKT_BURST];
+
+ do {
+ n = rte_ring_dequeue_burst(s->rx.q, (void **)se, RTE_DIM(se));
+ for (i = 0; i != n; i++) {
+ mb = stbl_get_pkt(se[i]);
+ get_pkt_info(mb, &pi, &si);
+ stbl_del_pkt_lock(st, se[i], &pi);
+ rte_pktmbuf_free(mb);
+ }
+ } while (n != 0);
+}
+
+static inline void
+tcp_stream_reset(struct tle_ctx *ctx, struct tle_tcp_stream *s)
+{
+ struct stbl *st;
+ uint16_t uop;
+
+ st = CTX_TCP_STLB(ctx);
+
+ /* reset TX armed */
+ rte_atomic32_set(&s->tx.arm, 0);
+
+ /* reset TCB */
+ uop = s->tcb.uop & (TCP_OP_LISTEN | TCP_OP_CONNECT);
+ memset(&s->tcb, 0, sizeof(s->tcb));
+
+ /* reset cached destination */
+ memset(&s->tx.dst, 0, sizeof(s->tx.dst));
+
+ if (uop != 0) {
+ /* free stream's destination port */
+ stream_clear_ctx(ctx, &s->s);
+ if (uop == TCP_OP_LISTEN)
+ empty_lq(s, st);
+ }
+
+ if (s->ste != NULL) {
+ /* remove entry from RX streams table */
+ stbl_del_stream_lock(st, s->ste, s);
+ s->ste = NULL;
+ empty_rq(s);
+ }
+
+ /* empty TX queue */
+ empty_mbuf_ring(s->tx.q);
+
+ /*
+ * mark the stream as free again.
+ * if there still are pkts queued for TX,
+ * then put this stream to the tail of free list.
+ */
+ put_stream(ctx, &s->s, TCP_STREAM_TX_FINISHED(s));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_CTL_H_ */
diff --git a/lib/libtle_l4p/tcp_misc.h b/lib/libtle_l4p/tcp_misc.h
new file mode 100644
index 0000000..beb6699
--- /dev/null
+++ b/lib/libtle_l4p/tcp_misc.h
@@ -0,0 +1,462 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_MISC_H_
+#define _TCP_MISC_H_
+
+#include "net_misc.h"
+#include <rte_tcp.h>
+#include <rte_cycles.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * TCP protocols related structures/functions definitions.
+ * Main purpose to simplify (and optimise) processing and representation
+ * of protocol related data.
+ */
+
+#define TCP_WSCALE_DEFAULT 7
+#define TCP_WSCALE_NONE 0
+
+#define TCP_TX_HDR_MAX (sizeof(struct tcp_hdr) + TCP_TX_OPT_LEN_MAX)
+
+/* max header size for normal data+ack packet */
+#define TCP_TX_HDR_DACK (sizeof(struct tcp_hdr) + TCP_TX_OPT_LEN_TMS)
+
+#define TCP4_MIN_MSS 536
+
+#define TCP6_MIN_MSS 1220
+
+/* default MTU, no TCP options. */
+#define TCP4_NOP_MSS \
+ (ETHER_MTU - sizeof(struct ipv4_hdr) - sizeof(struct tcp_hdr))
+
+#define TCP6_NOP_MSS \
+ (ETHER_MTU - sizeof(struct ipv6_hdr) - sizeof(struct tcp_hdr))
+
+/* default MTU, TCP options present */
+#define TCP4_OP_MSS (TCP4_NOP_MSS - TCP_TX_OPT_LEN_MAX)
+
+#define TCP6_OP_MSS (TCP6_NOP_MSS - TCP_TX_OPT_LEN_MAX)
+
+/*
+ * TCP flags
+ */
+#define TCP_FLAG_FIN 0x01
+#define TCP_FLAG_SYN 0x02
+#define TCP_FLAG_RST 0x04
+#define TCP_FLAG_PSH 0x08
+#define TCP_FLAG_ACK 0x10
+#define TCP_FLAG_URG 0x20
+
+/* TCP flags mask. */
+#define TCP_FLAG_MASK UINT8_MAX
+
+union typflg {
+ uint16_t raw;
+ struct {
+ uint8_t type; /* TLE_V4/TLE_V6 */
+ uint8_t flags; /* TCP header flags */
+ };
+};
+
+union pkt_info {
+ rte_xmm_t raw;
+ struct {
+ union typflg tf;
+ uint16_t csf; /* checksum flags */
+ union l4_ports port;
+ union {
+ union ipv4_addrs addr4;
+ const union ipv6_addrs *addr6;
+ };
+ };
+};
+
+union seg_info {
+ rte_xmm_t raw;
+ struct {
+ uint32_t seq;
+ uint32_t ack;
+ uint16_t hole1;
+ uint16_t wnd;
+ };
+};
+
+union seqlen {
+ uint64_t raw;
+ struct {
+ uint32_t seq;
+ uint32_t len;
+ };
+};
+
+#define TCP_DATA_ALIGN 4
+
+#define TCP_DATA_OFFSET 4
+
+/*
+ * recognizable options.
+ */
+#define TCP_OPT_KIND_EOL 0x00
+#define TCP_OPT_KIND_NOP 0x01
+#define TCP_OPT_KIND_MSS 0x02
+#define TCP_OPT_KIND_WSC 0x03
+#define TCP_OPT_KIND_TMS 0x08
+
+#define TCP_OPT_LEN_EOL 0x01
+#define TCP_OPT_LEN_NOP 0x01
+#define TCP_OPT_LEN_MSS 0x04
+#define TCP_OPT_LEN_WSC 0x03
+#define TCP_OPT_LEN_TMS 0x0a
+
+#define TCP_TX_OPT_LEN_MAX \
+ RTE_ALIGN_CEIL(TCP_OPT_LEN_MSS + TCP_OPT_LEN_WSC + TCP_OPT_LEN_TMS + \
+ TCP_OPT_LEN_EOL, TCP_DATA_ALIGN)
+
+/*
+ * recomended format for TSOPT from RFC 1323, appendix A:
+ * +--------+--------+--------+--------+
+ * | NOP | NOP | TSopt | 10 |
+ * +--------+--------+--------+--------+
+ * | TSval timestamp |
+ * +--------+--------+--------+--------+
+ * | TSecr timestamp |
+ * +--------+--------+--------+--------+
+ */
+#define TCP_TX_OPT_LEN_TMS (TCP_OPT_LEN_TMS + 2 * TCP_OPT_LEN_NOP)
+
+#define TCP_OPT_TMS_HDR (rte_be_to_cpu_32( \
+ TCP_OPT_KIND_NOP << 3 * CHAR_BIT | \
+ TCP_OPT_KIND_NOP << 2 * CHAR_BIT | \
+ TCP_OPT_KIND_TMS << CHAR_BIT | \
+ TCP_OPT_LEN_TMS))
+
+#define TCP_OPT_KL(k, l) (rte_be_to_cpu_16((k) << CHAR_BIT | (l)))
+
+#define TCP_OPT_KL_MSS TCP_OPT_KL(TCP_OPT_KIND_MSS, TCP_OPT_LEN_MSS)
+#define TCP_OPT_KL_WSC TCP_OPT_KL(TCP_OPT_KIND_WSC, TCP_OPT_LEN_WSC)
+#define TCP_OPT_KL_TMS TCP_OPT_KL(TCP_OPT_KIND_TMS, TCP_OPT_LEN_TMS)
+
+/*
+ * Timestamp option.
+ */
+union tsopt {
+ uint64_t raw;
+ struct {
+ uint32_t val;
+ uint32_t ecr;
+ };
+};
+
+struct tcpopt {
+ union {
+ uint16_t raw;
+ struct {
+ uint8_t kind;
+ uint8_t len;
+ };
+ } kl;
+ union {
+ uint16_t mss;
+ uint8_t wscale;
+ union tsopt ts;
+ };
+} __attribute__((__packed__));
+
+struct syn_opts {
+ uint16_t mss;
+ uint8_t wscale;
+ union tsopt ts;
+};
+
+struct resp_info {
+ uint32_t flags;
+};
+
+
+/* window update information (RFC 793 WL1, WL2) */
+union wui {
+ uint64_t raw;
+ union {
+ uint32_t wl1;
+ uint32_t wl2;
+ };
+};
+
+/*
+ * helper structure: holds aggregated information about group
+ * of processed data+ack packets.
+ */
+struct dack_info {
+ struct { /* # of received segments with: */
+ uint32_t data; /* incoming data */
+ uint32_t ack; /* newly acked data */
+ uint32_t dup; /* duplicate acks */
+ uint32_t badseq; /* bad seq/ack */
+ uint32_t ofo; /* OFO incoming data */
+ } segs;
+ uint32_t ack; /* highest received ACK */
+ union tsopt ts; /* TS of highest ACK */
+ union wui wu; /* window update information */
+ uint32_t wnd;
+ struct { /* 3 duplicate ACKs were observed after */
+ uint32_t seg; /* # of meaningful ACK segments */
+ uint32_t ack; /* ACK sequence */
+ } dup3;
+};
+
+/* get current timestamp in ms */
+static inline uint32_t
+tcp_get_tms(void)
+{
+ uint64_t ts, ms;
+ ms = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S;
+ ts = rte_get_tsc_cycles() / ms;
+ return ts;
+}
+
+static inline int
+tcp_seq_lt(uint32_t l, uint32_t r)
+{
+ return (int32_t)(l - r) < 0;
+}
+
+static inline int
+tcp_seq_leq(uint32_t l, uint32_t r)
+{
+ return (int32_t)(l - r) <= 0;
+}
+
+
+static inline void
+get_seg_info(const struct tcp_hdr *th, union seg_info *si)
+{
+ __m128i v;
+ const __m128i bswap_mask = _mm_set_epi8(15, 14, 13, 12, 10, 11, 9, 8,
+ 4, 5, 6, 7, 0, 1, 2, 3);
+
+ v = _mm_loadu_si128((const __m128i *)&th->sent_seq);
+ si->raw.x = _mm_shuffle_epi8(v, bswap_mask);
+}
+
+static inline void
+get_syn_opts(struct syn_opts *so, uintptr_t p, uint32_t len)
+{
+ uint32_t i, kind;
+ const struct tcpopt *opt;
+
+ memset(so, 0, sizeof(*so));
+
+ i = 0;
+ while (i < len) {
+ opt = (const struct tcpopt *)(p + i);
+ kind = opt->kl.kind;
+ if (kind == TCP_OPT_KIND_EOL)
+ return;
+ else if (kind == TCP_OPT_KIND_NOP)
+ i += sizeof(opt->kl.kind);
+ else {
+ i += opt->kl.len;
+ if (i <= len) {
+ if (opt->kl.raw == TCP_OPT_KL_MSS)
+ so->mss = rte_be_to_cpu_16(opt->mss);
+ else if (opt->kl.raw == TCP_OPT_KL_WSC)
+ so->wscale = opt->wscale;
+ else if (opt->kl.raw == TCP_OPT_KL_TMS) {
+ so->ts.val =
+ rte_be_to_cpu_32(opt->ts.val);
+ so->ts.ecr =
+ rte_be_to_cpu_32(opt->ts.ecr);
+ }
+ }
+ }
+ }
+}
+
+/*
+ * generates SYN options, assumes that there are
+ * at least TCP_TX_OPT_LEN_MAX bytes available.
+ */
+static inline void
+fill_syn_opts(void *p, const struct syn_opts *so)
+{
+ uint8_t *to;
+ struct tcpopt *opt;
+
+ to = (uint8_t *)p;
+
+ /* setup MSS*/
+ opt = (struct tcpopt *)to;
+ opt->kl.raw = TCP_OPT_KL_MSS;
+ opt->mss = rte_cpu_to_be_16(so->mss);
+
+ to += TCP_OPT_LEN_MSS;
+ opt = (struct tcpopt *)to;
+
+ /* setup TMS*/
+ if (so->ts.val != 0) {
+
+ opt->kl.raw = TCP_OPT_KL_TMS;
+ opt->ts.val = rte_cpu_to_be_32(so->ts.val);
+ opt->ts.ecr = rte_cpu_to_be_32(so->ts.ecr);
+
+ to += TCP_OPT_LEN_TMS;
+ opt = (struct tcpopt *)to;
+ }
+
+ /* setup TMS*/
+ if (so->wscale != 0) {
+
+ opt->kl.raw = TCP_OPT_KL_WSC;
+ opt->wscale = so->wscale;
+
+ to += TCP_OPT_LEN_WSC;
+ opt = (struct tcpopt *)to;
+ }
+
+ to[0] = TCP_OPT_KIND_EOL;
+}
+
+/*
+ * generate TMS option, for non SYN packet, make sure
+ * there at least TCP_TX_OPT_LEN_TMS available.
+ */
+static inline void
+fill_tms_opts(void *p, uint32_t val, uint32_t ecr)
+{
+ uint32_t *opt;
+
+ opt = (uint32_t *)p;
+ opt[0] = TCP_OPT_TMS_HDR;
+ opt[1] = rte_cpu_to_be_32(val);
+ opt[2] = rte_cpu_to_be_32(ecr);
+}
+
+static inline union tsopt
+get_tms_opts(uintptr_t p, uint32_t len)
+{
+ union tsopt ts;
+ uint32_t i, kind;
+ const uint32_t *opt;
+ const struct tcpopt *to;
+
+ opt = (const uint32_t *)p;
+
+ /* TS option is presented in recommended way */
+ if (len >= TCP_TX_OPT_LEN_TMS && opt[0] == TCP_OPT_TMS_HDR) {
+ ts.val = rte_be_to_cpu_32(opt[1]);
+ ts.ecr = rte_be_to_cpu_32(opt[2]);
+ return ts;
+ }
+
+ /* parse through whole list of options. */
+ ts.raw = 0;
+ i = 0;
+ while (i < len) {
+ to = (const struct tcpopt *)(p + i);
+ kind = to->kl.kind;
+ if (kind == TCP_OPT_KIND_EOL)
+ break;
+ else if (kind == TCP_OPT_KIND_NOP)
+ i += sizeof(to->kl.kind);
+ else {
+ i += to->kl.len;
+ if (i <= len && to->kl.raw == TCP_OPT_KL_TMS) {
+ ts.val = rte_be_to_cpu_32(to->ts.val);
+ ts.ecr = rte_be_to_cpu_32(to->ts.ecr);
+ break;
+ }
+ }
+ }
+
+ return ts;
+}
+
+static inline uint8_t
+get_pkt_type(const struct rte_mbuf *m)
+{
+ uint32_t v;
+
+ v = m->packet_type &
+ (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_MASK);
+ if (v == (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP))
+ return TLE_V4;
+ else if (v == (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP))
+ return TLE_V6;
+ else
+ return TLE_VNUM;
+}
+
+static inline void
+get_pkt_info(const struct rte_mbuf *m, union pkt_info *pi, union seg_info *si)
+{
+ uint32_t len, type;
+ const struct tcp_hdr *tcph;
+ const union l4_ports *prt;
+ const union ipv4_addrs *pa4;
+
+ type = get_pkt_type(m);
+ len = m->l2_len;
+
+ /*
+ * this line is here just to avoid gcc warning:
+ * error: .<U6098>.<U6000>.addr4.raw may be used uninitialized.
+ */
+ pi->addr4.raw = 0;
+
+ if (type == TLE_V4) {
+ pa4 = rte_pktmbuf_mtod_offset(m, const union ipv4_addrs *,
+ len + offsetof(struct ipv4_hdr, src_addr));
+ pi->addr4.raw = pa4->raw;
+ } else if (type == TLE_V6) {
+ pi->addr6 = rte_pktmbuf_mtod_offset(m, const union ipv6_addrs *,
+ len + offsetof(struct ipv6_hdr, src_addr));
+ }
+
+ len += m->l3_len;
+ tcph = rte_pktmbuf_mtod_offset(m, const struct tcp_hdr *, len);
+ prt = (const union l4_ports *)
+ ((uintptr_t)tcph + offsetof(struct tcp_hdr, src_port));
+ pi->tf.flags = tcph->tcp_flags;
+ pi->tf.type = type;
+ pi->csf = m->ol_flags & (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD);
+ pi->port.raw = prt->raw;
+
+ get_seg_info(tcph, si);
+}
+
+static inline uint32_t
+tcp_mbuf_seq_free(struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i, len;
+
+ len = 0;
+ for (i = 0; i != num; i++) {
+ len += mb[i]->pkt_len;
+ rte_pktmbuf_free(mb[i]);
+ }
+
+ return len;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_MISC_H_ */
diff --git a/lib/libtle_l4p/tcp_ofo.c b/lib/libtle_l4p/tcp_ofo.c
new file mode 100644
index 0000000..1565445
--- /dev/null
+++ b/lib/libtle_l4p/tcp_ofo.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <rte_malloc.h>
+#include <rte_errno.h>
+
+#include "tcp_stream.h"
+#include "tcp_rxq.h"
+
+#define OFO_FRACTION 4
+
+#define OFO_DB_MAX 0x20U
+
+#define OFODB_OBJ_MIN 8U
+#define OFODB_OBJ_MAX 0x20U
+
+#define OFO_OBJ_MAX (OFODB_OBJ_MAX * OFO_DB_MAX)
+
+void
+tcp_ofo_free(struct ofo *ofo)
+{
+ rte_free(ofo);
+}
+
+static void
+calc_ofo_elems(uint32_t nbufs, uint32_t *nobj, uint32_t *ndb)
+{
+ uint32_t n, nd, no;
+
+ n = nbufs / OFO_FRACTION;
+ n = RTE_MAX(n, OFODB_OBJ_MIN);
+ n = RTE_MIN(n, OFO_OBJ_MAX);
+
+ no = OFODB_OBJ_MIN / 2;
+ do {
+ no *= 2;
+ nd = n / no;
+ } while (nd > OFO_DB_MAX);
+
+ *nobj = no;
+ *ndb = nd;
+}
+
+struct ofo *
+tcp_ofo_alloc(uint32_t nbufs, int32_t socket)
+{
+ uint32_t i, ndb, nobj;
+ size_t dsz, osz, sz;
+ struct ofo *ofo;
+ struct rte_mbuf **obj;
+
+ calc_ofo_elems(nbufs, &nobj, &ndb);
+ osz = sizeof(*ofo) + sizeof(ofo->db[0]) * ndb;
+ dsz = sizeof(ofo->db[0].obj[0]) * nobj * ndb;
+ sz = osz + dsz;
+
+ ofo = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, socket);
+ if (ofo == NULL) {
+ TCP_LOG(ERR, "%s: allocation of %zu bytes on socket %d "
+ "failed with error code: %d\n",
+ __func__, sz, socket, rte_errno);
+ return NULL;
+ }
+
+ obj = (struct rte_mbuf **)&ofo->db[ndb];
+ for (i = 0; i != ndb; i++) {
+ ofo->db[i].nb_max = nobj;
+ ofo->db[i].obj = obj + i * nobj;
+ }
+
+ ofo->nb_max = ndb;
+ return ofo;
+}
+
diff --git a/lib/libtle_l4p/tcp_ofo.h b/lib/libtle_l4p/tcp_ofo.h
new file mode 100644
index 0000000..4f3bdab
--- /dev/null
+++ b/lib/libtle_l4p/tcp_ofo.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_OFO_H_
+#define _TCP_OFO_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ofodb {
+ uint32_t nb_elem;
+ uint32_t nb_max;
+ union seqlen sl;
+ struct rte_mbuf **obj;
+};
+
+struct ofo {
+ uint32_t nb_elem;
+ uint32_t nb_max;
+ struct ofodb db[];
+};
+
+static inline void
+_ofodb_free(struct ofodb *db)
+{
+ uint32_t i;
+
+ for (i = 0; i != db->nb_elem; i++)
+ rte_pktmbuf_free(db->obj[i]);
+}
+
+static inline void
+_ofo_remove(struct ofo *ofo, uint32_t pos, uint32_t num)
+{
+ uint32_t i, n;
+
+ n = ofo->nb_elem - num - pos;
+ for (i = 0; i != n; i++)
+ ofo->db[pos + i] = ofo->db[pos + num + i];
+ ofo->nb_elem -= num;
+}
+
+static inline void
+tcp_ofo_reset(struct ofo *ofo)
+{
+ uint32_t i;
+
+ for (i = 0; i != ofo->nb_elem; i++)
+ _ofodb_free(&ofo->db[i]);
+
+ _ofo_remove(ofo, 0, ofo->nb_elem);
+}
+
+static inline uint32_t
+_ofo_insert_new(struct ofo *ofo, uint32_t pos, union seqlen *sl,
+ struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i, n, plen;
+ struct ofodb *db;
+
+ n = ofo->nb_elem;
+
+ /* out of space */
+ if (n == ofo->nb_max)
+ return 0;
+
+ /* allocate new one */
+ db = ofo->db + n;
+ ofo->nb_elem = n + 1;
+
+ /* insert into a proper position. */
+ for (i = n; i != pos; i--)
+ ofo->db[i] = ofo->db[i - 1];
+
+ /* fill new block */
+ n = RTE_MIN(db->nb_max, num);
+ for (i = 0; i != n; i++)
+ db->obj[i] = mb[i];
+
+ /* can't queue some packets. */
+ plen = 0;
+ for (i = n; i != num; i++)
+ plen += mb[i]->pkt_len;
+
+ db->nb_elem = n;
+ db->sl.seq = sl->seq;
+ db->sl.len = sl->len - plen;
+
+ sl->seq += db->sl.len;
+ sl->len -= db->sl.len;
+ return n;
+}
+
+static inline uint32_t
+_ofo_insert_right(struct ofo *ofo, uint32_t pos, union seqlen *sl,
+ struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i, j, k, n;
+ uint32_t end, plen, skip;
+ struct ofodb *db;
+
+ db = ofo->db + pos;
+ end = db->sl.seq + db->sl.len;
+
+ skip = end - sl->seq;
+
+ /* skip overlapping packets */
+ for (i = 0, n = skip; i != num && n != 0; i++, n -= plen) {
+
+ plen = mb[i]->pkt_len;
+ if (n < plen) {
+ /* adjust partially overlapped packet. */
+ rte_pktmbuf_adj(mb[i], plen - n);
+ break;
+ }
+ }
+
+ /* free totally overlapped packets. */
+ for (j = 0; j != i; j++)
+ rte_pktmbuf_free(mb[j]);
+
+ /* copy non-overlapping mbufs */
+ k = db->nb_elem;
+ n = RTE_MIN(db->nb_max - k, num - i);
+
+ plen = 0;
+ for (j = 0; j != n; j++) {
+ db->obj[k + j] = mb[i + j];
+ plen += mb[i + j]->pkt_len;
+ }
+
+ db->nb_elem += n;
+ db->sl.len += plen;
+
+ plen += skip;
+ sl->len -= plen;
+ sl->seq += plen;
+ return n + i;
+}
+
+static inline uint32_t
+_ofo_step(struct ofo *ofo, union seqlen *sl, struct rte_mbuf *mb[],
+ uint32_t num)
+{
+ uint32_t i, n, end, lo, ro;
+ struct ofodb *db;
+
+ db = NULL;
+ end = sl->seq + sl->len;
+ n = ofo->nb_elem;
+
+ /*
+ * start from the right side, assume that after some gap,
+ * we keep receiving packets in order.
+ */
+ for (i = n; i-- != 0; ) {
+ db = ofo->db + i;
+ if (tcp_seq_leq(db->sl.seq, sl->seq))
+ break;
+ }
+
+ /* new db required */
+ if ((int32_t)i < 0 || tcp_seq_lt(db->sl.seq + db->sl.len, sl->seq))
+ return _ofo_insert_new(ofo, i + 1, sl, mb, num);
+
+ /* new one is right adjacent, or overlap */
+
+ ro = sl->seq - db->sl.seq;
+ lo = end - db->sl.seq;
+
+ /* new one is completely overlapped by old one */
+ if (lo <= db->sl.len)
+ return 0;
+
+ /* either overlap OR (adjacent AND some free space remains) */
+ if (ro < db->sl.len || db->nb_elem != db->nb_max)
+ return _ofo_insert_right(ofo, i, sl, mb, num);
+
+ /* adjacent, no free space in current block */
+ return _ofo_insert_new(ofo, i + 1, sl, mb, num);
+}
+
+static inline void
+_ofo_compact(struct ofo *ofo)
+{
+ uint32_t i, j, n, ro;
+ struct ofodb *db;
+
+ for (i = 0; i < ofo->nb_elem; i = j) {
+
+ for (j = i + 1; j != ofo->nb_elem; j++) {
+
+ /* no intersection */
+ ro = ofo->db[j].sl.seq - ofo->db[i].sl.seq;
+ if (ro > ofo->db[i].sl.len)
+ break;
+
+ db = ofo->db + j;
+ n = _ofo_insert_right(ofo, i, &db->sl, db->obj,
+ db->nb_elem);
+ if (n < db->nb_elem) {
+ db->nb_elem -= n;
+ break;
+ }
+ }
+
+ n = j - i - 1;
+ if (n != 0)
+ _ofo_remove(ofo, i + 1, n);
+ }
+}
+
+static inline uint32_t
+_ofodb_enqueue(struct rte_ring *r, const struct ofodb *db, union seqlen *sl)
+{
+ uint32_t n, num;
+
+ num = db->nb_elem;
+ sl->raw = db->sl.raw;
+ n = rte_ring_enqueue_burst(r, (void * const *)db->obj, num);
+
+ sl->len -= tcp_mbuf_seq_free(db->obj + n, num - n);
+ return num - n;
+}
+
+struct ofo *
+tcp_ofo_alloc(uint32_t nbufs, int32_t socket);
+
+void
+tcp_ofo_free(struct ofo *ofo);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_OFO_H_ */
diff --git a/lib/libtle_l4p/tcp_rxq.h b/lib/libtle_l4p/tcp_rxq.h
new file mode 100644
index 0000000..90e657f
--- /dev/null
+++ b/lib/libtle_l4p/tcp_rxq.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_RXQ_H_
+#define _TCP_RXQ_H_
+
+#include "tcp_ofo.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline uint32_t
+rx_ofo_enqueue(struct tle_tcp_stream *s, union seqlen *sl,
+ struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i, n;
+
+ n = 0;
+ do {
+ i = _ofo_step(s->rx.ofo, sl, mb + n, num - n);
+ n += i;
+ } while (i != 0 && n != num);
+
+ _ofo_compact(s->rx.ofo);
+ return n;
+}
+
+static inline uint32_t
+rx_ofo_reduce(struct tle_tcp_stream *s)
+{
+ uint32_t i, n, end, seq;
+ struct ofo *ofo;
+ struct ofodb *db;
+ union seqlen sl;
+
+ seq = s->tcb.rcv.nxt;
+ ofo = s->rx.ofo;
+
+ n = 0;
+ for (i = 0; i != ofo->nb_elem; i++) {
+
+ db = ofo->db + i;
+
+ /* gap still present */
+ if (tcp_seq_lt(seq, db->sl.seq))
+ break;
+
+ end = db->sl.seq + db->sl.len;
+
+ /* this db is fully overlapped */
+ if (tcp_seq_leq(end, seq))
+ _ofodb_free(db);
+ else
+ n += _ofodb_enqueue(s->rx.q, db, &sl);
+
+ seq = sl.seq + sl.len;
+ }
+
+ s->tcb.rcv.nxt = seq;
+ _ofo_remove(ofo, 0, i);
+ return n;
+}
+
+static inline uint32_t
+rx_ino_enqueue(struct tle_tcp_stream *s, union seqlen *sl,
+ struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i, n;
+
+ n = rte_ring_enqueue_burst(s->rx.q, (void * const *)mb, num);
+
+ /* error: can'queue some packets into receive buffer. */
+ for (i = n; i != num; i++)
+ sl->len -= mb[i]->pkt_len;
+
+ s->tcb.rcv.nxt = sl->seq + sl->len;
+ return n;
+}
+
+static inline uint32_t
+rx_data_enqueue(struct tle_tcp_stream *s, uint32_t seq, uint32_t len,
+ struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t n, r, t;
+ union seqlen sl;
+
+ sl.seq = seq;
+ sl.len = len;
+
+ r = rte_ring_count(s->rx.q);
+
+ /* in order packets, ready to be delivered */
+ if (seq == s->tcb.rcv.nxt) {
+
+ t = rx_ino_enqueue(s, &sl, mb, num);
+
+ /* failed to queue all input in-order packets */
+ if (t != num)
+ TCP_LOG(DEBUG,
+ "%s(s=%p, seq=%u, len=%u, num=%u) failed to queue "
+ "%u packets;\n",
+ __func__, s, seq, len, num, num - t);
+
+ /* try to consume some out-of-order packets*/
+ else {
+ n = rx_ofo_reduce(s);
+ if (n != 0)
+ TCP_LOG(DEBUG,
+ "%s(s=%p, rcv.nxt=%u) failed to queue %u "
+ "OFO packets;\n",
+ __func__, s, s->tcb.rcv.nxt, n);
+ }
+
+ /* queue out of order packets */
+ } else {
+ t = rx_ofo_enqueue(s, &sl, mb, num);
+ }
+
+ n = rte_ring_count(s->rx.q);
+ if (r != n) {
+ /* raise RX event */
+ if (s->rx.ev != NULL)
+ tle_event_raise(s->rx.ev);
+ /* if RX queue was empty invoke RX notification callback. */
+ else if (s->rx.cb.func != NULL && r == 0)
+ s->rx.cb.func(s->rx.cb.data, &s->s);
+ }
+
+ return t;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_RXQ_H_ */
diff --git a/lib/libtle_l4p/tcp_rxtx.c b/lib/libtle_l4p/tcp_rxtx.c
new file mode 100644
index 0000000..4e43730
--- /dev/null
+++ b/lib/libtle_l4p/tcp_rxtx.c
@@ -0,0 +1,2431 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+#include <rte_ip_frag.h>
+#include <rte_tcp.h>
+
+#include "tcp_stream.h"
+#include "tcp_timer.h"
+#include "stream_table.h"
+#include "syncookie.h"
+#include "misc.h"
+#include "tcp_ctl.h"
+#include "tcp_rxq.h"
+#include "tcp_txq.h"
+
+#define TCP_MAX_PKT_SEG 0x20
+
+/*
+ * checks if input TCP ports and IP addresses match given stream.
+ * returns zero on success.
+ */
+static inline int
+rx_check_stream(const struct tle_tcp_stream *s, const union pkt_info *pi)
+{
+ int32_t rc;
+
+ if (pi->tf.type == TLE_V4)
+ rc = (pi->port.raw & s->s.pmsk.raw) != s->s.port.raw ||
+ (pi->addr4.raw & s->s.ipv4.mask.raw) !=
+ s->s.ipv4.addr.raw;
+ else
+ rc = (pi->port.raw & s->s.pmsk.raw) != s->s.port.raw ||
+ ymm_mask_cmp(&pi->addr6->raw, &s->s.ipv6.addr.raw,
+ &s->s.ipv6.mask.raw) != 0;
+
+ return rc;
+}
+
+static inline struct tle_tcp_stream *
+rx_obtain_listen_stream(const struct tle_dev *dev, const union pkt_info *pi,
+ uint32_t type)
+{
+ struct tle_tcp_stream *s;
+
+ s = (struct tle_tcp_stream *)dev->dp[type]->streams[pi->port.dst];
+ if (s == NULL || rwl_acquire(&s->rx.use) < 0)
+ return NULL;
+
+ /* check that we have a proper stream. */
+ if (s->tcb.state != TCP_ST_LISTEN) {
+ rwl_release(&s->rx.use);
+ s = NULL;
+ }
+
+ return s;
+}
+
+static inline struct tle_tcp_stream *
+rx_obtain_stream(const struct tle_dev *dev, struct stbl *st,
+ const union pkt_info *pi, uint32_t type)
+{
+ struct tle_tcp_stream *s;
+
+ s = stbl_find_data(st, pi);
+ if (s == NULL) {
+ if (pi->tf.flags == TCP_FLAG_ACK)
+ return rx_obtain_listen_stream(dev, pi, type);
+ return NULL;
+ }
+
+ if (stbl_data_pkt(s) || rwl_acquire(&s->rx.use) < 0)
+ return NULL;
+ /* check that we have a proper stream. */
+ else if (s->tcb.state == TCP_ST_CLOSED) {
+ rwl_release(&s->rx.use);
+ s = NULL;
+ }
+
+ return s;
+}
+
+/*
+ * Consider 2 pkt_info *equal* if their:
+ * - types (IPv4/IPv6)
+ * - TCP flags
+ * - checksum flags
+ * - TCP src and dst ports
+ * - IP src and dst addresses
+ * are equal.
+ */
+static inline int
+pkt_info_bulk_eq(const union pkt_info pi[], uint32_t num)
+{
+ uint32_t i;
+
+ i = 1;
+
+ if (pi[0].tf.type == TLE_V4) {
+ while (i != num && xmm_cmp(&pi[0].raw, &pi[i].raw) == 0)
+ i++;
+
+ } else if (pi[0].tf.type == TLE_V6) {
+ while (i != num &&
+ pi[0].raw.u64[0] == pi[i].raw.u64[0] &&
+ ymm_cmp(&pi[0].addr6->raw,
+ &pi[i].addr6->raw) == 0)
+ i++;
+ }
+
+ return i;
+}
+
+static inline int
+pkt_info_bulk_syneq(const union pkt_info pi[], uint32_t num)
+{
+ uint32_t i;
+
+ i = 1;
+
+ if (pi[0].tf.type == TLE_V4) {
+ while (i != num && pi[0].tf.raw == pi[i].tf.raw &&
+ pi[0].port.dst == pi[i].port.dst &&
+ pi[0].addr4.dst == pi[i].addr4.dst)
+ i++;
+
+ } else if (pi[0].tf.type == TLE_V6) {
+ while (i != num && pi[0].tf.raw == pi[i].tf.raw &&
+ pi[0].port.dst == pi[i].port.dst &&
+ xmm_cmp(&pi[0].addr6->dst,
+ &pi[i].addr6->dst) == 0)
+ i++;
+ }
+
+ return i;
+}
+
+static inline void
+stream_drb_free(struct tle_tcp_stream *s, struct tle_drb *drbs[],
+ uint32_t nb_drb)
+{
+ rte_ring_enqueue_burst(s->tx.drb.r, (void **)drbs, nb_drb);
+}
+
+static inline uint32_t
+stream_drb_alloc(struct tle_tcp_stream *s, struct tle_drb *drbs[],
+ uint32_t nb_drb)
+{
+ return rte_ring_dequeue_burst(s->tx.drb.r, (void **)drbs, nb_drb);
+}
+
+static inline void
+fill_tcph(struct tcp_hdr *l4h, const struct tcb *tcb, union l4_ports port,
+ uint32_t seq, uint8_t hlen, uint8_t flags)
+{
+ uint16_t wnd;
+
+ l4h->src_port = port.dst;
+ l4h->dst_port = port.src;
+
+ wnd = (flags & TCP_FLAG_SYN) ?
+ RTE_MAX(TCP4_MIN_MSS, tcb->so.mss) :
+ tcb->rcv.wnd >> tcb->rcv.wscale;
+
+ /* ??? use sse shuffle to hton all remaining 16 bytes at once. ??? */
+ l4h->sent_seq = rte_cpu_to_be_32(seq);
+ l4h->recv_ack = rte_cpu_to_be_32(tcb->rcv.nxt);
+ l4h->data_off = hlen / TCP_DATA_ALIGN << TCP_DATA_OFFSET;
+ l4h->tcp_flags = flags;
+ l4h->rx_win = rte_cpu_to_be_16(wnd);
+ l4h->cksum = 0;
+ l4h->tcp_urp = 0;
+
+ if (flags & TCP_FLAG_SYN)
+ fill_syn_opts(l4h + 1, &tcb->so);
+ else if ((flags & TCP_FLAG_RST) == 0 && tcb->so.ts.raw != 0)
+ fill_tms_opts(l4h + 1, tcb->snd.ts, tcb->rcv.ts);
+}
+
+static inline int
+tcp_fill_mbuf(struct rte_mbuf *m, const struct tle_tcp_stream *s,
+ const struct tle_dest *dst, uint64_t ol_flags,
+ union l4_ports port, uint32_t seq, uint32_t flags,
+ uint32_t pid, uint32_t swcsm)
+{
+ uint32_t l4, len, plen;
+ struct tcp_hdr *l4h;
+ char *l2h;
+
+ len = dst->l2_len + dst->l3_len;
+ plen = m->pkt_len;
+
+ if (flags & TCP_FLAG_SYN)
+ l4 = sizeof(*l4h) + TCP_TX_OPT_LEN_MAX;
+ else if ((flags & TCP_FLAG_RST) == 0 && s->tcb.rcv.ts != 0)
+ l4 = sizeof(*l4h) + TCP_TX_OPT_LEN_TMS;
+ else
+ l4 = sizeof(*l4h);
+
+ /* adjust mbuf to put L2/L3/L4 headers into it. */
+ l2h = rte_pktmbuf_prepend(m, len + l4);
+ if (l2h == NULL)
+ return -EINVAL;
+
+ /* copy L2/L3 header */
+ rte_memcpy(l2h, dst->hdr, len);
+
+ /* setup TCP header & options */
+ l4h = (struct tcp_hdr *)(l2h + len);
+ fill_tcph(l4h, &s->tcb, port, seq, l4, flags);
+
+ /* setup mbuf TX offload related fields. */
+ m->tx_offload = _mbuf_tx_offload(dst->l2_len, dst->l3_len, l4, 0, 0, 0);
+ m->ol_flags |= ol_flags;
+
+ /* update proto specific fields. */
+
+ if (s->s.type == TLE_V4) {
+ struct ipv4_hdr *l3h;
+ l3h = (struct ipv4_hdr *)(l2h + dst->l2_len);
+ l3h->packet_id = rte_cpu_to_be_16(pid);
+ l3h->total_length = rte_cpu_to_be_16(plen + dst->l3_len + l4);
+
+ if ((ol_flags & PKT_TX_TCP_CKSUM) != 0)
+ l4h->cksum = _ipv4x_phdr_cksum(l3h, m->l3_len,
+ ol_flags);
+ else if (swcsm != 0)
+ l4h->cksum = _ipv4_udptcp_mbuf_cksum(m, len, l3h);
+
+ if ((ol_flags & PKT_TX_IP_CKSUM) == 0 && swcsm != 0)
+ l3h->hdr_checksum = _ipv4x_cksum(l3h, m->l3_len);
+ } else {
+ struct ipv6_hdr *l3h;
+ l3h = (struct ipv6_hdr *)(l2h + dst->l2_len);
+ l3h->payload_len = rte_cpu_to_be_16(plen + l4);
+ if ((ol_flags & PKT_TX_TCP_CKSUM) != 0)
+ l4h->cksum = rte_ipv6_phdr_cksum(l3h, ol_flags);
+ else if (swcsm != 0)
+ l4h->cksum = _ipv6_udptcp_mbuf_cksum(m, len, l3h);
+ }
+
+ return 0;
+}
+
+/*
+ * That function supposed to be used only for data packets.
+ * Assumes that L2/L3/L4 headers and mbuf fields already setup properly.
+ * - updates tcp SEG.SEQ, SEG.ACK, TS.VAL, TS.ECR.
+ * - if no HW cksum offloads are enabled, calculates TCP checksum.
+ */
+static inline void
+tcp_update_mbuf(struct rte_mbuf *m, uint32_t type, const struct tcb *tcb,
+ uint32_t seq, uint32_t pid)
+{
+ struct tcp_hdr *l4h;
+ uint32_t len;
+
+ len = m->l2_len + m->l3_len;
+ l4h = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, len);
+
+ l4h->sent_seq = rte_cpu_to_be_32(seq);
+ l4h->recv_ack = rte_cpu_to_be_32(tcb->rcv.nxt);
+
+ if (tcb->so.ts.raw != 0)
+ fill_tms_opts(l4h + 1, tcb->snd.ts, tcb->rcv.ts);
+
+ if (type == TLE_V4) {
+ struct ipv4_hdr *l3h;
+ l3h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
+ l3h->hdr_checksum = 0;
+ l3h->packet_id = rte_cpu_to_be_16(pid);
+ if ((m->ol_flags & PKT_TX_IP_CKSUM) == 0)
+ l3h->hdr_checksum = _ipv4x_cksum(l3h, m->l3_len);
+ }
+
+ /* have to calculate TCP checksum in SW */
+ if ((m->ol_flags & PKT_TX_TCP_CKSUM) == 0) {
+
+ l4h->cksum = 0;
+
+ if (type == TLE_V4) {
+ struct ipv4_hdr *l3h;
+ l3h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+ m->l2_len);
+ l4h->cksum = _ipv4_udptcp_mbuf_cksum(m, len, l3h);
+
+ } else {
+ struct ipv6_hdr *l3h;
+ l3h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+ m->l2_len);
+ l4h->cksum = _ipv6_udptcp_mbuf_cksum(m, len, l3h);
+ }
+ }
+}
+
+/* Send data packets that need to be ACK-ed by peer */
+static inline uint32_t
+tx_data_pkts(struct tle_tcp_stream *s, struct rte_mbuf *const m[], uint32_t num)
+{
+ uint32_t bsz, i, nb, nbm;
+ struct tle_dev *dev;
+ struct tle_drb *drb[num];
+
+ /* calculate how many drbs are needed.*/
+ bsz = s->tx.drb.nb_elem;
+ nbm = (num + bsz - 1) / bsz;
+
+ /* allocate drbs, adjust number of packets. */
+ nb = stream_drb_alloc(s, drb, nbm);
+
+ /* drb ring is empty. */
+ if (nb == 0)
+ return 0;
+
+ else if (nb != nbm)
+ num = nb * bsz;
+
+ dev = s->tx.dst.dev;
+
+ /* enqueue pkts for TX. */
+ nbm = nb;
+ i = tle_dring_mp_enqueue(&dev->tx.dr, (const void * const*)m,
+ num, drb, &nb);
+
+ /* free unused drbs. */
+ if (nb != 0)
+ stream_drb_free(s, drb + nbm - nb, nb);
+
+ return i;
+}
+
+static inline uint32_t
+tx_data_bulk(struct tle_tcp_stream *s, union seqlen *sl, struct rte_mbuf *mi[],
+ uint32_t num)
+{
+ uint32_t fail, i, k, n, mss, pid, plen, sz, tn, type;
+ struct tle_dev *dev;
+ struct rte_mbuf *mb;
+ struct rte_mbuf *mo[MAX_PKT_BURST + TCP_MAX_PKT_SEG];
+
+ mss = s->tcb.snd.mss;
+ type = s->s.type;
+
+ dev = s->tx.dst.dev;
+ pid = rte_atomic32_add_return(&dev->tx.packet_id[type], num) - num;
+
+ k = 0;
+ tn = 0;
+ fail = 0;
+ for (i = 0; i != num && sl->len != 0 && fail == 0; i++) {
+
+ mb = mi[i];
+ sz = RTE_MIN(sl->len, mss);
+ plen = PKT_L4_PLEN(mb);
+
+ /*fast path, no need to use indirect mbufs. */
+ if (plen <= sz) {
+
+ /* update pkt TCP header */
+ tcp_update_mbuf(mb, type, &s->tcb, sl->seq, pid + i);
+
+ /* keep mbuf till ACK is received. */
+ rte_pktmbuf_refcnt_update(mb, 1);
+ sl->len -= plen;
+ sl->seq += plen;
+ mo[k++] = mb;
+ /* remaining snd.wnd is less them MSS, send nothing */
+ } else if (sz < mss)
+ break;
+ /* packet indirection needed */
+ else
+ RTE_VERIFY(0);
+
+ if (k >= MAX_PKT_BURST) {
+ n = tx_data_pkts(s, mo, k);
+ fail = k - n;
+ tn += n;
+ k = 0;
+ }
+ }
+
+ if (k != 0) {
+ n = tx_data_pkts(s, mo, k);
+ fail = k - n;
+ tn += n;
+ }
+
+ if (fail != 0) {
+ sz = tcp_mbuf_seq_free(mo + n, fail);
+ sl->seq -= sz;
+ sl->len += sz;
+ }
+
+ return tn;
+}
+
+/*
+ * gets data from stream send buffer, updates it and
+ * queues it into TX device queue.
+ * Note that this function and is not MT safe.
+ */
+static inline uint32_t
+tx_nxt_data(struct tle_tcp_stream *s, uint32_t tms)
+{
+ uint32_t n, num, tn, wnd;
+ struct rte_mbuf **mi;
+ union seqlen sl;
+
+ tn = 0;
+ wnd = s->tcb.snd.wnd - (uint32_t)(s->tcb.snd.nxt - s->tcb.snd.una);
+ sl.seq = s->tcb.snd.nxt;
+ sl.len = RTE_MIN(wnd, s->tcb.snd.cwnd);
+
+ if (sl.len == 0)
+ return tn;
+
+ /* update send timestamp */
+ s->tcb.snd.ts = tms;
+
+ do {
+ /* get group of packets */
+ mi = tcp_txq_get_nxt_objs(s, &num);
+
+ /* stream send buffer is empty */
+ if (num == 0)
+ break;
+
+ /* queue data packets for TX */
+ n = tx_data_bulk(s, &sl, mi, num);
+ tn += n;
+
+ /* update consumer head */
+ tcp_txq_set_nxt_head(s, n);
+ } while (n == num);
+
+ s->tcb.snd.nxt += sl.seq - (uint32_t)s->tcb.snd.nxt;
+ return tn;
+}
+
+static inline void
+free_una_data(struct tle_tcp_stream *s, uint32_t len)
+{
+ uint32_t i, n, num, plen;
+ struct rte_mbuf **mi;
+
+ n = 0;
+ plen = 0;
+
+ do {
+ /* get group of packets */
+ mi = tcp_txq_get_una_objs(s, &num);
+
+ if (num == 0)
+ break;
+
+ /* free acked data */
+ for (i = 0; i != num && n != len; i++, n = plen) {
+ plen += PKT_L4_PLEN(mi[i]);
+ if (plen > len) {
+ /* keep SND.UNA at the start of the packet */
+ len -= RTE_MIN(len, plen - len);
+ break;
+ }
+ rte_pktmbuf_free(mi[i]);
+ }
+
+ /* update consumer tail */
+ tcp_txq_set_una_tail(s, i);
+ } while (plen < len);
+
+ s->tcb.snd.una += len;
+
+ /*
+ * that could happen in case of retransmit,
+ * adjust SND.NXT with SND.UNA.
+ */
+ if (s->tcb.snd.una > s->tcb.snd.nxt) {
+ tcp_txq_rst_nxt_head(s);
+ s->tcb.snd.nxt = s->tcb.snd.una;
+ }
+}
+
+static inline uint16_t
+calc_smss(uint16_t mss, const struct tle_dest *dst)
+{
+ uint16_t n;
+
+ n = dst->mtu - dst->l2_len - dst->l3_len - TCP_TX_HDR_DACK;
+ mss = RTE_MIN(n, mss);
+ return mss;
+}
+
+/*
+ * RFC 5681 3.1
+ * If SMSS > 2190 bytes:
+ * IW = 2 * SMSS bytes and MUST NOT be more than 2 segments
+ * If (SMSS > 1095 bytes) and (SMSS <= 2190 bytes):
+ * IW = 3 * SMSS bytes and MUST NOT be more than 3 segments
+ * if SMSS <= 1095 bytes:
+ * IW = 4 * SMSS bytes and MUST NOT be more than 4 segments
+ */
+static inline uint32_t
+initial_cwnd(uint16_t smss)
+{
+ if (smss > 2190)
+ return 2 * smss;
+ else if (smss > 1095)
+ return 3 * smss;
+ return 4 * smss;
+}
+
+/*
+ * queue standalone packet to he particular output device
+ * It assumes that:
+ * - L2/L3/L4 headers should be already set.
+ * - packet fits into one segment.
+ */
+static inline int
+send_pkt(struct tle_tcp_stream *s, struct tle_dev *dev, struct rte_mbuf *m)
+{
+ uint32_t n, nb;
+ struct tle_drb *drb;
+
+ if (stream_drb_alloc(s, &drb, 1) == 0)
+ return -ENOBUFS;
+
+ /* enqueue pkt for TX. */
+ nb = 1;
+ n = tle_dring_mp_enqueue(&dev->tx.dr, (const void * const*)&m, 1,
+ &drb, &nb);
+
+ /* free unused drbs. */
+ if (nb != 0)
+ stream_drb_free(s, &drb, 1);
+
+ return (n == 1) ? 0 : -ENOBUFS;
+}
+
+static inline int
+send_ctrl_pkt(struct tle_tcp_stream *s, struct rte_mbuf *m, uint32_t seq,
+ uint32_t flags)
+{
+ const struct tle_dest *dst;
+ uint32_t pid, type;
+ int32_t rc;
+
+ dst = &s->tx.dst;
+ type = s->s.type;
+ pid = rte_atomic32_add_return(&dst->dev->tx.packet_id[type], 1) - 1;
+
+ rc = tcp_fill_mbuf(m, s, dst, 0, s->s.port, seq, flags, pid, 1);
+ if (rc == 0)
+ rc = send_pkt(s, dst->dev, m);
+
+ return rc;
+}
+
+static inline int
+send_rst(struct tle_tcp_stream *s, uint32_t seq)
+{
+ struct rte_mbuf *m;
+ int32_t rc;
+
+ m = rte_pktmbuf_alloc(s->tx.dst.head_mp);
+ if (m == NULL)
+ return -ENOMEM;
+
+ rc = send_ctrl_pkt(s, m, seq, TCP_FLAG_RST);
+ if (rc != 0)
+ rte_pktmbuf_free(m);
+
+ return rc;
+}
+
+static inline int
+send_ack(struct tle_tcp_stream *s, uint32_t tms, uint32_t flags)
+{
+ struct rte_mbuf *m;
+ uint32_t seq;
+ int32_t rc;
+
+ m = rte_pktmbuf_alloc(s->tx.dst.head_mp);
+ if (m == NULL)
+ return -ENOMEM;
+
+ seq = s->tcb.snd.nxt - ((flags & (TCP_FLAG_FIN | TCP_FLAG_SYN)) != 0);
+ s->tcb.snd.ts = tms;
+
+ rc = send_ctrl_pkt(s, m, seq, flags);
+ if (rc != 0) {
+ rte_pktmbuf_free(m);
+ return rc;
+ }
+
+ s->tcb.snd.ack = s->tcb.rcv.nxt;
+ return 0;
+}
+
+
+static int
+sync_ack(struct tle_tcp_stream *s, const union pkt_info *pi,
+ const union seg_info *si, uint32_t ts, struct rte_mbuf *m)
+{
+ uint16_t len;
+ int32_t rc;
+ uint32_t pid, seq, type;
+ struct tle_dev *dev;
+ const void *da;
+ struct tle_dest dst;
+ const struct tcp_hdr *th;
+
+ type = s->s.type;
+
+ /* get destination information. */
+ if (type == TLE_V4)
+ da = &pi->addr4.src;
+ else
+ da = &pi->addr6->src;
+
+ rc = stream_get_dest(&s->s, da, &dst);
+ if (rc < 0)
+ return rc;
+
+ th = rte_pktmbuf_mtod_offset(m, const struct tcp_hdr *,
+ m->l2_len + m->l3_len);
+ get_syn_opts(&s->tcb.so, (uintptr_t)(th + 1), m->l4_len - sizeof(*th));
+
+ s->tcb.rcv.nxt = si->seq + 1;
+ seq = sync_gen_seq(pi, s->tcb.rcv.nxt, ts, s->tcb.so.mss);
+ s->tcb.so.ts.ecr = s->tcb.so.ts.val;
+ s->tcb.so.ts.val = sync_gen_ts(ts, s->tcb.so.wscale);
+ s->tcb.so.wscale = (s->tcb.so.wscale == TCP_WSCALE_NONE) ?
+ TCP_WSCALE_NONE : TCP_WSCALE_DEFAULT;
+ s->tcb.so.mss = calc_smss(dst.mtu, &dst);
+
+ /* reset mbuf's data contents. */
+ len = m->l2_len + m->l3_len + m->l4_len;
+ m->tx_offload = 0;
+ if (rte_pktmbuf_adj(m, len) == NULL)
+ return -EINVAL;
+
+ dev = dst.dev;
+ pid = rte_atomic32_add_return(&dev->tx.packet_id[type], 1) - 1;
+
+ rc = tcp_fill_mbuf(m, s, &dst, 0, pi->port, seq,
+ TCP_FLAG_SYN | TCP_FLAG_ACK, pid, 1);
+ if (rc == 0)
+ rc = send_pkt(s, dev, m);
+
+ return rc;
+}
+
+/*
+ * RFC 793:
+ * There are four cases for the acceptability test for an incoming segment:
+ * Segment Receive Test
+ * Length Window
+ * ------- ------- -------------------------------------------
+ * 0 0 SEG.SEQ = RCV.NXT
+ * 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+ * >0 0 not acceptable
+ * >0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+ * or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND
+ */
+static inline int
+check_seqn(const struct tcb *tcb, uint32_t seqn, uint32_t len)
+{
+ uint32_t n;
+
+ n = seqn + len;
+ if (seqn - tcb->rcv.nxt >= tcb->rcv.wnd &&
+ n - tcb->rcv.nxt > tcb->rcv.wnd)
+ return -ERANGE;
+
+ return 0;
+}
+
+static inline union tsopt
+rx_tms_opt(const struct tcb *tcb, const struct rte_mbuf *mb)
+{
+ union tsopt ts;
+ uintptr_t opt;
+ const struct tcp_hdr *th;
+
+ if (tcb->so.ts.val != 0) {
+ opt = rte_pktmbuf_mtod_offset(mb, uintptr_t,
+ mb->l2_len + mb->l3_len + sizeof(*th));
+ ts = get_tms_opts(opt, mb->l4_len - sizeof(*th));
+ } else
+ ts.raw = 0;
+
+ return ts;
+}
+
+/*
+ * PAWS and sequence check.
+ * RFC 1323 4.2.1
+ */
+static inline int
+rx_check_seq(struct tcb *tcb, uint32_t seq, uint32_t len, const union tsopt ts)
+{
+ int32_t rc;
+
+ /* RFC 1323 4.2.1 R2 */
+ rc = check_seqn(tcb, seq, len);
+ if (rc < 0)
+ return rc;
+
+ if (ts.raw != 0) {
+
+ /* RFC 1323 4.2.1 R1 */
+ if (tcp_seq_lt(ts.val, tcb->rcv.ts))
+ return -ERANGE;
+
+ /* RFC 1323 4.2.1 R3 */
+ if (tcp_seq_leq(seq, tcb->snd.ack) &&
+ tcp_seq_lt(tcb->snd.ack, seq + len))
+ tcb->rcv.ts = ts.val;
+ }
+
+ return rc;
+}
+
+static inline int
+rx_check_ack(const struct tcb *tcb, uint32_t ack)
+{
+ uint32_t max;
+
+ max = (uint32_t)RTE_MAX(tcb->snd.nxt, tcb->snd.rcvr);
+
+ if (tcp_seq_leq(tcb->snd.una, ack) && tcp_seq_leq(ack, max))
+ return 0;
+
+ return -ERANGE;
+}
+
+static inline int
+rx_check_seqack(struct tcb *tcb, uint32_t seq, uint32_t ack, uint32_t len,
+ const union tsopt ts)
+{
+ int32_t rc;
+
+ rc = rx_check_seq(tcb, seq, len, ts);
+ rc |= rx_check_ack(tcb, ack);
+ return rc;
+}
+
+static inline int
+restore_syn_pkt(const union pkt_info *pi, const union seg_info *si,
+ uint32_t ts, struct rte_mbuf *mb)
+{
+ int32_t rc;
+ uint32_t len;
+ struct tcp_hdr *th;
+ struct syn_opts so;
+
+ /* check that ACK, etc fields are what we expected. */
+ rc = sync_check_ack(pi, si->seq, si->ack - 1, ts);
+ if (rc < 0)
+ return rc;
+
+ so.mss = rc;
+
+ th = rte_pktmbuf_mtod_offset(mb, struct tcp_hdr *,
+ mb->l2_len + mb->l3_len);
+ len = mb->l4_len - sizeof(*th);
+ sync_get_opts(&so, (uintptr_t)(th + 1), len);
+
+ /* reconstruct SYN options, extend header size if necessary */
+ if (len < TCP_TX_OPT_LEN_MAX) {
+ len = TCP_TX_OPT_LEN_MAX - len;
+ th->data_off = TCP_TX_OPT_LEN_MAX / TCP_DATA_ALIGN <<
+ TCP_DATA_OFFSET;
+ mb->pkt_len += len;
+ mb->data_len += len;
+ mb->l4_len += len;
+ }
+
+ fill_syn_opts(th + 1, &so);
+ return 0;
+}
+
+static inline int
+rx_ack_listen(struct tle_tcp_stream *s, struct stbl *st,
+ const union pkt_info *pi, const union seg_info *si,
+ uint32_t ts, struct rte_mbuf *mb)
+{
+ int32_t rc;
+ struct stbl_entry *se;
+
+ if (pi->tf.flags != TCP_FLAG_ACK || rx_check_stream(s, pi) != 0)
+ return -EINVAL;
+
+ /* ACK for new connection request. */
+
+ rc = restore_syn_pkt(pi, si, ts, mb);
+ if (rc < 0)
+ return rc;
+
+ se = stbl_add_pkt(st, pi, mb);
+ if (se == NULL)
+ return -ENOBUFS;
+
+ /* put new connection requests into stream listen queue */
+ if (rte_ring_enqueue_burst(s->rx.q,
+ (void * const *)&se, 1) != 1) {
+ stbl_del_pkt(st, se, pi);
+ return -ENOBUFS;
+ }
+
+ return 0;
+}
+
+static inline void
+stream_term(struct tle_tcp_stream *s)
+{
+ struct sdr *dr;
+
+ s->tcb.state = TCP_ST_CLOSED;
+ rte_smp_wmb();
+
+ timer_stop(s);
+
+ /* close() was already invoked, schedule final cleanup */
+ if ((s->tcb.uop & TCP_OP_CLOSE) != 0) {
+
+ dr = CTX_TCP_SDR(s->s.ctx);
+ STAILQ_INSERT_TAIL(&dr->be, &s->s, link);
+
+ /* notify user that stream need to be closed */
+ } else if (s->err.ev != NULL)
+ tle_event_raise(s->err.ev);
+ else if (s->err.cb.func != NULL)
+ s->err.cb.func(s->err.cb.data, &s->s);
+}
+
+static inline int
+data_pkt_adjust(const struct tcb *tcb, struct rte_mbuf *mb, uint32_t hlen,
+ uint32_t *seqn, uint32_t *plen)
+{
+ uint32_t len, n, seq;
+
+ seq = *seqn;
+ len = *plen;
+
+ rte_pktmbuf_adj(mb, hlen);
+ if (len == 0)
+ return -ENODATA;
+ /* cut off the start of the packet */
+ else if (tcp_seq_lt(seq, tcb->rcv.nxt)) {
+ n = tcb->rcv.nxt - seq;
+ if (n >= len)
+ return -ENODATA;
+
+ rte_pktmbuf_adj(mb, n);
+ *seqn = seq + n;
+ *plen = len - n;
+ }
+
+ return 0;
+}
+
+static inline uint32_t
+rx_ackdata(struct tle_tcp_stream *s, uint32_t ack)
+{
+ uint32_t k, n;
+
+ n = ack - (uint32_t)s->tcb.snd.una;
+
+ /* some more data was acked. */
+ if (n != 0) {
+
+ /* advance SND.UNA and free related packets. */
+ k = rte_ring_free_count(s->tx.q);
+ free_una_data(s, n);
+
+ /* mark the stream as available for writing */
+ if (rte_ring_free_count(s->tx.q) != 0) {
+ if (s->tx.ev != NULL)
+ tle_event_raise(s->tx.ev);
+ else if (k == 0 && s->tx.cb.func != NULL)
+ s->tx.cb.func(s->tx.cb.data, &s->s);
+ }
+ }
+
+ return n;
+}
+
+static void
+rx_fin_state(struct tle_tcp_stream *s, struct resp_info *rsp)
+{
+ uint32_t state;
+ int32_t ackfin;
+
+ s->tcb.rcv.nxt += 1;
+
+ ackfin = (s->tcb.snd.una == s->tcb.snd.fss);
+ state = s->tcb.state;
+
+ if (state == TCP_ST_ESTABLISHED) {
+ s->tcb.state = TCP_ST_CLOSE_WAIT;
+ /* raise err.ev & err.cb */
+ if (s->err.ev != NULL)
+ tle_event_raise(s->err.ev);
+ else if (s->err.cb.func != NULL)
+ s->err.cb.func(s->err.cb.data, &s->s);
+ } else if (state == TCP_ST_FIN_WAIT_1 || state == TCP_ST_CLOSING) {
+ rsp->flags |= TCP_FLAG_ACK;
+ if (ackfin != 0) {
+ s->tcb.state = TCP_ST_TIME_WAIT;
+ s->tcb.snd.rto = TCP_RTO_2MSL;
+ timer_reset(s);
+ } else
+ s->tcb.state = TCP_ST_CLOSING;
+ } else if (state == TCP_ST_FIN_WAIT_2) {
+ rsp->flags |= TCP_FLAG_ACK;
+ s->tcb.state = TCP_ST_TIME_WAIT;
+ s->tcb.snd.rto = TCP_RTO_2MSL;
+ timer_reset(s);
+ } else if (state == TCP_ST_LAST_ACK && ackfin != 0) {
+ stream_term(s);
+ }
+}
+
+/*
+ * FIN process for ESTABLISHED state
+ * returns:
+ * 0 < - error occurred
+ * 0 - FIN was processed OK, and mbuf can be free/reused.
+ * 0 > - FIN was processed OK and mbuf can't be free/reused.
+ */
+static inline int
+rx_fin(struct tle_tcp_stream *s, uint32_t state,
+ const union seg_info *si, struct rte_mbuf *mb,
+ struct resp_info *rsp)
+{
+ uint32_t hlen, plen, seq;
+ int32_t ret;
+ union tsopt ts;
+
+ hlen = PKT_L234_HLEN(mb);
+ plen = mb->pkt_len - hlen;
+ seq = si->seq;
+
+ ts = rx_tms_opt(&s->tcb, mb);
+ ret = rx_check_seqack(&s->tcb, seq, si->ack, plen, ts);
+ if (ret != 0)
+ return ret;
+
+ if (state < TCP_ST_ESTABLISHED)
+ return -EINVAL;
+
+ if (plen != 0) {
+
+ ret = data_pkt_adjust(&s->tcb, mb, hlen, &seq, &plen);
+ if (ret != 0)
+ return ret;
+ if (rx_data_enqueue(s, seq, plen, &mb, 1) != 1)
+ return -ENOBUFS;
+ }
+
+ /* process ack here */
+ rx_ackdata(s, si->ack);
+
+ /* some fragments still missing */
+ if (seq + plen != s->tcb.rcv.nxt) {
+ s->tcb.rcv.frs.seq = seq + plen;
+ s->tcb.rcv.frs.on = 1;
+ } else
+ rx_fin_state(s, rsp);
+
+ return plen;
+}
+
+static inline int
+rx_rst(struct tle_tcp_stream *s, uint32_t state, uint32_t flags,
+ const union seg_info *si)
+{
+ int32_t rc;
+
+ /*
+ * RFC 793: In all states except SYN-SENT, all reset (RST) segments
+ * are validated by checking their SEQ-fields.
+ * A reset is valid if its sequence number is in the window.
+ * In the SYN-SENT state (a RST received in response to an initial SYN),
+ * the RST is acceptable if the ACK field acknowledges the SYN.
+ */
+ if (state == TCP_ST_SYN_SENT) {
+ rc = ((flags & TCP_FLAG_ACK) == 0 ||
+ si->ack != s->tcb.snd.nxt) ?
+ -ERANGE : 0;
+ }
+
+ else
+ rc = check_seqn(&s->tcb, si->seq, 0);
+
+ if (rc == 0)
+ stream_term(s);
+
+ return rc;
+}
+
+/*
+ * check do we have FIN that was received out-of-order.
+ * if yes, try to process it now.
+ */
+static inline void
+rx_ofo_fin(struct tle_tcp_stream *s, struct resp_info *rsp)
+{
+ if (s->tcb.rcv.frs.on != 0 && s->tcb.rcv.nxt == s->tcb.rcv.frs.seq)
+ rx_fin_state(s, rsp);
+}
+
+static inline void
+dack_info_init(struct dack_info *tack, const struct tcb *tcb)
+{
+ memset(tack, 0, sizeof(*tack));
+ tack->ack = tcb->snd.una;
+ tack->segs.dup = tcb->rcv.dupack;
+ tack->wu.raw = tcb->snd.wu.raw;
+ tack->wnd = tcb->snd.wnd >> tcb->snd.wscale;
+}
+
+static inline void
+ack_window_update(struct tcb *tcb, const struct dack_info *tack)
+{
+ tcb->snd.wu.raw = tack->wu.raw;
+ tcb->snd.wnd = tack->wnd << tcb->snd.wscale;
+}
+
+static inline void
+ack_cwnd_update(struct tcb *tcb, uint32_t acked, const struct dack_info *tack)
+{
+ uint32_t n;
+
+ n = tack->segs.ack * tcb->snd.mss;
+
+ /* slow start phase, RFC 5681 3.1 (2) */
+ if (tcb->snd.cwnd < tcb->snd.ssthresh)
+ tcb->snd.cwnd += RTE_MIN(acked, n);
+ /* congestion avoidance phase, RFC 5681 3.1 (3) */
+ else
+ tcb->snd.cwnd += RTE_MAX(1U, n * tcb->snd.mss / tcb->snd.cwnd);
+}
+
+static inline void
+rto_ssthresh_update(struct tcb *tcb)
+{
+ uint32_t k, n;
+
+ /* RFC 5681 3.1 (4) */
+ n = (tcb->snd.nxt - tcb->snd.una) / 2;
+ k = 2 * tcb->snd.mss;
+ tcb->snd.ssthresh = RTE_MAX(n, k);
+}
+
+static inline void
+rto_cwnd_update(struct tcb *tcb)
+{
+
+ if (tcb->snd.nb_retx == 0)
+ rto_ssthresh_update(tcb);
+
+ /*
+ * RFC 5681 3.1: upon a timeout cwnd MUST be set to
+ * no more than 1 full-sized segment.
+ */
+ tcb->snd.cwnd = tcb->snd.mss;
+}
+
+static inline void
+ack_info_update(struct dack_info *tack, const union seg_info *si,
+ int32_t badseq, uint32_t dlen, const union tsopt ts)
+{
+ if (badseq != 0) {
+ tack->segs.badseq++;
+ return;
+ }
+
+ /* segnt with incoming data */
+ tack->segs.data += (dlen != 0);
+
+ /* segment with newly acked data */
+ if (tcp_seq_lt(tack->ack, si->ack)) {
+ tack->segs.dup = 0;
+ tack->segs.ack++;
+ tack->ack = si->ack;
+ tack->ts = ts;
+
+ /*
+ * RFC 5681: An acknowledgment is considered a "duplicate" when:
+ * (a) the receiver of the ACK has outstanding data
+ * (b) the incoming acknowledgment carries no data
+ * (c) the SYN and FIN bits are both off
+ * (d) the acknowledgment number is equal to the TCP.UNA
+ * (e) the advertised window in the incoming acknowledgment equals the
+ * advertised window in the last incoming acknowledgment.
+ *
+ * Here will have only to check only for (b),(d),(e).
+ * (a) will be checked later for the whole bulk of packets,
+ * (c) should never happen here.
+ */
+ } else if (dlen == 0 && si->wnd == tack->wnd && ++tack->segs.dup == 3) {
+ tack->dup3.seg = tack->segs.ack + 1;
+ tack->dup3.ack = tack->ack;
+ }
+
+ /*
+ * RFC 793:
+ * If SND.UNA < SEG.ACK =< SND.NXT, the send window should be
+ * updated. If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and
+ * SND.WL2 =< SEG.ACK)), set SND.WND <- SEG.WND, set
+ * SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK.
+ */
+ if (tcp_seq_lt(tack->wu.wl1, si->seq) ||
+ (si->seq == tack->wu.wl1 &&
+ tcp_seq_leq(tack->wu.wl2, si->ack))) {
+
+ tack->wu.wl1 = si->seq;
+ tack->wu.wl2 = si->ack;
+ tack->wnd = si->wnd;
+ }
+}
+
+static inline uint32_t
+rx_data_ack(struct tle_tcp_stream *s, struct dack_info *tack,
+ const union seg_info si[], struct rte_mbuf *mb[], struct rte_mbuf *rp[],
+ int32_t rc[], uint32_t num)
+{
+ uint32_t i, j, k, n, t;
+ uint32_t hlen, plen, seq, tlen;
+ int32_t ret;
+ union tsopt ts;
+
+ k = 0;
+ for (i = 0; i != num; i = j) {
+
+ hlen = PKT_L234_HLEN(mb[i]);
+ plen = mb[i]->pkt_len - hlen;
+ seq = si[i].seq;
+
+ ts = rx_tms_opt(&s->tcb, mb[i]);
+ ret = rx_check_seqack(&s->tcb, seq, si[i].ack, plen, ts);
+
+ /* account segment received */
+ ack_info_update(tack, &si[i], ret != 0, plen, ts);
+
+ if (ret == 0) {
+ /* skip duplicate data, if any */
+ ret = data_pkt_adjust(&s->tcb, mb[i], hlen,
+ &seq, &plen);
+ }
+
+ j = i + 1;
+ if (ret != 0) {
+ rp[k] = mb[i];
+ rc[k] = -ret;
+ k++;
+ continue;
+ }
+
+ /* group sequential packets together. */
+ for (tlen = plen; j != num; tlen += plen, j++) {
+
+ hlen = PKT_L234_HLEN(mb[j]);
+ plen = mb[j]->pkt_len - hlen;
+
+ /* not consecutive packet */
+ if (plen == 0 || seq + tlen != si[j].seq)
+ break;
+
+ /* check SEQ/ACK */
+ ts = rx_tms_opt(&s->tcb, mb[j]);
+ ret = rx_check_seqack(&s->tcb, si[j].seq, si[j].ack,
+ plen, ts);
+
+ /* account for segment received */
+ ack_info_update(tack, &si[j], ret != 0, plen, ts);
+
+ if (ret != 0) {
+ rp[k] = mb[j];
+ rc[k] = -ret;
+ k++;
+ break;
+ }
+ rte_pktmbuf_adj(mb[j], hlen);
+ }
+
+ n = j - i;
+ j += (ret != 0);
+
+ /* account for OFO data */
+ if (seq != s->tcb.rcv.nxt)
+ tack->segs.ofo += n;
+
+ /* enqueue packets */
+ t = rx_data_enqueue(s, seq, tlen, mb + i, n);
+
+ /* if we are out of space in stream recv buffer. */
+ for (; t != n; t++) {
+ rp[k] = mb[i + t];
+ rc[k] = -ENOBUFS;
+ k++;
+ }
+ }
+
+ return num - k;
+}
+
+static inline void
+start_fast_retransmit(struct tle_tcp_stream *s)
+{
+ struct tcb *tcb;
+
+ tcb = &s->tcb;
+
+ /* RFC 6582 3.2.2 */
+ tcb->snd.rcvr = tcb->snd.nxt;
+ tcb->snd.fastack = 1;
+
+ /* RFC 5681 3.2.2 */
+ rto_ssthresh_update(tcb);
+
+ /* RFC 5681 3.2.3 */
+ tcp_txq_rst_nxt_head(s);
+ tcb->snd.nxt = tcb->snd.una;
+ tcb->snd.cwnd = tcb->snd.ssthresh + 3 * tcb->snd.mss;
+}
+
+static inline void
+stop_fast_retransmit(struct tle_tcp_stream *s)
+{
+ struct tcb *tcb;
+ uint32_t n;
+
+ tcb = &s->tcb;
+ n = tcb->snd.nxt - tcb->snd.una;
+ tcb->snd.cwnd = RTE_MIN(tcb->snd.ssthresh,
+ RTE_MAX(n, tcb->snd.mss) + tcb->snd.mss);
+ tcb->snd.fastack = 0;
+}
+
+static inline int
+in_fast_retransmit(struct tle_tcp_stream *s, uint32_t ack_len, uint32_t ack_num,
+ uint32_t dup_num)
+{
+ uint32_t n;
+ struct tcb *tcb;
+
+ tcb = &s->tcb;
+
+ /* RFC 5682 3.2.3 partial ACK */
+ if (ack_len != 0) {
+
+ n = ack_num * tcb->snd.mss;
+ if (ack_len >= n)
+ tcb->snd.cwnd -= ack_len - n;
+ else
+ tcb->snd.cwnd -= ack_len % tcb->snd.mss;
+
+ /*
+ * For the first partial ACK that arrives
+ * during fast recovery, also reset the
+ * retransmit timer.
+ */
+ if (tcb->snd.fastack == 1)
+ timer_reset(s);
+
+ tcb->snd.fastack += ack_num;
+ return 1;
+
+ /* RFC 5681 3.2.4 */
+ } else if (dup_num > 3) {
+ s->tcb.snd.cwnd += (dup_num - 3) * tcb->snd.mss;
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline int
+process_ack(struct tle_tcp_stream *s, uint32_t acked,
+ const struct dack_info *tack)
+{
+ int32_t send;
+
+ send = 0;
+
+ /* normal mode */
+ if (s->tcb.snd.fastack == 0) {
+
+ send = 1;
+
+ /* RFC 6582 3.2.2 switch to fast retransmit mode */
+ if (tack->dup3.seg != 0 && s->tcb.snd.una != s->tcb.snd.nxt &&
+ s->tcb.snd.una >= s->tcb.snd.rcvr) {
+
+ start_fast_retransmit(s);
+ in_fast_retransmit(s,
+ tack->ack - tack->dup3.ack,
+ tack->segs.ack - tack->dup3.seg - 1,
+ tack->segs.dup);
+
+ /* remain in normal mode */
+ } else if (acked != 0) {
+ ack_cwnd_update(&s->tcb, acked, tack);
+ timer_stop(s);
+ }
+
+ /* fast retransmit mode */
+ } else {
+
+ /* remain in fast retransmit mode */
+ if (s->tcb.snd.una < s->tcb.snd.rcvr) {
+
+ send = in_fast_retransmit(s, acked, tack->segs.ack,
+ tack->segs.dup);
+ } else {
+ /* RFC 5682 3.2.3 full ACK */
+ stop_fast_retransmit(s);
+ timer_stop(s);
+
+ /* if we have another series of dup ACKs */
+ if (tack->dup3.seg != 0 &&
+ s->tcb.snd.una != s->tcb.snd.nxt &&
+ tcp_seq_leq((uint32_t)s->tcb.snd.rcvr,
+ tack->dup3.ack)) {
+
+ /* restart fast retransmit again. */
+ start_fast_retransmit(s);
+ send = in_fast_retransmit(s,
+ tack->ack - tack->dup3.ack,
+ tack->segs.ack - tack->dup3.seg - 1,
+ tack->segs.dup);
+ }
+ }
+ }
+
+ return send;
+}
+
+/*
+ * our FIN was acked, stop rto timer, change stream state,
+ * and possibly close the stream.
+ */
+static inline void
+rx_ackfin(struct tle_tcp_stream *s)
+{
+ uint32_t state;
+
+ s->tcb.snd.una = s->tcb.snd.fss;
+ empty_mbuf_ring(s->tx.q);
+
+ state = s->tcb.state;
+ if (state == TCP_ST_LAST_ACK)
+ stream_term(s);
+ else if (state == TCP_ST_FIN_WAIT_1) {
+ timer_stop(s);
+ s->tcb.state = TCP_ST_FIN_WAIT_2;
+ } else if (state == TCP_ST_CLOSING) {
+ s->tcb.state = TCP_ST_TIME_WAIT;
+ s->tcb.snd.rto = TCP_RTO_2MSL;
+ timer_reset(s);
+ }
+}
+
+static inline void
+rx_process_ack(struct tle_tcp_stream *s, uint32_t ts,
+ const struct dack_info *tack)
+{
+ int32_t send;
+ uint32_t n;
+
+ s->tcb.rcv.dupack = tack->segs.dup;
+
+ n = rx_ackdata(s, tack->ack);
+ send = process_ack(s, n, tack);
+
+ /* try to send more data. */
+ if ((n != 0 || send != 0) && tcp_txq_nxt_cnt(s) != 0)
+ txs_enqueue(s->s.ctx, s);
+
+ /* restart RTO timer. */
+ if (s->tcb.snd.nxt != s->tcb.snd.una)
+ timer_start(s);
+
+ /* update rto, if fresh packet is here then calculate rtt */
+ if (tack->ts.ecr != 0)
+ rto_estimate(&s->tcb, ts - tack->ts.ecr);
+}
+
+/*
+ * process <SYN,ACK>
+ * returns negative value on failure, or zero on success.
+ */
+static inline int
+rx_synack(struct tle_tcp_stream *s, uint32_t ts, uint32_t state,
+ const union seg_info *si, struct rte_mbuf *mb,
+ struct resp_info *rsp)
+{
+ struct syn_opts so;
+ struct tcp_hdr *th;
+
+ if (state != TCP_ST_SYN_SENT)
+ return -EINVAL;
+
+ /* invalid SEG.SEQ */
+ if (si->ack != (uint32_t)s->tcb.snd.nxt) {
+ rsp->flags = TCP_FLAG_RST;
+ return 0;
+ }
+
+ th = rte_pktmbuf_mtod_offset(mb, struct tcp_hdr *,
+ mb->l2_len + mb->l3_len);
+ get_syn_opts(&so, (uintptr_t)(th + 1), mb->l4_len - sizeof(*th));
+
+ s->tcb.so = so;
+
+ s->tcb.snd.una = s->tcb.snd.nxt;
+ s->tcb.snd.mss = so.mss;
+ s->tcb.snd.wnd = si->wnd << so.wscale;
+ s->tcb.snd.wu.wl1 = si->seq;
+ s->tcb.snd.wu.wl2 = si->ack;
+ s->tcb.snd.wscale = so.wscale;
+
+ /* setup congestion variables */
+ s->tcb.snd.cwnd = initial_cwnd(s->tcb.snd.mss);
+ s->tcb.snd.ssthresh = s->tcb.snd.wnd;
+
+ s->tcb.rcv.ts = so.ts.val;
+ s->tcb.rcv.irs = si->seq;
+ s->tcb.rcv.nxt = si->seq + 1;
+
+ /* calculate initial rto */
+ rto_estimate(&s->tcb, ts - s->tcb.snd.ts);
+
+ rsp->flags |= TCP_FLAG_ACK;
+
+ timer_stop(s);
+ s->tcb.state = TCP_ST_ESTABLISHED;
+ rte_smp_wmb();
+
+ if (s->tx.ev != NULL)
+ tle_event_raise(s->tx.ev);
+ else if (s->tx.cb.func != NULL)
+ s->tx.cb.func(s->tx.cb.data, &s->s);
+
+ return 0;
+}
+
+static inline uint32_t
+rx_stream(struct tle_tcp_stream *s, uint32_t ts,
+ const union pkt_info *pi, const union seg_info si[],
+ struct rte_mbuf *mb[], struct rte_mbuf *rp[], int32_t rc[],
+ uint32_t num)
+{
+ uint32_t i, k, n, state;
+ int32_t ret;
+ struct resp_info rsp;
+ struct dack_info tack;
+
+ k = 0;
+ rsp.flags = 0;
+
+ state = s->tcb.state;
+
+ /*
+ * first check for the states/flags where we don't
+ * expect groups of packets.
+ */
+
+ /* process RST */
+ if ((pi->tf.flags & TCP_FLAG_RST) != 0) {
+ for (i = 0;
+ i != num &&
+ rx_rst(s, state, pi->tf.flags, &si[i]);
+ i++)
+ ;
+ i = 0;
+
+ /* RFC 793: if the ACK bit is off drop the segment and return */
+ } else if ((pi->tf.flags & TCP_FLAG_ACK) == 0) {
+ i = 0;
+ /*
+ * first check for the states/flags where we don't
+ * expect groups of packets.
+ */
+
+ /* process <SYN,ACK> */
+ } else if ((pi->tf.flags & TCP_FLAG_SYN) != 0) {
+ ret = 0;
+ for (i = 0; i != num; i++) {
+ ret = rx_synack(s, ts, state, &si[i], mb[i], &rsp);
+ if (ret == 0)
+ break;
+
+ rc[k] = -ret;
+ rp[k] = mb[i];
+ k++;
+ }
+
+ /* process FIN */
+ } else if ((pi->tf.flags & TCP_FLAG_FIN) != 0) {
+ ret = 0;
+ for (i = 0; i != num; i++) {
+ ret = rx_fin(s, state, &si[i], mb[i], &rsp);
+ if (ret >= 0)
+ break;
+
+ rc[k] = -ret;
+ rp[k] = mb[i];
+ k++;
+ }
+ i += (ret > 0);
+
+ /* normal data/ack packets */
+ } else if (state >= TCP_ST_ESTABLISHED && state <= TCP_ST_LAST_ACK) {
+
+ /* process incoming data packets. */
+ dack_info_init(&tack, &s->tcb);
+ n = rx_data_ack(s, &tack, si, mb, rp, rc, num);
+
+ /* follow up actions based on aggregated information */
+
+ /* update SND.WND */
+ ack_window_update(&s->tcb, &tack);
+
+ /*
+ * fast-path: all data & FIN was already sent out
+ * and now is acknowledged.
+ */
+ if (s->tcb.snd.fss == s->tcb.snd.nxt &&
+ tack.ack == (uint32_t) s->tcb.snd.nxt)
+ rx_ackfin(s);
+ else
+ rx_process_ack(s, ts, &tack);
+
+ /*
+ * send an immediate ACK if either:
+ * - received segment with invalid seq/ack number
+ * - received segment with OFO data
+ * - received segment with INO data and no TX is scheduled
+ * for that stream.
+ */
+ if (tack.segs.badseq != 0 || tack.segs.ofo != 0 ||
+ (tack.segs.data != 0 &&
+ rte_atomic32_read(&s->tx.arm) == 0))
+ rsp.flags |= TCP_FLAG_ACK;
+
+ rx_ofo_fin(s, &rsp);
+
+ k += num - n;
+ i = num;
+
+ /* unhandled state, drop all packets. */
+ } else
+ i = 0;
+
+ /* we have a response packet to send. */
+ if (rsp.flags == TCP_FLAG_RST) {
+ send_rst(s, si[i].ack);
+ stream_term(s);
+ } else if (rsp.flags != 0) {
+ send_ack(s, ts, rsp.flags);
+
+ /* start the timer for FIN packet */
+ if ((rsp.flags & TCP_FLAG_FIN) != 0)
+ timer_reset(s);
+ }
+
+ /* unprocessed packets */
+ for (; i != num; i++, k++) {
+ rc[k] = EINVAL;
+ rp[k] = mb[i];
+ }
+
+ return num - k;
+}
+
+static inline uint32_t
+rx_postsyn(struct tle_dev *dev, struct stbl *st, uint32_t type, uint32_t ts,
+ const union pkt_info pi[], const union seg_info si[],
+ struct rte_mbuf *mb[], struct rte_mbuf *rp[], int32_t rc[],
+ uint32_t num)
+{
+ struct tle_tcp_stream *s;
+ uint32_t i, k, state;
+ int32_t ret;
+
+ s = rx_obtain_stream(dev, st, &pi[0], type);
+ if (s == NULL) {
+ for (i = 0; i != num; i++) {
+ rc[i] = ENOENT;
+ rp[i] = mb[i];
+ }
+ return 0;
+ }
+
+ k = 0;
+ state = s->tcb.state;
+
+ if (state == TCP_ST_LISTEN) {
+
+ /* one connection per flow */
+ ret = EINVAL;
+ for (i = 0; i != num && ret != 0; i++) {
+ ret = rx_ack_listen(s, st, pi, &si[i], ts, mb[i]);
+ if (ret != 0) {
+ rc[k] = -ret;
+ rp[k] = mb[i];
+ k++;
+ }
+ }
+ /* duplicate SYN requests */
+ for (; i != num; i++, k++) {
+ rc[k] = EINVAL;
+ rp[k] = mb[i];
+ }
+
+ if (k != num && s->rx.ev != NULL)
+ tle_event_raise(s->rx.ev);
+ else if (s->rx.cb.func != NULL && rte_ring_count(s->rx.q) == 1)
+ s->rx.cb.func(s->rx.cb.data, &s->s);
+
+ } else {
+ i = rx_stream(s, ts, pi, si, mb, rp, rc, num);
+ k = num - i;
+ }
+
+ rwl_release(&s->rx.use);
+ return num - k;
+}
+
+
+static inline uint32_t
+rx_syn(struct tle_dev *dev, uint32_t type, uint32_t ts,
+ const union pkt_info pi[], const union seg_info si[],
+ struct rte_mbuf *mb[], struct rte_mbuf *rp[], int32_t rc[],
+ uint32_t num)
+{
+ struct tle_tcp_stream *s;
+ uint32_t i, k;
+ int32_t ret;
+
+ s = rx_obtain_listen_stream(dev, &pi[0], type);
+ if (s == NULL) {
+ for (i = 0; i != num; i++) {
+ rc[i] = ENOENT;
+ rp[i] = mb[i];
+ }
+ return 0;
+ }
+
+ k = 0;
+ for (i = 0; i != num; i++) {
+
+ /* check that this remote is allowed to connect */
+ if (rx_check_stream(s, &pi[i]) != 0)
+ ret = -ENOENT;
+ else
+ /* syncokie: reply with <SYN,ACK> */
+ ret = sync_ack(s, &pi[i], &si[i], ts, mb[i]);
+
+ if (ret != 0) {
+ rc[k] = -ret;
+ rp[k] = mb[i];
+ k++;
+ }
+ }
+
+ rwl_release(&s->rx.use);
+ return num - k;
+}
+
+uint16_t
+tle_tcp_rx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
+ struct rte_mbuf *rp[], int32_t rc[], uint16_t num)
+{
+ struct stbl *st;
+ uint32_t i, j, k, n, t, ts;
+ uint64_t csf;
+ union pkt_info pi[num];
+ union seg_info si[num];
+ union {
+ uint8_t t[TLE_VNUM];
+ uint32_t raw;
+ } stu;
+
+ ts = tcp_get_tms();
+ st = CTX_TCP_STLB(dev->ctx);
+
+ stu.raw = 0;
+
+ /* extract packet info and check the L3/L4 csums */
+ for (i = 0; i != num; i++) {
+
+ get_pkt_info(pkt[i], &pi[i], &si[i]);
+
+ t = pi[i].tf.type;
+ csf = dev->rx.ol_flags[t] &
+ (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD);
+
+ /* check csums in SW */
+ if (pi[i].csf == 0 && csf != 0 && check_pkt_csum(pkt[i], csf,
+ pi[i].tf.type, IPPROTO_TCP) != 0)
+ pi[i].csf = csf;
+
+ stu.t[t] = 1;
+ }
+
+ if (stu.t[TLE_V4] != 0)
+ stbl_lock(st, TLE_V4);
+ if (stu.t[TLE_V6] != 0)
+ stbl_lock(st, TLE_V6);
+
+ k = 0;
+ for (i = 0; i != num; i += j) {
+
+ t = pi[i].tf.type;
+
+ /*basic checks for incoming packet */
+ if (t >= TLE_VNUM || pi[i].csf != 0 || dev->dp[t] == NULL) {
+ rc[k] = EINVAL;
+ rp[k] = pkt[i];
+ j = 1;
+ k++;
+ /* process input SYN packets */
+ } else if (pi[i].tf.flags == TCP_FLAG_SYN) {
+ j = pkt_info_bulk_syneq(pi + i, num - i);
+ n = rx_syn(dev, t, ts, pi + i, si + i, pkt + i,
+ rp + k, rc + k, j);
+ k += j - n;
+ } else {
+ j = pkt_info_bulk_eq(pi + i, num - i);
+ n = rx_postsyn(dev, st, t, ts, pi + i, si + i, pkt + i,
+ rp + k, rc + k, j);
+ k += j - n;
+ }
+ }
+
+ if (stu.t[TLE_V4] != 0)
+ stbl_unlock(st, TLE_V4);
+ if (stu.t[TLE_V6] != 0)
+ stbl_unlock(st, TLE_V6);
+
+ return num - k;
+}
+
+uint16_t
+tle_tcp_stream_synreqs(struct tle_stream *ts, struct tle_syn_req rq[],
+ uint32_t num)
+{
+ uint32_t i, n;
+ struct tle_tcp_stream *s;
+ struct stbl_entry *se[num];
+
+ s = TCP_STREAM(ts);
+ n = rte_ring_mc_dequeue_burst(s->rx.q, (void **)se, num);
+ if (n == 0)
+ return 0;
+
+ for (i = 0; i != n; i++) {
+ rq[i].pkt = stbl_get_pkt(se[i]);
+ rq[i].opaque = se[i];
+ }
+
+ /*
+ * if we still have packets to read,
+ * then rearm stream RX event.
+ */
+ if (n == num && rte_ring_count(s->rx.q) != 0) {
+ if (rwl_try_acquire(&s->rx.use) > 0 && s->rx.ev != NULL)
+ tle_event_raise(s->rx.ev);
+ rwl_release(&s->rx.use);
+ }
+
+ return n;
+}
+
+static inline int
+stream_fill_dest(struct tle_tcp_stream *s)
+{
+ int32_t rc;
+ const void *da;
+
+ if (s->s.type == TLE_V4)
+ da = &s->s.ipv4.addr.src;
+ else
+ da = &s->s.ipv6.addr.src;
+
+ rc = stream_get_dest(&s->s, da, &s->tx.dst);
+ return (rc < 0) ? rc : 0;
+}
+
+/*
+ * helper function, prepares an accepted stream.
+ */
+static int
+accept_fill_stream(struct tle_tcp_stream *ps, struct tle_tcp_stream *cs,
+ const struct tle_tcp_accept_param *prm, uint32_t tms,
+ const union pkt_info *pi, const union seg_info *si)
+{
+ int32_t rc;
+ uint32_t rtt;
+
+ /* some TX still pending for that stream. */
+ if (TCP_STREAM_TX_PENDING(cs))
+ return -EAGAIN;
+
+ /* setup L4 ports and L3 addresses fields. */
+ cs->s.port.raw = pi->port.raw;
+ cs->s.pmsk.raw = UINT32_MAX;
+
+ if (pi->tf.type == TLE_V4) {
+ cs->s.ipv4.addr = pi->addr4;
+ cs->s.ipv4.mask.src = INADDR_NONE;
+ cs->s.ipv4.mask.dst = INADDR_NONE;
+ } else if (pi->tf.type == TLE_V6) {
+ cs->s.ipv6.addr = *pi->addr6;
+ rte_memcpy(&cs->s.ipv6.mask.src, &tle_ipv6_none,
+ sizeof(cs->s.ipv6.mask.src));
+ rte_memcpy(&cs->s.ipv6.mask.dst, &tle_ipv6_none,
+ sizeof(cs->s.ipv6.mask.dst));
+ }
+
+ /* setup TCB */
+ sync_fill_tcb(&cs->tcb, si, prm->syn.pkt);
+ cs->tcb.rcv.wnd = cs->rx.q->prod.mask << cs->tcb.rcv.wscale;
+
+ /* setup stream notification menchanism */
+ cs->rx.ev = prm->cfg.recv_ev;
+ cs->rx.cb = prm->cfg.recv_cb;
+ cs->tx.ev = prm->cfg.send_ev;
+ cs->tx.cb = prm->cfg.send_cb;
+ cs->err.ev = prm->cfg.err_ev;
+ cs->err.cb = prm->cfg.err_cb;
+
+ /* store other params */
+ cs->tcb.snd.nb_retm = (prm->cfg.nb_retries != 0) ? prm->cfg.nb_retries :
+ TLE_TCP_DEFAULT_RETRIES;
+
+ /*
+ * estimate the rto
+ * for now rtt is calculated based on the tcp TMS option,
+ * later add real-time one
+ */
+ if (cs->tcb.so.ts.ecr) {
+ rtt = tms - cs->tcb.so.ts.ecr;
+ rto_estimate(&cs->tcb, rtt);
+ } else
+ cs->tcb.snd.rto = TCP_RTO_DEFAULT;
+
+ tcp_stream_up(cs);
+
+ /* copy streams type. */
+ cs->s.type = ps->s.type;
+
+ /* retrive and cache destination information. */
+ rc = stream_fill_dest(cs);
+ if (rc != 0)
+ return rc;
+
+ /* update snd.mss with SMSS value */
+ cs->tcb.snd.mss = calc_smss(cs->tcb.snd.mss, &cs->tx.dst);
+
+ /* setup congestion variables */
+ cs->tcb.snd.cwnd = initial_cwnd(cs->tcb.snd.mss);
+ cs->tcb.snd.ssthresh = cs->tcb.snd.wnd;
+
+ cs->tcb.state = TCP_ST_ESTABLISHED;
+ cs->tcb.uop |= TCP_OP_ACCEPT;
+
+ /* add stream to the table */
+ cs->ste = prm->syn.opaque;
+ rte_smp_wmb();
+ cs->ste->data = cs;
+ return 0;
+}
+
+/*
+ * !!!
+ * Right now new stream rcv.wnd is set to zero.
+ * That simplifies handling of new connection establishment
+ * (as no data segments could be received),
+ * but has to be addressed.
+ * possible ways:
+ * - send ack after accept creates new stream with new rcv.wnd value.
+ * the problem with that approach that single ack is not delivered
+ * reliably (could be lost), plus might slowdown connection establishment
+ * (extra packet per connection, that client has to wait for).
+ * - allocate new stream at ACK recieve stage.
+ * As a drawback - whole new stream allocation/connection establishment
+ * will be done in BE.
+ * !!!
+ */
+int
+tle_tcp_stream_accept(struct tle_stream *ts,
+ const struct tle_tcp_accept_param prm[], struct tle_stream *rs[],
+ uint32_t num)
+{
+ struct tle_tcp_stream *cs, *s;
+ struct tle_ctx *ctx;
+ uint32_t i, j, n, tms;
+ int32_t rc;
+ union pkt_info pi[num];
+ union seg_info si[num];
+
+ tms = tcp_get_tms();
+ s = TCP_STREAM(ts);
+
+ for (i = 0; i != num; i++)
+ get_pkt_info(prm[i].syn.pkt, &pi[i], &si[i]);
+
+ /* mark stream as not closable */
+ if (rwl_acquire(&s->rx.use) < 0)
+ return -EINVAL;
+
+ ctx = s->s.ctx;
+ n = get_streams(ctx, rs, num);
+
+ rc = 0;
+ for (i = 0; i != n; i++) {
+
+ /* prepare new stream */
+ cs = TCP_STREAM(rs[i]);
+ rc = accept_fill_stream(s, cs, prm + i, tms, pi + i, si + i);
+ if (rc != 0)
+ break;
+ }
+
+ rwl_release(&s->rx.use);
+
+ /* free 'SYN' mbufs. */
+ for (j = 0; j != i; j++)
+ rte_pktmbuf_free(prm[j].syn.pkt);
+
+ /* close failed stream, put unused streams back to the free list. */
+ if (rc != 0) {
+ tle_tcp_stream_close(rs[i]);
+ for (j = i + 1; j != n; j++) {
+ cs = TCP_STREAM(rs[j]);
+ put_stream(ctx, rs[j], TCP_STREAM_TX_PENDING(cs));
+ }
+ rte_errno = -rc;
+
+ /* not enough streams are available */
+ } else if (n != num)
+ rte_errno = ENFILE;
+
+ return i;
+}
+
+/*
+ * !!! implement a proper one, or delete !!!
+ * need to make sure no race conditions with add/lookup stream table.
+ */
+void
+tle_tcp_reject(struct tle_stream *s, const struct tle_syn_req rq[],
+ uint32_t num)
+{
+ uint32_t i;
+ struct rte_mbuf *mb;
+ struct stbl *st;
+ union pkt_info pi;
+ union seg_info si;
+
+ st = CTX_TCP_STLB(s->ctx);
+
+ for (i = 0; i != num; i++) {
+ mb = rq[i].pkt;
+ get_pkt_info(mb, &pi, &si);
+ if (pi.tf.type < TLE_VNUM)
+ stbl_del_pkt_lock(st, rq[i].opaque, &pi);
+
+ /* !!! send RST pkt to the peer !!! */
+ rte_pktmbuf_free(mb);
+ }
+}
+
+uint16_t
+tle_tcp_tx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
+{
+ uint32_t i, j, k, n;
+ struct tle_drb *drb[num];
+ struct tle_tcp_stream *s;
+
+ /* extract packets from device TX queue. */
+
+ k = num;
+ n = tle_dring_sc_dequeue(&dev->tx.dr, (const void **)(uintptr_t)pkt,
+ num, drb, &k);
+
+ if (n == 0)
+ return 0;
+
+ /* free empty drbs and notify related streams. */
+
+ for (i = 0; i != k; i = j) {
+ s = drb[i]->udata;
+ for (j = i + 1; j != k && s == drb[j]->udata; j++)
+ ;
+ stream_drb_free(s, drb + i, j - i);
+ }
+
+ return n;
+}
+
+static inline void
+stream_fill_pkt_info(const struct tle_tcp_stream *s, union pkt_info *pi)
+{
+ if (s->s.type == TLE_V4)
+ pi->addr4 = s->s.ipv4.addr;
+ else
+ pi->addr6 = &s->s.ipv6.addr;
+
+ pi->port = s->s.port;
+ pi->tf.type = s->s.type;
+}
+
+static int
+stream_fill_addr(struct tle_tcp_stream *s, const struct sockaddr *addr)
+{
+ const struct sockaddr_in *in4;
+ const struct sockaddr_in6 *in6;
+ const struct tle_dev_param *prm;
+ int32_t rc;
+
+ rc = 0;
+ s->s.pmsk.raw = UINT32_MAX;
+
+ /* setup L4 src ports and src address fields. */
+ if (s->s.type == TLE_V4) {
+ in4 = (const struct sockaddr_in *)addr;
+ if (in4->sin_addr.s_addr == INADDR_ANY || in4->sin_port == 0)
+ return -EINVAL;
+
+ s->s.port.src = in4->sin_port;
+ s->s.ipv4.addr.src = in4->sin_addr.s_addr;
+ s->s.ipv4.mask.src = INADDR_NONE;
+ s->s.ipv4.mask.dst = INADDR_NONE;
+
+ } else if (s->s.type == TLE_V6) {
+ in6 = (const struct sockaddr_in6 *)addr;
+ if (memcmp(&in6->sin6_addr, &tle_ipv6_any,
+ sizeof(tle_ipv6_any)) == 0 ||
+ in6->sin6_port == 0)
+ return -EINVAL;
+
+ s->s.port.src = in6->sin6_port;
+ rte_memcpy(&s->s.ipv6.addr.src, &in6->sin6_addr,
+ sizeof(s->s.ipv6.addr.src));
+ rte_memcpy(&s->s.ipv6.mask.src, &tle_ipv6_none,
+ sizeof(s->s.ipv6.mask.src));
+ rte_memcpy(&s->s.ipv6.mask.dst, &tle_ipv6_none,
+ sizeof(s->s.ipv6.mask.dst));
+ }
+
+ /* setup the destination device. */
+ rc = stream_fill_dest(s);
+ if (rc != 0)
+ return rc;
+
+ /* setup L4 dst address from device param */
+ prm = &s->tx.dst.dev->prm;
+ if (s->s.type == TLE_V4) {
+ if (s->s.ipv4.addr.dst == INADDR_ANY)
+ s->s.ipv4.addr.dst = prm->local_addr4.s_addr;
+ } else if (memcmp(&s->s.ipv6.addr.dst, &tle_ipv6_any,
+ sizeof(tle_ipv6_any)) == 0)
+ memcpy(&s->s.ipv6.addr.dst, &prm->local_addr6,
+ sizeof(s->s.ipv6.addr.dst));
+
+ return rc;
+}
+
+static inline int
+tx_syn(struct tle_tcp_stream *s, const struct sockaddr *addr)
+{
+ int32_t rc;
+ uint32_t tms, seq;
+ union pkt_info pi;
+ struct stbl *st;
+ struct stbl_entry *se;
+
+ /* fill stream address */
+ rc = stream_fill_addr(s, addr);
+ if (rc != 0)
+ return rc;
+
+ /* fill pkt info to generate seq.*/
+ stream_fill_pkt_info(s, &pi);
+
+ tms = tcp_get_tms();
+ s->tcb.so.ts.val = tms;
+ s->tcb.so.ts.ecr = 0;
+ s->tcb.so.wscale = TCP_WSCALE_DEFAULT;
+ s->tcb.so.mss = calc_smss(s->tx.dst.mtu, &s->tx.dst);
+
+ /* note that rcv.nxt is 0 here for sync_gen_seq.*/
+ seq = sync_gen_seq(&pi, s->tcb.rcv.nxt, tms, s->tcb.so.mss);
+ s->tcb.snd.iss = seq;
+ s->tcb.snd.rcvr = seq;
+ s->tcb.snd.una = seq;
+ s->tcb.snd.nxt = seq + 1;
+ s->tcb.snd.rto = TCP_RTO_DEFAULT;
+ s->tcb.snd.ts = tms;
+
+ s->tcb.rcv.mss = s->tcb.so.mss;
+ s->tcb.rcv.wscale = TCP_WSCALE_DEFAULT;
+ s->tcb.rcv.wnd = s->rx.q->prod.mask << s->tcb.rcv.wscale;
+ s->tcb.rcv.ts = 0;
+
+ /* add the stream in stream table */
+ st = CTX_TCP_STLB(s->s.ctx);
+ se = stbl_add_stream_lock(st, s);
+ if (se == NULL)
+ return -ENOBUFS;
+ s->ste = se;
+
+ /* put stream into the to-send queue */
+ txs_enqueue(s->s.ctx, s);
+
+ return 0;
+}
+
+int
+tle_tcp_stream_connect(struct tle_stream *ts, const struct sockaddr *addr)
+{
+ struct tle_tcp_stream *s;
+ uint32_t type;
+ int32_t rc;
+
+ if (ts == NULL || addr == NULL)
+ return -EINVAL;
+
+ s = TCP_STREAM(ts);
+ type = s->s.type;
+ if (type >= TLE_VNUM)
+ return -EINVAL;
+
+ if (rwl_try_acquire(&s->tx.use) > 0) {
+ rc = rte_atomic16_cmpset(&s->tcb.state, TCP_ST_CLOSED,
+ TCP_ST_SYN_SENT);
+ rc = (rc == 0) ? -EDEADLK : 0;
+ } else
+ rc = -EINVAL;
+
+ if (rc != 0) {
+ rwl_release(&s->tx.use);
+ return rc;
+ }
+
+ /* fill stream, prepare and transmit syn pkt */
+ s->tcb.uop |= TCP_OP_CONNECT;
+ rc = tx_syn(s, addr);
+ rwl_release(&s->tx.use);
+
+ /* error happened, do a cleanup */
+ if (rc != 0)
+ tle_tcp_stream_close(ts);
+
+ return rc;
+}
+
+uint16_t
+tle_tcp_stream_recv(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
+{
+ uint32_t n;
+ struct tle_tcp_stream *s;
+
+ s = TCP_STREAM(ts);
+ n = rte_ring_mc_dequeue_burst(s->rx.q, (void **)pkt, num);
+ if (n == 0)
+ return 0;
+
+ /*
+ * if we still have packets to read,
+ * then rearm stream RX event.
+ */
+ if (n == num && rte_ring_count(s->rx.q) != 0) {
+ if (rwl_try_acquire(&s->rx.use) > 0 && s->rx.ev != NULL)
+ tle_event_raise(s->rx.ev);
+ rwl_release(&s->rx.use);
+ }
+
+ return n;
+}
+
+uint16_t
+tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
+{
+ uint32_t i, j, mss, n, state, type;
+ uint64_t ol_flags;
+ struct tle_tcp_stream *s;
+ struct tle_dev *dev;
+
+ s = TCP_STREAM(ts);
+
+ /* mark stream as not closable. */
+ if (rwl_acquire(&s->tx.use) < 0) {
+ rte_errno = EAGAIN;
+ return 0;
+ }
+
+ state = s->tcb.state;
+ if (state != TCP_ST_ESTABLISHED && state != TCP_ST_CLOSE_WAIT) {
+ rte_errno = ENOTCONN;
+ n = 0;
+ } else {
+ mss = s->tcb.snd.mss;
+ dev = s->tx.dst.dev;
+ type = s->s.type;
+ ol_flags = dev->tx.ol_flags[type];
+
+ /* prepare and check for TX */
+ for (i = 0; i != num; i++) {
+
+ /* !!! need to be modified !!! */
+ if (pkt[i]->pkt_len > mss ||
+ pkt[i]->nb_segs > TCP_MAX_PKT_SEG) {
+ rte_errno = EBADMSG;
+ break;
+ } else if (tcp_fill_mbuf(pkt[i], s, &s->tx.dst,
+ ol_flags, s->s.port, 0, TCP_FLAG_ACK,
+ 0, 0) != 0)
+ break;
+ }
+
+ /* queue packets for further transmision. */
+ n = rte_ring_mp_enqueue_burst(s->tx.q, (void **)pkt, i);
+
+ /* notify BE about more data to send */
+ if (n != 0)
+ txs_enqueue(s->s.ctx, s);
+
+ /*
+ * for unsent, but already modified packets:
+ * remove pkt l2/l3 headers, restore ol_flags
+ */
+ if (n != i) {
+ ol_flags = ~dev->tx.ol_flags[type];
+ for (j = n; j != i; j++) {
+ rte_pktmbuf_adj(pkt[j], pkt[j]->l2_len +
+ pkt[j]->l3_len + pkt[j]->l4_len);
+ pkt[j]->ol_flags &= ol_flags;
+ }
+ /* if possible, rearm stream write event. */
+ } else if (rte_ring_free_count(s->tx.q) != 0 &&
+ s->tx.ev != NULL)
+ tle_event_raise(s->tx.ev);
+ }
+
+ rwl_release(&s->tx.use);
+ return n;
+}
+
+/* send data and FIN (if needed) */
+static inline void
+tx_data_fin(struct tle_tcp_stream *s, uint32_t tms, uint32_t state)
+{
+ /* try to send some data */
+ tx_nxt_data(s, tms);
+
+ /* we also have to send a FIN */
+ if (state != TCP_ST_ESTABLISHED &&
+ state != TCP_ST_CLOSE_WAIT &&
+ tcp_txq_nxt_cnt(s) == 0 &&
+ s->tcb.snd.fss != s->tcb.snd.nxt) {
+ s->tcb.snd.fss = ++s->tcb.snd.nxt;
+ send_ack(s, tms, TCP_FLAG_FIN | TCP_FLAG_ACK);
+ }
+}
+
+static inline void
+tx_stream(struct tle_tcp_stream *s, uint32_t tms)
+{
+ uint32_t state;
+
+ state = s->tcb.state;
+
+ if (state == TCP_ST_SYN_SENT) {
+ /* send the SYN, start the rto timer */
+ send_ack(s, tms, TCP_FLAG_SYN);
+ timer_start(s);
+
+ } else if (state >= TCP_ST_ESTABLISHED && state <= TCP_ST_LAST_ACK) {
+
+ tx_data_fin(s, tms, state);
+
+ /* start RTO timer. */
+ if (s->tcb.snd.nxt != s->tcb.snd.una)
+ timer_start(s);
+ }
+}
+
+static inline void
+rto_stream(struct tle_tcp_stream *s, uint32_t tms)
+{
+ uint32_t state;
+
+ state = s->tcb.state;
+
+ TCP_LOG(DEBUG, "%s(%p, tms=%u): state=%u, "
+ "retx=%u, retm=%u, "
+ "rto=%u, snd.ts=%u, tmo=%u, "
+ "snd.nxt=%lu, snd.una=%lu, flight_size=%lu, "
+ "snd.rcvr=%lu, snd.fastack=%u, "
+ "wnd=%u, cwnd=%u, ssthresh=%u, "
+ "bytes sent=%lu, pkt remain=%u;\n",
+ __func__, s, tms, s->tcb.state,
+ s->tcb.snd.nb_retx, s->tcb.snd.nb_retm,
+ s->tcb.snd.rto, s->tcb.snd.ts, tms - s->tcb.snd.ts,
+ s->tcb.snd.nxt, s->tcb.snd.una, s->tcb.snd.nxt - s->tcb.snd.una,
+ s->tcb.snd.rcvr, s->tcb.snd.fastack,
+ s->tcb.snd.wnd, s->tcb.snd.cwnd, s->tcb.snd.ssthresh,
+ s->tcb.snd.nxt - s->tcb.snd.iss, tcp_txq_nxt_cnt(s));
+
+ if (s->tcb.snd.nb_retx < s->tcb.snd.nb_retm) {
+
+ if (state >= TCP_ST_ESTABLISHED && state <= TCP_ST_LAST_ACK) {
+
+ /* update SND.CWD and SND.SSTHRESH */
+ rto_cwnd_update(&s->tcb);
+
+ /* RFC 6582 3.2.4 */
+ s->tcb.snd.rcvr = s->tcb.snd.nxt;
+ s->tcb.snd.fastack = 0;
+
+ /* restart from last acked data */
+ tcp_txq_rst_nxt_head(s);
+ s->tcb.snd.nxt = s->tcb.snd.una;
+
+ tx_data_fin(s, tms, state);
+
+ } else if (state == TCP_ST_SYN_SENT) {
+ /* resending SYN */
+ s->tcb.so.ts.val = tms;
+ send_ack(s, tms, TCP_FLAG_SYN);
+
+ } else if (state == TCP_ST_TIME_WAIT) {
+ stream_term(s);
+ }
+
+ /* RFC6298:5.5 back off the timer */
+ s->tcb.snd.rto = rto_roundup(2 * s->tcb.snd.rto);
+ s->tcb.snd.nb_retx++;
+ timer_restart(s);
+
+ } else {
+ send_rst(s, s->tcb.snd.una);
+ stream_term(s);
+ }
+}
+
+int
+tle_tcp_process(struct tle_ctx *ctx, uint32_t num)
+{
+ uint32_t i, k, tms;
+ struct sdr *dr;
+ struct tle_timer_wheel *tw;
+ struct tle_stream *p;
+ struct tle_tcp_stream *s, *rs[num];
+
+ /* process streams with RTO exipred */
+
+ tw = CTX_TCP_TMWHL(ctx);
+ tms = tcp_get_tms();
+ tle_timer_expire(tw, tms);
+
+ k = tle_timer_get_expired_bulk(tw, (void **)rs, RTE_DIM(rs));
+
+ for (i = 0; i != k; i++) {
+
+ s = rs[i];
+ s->timer.handle = NULL;
+ if (rwl_try_acquire(&s->tx.use) > 0)
+ rto_stream(s, tms);
+ rwl_release(&s->tx.use);
+ }
+
+ /* process streams from to-send queue */
+
+ k = txs_dequeue_bulk(ctx, rs, RTE_DIM(rs));
+
+ for (i = 0; i != k; i++) {
+
+ s = rs[i];
+ if (rwl_try_acquire(&s->tx.use) > 0 &&
+ rte_atomic32_read(&s->tx.arm) > 0) {
+ rte_atomic32_set(&s->tx.arm, 0);
+ tx_stream(s, tms);
+ }
+ rwl_release(&s->tx.use);
+ }
+
+ /* collect streams to close from the death row */
+
+ dr = CTX_TCP_SDR(ctx);
+ for (k = 0, p = STAILQ_FIRST(&dr->be);
+ k != num && p != NULL;
+ k++, p = STAILQ_NEXT(p, link))
+ rs[k] = TCP_STREAM(p);
+
+ if (p == NULL)
+ STAILQ_INIT(&dr->be);
+ else
+ STAILQ_FIRST(&dr->be) = p;
+
+ /* cleanup closed streams */
+ for (i = 0; i != k; i++) {
+ s = rs[i];
+ tcp_stream_down(s);
+ tcp_stream_reset(ctx, s);
+ }
+
+ return 0;
+}
diff --git a/lib/libtle_l4p/tcp_stream.c b/lib/libtle_l4p/tcp_stream.c
new file mode 100644
index 0000000..67ed66b
--- /dev/null
+++ b/lib/libtle_l4p/tcp_stream.c
@@ -0,0 +1,522 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
+#include "tcp_stream.h"
+#include "tcp_timer.h"
+#include "stream_table.h"
+#include "misc.h"
+#include "tcp_ctl.h"
+#include "tcp_ofo.h"
+#include "tcp_txq.h"
+
+
+static void
+unuse_stream(struct tle_tcp_stream *s)
+{
+ s->s.type = TLE_VNUM;
+ rte_atomic32_set(&s->rx.use, INT32_MIN);
+ rte_atomic32_set(&s->tx.use, INT32_MIN);
+}
+
+static void
+fini_stream(struct tle_tcp_stream *s)
+{
+ if (s != NULL) {
+ rte_free(s->rx.q);
+ tcp_ofo_free(s->rx.ofo);
+ rte_free(s->tx.q);
+ rte_free(s->tx.drb.r);
+ }
+}
+
+static void
+tcp_fini_streams(struct tle_ctx *ctx)
+{
+ uint32_t i;
+ struct tcp_streams *ts;
+
+ ts = CTX_TCP_STREAMS(ctx);
+ if (ts != NULL) {
+ stbl_fini(&ts->st);
+ for (i = 0; i != ctx->prm.max_streams; i++)
+ fini_stream(&ts->s[i]);
+
+ /* free the timer wheel */
+ tle_timer_free(ts->tmr);
+ rte_free(ts->tsq);
+
+ STAILQ_INIT(&ts->dr.fe);
+ STAILQ_INIT(&ts->dr.be);
+ }
+
+ rte_free(ts);
+ ctx->streams.buf = NULL;
+ STAILQ_INIT(&ctx->streams.free);
+}
+
+static struct rte_ring *
+alloc_ring(uint32_t n, uint32_t flags, int32_t socket)
+{
+ struct rte_ring *r;
+ size_t sz;
+ char name[RTE_RING_NAMESIZE];
+
+ n = rte_align32pow2(n);
+ sz = sizeof(*r) + n * sizeof(r->ring[0]);
+
+ r = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, socket);
+ if (r == NULL) {
+ TCP_LOG(ERR, "%s: allocation of %zu bytes on socket %d "
+ "failed with error code: %d\n",
+ __func__, sz, socket, rte_errno);
+ return NULL;
+ }
+
+ snprintf(name, sizeof(name), "%p@%zu", r, sz);
+ rte_ring_init(r, name, n, flags);
+ return r;
+}
+
+static int
+init_stream(struct tle_ctx *ctx, struct tle_tcp_stream *s)
+{
+ size_t bsz, rsz, sz;
+ uint32_t i, k, n, nb;
+ struct tle_drb *drb;
+ char name[RTE_RING_NAMESIZE];
+
+ /* init RX part. */
+
+ n = RTE_MAX(ctx->prm.max_stream_rbufs, 1U);
+ s->rx.q = alloc_ring(n, RING_F_SP_ENQ, ctx->prm.socket_id);
+ if (s->rx.q == NULL)
+ return -ENOMEM;
+
+ s->rx.ofo = tcp_ofo_alloc(n, ctx->prm.socket_id);
+ if (s->rx.ofo == NULL)
+ return -ENOMEM;
+
+ /* init TX part. */
+
+ n = RTE_MAX(ctx->prm.max_stream_sbufs, 1U);
+ s->tx.q = alloc_ring(n, RING_F_SC_DEQ, ctx->prm.socket_id);
+ if (s->tx.q == NULL)
+ return -ENOMEM;
+
+ nb = drb_nb_elem(ctx);
+ k = calc_stream_drb_num(ctx, nb);
+ n = rte_align32pow2(k);
+
+ /* size of the drbs ring */
+ rsz = sizeof(*s->tx.drb.r) + n * sizeof(s->tx.drb.r->ring[0]);
+ rsz = RTE_ALIGN_CEIL(rsz, RTE_CACHE_LINE_SIZE);
+
+ /* size of the drb. */
+ bsz = tle_drb_calc_size(nb);
+
+ /* total stream drbs size. */
+ sz = rsz + bsz * k;
+
+ s->tx.drb.r = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ ctx->prm.socket_id);
+ if (s->tx.drb.r == NULL) {
+ TCP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
+ "failed with error code: %d\n",
+ __func__, s, sz, ctx->prm.socket_id, rte_errno);
+ return -ENOMEM;
+ }
+
+ snprintf(name, sizeof(name), "%p@%zu", s, sz);
+ rte_ring_init(s->tx.drb.r, name, n, 0);
+
+ for (i = 0; i != k; i++) {
+ drb = (struct tle_drb *)((uintptr_t)s->tx.drb.r +
+ rsz + bsz * i);
+ drb->udata = s;
+ drb->size = nb;
+ rte_ring_enqueue(s->tx.drb.r, drb);
+ }
+
+ s->tx.drb.nb_elem = nb;
+ s->tx.drb.nb_max = k;
+
+ /* mark stream as avaialble to use. */
+
+ s->s.ctx = ctx;
+ unuse_stream(s);
+ STAILQ_INSERT_TAIL(&ctx->streams.free, &s->s, link);
+
+ return 0;
+}
+
+static void
+tcp_free_drbs(struct tle_stream *s, struct tle_drb *drb[], uint32_t nb_drb)
+{
+ struct tle_tcp_stream *us;
+
+ us = (struct tle_tcp_stream *)s;
+ rte_ring_enqueue_burst(us->tx.drb.r, (void **)drb, nb_drb);
+}
+
+static struct tle_timer_wheel *
+alloc_timers(uint32_t num, int32_t socket)
+{
+ struct tle_timer_wheel_args twprm;
+
+ twprm.tick_size = TCP_RTO_GRANULARITY;
+ twprm.max_timer = num;
+ twprm.socket_id = socket;
+ return tle_timer_create(&twprm, tcp_get_tms());
+}
+
+static int
+tcp_init_streams(struct tle_ctx *ctx)
+{
+ size_t sz;
+ uint32_t i;
+ int32_t rc;
+ struct tcp_streams *ts;
+
+ sz = sizeof(*ts) + sizeof(ts->s[0]) * ctx->prm.max_streams;
+ ts = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ ctx->prm.socket_id);
+ if (ts == NULL) {
+ TCP_LOG(ERR, "allocation of %zu bytes on socket %d "
+ "for %u tcp_streams failed\n",
+ sz, ctx->prm.socket_id, ctx->prm.max_streams);
+ return -ENOMEM;
+ }
+
+ STAILQ_INIT(&ts->dr.fe);
+ STAILQ_INIT(&ts->dr.be);
+
+ ctx->streams.buf = ts;
+ STAILQ_INIT(&ctx->streams.free);
+
+ ts->tmr = alloc_timers(ctx->prm.max_streams, ctx->prm.socket_id);
+ if (ts->tmr == NULL) {
+ TCP_LOG(ERR, "alloc_timers(ctx=%p) failed with error=%d\n",
+ ctx, rte_errno);
+ rc = -ENOMEM;
+ } else {
+ ts->tsq = alloc_ring(ctx->prm.max_streams,
+ RING_F_SC_DEQ, ctx->prm.socket_id);
+ if (ts->tsq == NULL)
+ rc = -ENOMEM;
+ else
+ rc = stbl_init(&ts->st, ctx->prm.max_streams,
+ ctx->prm.socket_id);
+ }
+
+ for (i = 0; rc == 0 && i != ctx->prm.max_streams; i++)
+ rc = init_stream(ctx, &ts->s[i]);
+
+ if (rc != 0) {
+ TCP_LOG(ERR, "initalisation of %u-th stream failed", i);
+ tcp_fini_streams(ctx);
+ }
+
+ return rc;
+}
+
+static void __attribute__((constructor))
+tcp_stream_setup(void)
+{
+ static const struct stream_ops tcp_ops = {
+ .init_streams = tcp_init_streams,
+ .fini_streams = tcp_fini_streams,
+ .free_drbs = tcp_free_drbs,
+ };
+
+ tle_stream_ops[TLE_PROTO_TCP] = tcp_ops;
+}
+
+/*
+ * Helper routine, check that input event and callback are mutually exclusive.
+ */
+static int
+check_cbev(const struct tle_event *ev, const struct tle_stream_cb *cb)
+{
+ if (ev != NULL && cb->func != NULL)
+ return -EINVAL;
+ return 0;
+}
+
+static int
+check_stream_prm(const struct tle_ctx *ctx,
+ const struct tle_tcp_stream_param *prm)
+{
+ if ((prm->addr.local.ss_family != AF_INET &&
+ prm->addr.local.ss_family != AF_INET6) ||
+ prm->addr.local.ss_family != prm->addr.remote.ss_family)
+ return -EINVAL;
+
+ /* callback and event notifications mechanisms are mutually exclusive */
+ if (check_cbev(prm->cfg.recv_ev, &prm->cfg.recv_cb) != 0 ||
+ check_cbev(prm->cfg.recv_ev, &prm->cfg.recv_cb) != 0 ||
+ check_cbev(prm->cfg.err_ev, &prm->cfg.err_cb) != 0)
+ return -EINVAL;
+
+ /* check does context support desired address family. */
+ if ((prm->addr.local.ss_family == AF_INET &&
+ ctx->prm.lookup4 == NULL) ||
+ (prm->addr.local.ss_family == AF_INET6 &&
+ ctx->prm.lookup6 == NULL))
+ return -EINVAL;
+
+ return 0;
+}
+
+struct tle_stream *
+tle_tcp_stream_open(struct tle_ctx *ctx,
+ const struct tle_tcp_stream_param *prm)
+{
+ struct tle_tcp_stream *s;
+ int32_t rc;
+
+ if (ctx == NULL || prm == NULL || check_stream_prm(ctx, prm) != 0) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ s = (struct tle_tcp_stream *)get_stream(ctx);
+ if (s == NULL) {
+ rte_errno = ENFILE;
+ return NULL;
+
+ /* some TX still pending for that stream. */
+ } else if (TCP_STREAM_TX_PENDING(s)) {
+ put_stream(ctx, &s->s, 0);
+ rte_errno = EAGAIN;
+ return NULL;
+ }
+
+ /* setup L4 ports and L3 addresses fields. */
+ rc = stream_fill_ctx(ctx, &s->s,
+ (const struct sockaddr *)&prm->addr.local,
+ (const struct sockaddr *)&prm->addr.remote);
+
+ if (rc != 0) {
+ put_stream(ctx, &s->s, 1);
+ rte_errno = rc;
+ return NULL;
+ }
+
+ /* setup stream notification menchanism */
+ s->rx.ev = prm->cfg.recv_ev;
+ s->rx.cb = prm->cfg.recv_cb;
+ s->tx.ev = prm->cfg.send_ev;
+ s->tx.cb = prm->cfg.send_cb;
+ s->err.ev = prm->cfg.err_ev;
+ s->err.cb = prm->cfg.err_cb;
+
+ /* store other params */
+ s->tcb.snd.nb_retm = (prm->cfg.nb_retries != 0) ? prm->cfg.nb_retries :
+ TLE_TCP_DEFAULT_RETRIES;
+
+ tcp_stream_up(s);
+ return &s->s;
+}
+
+/*
+ * Helper functions, used by close API.
+ */
+static inline int
+stream_close(struct tle_ctx *ctx, struct tle_tcp_stream *s)
+{
+ uint16_t uop;
+ uint32_t state;
+ static const struct tle_stream_cb zcb;
+
+ /* check was close() already invoked */
+ uop = s->tcb.uop;
+ if ((uop & TCP_OP_CLOSE) != 0)
+ return -EDEADLK;
+
+ /* record that close() was already invoked */
+ if (rte_atomic16_cmpset(&s->tcb.uop, uop, uop | TCP_OP_CLOSE) == 0)
+ return -EDEADLK;
+
+ /* mark stream as unavaialbe for RX/TX. */
+ tcp_stream_down(s);
+
+ /* reset events/callbacks */
+ s->rx.ev = NULL;
+ s->tx.ev = NULL;
+ s->err.ev = NULL;
+
+ s->rx.cb = zcb;
+ s->tx.cb = zcb;
+ s->err.cb = zcb;
+
+ state = s->tcb.state;
+
+ /* CLOSED, LISTEN, SYN_SENT - we can close the stream straighway */
+ if (state <= TCP_ST_SYN_SENT) {
+ tcp_stream_reset(ctx, s);
+ return 0;
+ }
+
+ /* generate FIN and proceed with normal connection termination */
+ if (state == TCP_ST_ESTABLISHED || state == TCP_ST_CLOSE_WAIT) {
+
+ /* change state */
+ s->tcb.state = (state == TCP_ST_ESTABLISHED) ?
+ TCP_ST_FIN_WAIT_1 : TCP_ST_LAST_ACK;
+
+ /* mark stream as writable/readable again */
+ tcp_stream_up(s);
+
+ /* queue stream into to-send queue */
+ txs_enqueue(ctx, s);
+ return 0;
+ }
+
+ /*
+ * accroding to the state, close() was already invoked,
+ * should never that point.
+ */
+ RTE_ASSERT(0);
+ return -EINVAL;
+}
+
+uint32_t
+tle_tcp_stream_close_bulk(struct tle_stream *ts[], uint32_t num)
+{
+ int32_t rc;
+ uint32_t i;
+ struct tle_ctx *ctx;
+ struct tle_tcp_stream *s;
+
+ rc = 0;
+
+ for (i = 0; i != num; i++) {
+
+ s = TCP_STREAM(ts[i]);
+ if (ts[i] == NULL || s->s.type >= TLE_VNUM) {
+ rc = EINVAL;
+ break;
+ }
+
+ ctx = s->s.ctx;
+ rc = stream_close(ctx, s);
+ if (rc != 0)
+ break;
+ }
+
+ if (rc != 0)
+ rte_errno = -rc;
+ return i;
+}
+
+int
+tle_tcp_stream_close(struct tle_stream *ts)
+{
+ struct tle_ctx *ctx;
+ struct tle_tcp_stream *s;
+
+ s = TCP_STREAM(ts);
+ if (ts == NULL || s->s.type >= TLE_VNUM)
+ return -EINVAL;
+
+ ctx = s->s.ctx;
+
+ /* reset stream events if any. */
+ if (s->rx.ev != NULL)
+ tle_event_idle(s->rx.ev);
+ if (s->tx.ev != NULL)
+ tle_event_idle(s->tx.ev);
+ if (s->err.ev != NULL)
+ tle_event_idle(s->err.ev);
+
+ return stream_close(ctx, s);
+}
+
+int
+tle_tcp_stream_get_addr(const struct tle_stream *ts,
+ struct tle_tcp_stream_addr *addr)
+{
+ struct sockaddr_in *lin4, *rin4;
+ struct sockaddr_in6 *lin6, *rin6;
+ struct tle_tcp_stream *s;
+
+ s = TCP_STREAM(ts);
+ if (addr == NULL || ts == NULL || s->s.type >= TLE_VNUM)
+ return -EINVAL;
+
+ if (s->s.type == TLE_V4) {
+
+ lin4 = (struct sockaddr_in *)&addr->local;
+ rin4 = (struct sockaddr_in *)&addr->remote;
+
+ addr->local.ss_family = AF_INET;
+ addr->remote.ss_family = AF_INET;
+
+ lin4->sin_port = s->s.port.dst;
+ rin4->sin_port = s->s.port.src;
+ lin4->sin_addr.s_addr = s->s.ipv4.addr.dst;
+ rin4->sin_addr.s_addr = s->s.ipv4.addr.src;
+
+ } else if (s->s.type == TLE_V6) {
+
+ lin6 = (struct sockaddr_in6 *)&addr->local;
+ rin6 = (struct sockaddr_in6 *)&addr->remote;
+
+ addr->local.ss_family = AF_INET6;
+ addr->remote.ss_family = AF_INET6;
+
+ lin6->sin6_port = s->s.port.dst;
+ rin6->sin6_port = s->s.port.src;
+ memcpy(&lin6->sin6_addr, &s->s.ipv6.addr.dst,
+ sizeof(lin6->sin6_addr));
+ memcpy(&rin6->sin6_addr, &s->s.ipv6.addr.src,
+ sizeof(rin6->sin6_addr));
+ }
+
+ return 0;
+}
+
+int
+tle_tcp_stream_listen(struct tle_stream *ts)
+{
+ struct tle_tcp_stream *s;
+ int32_t rc;
+
+ s = TCP_STREAM(ts);
+ if (ts == NULL || s->s.type >= TLE_VNUM)
+ return -EINVAL;
+
+ /* mark stream as not closable. */
+ if (rwl_try_acquire(&s->rx.use) > 0) {
+ rc = rte_atomic16_cmpset(&s->tcb.state, TCP_ST_CLOSED,
+ TCP_ST_LISTEN);
+ if (rc != 0) {
+ s->tcb.uop |= TCP_OP_LISTEN;
+ rc = 0;
+ } else
+ rc = -EDEADLK;
+ } else
+ rc = -EINVAL;
+
+ rwl_release(&s->rx.use);
+ return rc;
+}
diff --git a/lib/libtle_l4p/tcp_stream.h b/lib/libtle_l4p/tcp_stream.h
new file mode 100644
index 0000000..04c2f88
--- /dev/null
+++ b/lib/libtle_l4p/tcp_stream.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_STREAM_H_
+#define _TCP_STREAM_H_
+
+#include <rte_vect.h>
+#include <tle_dring.h>
+#include <tle_tcp.h>
+#include <tle_event.h>
+
+#include "stream.h"
+#include "misc.h"
+#include "tcp_misc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ TCP_ST_CLOSED,
+ TCP_ST_LISTEN,
+ TCP_ST_SYN_SENT,
+ TCP_ST_SYN_RCVD,
+ TCP_ST_ESTABLISHED,
+ TCP_ST_FIN_WAIT_1,
+ TCP_ST_FIN_WAIT_2,
+ TCP_ST_CLOSE_WAIT,
+ TCP_ST_CLOSING,
+ TCP_ST_LAST_ACK,
+ TCP_ST_TIME_WAIT,
+ TCP_ST_NUM
+};
+
+enum {
+ TCP_OP_LISTEN = 0x1,
+ TCP_OP_ACCEPT = 0x2,
+ TCP_OP_CONNECT = 0x4,
+ TCP_OP_CLOSE = 0x8,
+};
+
+struct tcb {
+ volatile uint16_t state;
+ volatile uint16_t uop; /* operations by user performed */
+ struct {
+ uint32_t nxt;
+ uint32_t irs; /* initial received sequence */
+ uint32_t wnd;
+ uint32_t ts;
+ struct {
+ uint32_t seq;
+ uint32_t on;
+ } frs;
+ uint32_t srtt; /* smoothed round trip time (scaled by >> 3) */
+ uint32_t rttvar; /* rtt variance */
+ uint16_t mss;
+ uint8_t wscale;
+ uint8_t dupack;
+ } rcv;
+ struct {
+ uint64_t nxt;
+ uint64_t una;
+ uint64_t rcvr; /* recover RFC 6582 */
+ uint64_t fss; /* FIN sequence # */
+ uint32_t fastack; /* # of partial acks in fast retransmit */
+ uint32_t wnd;
+ union wui wu; /* window update */
+ uint32_t ack; /* last sent ack */
+ uint32_t ts;
+ uint32_t cwnd; /* congestion window */
+ uint32_t ssthresh; /* slow start threshold */
+ uint32_t rto; /* retransmission timeout */
+ uint32_t iss; /* initial send sequence */
+ uint16_t mss;
+ uint8_t wscale;
+ uint8_t nb_retx; /* number of retransmission */
+ uint8_t nb_retm; /**< max number of retx attempts. */
+ } snd;
+ struct syn_opts so; /* initial syn options. */
+};
+
+
+struct tle_tcp_stream {
+
+ struct tle_stream s;
+
+ struct stbl_entry *ste; /* entry in streams table. */
+ struct tcb tcb;
+
+ struct {
+ void *handle;
+ } timer;
+
+ struct {
+ struct tle_event *ev;
+ struct tle_stream_cb cb;
+ } err;
+
+ struct {
+ rte_atomic32_t use;
+ struct rte_ring *q; /* listen (syn) queue */
+ struct ofo *ofo;
+ struct tle_event *ev; /* user provided recv event. */
+ struct tle_stream_cb cb; /* user provided recv callback. */
+ } rx __rte_cache_aligned;
+
+ struct {
+ rte_atomic32_t use;
+ rte_atomic32_t arm; /* when > 0 stream is in to-send queue */
+ struct {
+ uint32_t nb_elem; /* number of objects per drb. */
+ uint32_t nb_max; /* number of drbs per stream. */
+ struct rte_ring *r;
+ } drb;
+ struct rte_ring *q; /* (re)tx queue */
+ struct tle_event *ev;
+ struct tle_stream_cb cb;
+ struct tle_dest dst;
+ } tx __rte_cache_aligned;
+
+} __rte_cache_aligned;
+
+#define TCP_STREAM(p) \
+((struct tle_tcp_stream *)((uintptr_t)(p) - offsetof(struct tle_tcp_stream, s)))
+
+#define TCP_STREAM_TX_PENDING(s) \
+ ((s)->tx.drb.nb_max != rte_ring_count((s)->tx.drb.r))
+
+#define TCP_STREAM_TX_FINISHED(s) \
+ ((s)->tx.drb.nb_max == rte_ring_count((s)->tx.drb.r))
+
+#include "stream_table.h"
+
+struct sdr {
+ rte_spinlock_t lock;
+ STAILQ_HEAD(, tle_stream) fe;
+ STAILQ_HEAD(, tle_stream) be;
+};
+
+struct tcp_streams {
+ struct stbl st;
+ struct tle_timer_wheel *tmr; /* timer wheel */
+ struct rte_ring *tsq; /* to-send streams queue */
+ struct sdr dr; /* death row for zombie streams */
+ struct tle_tcp_stream s[]; /* array of allocated streams. */
+};
+
+#define CTX_TCP_STREAMS(ctx) ((struct tcp_streams *)(ctx)->streams.buf)
+#define CTX_TCP_STLB(ctx) (&CTX_TCP_STREAMS(ctx)->st)
+#define CTX_TCP_TMWHL(ctx) (CTX_TCP_STREAMS(ctx)->tmr)
+#define CTX_TCP_TSQ(ctx) (CTX_TCP_STREAMS(ctx)->tsq)
+#define CTX_TCP_SDR(ctx) (&CTX_TCP_STREAMS(ctx)->dr)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_STREAM_H_ */
diff --git a/lib/libtle_l4p/tcp_timer.h b/lib/libtle_l4p/tcp_timer.h
new file mode 100644
index 0000000..8faefb3
--- /dev/null
+++ b/lib/libtle_l4p/tcp_timer.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_TIMER_H_
+#define _TCP_TIMER_H_
+
+#include <tle_timer.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * internal defines.
+ * all RTO values are in ms.
+ */
+#define TCP_RTO_MAX 60000U /* RFC 6298 (2.5) */
+#define TCP_RTO_MIN 1000U /* RFC 6298 (2.4) */
+#define TCP_RTO_2MSL (2 * TCP_RTO_MAX)
+#define TCP_RTO_DEFAULT TCP_RTO_MIN /* RFC 6298 (2.1)*/
+#define TCP_RTO_GRANULARITY 100U
+
+
+static inline void
+timer_stop(struct tle_tcp_stream *s)
+{
+ struct tle_timer_wheel *tw;
+
+ if (s->timer.handle != NULL) {
+ tw = CTX_TCP_TMWHL(s->s.ctx);
+ tle_timer_stop(tw, s->timer.handle);
+ s->timer.handle = NULL;
+ }
+}
+
+static inline void
+timer_start(struct tle_tcp_stream *s)
+{
+ struct tle_timer_wheel *tw;
+
+ if (s->timer.handle == NULL) {
+ tw = CTX_TCP_TMWHL(s->s.ctx);
+ s->timer.handle = tle_timer_start(tw, s, s->tcb.snd.rto);
+ s->tcb.snd.nb_retx = 0;
+ }
+}
+
+static inline void
+timer_restart(struct tle_tcp_stream *s)
+{
+ struct tle_timer_wheel *tw;
+
+ tw = CTX_TCP_TMWHL(s->s.ctx);
+ s->timer.handle = tle_timer_start(tw, s, s->tcb.snd.rto);
+}
+
+
+/*
+ * reset number of retransmissions and restart RTO timer.
+ */
+static inline void
+timer_reset(struct tle_tcp_stream *s)
+{
+ timer_stop(s);
+ timer_start(s);
+}
+
+static inline uint32_t
+rto_roundup(uint32_t rto)
+{
+ rto = RTE_MAX(rto, TCP_RTO_MIN);
+ rto = RTE_MIN(rto, TCP_RTO_MAX);
+ return rto;
+}
+
+/*
+ * RFC6298: Computing TCP's Retransmission Timer
+ * RTTVAR <- (1 - beta) * RTTVAR + beta * |SRTT - R'|
+ * SRTT <- (1 - alpha) * SRTT + alpha * R'
+ * RTO <- SRTT + max (G, K*RTTVAR)
+ * the following computation is based on Jacobson'88 paper referenced
+ * in the RFC6298
+*/
+static inline void
+rto_estimate(struct tcb *tcb, int32_t rtt)
+{
+ uint32_t rto;
+
+ if (!rtt)
+ rtt = 1;
+ if (tcb->rcv.srtt) {
+ rtt -= (tcb->rcv.srtt >> 3); /* alpha = 1/8 */
+ tcb->rcv.srtt += rtt;
+
+ if (rtt < 0)
+ rtt = -rtt;
+ rtt -= (tcb->rcv.rttvar >> 2); /* beta = 1/4 */
+ tcb->rcv.rttvar += rtt;
+
+ } else {
+ tcb->rcv.srtt = rtt << 3;
+ tcb->rcv.rttvar = rtt << 1;
+ }
+
+ rto = (tcb->rcv.srtt >> 3) +
+ RTE_MAX(TCP_RTO_GRANULARITY, tcb->rcv.rttvar);
+ tcb->snd.rto = rto_roundup(rto);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_TIMER_H_ */
diff --git a/lib/libtle_l4p/tcp_txq.h b/lib/libtle_l4p/tcp_txq.h
new file mode 100644
index 0000000..0b199ba
--- /dev/null
+++ b/lib/libtle_l4p/tcp_txq.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_TXQ_H_
+#define _TCP_TXQ_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline struct rte_mbuf **
+tcp_txq_get_nxt_objs(const struct tle_tcp_stream *s, uint32_t *num)
+{
+ uint32_t cnt, head, mask, sz, tail;
+ struct rte_ring *r;
+
+ r = s->tx.q;
+ sz = r->prod.size;
+ mask = r->prod.mask;
+ head = r->cons.head & mask;
+ tail = r->prod.tail & mask;
+
+ cnt = (tail >= head) ? tail - head : sz - head;
+
+ *num = cnt;
+ return (struct rte_mbuf **)(r->ring + head);
+}
+
+static inline struct rte_mbuf **
+tcp_txq_get_una_objs(const struct tle_tcp_stream *s, uint32_t *num)
+{
+ uint32_t cnt, head, mask, sz, tail;
+ struct rte_ring *r;
+
+ r = s->tx.q;
+ sz = r->prod.size;
+ mask = r->prod.mask;
+ head = r->prod.tail & mask;
+ tail = r->cons.tail & mask;
+
+ cnt = (head >= tail) ? head - tail : sz - tail;
+
+ *num = cnt;
+ return (struct rte_mbuf **)(r->ring + tail);
+}
+
+static inline void
+tcp_txq_set_nxt_head(struct tle_tcp_stream *s, uint32_t num)
+{
+ struct rte_ring *r;
+
+ r = s->tx.q;
+ r->cons.head += num;
+}
+
+static inline void
+tcp_txq_rst_nxt_head(struct tle_tcp_stream *s)
+{
+ struct rte_ring *r;
+
+ r = s->tx.q;
+ r->cons.head = r->cons.tail;
+}
+
+static inline void
+tcp_txq_set_una_tail(struct tle_tcp_stream *s, uint32_t num)
+{
+ struct rte_ring *r;
+
+ r = s->tx.q;
+ rte_smp_rmb();
+ r->cons.tail += num;
+}
+
+static inline uint32_t
+tcp_txq_nxt_cnt(struct tle_tcp_stream *s)
+{
+ struct rte_ring *r;
+
+ r = s->tx.q;
+ return (r->prod.tail - r->cons.head) & r->prod.mask;
+}
+
+static inline void
+txs_enqueue(struct tle_ctx *ctx, struct tle_tcp_stream *s)
+{
+ struct rte_ring *r;
+ uint32_t n;
+
+ if (rte_atomic32_add_return(&s->tx.arm, 1) == 1) {
+ r = CTX_TCP_TSQ(ctx);
+ n = rte_ring_enqueue_burst(r, (void * const *)&s, 1);
+ RTE_VERIFY(n == 1);
+ }
+}
+
+static inline uint32_t
+txs_dequeue_bulk(struct tle_ctx *ctx, struct tle_tcp_stream *s[], uint32_t num)
+{
+ struct rte_ring *r;
+
+ r = CTX_TCP_TSQ(ctx);
+ return rte_ring_dequeue_burst(r, (void **)s, num);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_TXQ_H_ */
diff --git a/lib/libtle_l4p/tle_ctx.h b/lib/libtle_l4p/tle_ctx.h
new file mode 100644
index 0000000..a3516bf
--- /dev/null
+++ b/lib/libtle_l4p/tle_ctx.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_CTX_H_
+#define _TLE_CTX_H_
+
+#include <stdint.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <rte_common.h>
+#include <rte_mbuf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * <tle_ctx> - each such ctx represents an 'independent copy of the stack'.
+ * It owns set of <stream>s and <dev>s entities and provides
+ * (de)multiplexing input/output packets from/into devices into/from streams.
+ * <dev> is an abstraction for the underlying device, that is able
+ * to RX/TX packets and may provide some HW offload capabilities.
+ * It is a user responsibility to add to the <ctx> all <dev>s,
+ * that context has to manage, before starting to do stream operations
+ * (open/send/recv,close) over that context.
+ * Right now adding/deleting <dev>s to the context with open
+ * streams is not supported.
+ * <stream> represents an L4(UDP/TCP, etc.) endpoint <addr, port> and
+ * is an analogy to socket entity.
+ * As with a socket, there are ability to do recv/send over it.
+ * <stream> belongs to particular <ctx> but is visible globally across
+ * the process, i.e. any thread within the process can do recv/send over it
+ * without any further synchronisation.
+ * While 'upper' layer API is thread safe, lower layer API (rx_bulk/tx_bulk)
+ * is not thread safe and is not supposed to be run on multiple threads
+ * in parallel.
+ * So single thread can drive multiple <ctx>s and do IO for them,
+ * but multiple threads can't drive same <ctx> without some
+ * explicit synchronization.
+ */
+
+struct tle_ctx;
+struct tle_dev;
+
+/**
+ * Blocked L4 ports info.
+ */
+struct tle_bl_port {
+ uint32_t nb_port; /**< number of blocked ports. */
+ const uint16_t *port; /**< list of blocked ports. */
+};
+
+
+/**
+ * device parameters.
+ */
+struct tle_dev_param {
+ uint32_t rx_offload; /**< DEV_RX_OFFLOAD_* supported. */
+ uint32_t tx_offload; /**< DEV_TX_OFFLOAD_* supported. */
+ struct in_addr local_addr4; /**< local IPv4 address assigned. */
+ struct in6_addr local_addr6; /**< local IPv6 address assigned. */
+ struct tle_bl_port bl4; /**< blocked ports for IPv4 address. */
+ struct tle_bl_port bl6; /**< blocked ports for IPv4 address. */
+};
+
+#define TLE_DST_MAX_HDR 0x60
+
+struct tle_dest {
+ struct rte_mempool *head_mp;
+ /**< MP for fragment headers and control packets. */
+ struct tle_dev *dev; /**< device to send packets through. */
+ uint16_t mtu; /**< MTU for given destination. */
+ uint8_t l2_len; /**< L2 header length. */
+ uint8_t l3_len; /**< L3 header length. */
+ uint8_t hdr[TLE_DST_MAX_HDR]; /**< L2/L3 headers. */
+};
+
+/**
+ * context creation parameters.
+ */
+
+enum {
+ TLE_PROTO_UDP,
+ TLE_PROTO_TCP,
+ TLE_PROTO_NUM
+};
+
+struct tle_ctx_param {
+ int32_t socket_id; /**< socket ID to allocate memory for. */
+ uint32_t proto; /**< L4 proto to handle. */
+ uint32_t max_streams; /**< max number of streams in context. */
+ uint32_t max_stream_rbufs; /**< max recv mbufs per stream. */
+ uint32_t max_stream_sbufs; /**< max send mbufs per stream. */
+ uint32_t send_bulk_size; /**< expected # of packets per send call. */
+
+ int (*lookup4)(void *opaque, const struct in_addr *addr,
+ struct tle_dest *res);
+ /**< will be called by send() to get IPv4 packet destination info. */
+ void *lookup4_data;
+ /**< opaque data pointer for lookup4() callback. */
+
+ int (*lookup6)(void *opaque, const struct in6_addr *addr,
+ struct tle_dest *res);
+ /**< will be called by send() to get IPv6 packet destination info. */
+ void *lookup6_data;
+ /**< opaque data pointer for lookup6() callback. */
+};
+
+/**
+ * create L4 processing context.
+ * @param ctx_prm
+ * Parameters used to create and initialise the L4 context.
+ * @return
+ * Pointer to context structure that can be used in future operations,
+ * or NULL on error, with error code set in rte_errno.
+ *
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENOMEM - out of memory
+ */
+struct tle_ctx *
+tle_ctx_create(const struct tle_ctx_param *ctx_prm);
+
+/**
+ * Destroy given context.
+ *
+ * @param ctx
+ * context to destroy
+ */
+void tle_ctx_destroy(struct tle_ctx *ctx);
+
+/**
+ * Add new device into the given context.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ * context to add new device into.
+ * @param dev_prm
+ * Parameters used to create and initialise new device inside the context.
+ * @return
+ * Pointer to device structure that can be used in future operations,
+ * or NULL on error, with error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENODEV - max possible value of open devices is reached
+ * - ENOMEM - out of memory
+ */
+struct tle_dev *
+tle_add_dev(struct tle_ctx *ctx, const struct tle_dev_param *dev_prm);
+
+/**
+ * Remove and destroy previously added device from the given context.
+ * This function is not multi-thread safe.
+ *
+ * @param dev
+ * device to remove and destroy.
+ * @return
+ * zero on successful completion.
+ * - -EINVAL - invalid parameter passed to function
+ */
+int tle_del_dev(struct tle_dev *dev);
+
+/**
+ * Flags to the context that destinations info might be changed,
+ * so if it has any destinations data cached, then
+ * it has to be invalidated.
+ * @param ctx
+ * context to invalidate.
+ */
+void tle_ctx_invalidate(struct tle_ctx *ctx);
+
+/**
+ * Stream asynchronous notification mechanisms:
+ * a) recv/send callback.
+ * Stream recv/send notification callbacks behaviour is edge-triggered (ET).
+ * recv callback will be invoked if stream receive buffer was empty and
+ * new packet(s) have arrived.
+ * send callback will be invoked when stream send buffer was full,
+ * and some packets belonging to that stream were sent
+ * (part of send buffer became free again).
+ * Note that both recv and send callbacks are called with sort of read lock
+ * held on that stream. So it is not permitted to call stream_close()
+ * within the callback function. Doing that would cause a deadlock.
+ * While it is allowed to call stream send/recv functions within the
+ * callback, it is not recommended: callback function will be invoked
+ * within tle_udp_rx_bulk/tle_udp_tx_bulk context and some heavy processing
+ * within the callback functions might cause performance degradation
+ * or even loss of packets for further streams.
+ * b) recv/send event.
+ * Stream recv/send events behaviour is level-triggered (LT).
+ * receive event will be raised by either
+ * tle_udp_rx_burst() or tle_udp_stream_recv() as long as there are any
+ * remaining packets inside stream receive buffer.
+ * send event will be raised by either
+ * tle_udp_tx_burst() or tle_udp_stream_send() as long as there are any
+ * free space inside stream send buffer.
+ * Note that callback and event are mutually exclusive on <stream, op> basis.
+ * It is not possible to open a stream with both recv event and callback
+ * specified.
+ * Though it is possible to open a stream with recv callback and send event,
+ * or visa-versa.
+ * If the user doesn't need any notification mechanism for that stream,
+ * both event and callback could be set to zero.
+ */
+
+struct tle_event;
+struct tle_stream;
+
+/**
+ * Stream recv/send callback function and data.
+ */
+struct tle_stream_cb {
+ void (*func)(void *, struct tle_stream *);
+ void *data;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_CTX_H_ */
diff --git a/lib/libtle_udp/tle_event.h b/lib/libtle_l4p/tle_event.h
index 9357def..b19954a 100644
--- a/lib/libtle_udp/tle_event.h
+++ b/lib/libtle_l4p/tle_event.h
@@ -17,6 +17,7 @@
#define _SEV_IMPL_H_
#include <rte_common.h>
+#include <rte_memory.h>
#include <rte_spinlock.h>
#include <rte_atomic.h>
#include <sys/queue.h>
@@ -210,6 +211,26 @@ tle_event_idle(struct tle_event *ev)
rte_spinlock_unlock(&q->lock);
}
+static inline void
+tle_evq_idle(struct tle_evq *evq, struct tle_event *ev[], uint32_t num)
+{
+ uint32_t i, n;
+
+ rte_spinlock_lock(&evq->lock);
+
+ n = 0;
+ for (i = 0; i != num; i++) {
+ if (ev[i]->state == TLE_SEV_UP) {
+ TAILQ_REMOVE(&evq->armed, ev[i], ql);
+ n++;
+ }
+ ev[i]->state = TLE_SEV_IDLE;
+ }
+
+ evq->nb_armed -= n;
+ rte_spinlock_unlock(&evq->lock);
+}
+
/*
* return up to *num* user data pointers associated with
diff --git a/lib/libtle_l4p/tle_tcp.h b/lib/libtle_l4p/tle_tcp.h
new file mode 100644
index 0000000..e6eb336
--- /dev/null
+++ b/lib/libtle_l4p/tle_tcp.h
@@ -0,0 +1,395 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_TCP_H_
+#define _TLE_TCP_H_
+
+#include <tle_ctx.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * TCP stream creation parameters.
+ */
+struct tle_tcp_stream_addr {
+ struct sockaddr_storage local; /**< stream local address. */
+ struct sockaddr_storage remote; /**< stream remote address. */
+};
+
+#define TLE_TCP_DEFAULT_RETRIES 3
+
+struct tle_tcp_stream_cfg {
+ uint8_t nb_retries; /**< max number of retransmission attempts. */
+
+ /* _cb and _ev are mutually exclusive */
+ struct tle_event *err_ev; /**< error event to use. */
+ struct tle_stream_cb err_cb; /**< error callback to use. */
+
+ struct tle_event *recv_ev; /**< recv event to use. */
+ struct tle_stream_cb recv_cb; /**< recv callback to use. */
+
+ struct tle_event *send_ev; /**< send event to use. */
+ struct tle_stream_cb send_cb; /**< send callback to use. */
+};
+
+struct tle_tcp_stream_param {
+ struct tle_tcp_stream_addr addr;
+ struct tle_tcp_stream_cfg cfg;
+};
+
+/**
+ * create a new stream within given TCP context.
+ * @param ctx
+ * TCP context to create new stream within.
+ * @param prm
+ * Parameters used to create and initialise the new stream.
+ * @return
+ * Pointer to TCP stream structure that can be used in future TCP API calls,
+ * or NULL on error, with error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENOFILE - max limit of open streams reached for that context
+ */
+struct tle_stream *
+tle_tcp_stream_open(struct tle_ctx *ctx,
+ const struct tle_tcp_stream_param *prm);
+
+/**
+ * close an open stream.
+ * if the stream is in connected state, then:
+ * - connection termination would be performed.
+ * - if stream contains unsent data, then actual close will be postponed
+ * till either remaining data will be TX-ed, or timeout will expire.
+ * All packets that belong to that stream and remain in the device
+ * TX queue will be kept for father transmission.
+ * @param s
+ * Pointer to the stream to close.
+ * @return
+ * zero on successful completion.
+ * - -EINVAL - invalid parameter passed to function
+ * - -EDEADLK - close was already invoked on that stream
+ */
+int tle_tcp_stream_close(struct tle_stream *s);
+
+/**
+ * close a group of open streams.
+ * if the stream is in connected state, then:
+ * - connection termination would be performed.
+ * - if stream contains unsent data, then actual close will be postponed
+ * till either remaining data will be TX-ed, or timeout will expire.
+ * All packets that belong to that stream and remain in the device
+ * TX queue will be kept for father transmission.
+ * @param ts
+ * An array of pointers to streams that have to be closed.
+ * @param num
+ * Number of elements in the *ts* array.
+ * @return
+ * number of successfully closed streams.
+ * In case of error, error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - EDEADLK - close was already invoked on that stream
+ */
+uint32_t
+tle_tcp_stream_close_bulk(struct tle_stream *ts[], uint32_t num);
+
+/**
+ * get open stream local and remote addresses.
+ * @param s
+ * Pointer to the stream.
+ * @return
+ * zero on successful completion.
+ * - EINVAL - invalid parameter passed to function
+ */
+int
+tle_tcp_stream_get_addr(const struct tle_stream *s,
+ struct tle_tcp_stream_addr *addr);
+
+/**
+ * Client mode connect API.
+ */
+
+/**
+ * Attempt to establish connection with the destination TCP endpoint.
+ * Stream write event (or callback) will fire, if the connection will be
+ * established successfully.
+ * Note that stream in listen state or stream with already established
+ * connection, can't be subject of connect() call.
+ * In case of unsuccessful attempt, error event (or callback) will be
+ * activated.
+ * @param s
+ * Pointer to the stream.
+ * @param addr
+ * Address of the destination endpoint.
+ * @return
+ * zero on successful completion.
+ * - -EINVAL - invalid parameter passed to function
+ */
+int tle_tcp_stream_connect(struct tle_stream *s, const struct sockaddr *addr);
+
+/*
+ * Server mode connect API.
+ * Basic scheme for server mode API usage:
+ *
+ * <stream open happens here>
+ * tle_tcp_stream_listen(stream_to_listen);
+ * <wait for read event/callback on that stream>
+ * n = tle_tcp_synreqs(stream_to_listen, syn_reqs, sizeof(syn_reqs));
+ * for (i = 0, k = 0; i != n; i++) {
+ * rc = <decide should connection from that endpoint be allowed>;
+ * if (rc == 0) {
+ * //proceed with connection establishment
+ * k++;
+ * accept_param[k].syn = syn_reqs[i];
+ * <fill rest of accept_param fields for k-th connection>
+ * } else {
+ * //reject connection requests from that endpoint
+ * rej_reqs[i - k] = syn_reqs[i];
+ * }
+ * }
+ *
+ * //reject n - k connection requests
+ * tle_tcp_reject(stream_to_listen, rej_reqs, n - k);
+ *
+ * //accept k new connections
+ * rc = tle_tcp_accept(stream_to_listen, accept_param, new_con_streams, k);
+ * <handle errors>
+ */
+
+struct tle_syn_req {
+ struct rte_mbuf *pkt;
+ /*< mbuf with incoming connection request. */
+ void *opaque; /*< tldk related opaque pointer. */
+};
+
+struct tle_tcp_accept_param {
+ struct tle_syn_req syn; /*< mbuf with incoming SYN request. */
+ struct tle_tcp_stream_cfg cfg; /*< stream configure options. */
+};
+
+
+/**
+ * Set stream into the listen state (passive opener), i.e. make stream ready
+ * to accept new connections.
+ * Stream read event (or callback) will be activated as new SYN requests
+ * will arrive.
+ * Note that stream with already established (or establishing) connection
+ * can't be subject of listen() call.
+ * @param s
+ * Pointer to the stream.
+ * @return
+ * zero on successful completion.
+ * - -EINVAL - invalid parameter passed to function
+ */
+int tle_tcp_stream_listen(struct tle_stream *s);
+
+/**
+ * return up to *num* mbufs with SYN requests that were received
+ * for given TCP endpoint.
+ * Note that the stream has to be in listen state.
+ * For each returned mbuf:
+ * data_off set to the start of the packet
+ * l2_len, l3_len, l4_len are setup properly
+ * (so user can still extract L2/L3/L4 header info if needed)
+ * packet_type RTE_PTYPE_L2/L3/L4 bits are setup properly.
+ * L3/L4 checksum is verified.
+ * @param s
+ * TCP stream to receive packets from.
+ * @param rq
+ * An array of tle_syn_req structures that contains
+ * at least *num* elements in it.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @return
+ * number of of entries filled inside *pkt* array.
+ */
+uint16_t tle_tcp_stream_synreqs(struct tle_stream *s, struct tle_syn_req rq[],
+ uint32_t num);
+
+/**
+ * Accept connection requests for the given stream.
+ * Note that the stream has to be in listen state.
+ * For each new connection a new stream will be open.
+ * @param s
+ * TCP listen stream.
+ * @param prm
+ * An array of *tle_tcp_accept_param* structures that
+ * contains at least *num* elements in it.
+ * @param rs
+ * An array of pointers to *tle_stream* structures that
+ * must be large enough to store up to *num* pointers in it.
+ * @param num
+ * Number of elements in the *prm* and *rs* arrays.
+ * @return
+ * number of of entries filled inside *rs* array.
+ * In case of error, error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENFILE - no more streams are avaialble to open.
+ */
+int tle_tcp_stream_accept(struct tle_stream *s,
+ const struct tle_tcp_accept_param prm[], struct tle_stream *rs[],
+ uint32_t num);
+
+/**
+ * Reject connection requests for the given stream.
+ * Note that the stream has to be in listen state.
+ * For each new connection a new stream will be open.
+ * @param s
+ * TCP listen stream.
+ * @param rq
+ * An array of tle_syn_req structures that contains
+ * at least *num* elements in it.
+ * @param num
+ * Number of elements in the *pkt* array.
+ */
+void tle_tcp_reject(struct tle_stream *s, const struct tle_syn_req rq[],
+ uint32_t num);
+
+/**
+ * return up to *num* mbufs that was received for given TCP stream.
+ * Note that the stream has to be in connected state.
+ * Data ordering is preserved.
+ * For each returned mbuf:
+ * data_off set to the start of the packet's TCP data
+ * l2_len, l3_len, l4_len are setup properly
+ * (so user can still extract L2/L3 address info if needed)
+ * packet_type RTE_PTYPE_L2/L3/L4 bits are setup properly.
+ * L3/L4 checksum is verified.
+ * @param s
+ * TCP stream to receive packets from.
+ * @param pkt
+ * An array of pointers to *rte_mbuf* structures that
+ * must be large enough to store up to *num* pointers in it.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @return
+ * number of of entries filled inside *pkt* array.
+ */
+uint16_t tle_tcp_stream_recv(struct tle_stream *s, struct rte_mbuf *pkt[],
+ uint16_t num);
+
+/**
+ * Consume and queue up to *num* packets, that will be sent eventually
+ * by tle_tcp_tx_bulk().
+ * Note that the stream has to be in connected state.
+ * It is responsibility of that function is to determine over which TCP dev
+ * given packets have to be sent out and do necessary preparations for that.
+ * Based on the *dst_addr* it does route lookup, fills L2/L3/L4 headers,
+ * and, if necessary, fragments packets.
+ * Depending on the underlying device information, it either does
+ * IP/TCP checksum calculations in SW or sets mbuf TX checksum
+ * offload fields properly.
+ * For each input mbuf the following conditions have to be met:
+ * - data_off point to the start of packet's TCP data.
+ * - there is enough header space to prepend L2/L3/L4 headers.
+ * @param s
+ * TCP stream to send packets over.
+ * @param pkt
+ * The burst of output packets that need to be send.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @return
+ * number of packets successfully queued in the stream send buffer.
+ * In case of error, error code can be set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EAGAIN - operation can be perfomed right now
+ * (most likely close() was perfomed on that stream allready).
+ * - ENOTCONN - the stream is not connected.
+ */
+uint16_t tle_tcp_stream_send(struct tle_stream *s, struct rte_mbuf *pkt[],
+ uint16_t num);
+
+/**
+ * Back End (BE) API.
+ * BE API functions are not multi-thread safe.
+ * Supposed to be called by the L2/L3 processing layer.
+ */
+
+/**
+ * Take input mbufs and distribute them to open TCP streams.
+ * expects that for each input packet:
+ * - l2_len, l3_len, l4_len are setup correctly
+ * - (packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) != 0,
+ * - (packet_type & RTE_PTYPE_L4_TCP) != 0,
+ * During delivery L3/L4 checksums will be verified
+ * (either relies on HW offload or in SW).
+ * May cause some extra packets to be queued for TX.
+ * This function is not multi-thread safe.
+ * @param dev
+ * TCP device the packets were received from.
+ * @param pkt
+ * The burst of input packets that need to be processed.
+ * @param rp
+ * The array that will contain pointers of unprocessed packets at return.
+ * Should contain at least *num* elements.
+ * @param rc
+ * The array that will contain error code for corresponding rp[] entry:
+ * - ENOENT - no open stream matching this packet.
+ * - ENOBUFS - receive buffer of the destination stream is full.
+ * Should contain at least *num* elements.
+ * @param num
+ * Number of elements in the *pkt* input array.
+ * @return
+ * number of packets delivered to the TCP streams.
+ */
+uint16_t tle_tcp_rx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
+ struct rte_mbuf *rp[], int32_t rc[], uint16_t num);
+
+/**
+ * Fill *pkt* with pointers to the packets that have to be transmitted
+ * over given TCP device.
+ * Output packets have to be ready to be passed straight to rte_eth_tx_burst()
+ * without any extra processing.
+ * TCP/IPv4 checksum either already calculated or appropriate mbuf fields set
+ * properly for HW offload.
+ * This function is not multi-thread safe.
+ * @param dev
+ * TCP device the output packets will be transmitted over.
+ * @param pkt
+ * An array of pointers to *rte_mbuf* structures that
+ * must be large enough to store up to *num* pointers in it.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @return
+ * number of of entries filled inside *pkt* array.
+ */
+uint16_t tle_tcp_tx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
+ uint16_t num);
+
+/**
+ * perform internal processing for given TCP context.
+ * Checks which timers are expired and performs the required actions
+ * (retransmission/connection abort, etc.)
+ * May cause some extra packets to be queued for TX.
+ * This function is not multi-thread safe.
+ * @param ctx
+ * TCP context to process.
+ * @param num
+ * maximum number of streams to process.
+ * @return
+ * zero on successful completion.
+ * - EINVAL - invalid parameter passed to function
+ * @return
+ */
+int tle_tcp_process(struct tle_ctx *ctx, uint32_t num);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_TCP_H_ */
diff --git a/lib/libtle_l4p/tle_udp.h b/lib/libtle_l4p/tle_udp.h
new file mode 100644
index 0000000..d3a8fe9
--- /dev/null
+++ b/lib/libtle_l4p/tle_udp.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_UDP_H_
+#define _TLE_UDP_H_
+
+#include <tle_ctx.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * UDP stream creation parameters.
+ */
+struct tle_udp_stream_param {
+ struct sockaddr_storage local_addr; /**< stream local address. */
+ struct sockaddr_storage remote_addr; /**< stream remote address. */
+
+ /* _cb and _ev are mutually exclusive */
+ struct tle_event *recv_ev; /**< recv event to use. */
+ struct tle_stream_cb recv_cb; /**< recv callback to use. */
+
+ struct tle_event *send_ev; /**< send event to use. */
+ struct tle_stream_cb send_cb; /**< send callback to use. */
+};
+
+/**
+ * create a new stream within given UDP context.
+ * @param ctx
+ * UDP context to create new stream within.
+ * @param prm
+ * Parameters used to create and initialise the new stream.
+ * @return
+ * Pointer to UDP stream structure that can be used in future UDP API calls,
+ * or NULL on error, with error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENOFILE - max limit of open streams reached for that context
+ */
+struct tle_stream *
+tle_udp_stream_open(struct tle_ctx *ctx,
+ const struct tle_udp_stream_param *prm);
+
+/**
+ * close an open stream.
+ * All packets still remaining in stream receive buffer will be freed.
+ * All packets still remaining in stream transmit buffer will be kept
+ * for father transmission.
+ * @param s
+ * Pointer to the stream to close.
+ * @return
+ * zero on successful completion.
+ * - -EINVAL - invalid parameter passed to function
+ */
+int tle_udp_stream_close(struct tle_stream *s);
+
+/**
+ * get open stream parameters.
+ * @param s
+ * Pointer to the stream.
+ * @return
+ * zero on successful completion.
+ * - EINVAL - invalid parameter passed to function
+ */
+int
+tle_udp_stream_get_param(const struct tle_stream *s,
+ struct tle_udp_stream_param *prm);
+
+/**
+ * Take input mbufs and distribute them to open UDP streams.
+ * expects that for each input packet:
+ * - l2_len, l3_len, l4_len are setup correctly
+ * - (packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) != 0,
+ * - (packet_type & RTE_PTYPE_L4_UDP) != 0,
+ * During delivery L3/L4 checksums will be verified
+ * (either relies on HW offload or in SW).
+ * This function is not multi-thread safe.
+ * @param dev
+ * UDP device the packets were received from.
+ * @param pkt
+ * The burst of input packets that need to be processed.
+ * @param rp
+ * The array that will contain pointers of unprocessed packets at return.
+ * Should contain at least *num* elements.
+ * @param rc
+ * The array that will contain error code for corresponding rp[] entry:
+ * - ENOENT - no open stream matching this packet.
+ * - ENOBUFS - receive buffer of the destination stream is full.
+ * Should contain at least *num* elements.
+ * @param num
+ * Number of elements in the *pkt* input array.
+ * @return
+ * number of packets delivered to the UDP streams.
+ */
+uint16_t tle_udp_rx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
+ struct rte_mbuf *rp[], int32_t rc[], uint16_t num);
+
+/**
+ * Fill *pkt* with pointers to the packets that have to be transmitted
+ * over given UDP device.
+ * Output packets have to be ready to be passed straight to rte_eth_tx_burst()
+ * without any extra processing.
+ * UDP/IPv4 checksum either already calculated or appropriate mbuf fields set
+ * properly for HW offload.
+ * This function is not multi-thread safe.
+ * @param dev
+ * UDP device the output packets will be transmitted over.
+ * @param pkt
+ * An array of pointers to *rte_mbuf* structures that
+ * must be large enough to store up to *num* pointers in it.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @return
+ * number of of entries filled inside *pkt* array.
+ */
+uint16_t tle_udp_tx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
+ uint16_t num);
+
+/*
+ * return up to *num* mbufs that was received for given UDP stream.
+ * For each returned mbuf:
+ * data_off set to the start of the packet's UDP data
+ * l2_len, l3_len, l4_len are setup properly
+ * (so user can still extract L2/L3 address info if needed)
+ * packet_type RTE_PTYPE_L2/L3/L4 bits are setup properly.
+ * L3/L4 checksum is verified.
+ * Packets with invalid L3/L4 checksum will be silently dropped.
+ * @param s
+ * UDP stream to receive packets from.
+ * @param pkt
+ * An array of pointers to *rte_mbuf* structures that
+ * must be large enough to store up to *num* pointers in it.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @return
+ * number of of entries filled inside *pkt* array.
+ */
+uint16_t tle_udp_stream_recv(struct tle_stream *s, struct rte_mbuf *pkt[],
+ uint16_t num);
+
+/**
+ * Consume and queue up to *num* packets, that will be sent eventually
+ * by tle_udp_tx_bulk().
+ * If *dst_addr* is NULL, then default remote address associated with that
+ * stream (if any) will be used.
+ * The main purpose of that function is to determine over which UDP dev
+ * given packets have to be sent out and do necessary preparations for that.
+ * Based on the *dst_addr* it does route lookup, fills L2/L3/L4 headers,
+ * and, if necessary, fragments packets.
+ * Depending on the underlying device information, it either does
+ * IP/UDP checksum calculations in SW or sets mbuf TX checksum
+ * offload fields properly.
+ * For each input mbuf the following conditions have to be met:
+ * - data_off point to the start of packet's UDP data.
+ * - there is enough header space to prepend L2/L3/L4 headers.
+ * @param s
+ * UDP stream to send packets over.
+ * @param pkt
+ * The burst of output packets that need to be send.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @param dst_addr
+ * Destination address to send packets to.
+ * @return
+ * number of packets successfully queued in the stream send buffer.
+ */
+uint16_t tle_udp_stream_send(struct tle_stream *s, struct rte_mbuf *pkt[],
+ uint16_t num, const struct sockaddr *dst_addr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_UDP_H_ */
diff --git a/lib/libtle_udp/udp_rxtx.c b/lib/libtle_l4p/udp_rxtx.c
index a5b48c8..01d3520 100644
--- a/lib/libtle_udp/udp_rxtx.c
+++ b/lib/libtle_l4p/udp_rxtx.c
@@ -20,18 +20,18 @@
#include <rte_ip_frag.h>
#include <rte_udp.h>
-#include "udp_impl.h"
+#include "udp_stream.h"
#include "misc.h"
static inline struct tle_udp_stream *
-rx_stream_obtain(struct tle_udp_dev *dev, uint32_t type, uint32_t port)
+rx_stream_obtain(struct tle_dev *dev, uint32_t type, uint32_t port)
{
struct tle_udp_stream *s;
- if (type >= TLE_UDP_VNUM || dev->dp[type] == NULL)
+ if (type >= TLE_VNUM || dev->dp[type] == NULL)
return NULL;
- s = dev->dp[type]->streams[port];
+ s = (struct tle_udp_stream *)dev->dp[type]->streams[port];
if (s == NULL)
return NULL;
@@ -49,38 +49,38 @@ get_pkt_type(const struct rte_mbuf *m)
v = m->packet_type &
(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_MASK);
if (v == (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP))
- return TLE_UDP_V4;
+ return TLE_V4;
else if (v == (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP))
- return TLE_UDP_V6;
+ return TLE_V6;
else
- return TLE_UDP_VNUM;
+ return TLE_VNUM;
}
-static inline union udp_ports
-pkt_info(const struct tle_udp_dev *dev, struct rte_mbuf *m,
- union udp_ports *ports, union ipv4_addrs *addr4,
+static inline union l4_ports
+pkt_info(const struct tle_dev *dev, struct rte_mbuf *m,
+ union l4_ports *ports, union ipv4_addrs *addr4,
union ipv6_addrs **addr6)
{
uint32_t len;
- union udp_ports ret, *up;
+ union l4_ports ret, *up;
union ipv4_addrs *pa4;
ret.src = get_pkt_type(m);
len = m->l2_len;
- if (ret.src == TLE_UDP_V4) {
+ if (ret.src == TLE_V4) {
pa4 = rte_pktmbuf_mtod_offset(m, union ipv4_addrs *,
len + offsetof(struct ipv4_hdr, src_addr));
addr4->raw = pa4->raw;
- m->ol_flags |= dev->rx.ol_flags[TLE_UDP_V4];
- } else if (ret.src == TLE_UDP_V6) {
+ m->ol_flags |= dev->rx.ol_flags[TLE_V4];
+ } else if (ret.src == TLE_V6) {
*addr6 = rte_pktmbuf_mtod_offset(m, union ipv6_addrs *,
len + offsetof(struct ipv6_hdr, src_addr));
- m->ol_flags |= dev->rx.ol_flags[TLE_UDP_V6];
+ m->ol_flags |= dev->rx.ol_flags[TLE_V6];
}
len += m->l3_len;
- up = rte_pktmbuf_mtod_offset(m, union udp_ports *,
+ up = rte_pktmbuf_mtod_offset(m, union l4_ports *,
len + offsetof(struct udp_hdr, src_port));
ports->raw = up->raw;
ret.dst = ports->dst;
@@ -101,7 +101,7 @@ rx_stream(struct tle_udp_stream *s, void *mb[], struct rte_mbuf *rp[],
/* if RX queue was empty invoke user RX notification callback. */
if (s->rx.cb.func != NULL && r != 0 && rte_ring_count(s->rx.q) == r)
- s->rx.cb.func(s->rx.cb.data, s);
+ s->rx.cb.func(s->rx.cb.data, &s->s);
for (i = r, k = 0; i != num; i++, k++) {
rc[k] = ENOBUFS;
@@ -113,7 +113,7 @@ rx_stream(struct tle_udp_stream *s, void *mb[], struct rte_mbuf *rp[],
static inline uint16_t
rx_stream6(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
- union ipv6_addrs *addr[], union udp_ports port[],
+ union ipv6_addrs *addr[], union l4_ports port[],
struct rte_mbuf *rp[], int32_t rc[], uint16_t num)
{
uint32_t i, k, n;
@@ -124,9 +124,9 @@ rx_stream6(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
for (i = 0; i != num; i++) {
- if ((port[i].raw & s->pmsk.raw) != s->port.raw ||
- ymm_mask_cmp(&addr[i]->raw, &s->ipv6.addr.raw,
- &s->ipv6.mask.raw) != 0) {
+ if ((port[i].raw & s->s.pmsk.raw) != s->s.port.raw ||
+ ymm_mask_cmp(&addr[i]->raw, &s->s.ipv6.addr.raw,
+ &s->s.ipv6.mask.raw) != 0) {
rc[k] = ENOENT;
rp[k] = pkt[i];
k++;
@@ -141,7 +141,7 @@ rx_stream6(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
static inline uint16_t
rx_stream4(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
- union ipv4_addrs addr[], union udp_ports port[],
+ union ipv4_addrs addr[], union l4_ports port[],
struct rte_mbuf *rp[], int32_t rc[], uint16_t num)
{
uint32_t i, k, n;
@@ -152,9 +152,9 @@ rx_stream4(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
for (i = 0; i != num; i++) {
- if ((addr[i].raw & s->ipv4.mask.raw) != s->ipv4.addr.raw ||
- (port[i].raw & s->pmsk.raw) !=
- s->port.raw) {
+ if ((addr[i].raw & s->s.ipv4.mask.raw) != s->s.ipv4.addr.raw ||
+ (port[i].raw & s->s.pmsk.raw) !=
+ s->s.port.raw) {
rc[k] = ENOENT;
rp[k] = pkt[i];
k++;
@@ -168,12 +168,12 @@ rx_stream4(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
}
uint16_t
-tle_udp_rx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[],
+tle_udp_rx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
struct rte_mbuf *rp[], int32_t rc[], uint16_t num)
{
struct tle_udp_stream *s;
uint32_t i, j, k, n, p, t;
- union udp_ports tp[num], port[num];
+ union l4_ports tp[num], port[num];
union ipv4_addrs a4[num];
union ipv6_addrs *pa6[num];
@@ -191,7 +191,7 @@ tle_udp_rx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[],
s = rx_stream_obtain(dev, t, p);
if (s != NULL) {
- if (t == TLE_UDP_V4)
+ if (t == TLE_V4)
n = rx_stream4(s, pkt + i, a4 + i,
port + i, rp + k, rc + k, j - i);
else
@@ -217,7 +217,7 @@ tle_udp_rx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[],
}
static inline void
-stream_drb_release(struct tle_udp_stream *s, struct tle_drb * drb[],
+stream_drb_release(struct tle_udp_stream *s, struct tle_drb *drb[],
uint32_t nb_drb)
{
uint32_t n;
@@ -233,7 +233,7 @@ stream_drb_release(struct tle_udp_stream *s, struct tle_drb * drb[],
/* if stream send buffer was full invoke TX callback */
else if (s->tx.cb.func != NULL && n == 0)
- s->tx.cb.func(s->tx.cb.data, s);
+ s->tx.cb.func(s->tx.cb.data, &s->s);
}
@@ -241,7 +241,7 @@ stream_drb_release(struct tle_udp_stream *s, struct tle_drb * drb[],
}
uint16_t
-tle_udp_tx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
+tle_udp_tx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
{
uint32_t i, j, k, n;
struct tle_drb *drb[num];
@@ -260,7 +260,7 @@ tle_udp_tx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
for (i = 0; i != k; i = j) {
s = drb[i]->udata;
- for (j = i + 1; j != k && s == drb[i]->udata; j++)
+ for (j = i + 1; j != k && s == drb[j]->udata; j++)
;
stream_drb_release(s, drb + i, j - i);
}
@@ -268,73 +268,6 @@ tle_udp_tx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
return n;
}
-static int
-check_pkt_csum(const struct rte_mbuf *m, uint32_t type)
-{
- const struct ipv4_hdr *l3h4;
- const struct ipv6_hdr *l3h6;
- const struct udp_hdr *l4h;
- int32_t ret;
- uint16_t csum;
-
- ret = 0;
- l3h4 = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, m->l2_len);
- l3h6 = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, m->l2_len);
-
- if ((m->ol_flags & PKT_RX_IP_CKSUM_BAD) != 0) {
- csum = _ipv4x_cksum(l3h4, m->l3_len);
- ret = (csum != UINT16_MAX);
- }
-
- if (ret == 0 && (m->ol_flags & PKT_RX_L4_CKSUM_BAD) != 0) {
-
- /*
- * for IPv4 it is allowed to have zero UDP cksum,
- * for IPv6 valid UDP cksum is mandatory.
- */
- if (type == TLE_UDP_V4) {
- l4h = (const struct udp_hdr *)((uintptr_t)l3h4 +
- m->l3_len);
- csum = (l4h->dgram_cksum == 0) ? UINT16_MAX :
- _ipv4_udptcp_mbuf_cksum(m,
- m->l2_len + m->l3_len, l3h4);
- } else
- csum = _ipv6_udptcp_mbuf_cksum(m,
- m->l2_len + m->l3_len, l3h6);
-
- ret = (csum != UINT16_MAX);
- }
-
- return ret;
-}
-
-/* exclude NULLs from the final list of packets. */
-static inline uint32_t
-compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero)
-{
- uint32_t i, j, k, l;
-
- for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) {
-
- /* found a hole. */
- if (pkt[j] == NULL) {
-
- /* find how big is it. */
- for (i = j; i-- != 0 && pkt[i] == NULL; )
- ;
- /* fill the hole. */
- for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++)
- pkt[l] = pkt[k];
-
- nb_pkt -= j - i;
- nb_zero -= j - i;
- j = i + 1;
- }
- }
-
- return nb_pkt;
-}
-
/*
* helper function, do the necessary pre-processing for the received packets
* before handiing them to the strem_recv caller.
@@ -356,7 +289,8 @@ recv_pkt_process(struct rte_mbuf *m[], uint32_t num, uint32_t type)
f = flg[i] & (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD);
/* drop packets with invalid cksum(s). */
- if (f != 0 && check_pkt_csum(m[i], type) != 0) {
+ if (f != 0 && check_pkt_csum(m[i], m[i]->ol_flags, type,
+ IPPROTO_UDP) != 0) {
rte_pktmbuf_free(m[i]);
m[i] = NULL;
k++;
@@ -370,11 +304,12 @@ recv_pkt_process(struct rte_mbuf *m[], uint32_t num, uint32_t type)
}
uint16_t
-tle_udp_stream_recv(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
- uint16_t num)
+tle_udp_stream_recv(struct tle_stream *us, struct rte_mbuf *pkt[], uint16_t num)
{
uint32_t k, n;
+ struct tle_udp_stream *s;
+ s = UDP_STREAM(us);
n = rte_ring_mc_dequeue_burst(s->rx.q, (void **)pkt, num);
if (n == 0)
return 0;
@@ -389,58 +324,14 @@ tle_udp_stream_recv(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
rwl_release(&s->rx.use);
}
- k = recv_pkt_process(pkt, n, s->type);
+ k = recv_pkt_process(pkt, n, s->s.type);
return compress_pkt_list(pkt, n, k);
}
-static int32_t
-udp_get_dest(struct tle_udp_stream *s, const void *dst_addr,
- struct tle_udp_dest *dst)
-{
- int32_t rc;
- const struct in_addr *d4;
- const struct in6_addr *d6;
- struct tle_udp_ctx *ctx;
- struct tle_udp_dev *dev;
-
- ctx = s->ctx;
-
- /* it is here just to keep gcc happy. */
- d4 = NULL;
-
- if (s->type == TLE_UDP_V4) {
- d4 = dst_addr;
- rc = ctx->prm.lookup4(ctx->prm.lookup4_data, d4, dst);
- } else if (s->type == TLE_UDP_V6) {
- d6 = dst_addr;
- rc = ctx->prm.lookup6(ctx->prm.lookup6_data, d6, dst);
- } else
- rc = -ENOENT;
-
- if (rc < 0 || dst->dev == NULL || dst->dev->ctx != ctx)
- return -ENOENT;
-
- dev = dst->dev;
- if (s->type == TLE_UDP_V4) {
- struct ipv4_hdr *l3h;
- l3h = (struct ipv4_hdr *)(dst->hdr + dst->l2_len);
- l3h->src_addr = dev->prm.local_addr4.s_addr;
- l3h->dst_addr = d4->s_addr;
- } else {
- struct ipv6_hdr *l3h;
- l3h = (struct ipv6_hdr *)(dst->hdr + dst->l2_len);
- rte_memcpy(l3h->src_addr, &dev->prm.local_addr6,
- sizeof(l3h->src_addr));
- rte_memcpy(l3h->dst_addr, d6, sizeof(l3h->dst_addr));
- }
-
- return dev - ctx->dev;
-}
-
static inline int
udp_fill_mbuf(struct rte_mbuf *m,
uint32_t type, uint64_t ol_flags, uint32_t pid,
- union udph udph, const struct tle_udp_dest *dst)
+ union udph udph, const struct tle_dest *dst)
{
uint32_t len, plen;
char *l2h;
@@ -471,7 +362,7 @@ udp_fill_mbuf(struct rte_mbuf *m,
/* update proto specific fields. */
- if (type == TLE_UDP_V4) {
+ if (type == TLE_V4) {
struct ipv4_hdr *l3h;
l3h = (struct ipv4_hdr *)(l2h + dst->l2_len);
l3h->packet_id = rte_cpu_to_be_16(pid);
@@ -511,7 +402,7 @@ frag_fixup(const struct rte_mbuf *ms, struct rte_mbuf *mf, uint32_t type)
mf->ol_flags = ms->ol_flags;
mf->tx_offload = ms->tx_offload;
- if (type == TLE_UDP_V4 && (ms->ol_flags & PKT_TX_IP_CKSUM) == 0) {
+ if (type == TLE_V4 && (ms->ol_flags & PKT_TX_IP_CKSUM) == 0) {
l3h = rte_pktmbuf_mtod(mf, struct ipv4_hdr *);
l3h->hdr_checksum = _ipv4x_cksum(l3h, mf->l3_len);
}
@@ -522,7 +413,7 @@ frag_fixup(const struct rte_mbuf *ms, struct rte_mbuf *mf, uint32_t type)
*/
static inline int
fragment(struct rte_mbuf *pkt, struct rte_mbuf *frag[], uint32_t num,
- uint32_t type, const struct tle_udp_dest *dst)
+ uint32_t type, const struct tle_dest *dst)
{
int32_t frag_num, i;
uint16_t mtu;
@@ -533,7 +424,7 @@ fragment(struct rte_mbuf *pkt, struct rte_mbuf *frag[], uint32_t num,
mtu = dst->mtu - dst->l2_len;
/* fragment packet */
- if (type == TLE_UDP_V4)
+ if (type == TLE_V4)
frag_num = rte_ipv4_fragment_packet(pkt, frag, num, mtu,
dst->head_mp, dst->head_mp);
else
@@ -572,7 +463,7 @@ stream_drb_alloc(struct tle_udp_stream *s, struct tle_drb *drbs[],
/* enqueue up to num packets to the destination device queue. */
static inline uint16_t
-queue_pkt_out(struct tle_udp_stream *s, struct tle_udp_dev *dev,
+queue_pkt_out(struct tle_udp_stream *s, struct tle_dev *dev,
const void *pkt[], uint16_t nb_pkt,
struct tle_drb *drbs[], uint32_t *nb_drb)
{
@@ -613,7 +504,7 @@ queue_pkt_out(struct tle_udp_stream *s, struct tle_udp_dev *dev,
}
uint16_t
-tle_udp_stream_send(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
+tle_udp_stream_send(struct tle_stream *us, struct rte_mbuf *pkt[],
uint16_t num, const struct sockaddr *dst_addr)
{
int32_t di, frg, rc;
@@ -622,16 +513,18 @@ tle_udp_stream_send(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
uint32_t mtu, pid, type;
const struct sockaddr_in *d4;
const struct sockaddr_in6 *d6;
+ struct tle_udp_stream *s;
const void *da;
union udph udph;
- struct tle_udp_dest dst;
+ struct tle_dest dst;
struct tle_drb *drb[num];
- type = s->type;
+ s = UDP_STREAM(us);
+ type = s->s.type;
/* start filling UDP header. */
udph.raw = 0;
- udph.ports.src = s->port.dst;
+ udph.ports.src = s->s.port.dst;
/* figure out what destination addr/port to use. */
if (dst_addr != NULL) {
@@ -639,7 +532,7 @@ tle_udp_stream_send(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
rte_errno = EINVAL;
return 0;
}
- if (type == TLE_UDP_V4) {
+ if (type == TLE_V4) {
d4 = (const struct sockaddr_in *)dst_addr;
da = &d4->sin_addr;
udph.ports.dst = d4->sin_port;
@@ -649,14 +542,14 @@ tle_udp_stream_send(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
udph.ports.dst = d6->sin6_port;
}
} else {
- udph.ports.dst = s->port.src;
- if (type == TLE_UDP_V4)
- da = &s->ipv4.addr.src;
+ udph.ports.dst = s->s.port.src;
+ if (type == TLE_V4)
+ da = &s->s.ipv4.addr.src;
else
- da = &s->ipv6.addr.src;
+ da = &s->s.ipv6.addr.src;
}
- di = udp_get_dest(s, da, &dst);
+ di = stream_get_dest(&s->s, da, &dst);
if (di < 0) {
rte_errno = -di;
return 0;
diff --git a/lib/libtle_l4p/udp_stream.c b/lib/libtle_l4p/udp_stream.c
new file mode 100644
index 0000000..9f379d9
--- /dev/null
+++ b/lib/libtle_l4p/udp_stream.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+
+#include "udp_stream.h"
+#include "misc.h"
+
+static void
+unuse_stream(struct tle_udp_stream *s)
+{
+ s->s.type = TLE_VNUM;
+ rte_atomic32_set(&s->rx.use, INT32_MIN);
+ rte_atomic32_set(&s->tx.use, INT32_MIN);
+}
+
+static void
+fini_stream(struct tle_udp_stream *s)
+{
+ if (s != NULL) {
+ rte_free(s->rx.q);
+ rte_free(s->tx.drb.r);
+ }
+}
+
+static void
+udp_fini_streams(struct tle_ctx *ctx)
+{
+ uint32_t i;
+ struct tle_udp_stream *s;
+
+ s = ctx->streams.buf;
+ if (s != NULL) {
+ for (i = 0; i != ctx->prm.max_streams; i++)
+ fini_stream(s + i);
+ }
+
+ rte_free(s);
+ ctx->streams.buf = NULL;
+ STAILQ_INIT(&ctx->streams.free);
+}
+
+static int
+init_stream(struct tle_ctx *ctx, struct tle_udp_stream *s)
+{
+ size_t bsz, rsz, sz;
+ uint32_t i, k, n, nb;
+ struct tle_drb *drb;
+ char name[RTE_RING_NAMESIZE];
+
+ /* init RX part. */
+
+ n = RTE_MAX(ctx->prm.max_stream_rbufs, 1U);
+ n = rte_align32pow2(n);
+ sz = sizeof(*s->rx.q) + n * sizeof(s->rx.q->ring[0]);
+
+ s->rx.q = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ ctx->prm.socket_id);
+ if (s->rx.q == NULL) {
+ UDP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
+ "failed with error code: %d\n",
+ __func__, s, sz, ctx->prm.socket_id, rte_errno);
+ return -ENOMEM;
+ }
+
+ snprintf(name, sizeof(name), "%p@%zu", s, sz);
+ rte_ring_init(s->rx.q, name, n, RING_F_SP_ENQ);
+
+ /* init TX part. */
+
+ nb = drb_nb_elem(ctx);
+ k = calc_stream_drb_num(ctx, nb);
+ n = rte_align32pow2(k);
+
+ /* size of the drbs ring */
+ rsz = sizeof(*s->tx.drb.r) + n * sizeof(s->tx.drb.r->ring[0]);
+ rsz = RTE_ALIGN_CEIL(rsz, RTE_CACHE_LINE_SIZE);
+
+ /* size of the drb. */
+ bsz = tle_drb_calc_size(nb);
+
+ /* total stream drbs size. */
+ sz = rsz + bsz * k;
+
+ s->tx.drb.r = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ ctx->prm.socket_id);
+ if (s->tx.drb.r == NULL) {
+ UDP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
+ "failed with error code: %d\n",
+ __func__, s, sz, ctx->prm.socket_id, rte_errno);
+ return -ENOMEM;
+ }
+
+ snprintf(name, sizeof(name), "%p@%zu", s, sz);
+ rte_ring_init(s->tx.drb.r, name, n, 0);
+
+ for (i = 0; i != k; i++) {
+ drb = (struct tle_drb *)((uintptr_t)s->tx.drb.r +
+ rsz + bsz * i);
+ drb->udata = s;
+ drb->size = nb;
+ rte_ring_enqueue(s->tx.drb.r, drb);
+ }
+
+ s->tx.drb.nb_elem = nb;
+ s->tx.drb.nb_max = k;
+
+ /* mark stream as avaialble to use. */
+
+ s->s.ctx = ctx;
+ unuse_stream(s);
+ STAILQ_INSERT_TAIL(&ctx->streams.free, &s->s, link);
+
+ return 0;
+}
+
+static void
+udp_free_drbs(struct tle_stream *s, struct tle_drb *drb[], uint32_t nb_drb)
+{
+ struct tle_udp_stream *us;
+
+ us = (struct tle_udp_stream *)s;
+ rte_ring_enqueue_burst(us->tx.drb.r, (void **)drb, nb_drb);
+}
+
+static int
+udp_init_streams(struct tle_ctx *ctx)
+{
+ size_t sz;
+ uint32_t i;
+ int32_t rc;
+ struct tle_udp_stream *s;
+
+ sz = sizeof(*s) * ctx->prm.max_streams;
+ s = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ ctx->prm.socket_id);
+ if (s == NULL) {
+ UDP_LOG(ERR, "allocation of %zu bytes on socket %d "
+ "for %u udp_streams failed\n",
+ sz, ctx->prm.socket_id, ctx->prm.max_streams);
+ return -ENOMEM;
+ }
+
+ ctx->streams.buf = s;
+ STAILQ_INIT(&ctx->streams.free);
+
+ for (i = 0; i != ctx->prm.max_streams; i++) {
+ rc = init_stream(ctx, s + i);
+ if (rc != 0) {
+ UDP_LOG(ERR, "initalisation of %u-th stream failed", i);
+ udp_fini_streams(ctx);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static void __attribute__((constructor))
+udp_stream_setup(void)
+{
+ static const struct stream_ops udp_ops = {
+ .init_streams = udp_init_streams,
+ .fini_streams = udp_fini_streams,
+ .free_drbs = udp_free_drbs,
+ };
+
+ tle_stream_ops[TLE_PROTO_UDP] = udp_ops;
+}
+
+static inline void
+stream_down(struct tle_udp_stream *s)
+{
+ rwl_down(&s->rx.use);
+ rwl_down(&s->tx.use);
+}
+
+static inline void
+stream_up(struct tle_udp_stream *s)
+{
+ rwl_up(&s->rx.use);
+ rwl_up(&s->tx.use);
+}
+
+static int
+check_stream_prm(const struct tle_ctx *ctx,
+ const struct tle_udp_stream_param *prm)
+{
+ if ((prm->local_addr.ss_family != AF_INET &&
+ prm->local_addr.ss_family != AF_INET6) ||
+ prm->local_addr.ss_family != prm->remote_addr.ss_family)
+ return -EINVAL;
+
+ /* callback and event notifications mechanisms are mutually exclusive */
+ if ((prm->recv_ev != NULL && prm->recv_cb.func != NULL) ||
+ (prm->send_ev != NULL && prm->send_cb.func != NULL))
+ return -EINVAL;
+
+ /* check does context support desired address family. */
+ if ((prm->local_addr.ss_family == AF_INET &&
+ ctx->prm.lookup4 == NULL) ||
+ (prm->local_addr.ss_family == AF_INET6 &&
+ ctx->prm.lookup6 == NULL))
+ return -EINVAL;
+
+ return 0;
+}
+
+struct tle_stream *
+tle_udp_stream_open(struct tle_ctx *ctx,
+ const struct tle_udp_stream_param *prm)
+{
+ struct tle_udp_stream *s;
+ int32_t rc;
+
+ if (ctx == NULL || prm == NULL || check_stream_prm(ctx, prm) != 0) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ s = (struct tle_udp_stream *)get_stream(ctx);
+ if (s == NULL) {
+ rte_errno = ENFILE;
+ return NULL;
+
+ /* some TX still pending for that stream. */
+ } else if (UDP_STREAM_TX_PENDING(s)) {
+ put_stream(ctx, &s->s, 0);
+ rte_errno = EAGAIN;
+ return NULL;
+ }
+
+ /* copy input parameters. */
+ s->prm = *prm;
+
+ /* setup L4 ports and L3 addresses fields. */
+ rc = stream_fill_ctx(ctx, &s->s,
+ (const struct sockaddr *)&prm->local_addr,
+ (const struct sockaddr *)&prm->remote_addr);
+
+ if (rc != 0) {
+ put_stream(ctx, &s->s, 1);
+ s = NULL;
+ rte_errno = rc;
+ } else {
+ /* setup stream notification menchanism */
+ s->rx.ev = prm->recv_ev;
+ s->rx.cb = prm->recv_cb;
+ s->tx.ev = prm->send_ev;
+ s->tx.cb = prm->send_cb;
+
+ /* mark stream as avaialbe for RX/TX */
+ if (s->tx.ev != NULL)
+ tle_event_raise(s->tx.ev);
+ stream_up(s);
+ }
+
+ return &s->s;
+}
+
+int
+tle_udp_stream_close(struct tle_stream *us)
+{
+ int32_t rc;
+ struct tle_ctx *ctx;
+ struct tle_udp_stream *s;
+
+ static const struct tle_stream_cb zcb;
+
+ s = UDP_STREAM(us);
+ if (us == NULL || s->s.type >= TLE_VNUM)
+ return -EINVAL;
+
+ ctx = s->s.ctx;
+
+ /* mark stream as unavaialbe for RX/TX. */
+ stream_down(s);
+
+ /* reset stream events if any. */
+ if (s->rx.ev != NULL) {
+ tle_event_idle(s->rx.ev);
+ s->rx.ev = NULL;
+ }
+ if (s->tx.ev != NULL) {
+ tle_event_idle(s->tx.ev);
+ s->tx.ev = NULL;
+ }
+
+ s->rx.cb = zcb;
+ s->tx.cb = zcb;
+
+ /* free stream's destination port */
+ rc = stream_clear_ctx(ctx, &s->s);
+
+ /* empty stream's RX queue */
+ empty_mbuf_ring(s->rx.q);
+
+ /*
+ * mark the stream as free again.
+ * if there still are pkts queued for TX,
+ * then put this stream to the tail of free list.
+ */
+ put_stream(ctx, &s->s, UDP_STREAM_TX_FINISHED(s));
+ return rc;
+}
+
+int
+tle_udp_stream_get_param(const struct tle_stream *us,
+ struct tle_udp_stream_param *prm)
+{
+ struct sockaddr_in *lin4;
+ struct sockaddr_in6 *lin6;
+ const struct tle_udp_stream *s;
+
+ s = UDP_STREAM(us);
+ if (prm == NULL || us == NULL || s->s.type >= TLE_VNUM)
+ return -EINVAL;
+
+ prm[0] = s->prm;
+ if (prm->local_addr.ss_family == AF_INET) {
+ lin4 = (struct sockaddr_in *)&prm->local_addr;
+ lin4->sin_port = s->s.port.dst;
+ } else if (s->prm.local_addr.ss_family == AF_INET6) {
+ lin6 = (struct sockaddr_in6 *)&prm->local_addr;
+ lin6->sin6_port = s->s.port.dst;
+ }
+
+ return 0;
+}
diff --git a/lib/libtle_l4p/udp_stream.h b/lib/libtle_l4p/udp_stream.h
new file mode 100644
index 0000000..a950e56
--- /dev/null
+++ b/lib/libtle_l4p/udp_stream.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _UDP_STREAM_H_
+#define _UDP_STREAM_H_
+
+#include <rte_vect.h>
+#include <tle_dring.h>
+#include <tle_udp.h>
+#include <tle_event.h>
+
+#include "osdep.h"
+#include "ctx.h"
+#include "stream.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+union udph {
+ uint64_t raw;
+ struct {
+ union l4_ports ports;
+ uint16_t len;
+ uint16_t cksum;
+ };
+};
+
+struct tle_udp_stream {
+
+ struct tle_stream s;
+
+ struct {
+ struct rte_ring *q;
+ struct tle_event *ev;
+ struct tle_stream_cb cb;
+ rte_atomic32_t use;
+ } rx __rte_cache_aligned;
+
+ struct {
+ rte_atomic32_t use;
+ struct {
+ uint32_t nb_elem; /* number of obects per drb. */
+ uint32_t nb_max; /* number of drbs per stream. */
+ struct rte_ring *r;
+ } drb;
+ struct tle_event *ev;
+ struct tle_stream_cb cb;
+ } tx __rte_cache_aligned;
+
+ struct tle_udp_stream_param prm;
+} __rte_cache_aligned;
+
+#define UDP_STREAM(p) \
+((struct tle_udp_stream *)((uintptr_t)(p) - offsetof(struct tle_udp_stream, s)))
+
+#define UDP_STREAM_TX_PENDING(s) \
+ ((s)->tx.drb.nb_max != rte_ring_count((s)->tx.drb.r))
+
+#define UDP_STREAM_TX_FINISHED(s) \
+ ((s)->tx.drb.nb_max == rte_ring_count((s)->tx.drb.r))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UDP_STREAM_H_ */
diff --git a/lib/libtle_timer/Makefile b/lib/libtle_timer/Makefile
new file mode 100644
index 0000000..c17d219
--- /dev/null
+++ b/lib/libtle_timer/Makefile
@@ -0,0 +1,38 @@
+# Copyright (c) 2016 Intel Corporation.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overwritten by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = libtle_timer.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := tle_timer_version.map
+
+LIBABIVER := 1
+
+#source files
+SRCS-y += timer.c
+
+SYMLINK-y-include += tle_timer.h
+
+include $(TLDK_ROOT)/mk/tle.lib.mk
diff --git a/lib/libtle_timer/timer.c b/lib/libtle_timer/timer.c
new file mode 100644
index 0000000..8b89fd6
--- /dev/null
+++ b/lib/libtle_timer/timer.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <sys/queue.h>
+#include <rte_cycles.h>
+#include <rte_errno.h>
+#include <tle_timer.h>
+
+#define TW_SLOTS_PER_RING 512
+#define TW_RING_SHIFT 9
+#define TW_RING_MASK (TW_SLOTS_PER_RING - 1)
+#define MAX_TIMER_BURST 0x20
+
+enum {
+ TW_RING_FAST,
+ TW_RING_SLOW,
+ TW_N_RINGS,
+};
+
+struct tle_timer_list;
+
+struct tle_timer_elmt {
+ void *obj; /** object for which timer is created */
+
+ struct tle_timer_list *list; /* current list object belongs to */
+
+ /** Slow ring only, saved when timer added to ring */
+ uint16_t fast_index;
+
+ LIST_ENTRY(tle_timer_elmt) link;
+};
+
+struct tle_timer_list {
+ uint32_t num;
+ LIST_HEAD(, tle_timer_elmt) head;
+};
+
+struct tle_timer_wheel {
+ uint64_t next_run_time; /** Next time the wheel should run */
+
+ uint64_t last_run_time; /** Last time the wheel ran */
+
+ uint32_t current_tick; /** current tick */
+
+ uint32_t current_index[TW_N_RINGS]; /** current wheel indices */
+
+ struct tle_timer_list free; /** free timers to be used */
+
+ struct tle_timer_list expired; /** expired timers to be pulled */
+
+ struct tle_timer_wheel_args prm; /** timer wheel configuration params */
+
+ /** wheel arrays */
+ struct tle_timer_list w[TW_N_RINGS][TW_SLOTS_PER_RING];
+};
+
+/** helper functions to manipulate the linked lists */
+static inline uint32_t
+get_timers(struct tle_timer_list *list, struct tle_timer_elmt *re[],
+ uint32_t num)
+{
+ struct tle_timer_elmt *e;
+ uint32_t i, n;
+
+ n = RTE_MIN(list->num, num);
+ for (i = 0; i != n; i++) {
+ e = LIST_FIRST(&list->head);
+ LIST_REMOVE(e, link);
+ e->list = NULL;
+ re[i] = e;
+ }
+
+ list->num -= n;
+ return n;
+}
+
+static inline struct tle_timer_elmt *
+get_timer(struct tle_timer_list *list)
+{
+ struct tle_timer_elmt *e;
+
+ e = LIST_FIRST(&list->head);
+ LIST_REMOVE(e, link);
+ e->list = NULL;
+ list->num--;
+ return e;
+}
+
+static inline void
+put_timers(struct tle_timer_list *list, struct tle_timer_elmt *te[],
+ uint32_t num)
+{
+ uint32_t i;
+
+ for (i = 0; i != num; i++) {
+ te[i]->list = list;
+ LIST_INSERT_HEAD(&list->head, te[i], link);
+ }
+ list->num += num;
+}
+
+static inline void
+put_timer(struct tle_timer_list *list, struct tle_timer_elmt *e)
+{
+ e->list = list;
+ LIST_INSERT_HEAD(&list->head, e, link);
+ list->num++;
+}
+
+static inline void
+rem_timer(struct tle_timer_list *list, struct tle_timer_elmt *e)
+{
+ LIST_REMOVE(e, link);
+ e->list = NULL;
+ list->num--;
+}
+
+/** create the tle timer wheel */
+struct tle_timer_wheel *
+tle_timer_create(struct tle_timer_wheel_args *prm, uint64_t now)
+{
+ uint32_t i, j;
+ size_t sz;
+ struct tle_timer_wheel *tw;
+ struct tle_timer_elmt *e;
+ struct tle_timer_elmt *timers;
+
+ if (prm == NULL) {
+ rte_errno = -EINVAL;
+ return NULL;
+ }
+
+ /* at least one timer has to be created */
+ if (prm->max_timer == 0) {
+ rte_errno = -EINVAL;
+ return NULL;
+ }
+
+ /* do not allow tick size smaller than 1ms */
+ if (prm->tick_size == 0) {
+ rte_errno = -EINVAL;
+ return NULL;
+ }
+
+ sz = sizeof(*tw) + prm->max_timer * sizeof(struct tle_timer_elmt);
+
+ /* allocate memory */
+ tw = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ prm->socket_id);
+
+ if (tw == NULL) {
+ rte_errno = -ENOMEM;
+ return NULL;
+ }
+
+ tw->last_run_time = now;
+ tw->prm = *prm;
+ timers = (struct tle_timer_elmt *)(tw + 1);
+
+ /* initialize the lists */
+ LIST_INIT(&tw->free.head);
+ LIST_INIT(&tw->expired.head);
+
+ for (i = 0; i < prm->max_timer; i++) {
+ e = timers + i;
+ put_timer(&tw->free, e);
+ }
+
+ for (i = 0; i < TW_N_RINGS; i++)
+ for (j = 0; j < TW_SLOTS_PER_RING; j++)
+ LIST_INIT(&tw->w[i][j].head);
+
+ return tw;
+}
+
+/** free the tle timer wheel */
+void
+tle_timer_free(struct tle_timer_wheel *tw)
+{
+ rte_free(tw);
+}
+
+/** start a timer */
+void *
+tle_timer_start(struct tle_timer_wheel *tw, void *obj, uint64_t interval)
+{
+ uint16_t slow_ring_index, fast_ring_index;
+ struct tle_timer_list *ts;
+ struct tle_timer_elmt *e;
+ uint32_t carry;
+ uint32_t nb_tick;
+
+ rte_errno = 0;
+ if (!interval) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ if (tw->free.num == 0) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ nb_tick = interval / tw->prm.tick_size;
+
+ fast_ring_index = nb_tick & TW_RING_MASK;
+ fast_ring_index += tw->current_index[TW_RING_FAST];
+ carry = fast_ring_index >= TW_SLOTS_PER_RING ? 1 : 0;
+ fast_ring_index %= TW_SLOTS_PER_RING;
+ slow_ring_index = (nb_tick >> TW_RING_SHIFT) + carry;
+
+ /* Timer duration exceeds ~7 hrs? Oops */
+ if (slow_ring_index >= TW_SLOTS_PER_RING) {
+ rte_errno = ERANGE;
+ return NULL;
+ }
+
+ /* Timer expires more than 51.2 seconds from now? */
+ if (slow_ring_index) {
+ slow_ring_index += tw->current_index[TW_RING_SLOW];
+ slow_ring_index %= TW_SLOTS_PER_RING;
+ ts = &tw->w[TW_RING_SLOW][slow_ring_index];
+
+ e = get_timer(&tw->free);
+ e->obj = obj;
+ e->fast_index = fast_ring_index;
+ put_timer(ts, e);
+
+ /* Return the user timer-cancellation handle */
+ return (void *)e;
+ }
+
+ /* Timer expires less than 51.2 seconds from now */
+ ts = &tw->w[TW_RING_FAST][fast_ring_index];
+
+ e = get_timer(&tw->free);
+ e->obj = obj;
+ put_timer(ts, e);
+
+ /* Give the user a handle to cancel the timer */
+ return (void *)e;
+}
+
+/** stop a timer */
+void tle_timer_stop(struct tle_timer_wheel *tw, void *timer)
+{
+ struct tle_timer_elmt *e;
+ struct tle_timer_list *ts;
+
+ /* Cancel the timer */
+ e = (struct tle_timer_elmt *)timer;
+ ts = e->list;
+ rem_timer(ts, e);
+ put_timer(&tw->free, e);
+}
+
+/** run the timer wheel. Call in every tick_size cycles
+ * (e.g. equivalent of 100ms).
+ */
+void tle_timer_expire(struct tle_timer_wheel *tw, uint64_t now)
+{
+ uint32_t nb_tick, i, n;
+ uint32_t fast_wheel_index, slow_wheel_index, demoted_index;
+ struct tle_timer_list *ts, *ts2;
+ struct tle_timer_elmt *re[MAX_TIMER_BURST], *e;
+
+ /* Shouldn't happen */
+ if (unlikely(now < tw->next_run_time))
+ return;
+
+ /* Number of tick_size cycles which have occurred */
+ nb_tick = (now - tw->last_run_time) / tw->prm.tick_size;
+ if (nb_tick == 0)
+ return;
+
+ /* Remember when we ran, compute next runtime */
+ tw->next_run_time = (now + tw->prm.tick_size);
+ tw->last_run_time = now;
+
+ for (i = 0; i < nb_tick; i++) {
+ fast_wheel_index = tw->current_index[TW_RING_FAST];
+
+ /* If we've been around the fast ring once,
+ * process one slot in the slow ring before we handle
+ * the fast ring.
+ */
+ if (unlikely(fast_wheel_index == TW_SLOTS_PER_RING)) {
+ fast_wheel_index = tw->current_index[TW_RING_FAST] = 0;
+
+ tw->current_index[TW_RING_SLOW]++;
+ tw->current_index[TW_RING_SLOW] %= TW_SLOTS_PER_RING;
+ slow_wheel_index = tw->current_index[TW_RING_SLOW];
+
+ ts = &tw->w[TW_RING_SLOW][slow_wheel_index];
+
+ /* Deal slow-ring elements into the fast ring. */
+ while (ts->num != 0) {
+ e = get_timer(ts);
+ demoted_index = e->fast_index;
+ ts2 = &tw->w[TW_RING_FAST][demoted_index];
+ put_timer(ts2, e);
+ };
+ LIST_INIT(&ts->head);
+ }
+
+ /* Handle the fast ring */
+ ts = &tw->w[TW_RING_FAST][fast_wheel_index];
+
+ /* Clear the fast-ring slot and move timers in expired list*/
+ n = get_timers(ts, re, RTE_DIM(re));
+ while (n != 0) {
+ put_timers(&tw->expired, re, n);
+ n = get_timers(ts, re, RTE_DIM(re));
+ };
+ LIST_INIT(&ts->head);
+
+ tw->current_index[TW_RING_FAST]++;
+ tw->current_tick++;
+ }
+}
+
+/** bulk retrieve of expired timers */
+int
+tle_timer_get_expired_bulk(struct tle_timer_wheel *tw, void *rt[], uint32_t num)
+{
+ uint32_t i, n;
+ struct tle_timer_elmt *e[MAX_TIMER_BURST];
+
+ n = get_timers(&tw->expired, e, num);
+
+ for (i = 0; i != n; i++)
+ rt[i] = e[i]->obj;
+
+ put_timers(&tw->free, e, n);
+
+ return n;
+}
diff --git a/lib/libtle_timer/tle_timer.h b/lib/libtle_timer/tle_timer.h
new file mode 100644
index 0000000..c40516e
--- /dev/null
+++ b/lib/libtle_timer/tle_timer.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __tle_timer_h__
+#define __tle_timer_h__
+
+#include <stdint.h>
+#include <rte_config.h>
+#include <rte_debug.h>
+#include <rte_malloc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @file
+ * @brief timer definitions
+ *
+ * Design parameters:
+ * granularity: configurable in terms of units (i.e. cycles or ms).
+ * e.g. with 100ms tick
+ * required max period: 2.5 hours => 150 minutes => 90,000 ticks
+ * Rounding up to 256k ticks yields a two-level 512 slot-per-level
+ * wheel, resulting in a 7-hour max period.
+ */
+
+struct tle_timer_wheel_args {
+ uint32_t tick_size; /** tick size in units */
+
+ int32_t socket_id; /**< socket ID to allocate memory for. */
+
+ uint32_t max_timer; /** maximum number of timers */
+};
+
+struct tle_timer_wheel;
+
+/** initialize a timer wheel */
+struct tle_timer_wheel *
+tle_timer_create(struct tle_timer_wheel_args *prm, uint64_t now);
+
+/** free a timer wheel */
+void
+tle_timer_free(struct tle_timer_wheel *tw);
+
+/** start a timer */
+void *
+tle_timer_start(struct tle_timer_wheel *tw, void *obj, uint64_t interval);
+
+/** stop a timer */
+void
+tle_timer_stop(struct tle_timer_wheel *tw, void *timer);
+
+/** run the timer wheel. Call in every tick_size cycles
+ * (e.g. equivalent of 100ms).
+ */
+void
+tle_timer_expire(struct tle_timer_wheel *tw, uint64_t now);
+
+/** bulk retrieve of expired timers */
+int
+tle_timer_get_expired_bulk(struct tle_timer_wheel *tw, void *timers[],
+ uint32_t num);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __tle_timer_h__ */
diff --git a/lib/libtle_udp/tle_udp_impl.h b/lib/libtle_udp/tle_udp_impl.h
deleted file mode 100644
index c55d605..0000000
--- a/lib/libtle_udp/tle_udp_impl.h
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * Copyright (c) 2016 Intel Corporation.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _TLE_UDP_IMPL_H_
-#define _TLE_UDP_IMPL_H_
-
-#include <stdint.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <rte_common.h>
-#include <rte_mbuf.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * <udp_ctx> - each such ctx represents an 'independent copy of the stack'.
- * It owns set of <udp_stream>s and <udp_dev>s entities and provides
- * (de)multiplexing input/output packets from/into UDP devices into/from
- * UDP streams.
- * <udp_dev> is an abstraction for the underlying device, that is able
- * to RX/TX packets and may provide some HW offload capabilities.
- * It is a user responsibility to add to the <udp_ctx> all <udp_dev>s,
- * that context has to manage, before starting to do stream operations
- * (open/send/recv,close) over that context.
- * Right now adding/deleting <udp_dev>s to the context with open
- * streams is not supported.
- * <udp_stream> represents an UDP endpoint <addr, port> and is an analogy to
- * socket entity.
- * As with a socket, there are ability to do recv/send over it.
- * <udp_stream> belongs to particular <udp_ctx> but is visible globally across
- * the process, i.e. any thread within the process can do recv/send over it
- * without any further synchronisation.
- * While 'upper' layer API is thread safe, lower layer API (rx_bulk/tx_bulk)
- * is not thread safe and is not supposed to be run on multiple threads
- * in parallel.
- * So single thread can drive multiple <udp_ctx>s and do IO for them,
- * but multiple threads can't drive same <udp_ctx> without some
- * explicit synchronization.
- */
-
-struct tle_udp_ctx;
-struct tle_udp_dev;
-
-/**
- * Blocked UDP ports info.
- */
-struct tle_bl_port {
- uint32_t nb_port; /**< number of blocked ports. */
- const uint16_t *port; /**< list of blocked ports. */
-};
-
-/**
- * UDP device parameters.
- */
-struct tle_udp_dev_param {
- uint32_t rx_offload; /**< DEV_RX_OFFLOAD_* supported. */
- uint32_t tx_offload; /**< DEV_TX_OFFLOAD_* supported. */
- struct in_addr local_addr4; /**< local IPv4 address assigned. */
- struct in6_addr local_addr6; /**< local IPv6 address assigned. */
- struct tle_bl_port bl4; /**< blocked ports for IPv4 address. */
- struct tle_bl_port bl6; /**< blocked ports for IPv4 address. */
-};
-
-#define TLE_UDP_MAX_HDR 0x60
-
-struct tle_udp_dest {
- struct rte_mempool *head_mp; /**< MP for fragment feaders. */
- struct tle_udp_dev *dev; /**< device to send packets through. */
- uint16_t mtu; /**< MTU for given destination. */
- uint8_t l2_len; /**< L2 header lenght. */
- uint8_t l3_len; /**< L3 header lenght. */
- uint8_t hdr[TLE_UDP_MAX_HDR]; /**< L2/L3 headers. */
-};
-
-/**
- * UDP context creation parameters.
- */
-struct tle_udp_ctx_param {
- int32_t socket_id; /**< socket ID to allocate memory for. */
- uint32_t max_streams; /**< max number of streams in context. */
- uint32_t max_stream_rbufs; /**< max recv mbufs per stream. */
- uint32_t max_stream_sbufs; /**< max send mbufs per stream. */
- uint32_t send_bulk_size; /**< expected # of packets per send call. */
-
- int (*lookup4)(void *opaque, const struct in_addr *addr,
- struct tle_udp_dest *res);
- /**< will be called by send() to get IPv4 packet destination info. */
- void *lookup4_data;
- /**< opaque data pointer for lookup4() callback. */
-
- int (*lookup6)(void *opaque, const struct in6_addr *addr,
- struct tle_udp_dest *res);
- /**< will be called by send() to get IPv6 packet destination info. */
- void *lookup6_data;
- /**< opaque data pointer for lookup6() callback. */
-};
-
-/**
- * create UDP context.
- * @param ctx_prm
- * Parameters used to create and initialise the UDP context.
- * @return
- * Pointer to UDP context structure that can be used in future UDP
- * operations, or NULL on error, with error code set in rte_errno.
- * Possible rte_errno errors include:
- * - EINVAL - invalid parameter passed to function
- * - ENOMEM - out of memory
- */
-struct tle_udp_ctx *
-tle_udp_create(const struct tle_udp_ctx_param *ctx_prm);
-
-/**
- * Destroy given UDP context.
- *
- * @param ctx
- * UDP context to destroy
- */
-void tle_udp_destroy(struct tle_udp_ctx *ctx);
-
-/**
- * Add new device into the given UDP context.
- * This function is not multi-thread safe.
- *
- * @param ctx
- * UDP context to add new device into.
- * @param dev_prm
- * Parameters used to create and initialise new device inside the
- * UDP context.
- * @return
- * Pointer to UDP device structure that can be used in future UDP
- * operations, or NULL on error, with error code set in rte_errno.
- * Possible rte_errno errors include:
- * - EINVAL - invalid parameter passed to function
- * - ENODEV - max possible value of open devices is reached
- * - ENOMEM - out of memory
- */
-struct tle_udp_dev *
-tle_udp_add_dev(struct tle_udp_ctx *ctx,
- const struct tle_udp_dev_param *dev_prm);
-
-/**
- * Remove and destroy previously added device from the given UDP context.
- * This function is not multi-thread safe.
- *
- * @param dev
- * UDP device to remove and destroy.
- * @return
- * zero on successful completion.
- * - -EINVAL - invalid parameter passed to function
- */
-int tle_udp_del_dev(struct tle_udp_dev *dev);
-
-/**
- * Flags to the UDP context that destinations info might be changed,
- * so if it has any destinations data cached, then
- * it has to be invalidated.
- * @param ctx
- * UDP context to invalidate.
- */
-void tle_udp_ctx_invalidate(struct tle_udp_ctx *ctx);
-
-struct tle_udp_stream;
-
-/**
- * Stream asynchronous notification mechanisms:
- * a) recv/send callback.
- * Stream recv/send notification callbacks behaviour is edge-triggered (ET).
- * recv callback will be invoked if stream receive buffer was empty and
- * new packet(s) have arrived.
- * send callback will be invoked when stream send buffer was full,
- * and some packets belonging to that stream were sent
- * (part of send buffer became free again).
- * Note that both recv and send callbacks are called with sort of read lock
- * held on that stream. So it is not permitted to call stream_close()
- * within the callback function. Doing that would cause a deadlock.
- * While it is allowed to call stream send/recv functions within the
- * callback, it is not recommended: callback function will be invoked
- * within tle_udp_rx_bulk/tle_udp_tx_bulk context and some heavy processing
- * within the callback functions might cause performance degradation
- * or even loss of packets for further streams.
- * b) recv/send event.
- * Stream recv/send events behavour is level-triggered (LT).
- * receive event will be raised by either
- * tle_udp_rx_burst() or tle_udp_stream_recv() as long as there are any
- * remaining packets inside stream receive buffer.
- * send event will be raised by either
- * tle_udp_tx_burst() or tle_udp_stream_send() as long as there are any
- * free space inside stream send buffer.
- * Note that callback and event are mutually exclusive on <stream, op> basis.
- * It is not possible to open a stream with both recv event and callback
- * specified.
- * Though it is possible to open a stream with recv callback and send event,
- * or visa-versa.
- * If the user doesn't need any notification mechanism for that stream,
- * both event and callback could be set to zero.
- */
-
-/**
- * Stream recv/send callback function and data.
- */
-struct tle_udp_stream_cb {
- void (*func)(void *, struct tle_udp_stream *);
- void *data;
-};
-
-struct tle_event;
-
-/**
- * UDP stream creation parameters.
- */
-struct tle_udp_stream_param {
- struct sockaddr_storage local_addr; /**< stream local address. */
- struct sockaddr_storage remote_addr; /**< stream remote address. */
-
- /* _cb and _ev are mutually exclusive */
- struct tle_event *recv_ev; /**< recv event to use. */
- struct tle_udp_stream_cb recv_cb; /**< recv callback to use. */
-
- struct tle_event *send_ev; /**< send event to use. */
- struct tle_udp_stream_cb send_cb; /**< send callback to use. */
-};
-
-/**
- * create a new stream within given UDP context.
- * @param ctx
- * UDP context to create new stream within.
- * @param prm
- * Parameters used to create and initialise the new stream.
- * @return
- * Pointer to UDP stream structure that can be used in future UDP API calls,
- * or NULL on error, with error code set in rte_errno.
- * Possible rte_errno errors include:
- * - EINVAL - invalid parameter passed to function
- * - ENOFILE - max limit of open streams reached for that context
- */
-struct tle_udp_stream *
-tle_udp_stream_open(struct tle_udp_ctx *ctx,
- const struct tle_udp_stream_param *prm);
-
-/**
- * close an open stream.
- * All packets still remaining in stream receive buffer will be freed.
- * All packets still remaining in stream transmit buffer will be kept
- * for father transmission.
- * @param s
- * Pointer to the stream to close.
- * @return
- * zero on successful completion.
- * - -EINVAL - invalid parameter passed to function
- */
-int tle_udp_stream_close(struct tle_udp_stream *s);
-
-/**
- * get open stream parameters.
- * @param s
- * Pointer to the stream.
- * @return
- * zero on successful completion.
- * - -EINVAL - invalid parameter passed to function
- */
-int
-tle_udp_stream_get_param(const struct tle_udp_stream *s,
- struct tle_udp_stream_param *prm);
-
-/**
- * Take input mbufs and distribute them to open UDP streams.
- * expects that for each input packet:
- * - l2_len, l3_len, l4_len are setup correctly
- * - (packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) != 0,
- * - (packet_type & RTE_PTYPE_L4_UDP) != 0,
- * During delivery L3/L4 checksums will be verified
- * (either relies on HW offload or in SW).
- * This function is not multi-thread safe.
- * @param dev
- * UDP device the packets were received from.
- * @param pkt
- * The burst of input packets that need to be processed.
- * @param rp
- * The array that will contain pointers of unprocessed packets at return.
- * Should contain at least *num* elements.
- * @param rc
- * The array that will contain error code for corresponding rp[] entry:
- * - ENOENT - no open stream matching this packet.
- * - ENOBUFS - receive buffer of the destination stream is full.
- * Should contain at least *num* elements.
- * @param num
- * Number of elements in the *pkt* input array.
- * @return
- * number of packets delivered to the UDP streams.
- */
-uint16_t tle_udp_rx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[],
- struct rte_mbuf *rp[], int32_t rc[], uint16_t num);
-
-/**
- * Fill *pkt* with pointers to the packets that have to be transmitted
- * over given UDP device.
- * Output packets have to be ready to be passed straight to rte_eth_tx_burst()
- * without any extra processing.
- * UDP/IPv4 checksum either already calculated or appropriate mbuf fields set
- * properly for HW offload.
- * This function is not multi-thread safe.
- * @param dev
- * UDP device the output packets will be transmitted over.
- * @param pkt
- * An array of pointers to *rte_mbuf* structures that
- * must be large enough to store up to *num* pointers in it.
- * @param num
- * Number of elements in the *pkt* array.
- * @return
- * number of of entries filled inside *pkt* array.
- */
-uint16_t tle_udp_tx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[],
- uint16_t num);
-
-/*
- * return up to *num* mbufs that was received for given UDP stream.
- * For each returned mbuf:
- * data_off set to the start of the packet's UDP data
- * l2_len, l3_len, l4_len are setup properly
- * (so user can still extract L2/L3 address info if needed)
- * packet_type RTE_PTYPE_L2/L3/L4 bits are setup properly.
- * L3/L4 checksum is verified.
- * Packets with invalid L3/L4 checksum will be silently dropped.
- * @param s
- * UDP stream to receive packets from.
- * @param pkt
- * An array of pointers to *rte_mbuf* structures that
- * must be large enough to store up to *num* pointers in it.
- * @param num
- * Number of elements in the *pkt* array.
- * @return
- * number of of entries filled inside *pkt* array.
- */
-uint16_t tle_udp_stream_recv(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
- uint16_t num);
-
-/**
- * Consume and queue up to *num* packets, that will be sent eventually
- * by tle_udp_tx_bulk().
- * If *dst_addr* is NULL, then default remote address associated with that
- * stream (if any) will be used.
- * The main purpose of that function is to determine over which UDP dev
- * given packets have to be sent out and do necessary preparations for that.
- * Based on the *dst_addr* it does route lookup, fills L2/L3/L4 headers,
- * and, if necessary, fragments packets.
- * Depending on the underlying device information, it either does
- * IP/UDP checksum calculations in SW or sets mbuf TX checksum
- * offload fields properly.
- * For each input mbuf the following conditions have to be met:
- * - data_off point to the start of packet's UDP data.
- * - there is enough header space to prepend L2/L3/L4 headers.
- * @param s
- * UDP stream to send packets over.
- * @param pkt
- * The burst of output packets that need to be send.
- * @param num
- * Number of elements in the *pkt* array.
- * @param dst_addr
- * Destination address to send packets to.
- * @return
- * number of packets successfully queued in the stream send buffer.
- */
-uint16_t tle_udp_stream_send(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
- uint16_t num, const struct sockaddr *dst_addr);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _TLE_UDP_IMPL_H_ */
diff --git a/lib/libtle_udp/udp_ctl.c b/lib/libtle_udp/udp_ctl.c
deleted file mode 100644
index faedcad..0000000
--- a/lib/libtle_udp/udp_ctl.c
+++ /dev/null
@@ -1,794 +0,0 @@
-/*
- * Copyright (c) 2016 Intel Corporation.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <string.h>
-#include <rte_malloc.h>
-#include <rte_errno.h>
-#include <rte_ethdev.h>
-#include <rte_ip.h>
-#include <rte_udp.h>
-
-#include "udp_impl.h"
-#include "misc.h"
-
-#define LPORT_START 0x8000
-#define LPORT_END MAX_PORT_NUM
-
-#define LPORT_START_BLK PORT_BLK(LPORT_START)
-#define LPORT_END_BLK PORT_BLK(LPORT_END)
-
-static const struct in6_addr tle_udp6_any = IN6ADDR_ANY_INIT;
-static const struct in6_addr tle_udp6_none = {
- {
- .__u6_addr32 = {
- UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX
- },
- },
-};
-
-static int
-check_dev_prm(const struct tle_udp_dev_param *dev_prm)
-{
- /* no valid IPv4/IPv6 addresses provided. */
- if (dev_prm->local_addr4.s_addr == INADDR_ANY &&
- memcmp(&dev_prm->local_addr6, &tle_udp6_any,
- sizeof(tle_udp6_any)) == 0)
- return -EINVAL;
-
- /* all the ports are blocked. */
- if (dev_prm->bl4.nb_port > UINT16_MAX ||
- (dev_prm->bl4.nb_port != 0 && dev_prm->bl4.port == NULL))
- return -EINVAL;
-
- if (dev_prm->bl6.nb_port > UINT16_MAX ||
- (dev_prm->bl6.nb_port != 0 && dev_prm->bl6.port == NULL))
- return -EINVAL;
-
- return 0;
-}
-
-static void
-unuse_stream(struct tle_udp_stream *s)
-{
- s->type = TLE_UDP_VNUM;
- rte_atomic32_set(&s->rx.use, INT32_MIN);
- rte_atomic32_set(&s->tx.use, INT32_MIN);
-}
-
-/* calculate number of drbs per stream. */
-static uint32_t
-calc_stream_drb_num(const struct tle_udp_ctx *ctx, uint32_t obj_num)
-{
- uint32_t num;
-
- num = (ctx->prm.max_stream_sbufs + obj_num - 1) / obj_num;
- num = num + num / 2;
- num = RTE_MAX(num, RTE_DIM(ctx->dev) + 1);
- return num;
-}
-
-static uint32_t
-drb_nb_elem(const struct tle_udp_ctx *ctx)
-{
- return (ctx->prm.send_bulk_size != 0) ?
- ctx->prm.send_bulk_size : MAX_PKT_BURST;
-}
-
-static int
-init_stream(struct tle_udp_ctx *ctx, struct tle_udp_stream *s)
-{
- size_t bsz, rsz, sz;
- uint32_t i, k, n, nb;
- struct tle_drb *drb;
- char name[RTE_RING_NAMESIZE];
-
- /* init RX part. */
-
- n = RTE_MAX(ctx->prm.max_stream_rbufs, 1U);
- n = rte_align32pow2(n);
- sz = sizeof(*s->rx.q) + n * sizeof(s->rx.q->ring[0]);
-
- s->rx.q = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
- ctx->prm.socket_id);
- if (s->rx.q == NULL) {
- UDP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
- "failed with error code: %d\n",
- __func__, s, sz, ctx->prm.socket_id, rte_errno);
- return ENOMEM;
- }
-
- snprintf(name, sizeof(name), "%p@%zu", s, sz);
- rte_ring_init(s->rx.q, name, n, RING_F_SP_ENQ);
-
- /* init TX part. */
-
- nb = drb_nb_elem(ctx);
- k = calc_stream_drb_num(ctx, nb);
- n = rte_align32pow2(k);
-
- /* size of the drbs ring */
- rsz = sizeof(*s->tx.drb.r) + n * sizeof(s->tx.drb.r->ring[0]);
- rsz = RTE_ALIGN_CEIL(rsz, RTE_CACHE_LINE_SIZE);
-
- /* size of the drb. */
- bsz = tle_drb_calc_size(nb);
-
- /* total stream drbs size. */
- sz = rsz + bsz * k;
-
- s->tx.drb.r = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
- ctx->prm.socket_id);
- if (s->tx.drb.r == NULL) {
- UDP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
- "failed with error code: %d\n",
- __func__, s, sz, ctx->prm.socket_id, rte_errno);
- return ENOMEM;
- }
-
- snprintf(name, sizeof(name), "%p@%zu", s, sz);
- rte_ring_init(s->tx.drb.r, name, n, 0);
-
- for (i = 0; i != k; i++) {
- drb = (struct tle_drb *)((uintptr_t)s->tx.drb.r +
- rsz + bsz * i);
- drb->udata = s;
- drb->size = nb;
- rte_ring_enqueue(s->tx.drb.r, drb);
- }
-
- s->tx.drb.nb_elem = nb;
- s->tx.drb.nb_max = k;
-
- /* mark stream as avaialble to use. */
-
- s->ctx = ctx;
- unuse_stream(s);
- STAILQ_INSERT_TAIL(&ctx->streams.free, s, link);
-
- return 0;
-}
-
-static void
-fini_stream(struct tle_udp_stream *s)
-{
- rte_free(s->rx.q);
- rte_free(s->tx.drb.r);
-}
-
-struct tle_udp_ctx *
-tle_udp_create(const struct tle_udp_ctx_param *ctx_prm)
-{
- struct tle_udp_ctx *ctx;
- size_t sz;
- uint32_t i;
-
- if (ctx_prm == NULL) {
- rte_errno = EINVAL;
- return NULL;
- }
-
- sz = sizeof(*ctx);
- ctx = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
- ctx_prm->socket_id);
- if (ctx == NULL) {
- UDP_LOG(ERR, "allocation of %zu bytes for new udp_ctx "
- "on socket %d failed\n",
- sz, ctx_prm->socket_id);
- return NULL;
- }
-
- ctx->prm = *ctx_prm;
-
- sz = sizeof(*ctx->streams.buf) * ctx_prm->max_streams;
- ctx->streams.buf = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
- ctx_prm->socket_id);
- if (ctx->streams.buf == NULL) {
- UDP_LOG(ERR, "allocation of %zu bytes on socket %d "
- "for %u udp_streams failed\n",
- sz, ctx_prm->socket_id, ctx_prm->max_streams);
- tle_udp_destroy(ctx);
- return NULL;
- }
-
- STAILQ_INIT(&ctx->streams.free);
- for (i = 0; i != ctx_prm->max_streams &&
- init_stream(ctx, &ctx->streams.buf[i]) == 0;
- i++)
- ;
-
- if (i != ctx_prm->max_streams) {
- UDP_LOG(ERR, "initalisation of %u-th stream failed", i);
- tle_udp_destroy(ctx);
- return NULL;
- }
-
- for (i = 0; i != RTE_DIM(ctx->use); i++)
- udp_pbm_init(ctx->use + i, LPORT_START_BLK);
-
- ctx->streams.nb_free = ctx->prm.max_streams;
- return ctx;
-}
-
-void
-tle_udp_destroy(struct tle_udp_ctx *ctx)
-{
- uint32_t i;
-
- if (ctx == NULL) {
- rte_errno = EINVAL;
- return;
- }
-
- for (i = 0; i != RTE_DIM(ctx->dev); i++)
- tle_udp_del_dev(ctx->dev + i);
-
- if (ctx->streams.buf != 0) {
- for (i = 0; i != ctx->prm.max_streams; i++)
- fini_stream(&ctx->streams.buf[i]);
- rte_free(ctx->streams.buf);
- }
-
- rte_free(ctx);
-}
-
-void
-tle_udp_ctx_invalidate(struct tle_udp_ctx *ctx)
-{
- RTE_SET_USED(ctx);
-}
-
-static void
-fill_pbm(struct udp_pbm *pbm, const struct tle_bl_port *blp)
-{
- uint32_t i;
-
- for (i = 0; i != blp->nb_port; i++)
- udp_pbm_set(pbm, blp->port[i]);
-}
-
-static int
-init_dev_proto(struct tle_udp_dev *dev, uint32_t idx, int32_t socket_id,
- const struct tle_bl_port *blp)
-{
- size_t sz;
-
- sz = sizeof(*dev->dp[idx]);
- dev->dp[idx] = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
- socket_id);
-
- if (dev->dp[idx] == NULL) {
- UDP_LOG(ERR, "allocation of %zu bytes on "
- "socket %d for %u-th device failed\n",
- sz, socket_id, idx);
- return ENOMEM;
- }
-
- udp_pbm_init(&dev->dp[idx]->use, LPORT_START_BLK);
- fill_pbm(&dev->dp[idx]->use, blp);
-
- return 0;
-}
-
-static struct tle_udp_dev *
-find_free_dev(struct tle_udp_ctx *ctx)
-{
- uint32_t i;
-
- if (ctx->nb_dev < RTE_DIM(ctx->dev)) {
- for (i = 0; i != RTE_DIM(ctx->dev); i++) {
- if (ctx->dev[i].ctx != ctx)
- return ctx->dev + i;
- }
- }
-
- rte_errno = ENODEV;
- return NULL;
-}
-
-struct tle_udp_dev *
-tle_udp_add_dev(struct tle_udp_ctx *ctx,
- const struct tle_udp_dev_param *dev_prm)
-{
- int32_t rc;
- struct tle_udp_dev *dev;
-
- if (ctx == NULL || dev_prm == NULL || check_dev_prm(dev_prm) != 0) {
- rte_errno = EINVAL;
- return NULL;
- }
-
- dev = find_free_dev(ctx);
- if (dev == NULL)
- return NULL;
- rc = 0;
-
- /* device can handle IPv4 traffic */
- if (dev_prm->local_addr4.s_addr != INADDR_ANY) {
- rc = init_dev_proto(dev, TLE_UDP_V4, ctx->prm.socket_id,
- &dev_prm->bl4);
- if (rc == 0)
- fill_pbm(&ctx->use[TLE_UDP_V4], &dev_prm->bl4);
- }
-
- /* device can handle IPv6 traffic */
- if (rc == 0 && memcmp(&dev_prm->local_addr6, &tle_udp6_any,
- sizeof(tle_udp6_any)) != 0) {
- rc = init_dev_proto(dev, TLE_UDP_V6, ctx->prm.socket_id,
- &dev_prm->bl6);
- if (rc == 0)
- fill_pbm(&ctx->use[TLE_UDP_V6], &dev_prm->bl6);
- }
-
- if (rc != 0) {
- /* cleanup and return an error. */
- rte_free(dev->dp[TLE_UDP_V4]);
- rte_free(dev->dp[TLE_UDP_V6]);
- rte_errno = rc;
- return NULL;
- }
-
- /* setup RX data. */
- if (dev_prm->local_addr4.s_addr != INADDR_ANY &&
- (dev_prm->rx_offload & DEV_RX_OFFLOAD_IPV4_CKSUM) == 0)
- dev->rx.ol_flags[TLE_UDP_V4] |= PKT_RX_IP_CKSUM_BAD;
- if ((dev_prm->rx_offload & DEV_RX_OFFLOAD_UDP_CKSUM) == 0) {
- dev->rx.ol_flags[TLE_UDP_V4] |= PKT_RX_L4_CKSUM_BAD;
- dev->rx.ol_flags[TLE_UDP_V6] |= PKT_RX_L4_CKSUM_BAD;
- }
-
- /* setup TX data. */
- tle_dring_reset(&dev->tx.dr);
-
- if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_UDP_CKSUM) != 0) {
- dev->tx.ol_flags[TLE_UDP_V4] |= PKT_TX_IPV4 | PKT_TX_UDP_CKSUM;
- dev->tx.ol_flags[TLE_UDP_V6] |= PKT_TX_IPV6 | PKT_TX_UDP_CKSUM;
- }
- if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_IPV4_CKSUM) != 0)
- dev->tx.ol_flags[TLE_UDP_V4] |= PKT_TX_IPV4 | PKT_TX_IP_CKSUM;
-
- dev->prm = *dev_prm;
- dev->ctx = ctx;
- ctx->nb_dev++;
-
- return dev;
-}
-
-static void
-empty_dring(struct tle_dring *dr)
-{
- uint32_t i, k, n;
- struct tle_udp_stream *s;
- struct rte_mbuf *pkt[MAX_PKT_BURST];
- struct tle_drb *drb[MAX_PKT_BURST];
-
- do {
- k = RTE_DIM(drb);
- n = tle_dring_sc_dequeue(dr, (const void **)(uintptr_t)pkt,
- RTE_DIM(pkt), drb, &k);
-
- /* free mbufs */
- for (i = 0; i != n; i++)
- rte_pktmbuf_free(pkt[i]);
- /* free drbs */
- for (i = 0; i != k; i++) {
- s = drb[i]->udata;
- rte_ring_enqueue(s->tx.drb.r, drb[i]);
- }
- } while (n != 0);
-}
-
-int
-tle_udp_del_dev(struct tle_udp_dev *dev)
-{
- uint32_t p;
- struct tle_udp_ctx *ctx;
-
- if (dev == NULL || dev->ctx == NULL)
- return -EINVAL;
-
- ctx = dev->ctx;
- p = dev - ctx->dev;
-
- if (p >= RTE_DIM(ctx->dev) ||
- (dev->dp[TLE_UDP_V4] == NULL &&
- dev->dp[TLE_UDP_V6] == NULL))
- return -EINVAL;
-
- /* emtpy TX queues. */
- empty_dring(&dev->tx.dr);
-
- rte_free(dev->dp[TLE_UDP_V4]);
- rte_free(dev->dp[TLE_UDP_V6]);
- memset(dev, 0, sizeof(*dev));
- ctx->nb_dev--;
- return 0;
-}
-
-static inline void
-stream_down(struct tle_udp_stream *s)
-{
- rwl_down(&s->rx.use);
- rwl_down(&s->tx.use);
-}
-
-static inline void
-stream_up(struct tle_udp_stream *s)
-{
- rwl_up(&s->rx.use);
- rwl_up(&s->tx.use);
-}
-
-static struct tle_udp_dev *
-find_ipv4_dev(struct tle_udp_ctx *ctx, const struct in_addr *addr)
-{
- uint32_t i;
-
- for (i = 0; i != RTE_DIM(ctx->dev); i++) {
- if (ctx->dev[i].prm.local_addr4.s_addr == addr->s_addr &&
- ctx->dev[i].dp[TLE_UDP_V4] != NULL)
- return ctx->dev + i;
- }
-
- return NULL;
-}
-
-static struct tle_udp_dev *
-find_ipv6_dev(struct tle_udp_ctx *ctx, const struct in6_addr *addr)
-{
- uint32_t i;
-
- for (i = 0; i != RTE_DIM(ctx->dev); i++) {
- if (memcmp(&ctx->dev[i].prm.local_addr6, addr,
- sizeof(*addr)) == 0 &&
- ctx->dev[i].dp[TLE_UDP_V6] != NULL)
- return ctx->dev + i;
- }
-
- return NULL;
-}
-
-static int
-stream_fill_dev(struct tle_udp_ctx *ctx, struct tle_udp_stream *s)
-{
- struct tle_udp_dev *dev;
- struct udp_pbm *pbm;
- struct sockaddr_in *lin4;
- struct sockaddr_in6 *lin6;
- uint32_t i, p, sp, t;
-
- if (s->prm.local_addr.ss_family == AF_INET) {
- lin4 = (struct sockaddr_in *)&s->prm.local_addr;
- t = TLE_UDP_V4;
- p = lin4->sin_port;
- } else if (s->prm.local_addr.ss_family == AF_INET6) {
- lin6 = (struct sockaddr_in6 *)&s->prm.local_addr;
- t = TLE_UDP_V6;
- p = lin6->sin6_port;
- } else
- return EINVAL;
-
- p = ntohs(p);
-
- /* if local address is not wildcard, find device it belongs to. */
- if (t == TLE_UDP_V4 && lin4->sin_addr.s_addr != INADDR_ANY) {
- dev = find_ipv4_dev(ctx, &lin4->sin_addr);
- if (dev == NULL)
- return ENODEV;
- } else if (t == TLE_UDP_V6 && memcmp(&tle_udp6_any, &lin6->sin6_addr,
- sizeof(tle_udp6_any)) != 0) {
- dev = find_ipv6_dev(ctx, &lin6->sin6_addr);
- if (dev == NULL)
- return ENODEV;
- } else
- dev = NULL;
-
- if (dev != NULL)
- pbm = &dev->dp[t]->use;
- else
- pbm = &ctx->use[t];
-
- /* try to acquire local port number. */
- if (p == 0) {
- p = udp_pbm_find_range(pbm, pbm->blk, LPORT_END_BLK);
- if (p == 0 && pbm->blk > LPORT_START_BLK)
- p = udp_pbm_find_range(pbm, LPORT_START_BLK, pbm->blk);
- } else if (udp_pbm_check(pbm, p) != 0)
- return EEXIST;
-
- if (p == 0)
- return ENFILE;
-
- /* fill socket's dst port and type */
- sp = htons(p);
- s->type = t;
- s->port.dst = sp;
-
- /* mark port as in-use */
- udp_pbm_set(&ctx->use[t], p);
- if (dev != NULL) {
- udp_pbm_set(pbm, p);
- dev->dp[t]->streams[sp] = s;
- } else {
- for (i = 0; i != RTE_DIM(ctx->dev); i++) {
- if (ctx->dev[i].dp[t] != NULL) {
- udp_pbm_set(&ctx->dev[i].dp[t]->use, p);
- ctx->dev[i].dp[t]->streams[sp] = s;
- }
- }
- }
-
- return 0;
-}
-
-static int
-stream_clear_dev(struct tle_udp_ctx *ctx, struct tle_udp_stream *s)
-{
- struct tle_udp_dev *dev;
- uint32_t i, p, sp, t;
-
- t = s->type;
- sp = s->port.dst;
- p = ntohs(sp);
-
- /* if local address is not wildcard, find device it belongs to. */
- if (t == TLE_UDP_V4 && s->ipv4.addr.dst != INADDR_ANY) {
- dev = find_ipv4_dev(ctx, (struct in_addr *)&s->ipv4.addr.dst);
- if (dev == NULL)
- return ENODEV;
- } else if (t == TLE_UDP_V6 && memcmp(&tle_udp6_any, &s->ipv6.addr.dst,
- sizeof(tle_udp6_any)) != 0) {
- dev = find_ipv6_dev(ctx, (struct in6_addr *)&s->ipv6.addr.dst);
- if (dev == NULL)
- return ENODEV;
- } else
- dev = NULL;
-
- udp_pbm_clear(&ctx->use[t], p);
- if (dev != NULL) {
- udp_pbm_clear(&dev->dp[t]->use, p);
- dev->dp[t]->streams[sp] = NULL;
- } else {
- for (i = 0; i != RTE_DIM(ctx->dev); i++) {
- if (ctx->dev[i].dp[t] != NULL) {
- udp_pbm_clear(&ctx->dev[i].dp[t]->use, p);
- ctx->dev[i].dp[t]->streams[sp] = NULL;
- }
- }
- }
-
- return 0;
-}
-
-static struct tle_udp_stream *
-get_stream(struct tle_udp_ctx *ctx)
-{
- struct tle_udp_stream *s;
-
- s = NULL;
- if (ctx->streams.nb_free == 0)
- return s;
-
- rte_spinlock_lock(&ctx->streams.lock);
- if (ctx->streams.nb_free != 0) {
- s = STAILQ_FIRST(&ctx->streams.free);
- STAILQ_REMOVE_HEAD(&ctx->streams.free, link);
- ctx->streams.nb_free--;
- }
- rte_spinlock_unlock(&ctx->streams.lock);
- return s;
-}
-
-static void
-put_stream(struct tle_udp_ctx *ctx, struct tle_udp_stream *s, int32_t head)
-{
- s->type = TLE_UDP_VNUM;
- rte_spinlock_lock(&ctx->streams.lock);
- if (head != 0)
- STAILQ_INSERT_HEAD(&ctx->streams.free, s, link);
- else
- STAILQ_INSERT_TAIL(&ctx->streams.free, s, link);
- ctx->streams.nb_free++;
- rte_spinlock_unlock(&ctx->streams.lock);
-}
-
-static void
-fill_ipv4_am(const struct sockaddr_in *in, uint32_t *addr, uint32_t *mask)
-{
- *addr = in->sin_addr.s_addr;
- *mask = (*addr == INADDR_ANY) ? INADDR_ANY : INADDR_NONE;
-}
-
-static void
-fill_ipv6_am(const struct sockaddr_in6 *in, rte_xmm_t *addr, rte_xmm_t *mask)
-{
- const struct in6_addr *pm;
-
- memcpy(addr, &in->sin6_addr, sizeof(*addr));
- if (memcmp(&tle_udp6_any, addr, sizeof(*addr)) == 0)
- pm = &tle_udp6_any;
- else
- pm = &tle_udp6_none;
-
- memcpy(mask, pm, sizeof(*mask));
-}
-
-static int
-check_stream_prm(const struct tle_udp_ctx *ctx,
- const struct tle_udp_stream_param *prm)
-{
- if ((prm->local_addr.ss_family != AF_INET &&
- prm->local_addr.ss_family != AF_INET6) ||
- prm->local_addr.ss_family != prm->remote_addr.ss_family)
- return -EINVAL;
-
- /* callback and event notifications mechanisms are mutually exclusive */
- if ((prm->recv_ev != NULL && prm->recv_cb.func != NULL) ||
- (prm->send_ev != NULL && prm->send_cb.func != NULL))
- return -EINVAL;
-
- /* check does context support desired address family. */
- if ((prm->local_addr.ss_family == AF_INET &&
- ctx->prm.lookup4 == NULL) ||
- (prm->local_addr.ss_family == AF_INET6 &&
- ctx->prm.lookup6 == NULL))
- return -EINVAL;
-
- return 0;
-}
-
-struct tle_udp_stream *
-tle_udp_stream_open(struct tle_udp_ctx *ctx,
- const struct tle_udp_stream_param *prm)
-{
- struct tle_udp_stream *s;
- const struct sockaddr_in *rin;
- int32_t rc;
-
- if (ctx == NULL || prm == NULL || check_stream_prm(ctx, prm) != 0) {
- rte_errno = EINVAL;
- return NULL;
- }
-
- s = get_stream(ctx);
- if (s == NULL) {
- rte_errno = ENFILE;
- return NULL;
-
- /* some TX still pending for that stream. */
- } else if (UDP_STREAM_TX_PENDING(s)) {
- put_stream(ctx, s, 0);
- rte_errno = EAGAIN;
- return NULL;
- }
-
- /* copy input parameters. */
- s->prm = *prm;
-
- /* setup ports and port mask fields (except dst port). */
- rin = (const struct sockaddr_in *)&prm->remote_addr;
- s->port.src = rin->sin_port;
- s->pmsk.src = (s->port.src == 0) ? 0 : UINT16_MAX;
- s->pmsk.dst = UINT16_MAX;
-
- /* setup src and dst addresses. */
- if (prm->local_addr.ss_family == AF_INET) {
- fill_ipv4_am((const struct sockaddr_in *)&prm->local_addr,
- &s->ipv4.addr.dst, &s->ipv4.mask.dst);
- fill_ipv4_am((const struct sockaddr_in *)&prm->remote_addr,
- &s->ipv4.addr.src, &s->ipv4.mask.src);
- } else if (prm->local_addr.ss_family == AF_INET6) {
- fill_ipv6_am((const struct sockaddr_in6 *)&prm->local_addr,
- &s->ipv6.addr.dst, &s->ipv6.mask.dst);
- fill_ipv6_am((const struct sockaddr_in6 *)&prm->remote_addr,
- &s->ipv6.addr.src, &s->ipv6.mask.src);
- }
-
- rte_spinlock_lock(&ctx->dev_lock);
- rc = stream_fill_dev(ctx, s);
- rte_spinlock_unlock(&ctx->dev_lock);
-
- if (rc != 0) {
- put_stream(ctx, s, 1);
- s = NULL;
- rte_errno = rc;
- } else {
- /* setup stream notification menchanism */
- s->rx.ev = prm->recv_ev;
- s->rx.cb = prm->recv_cb;
- s->tx.ev = prm->send_ev;
- s->tx.cb = prm->send_cb;
-
- /* mark stream as avaialbe for RX/TX */
- if (s->tx.ev != NULL)
- tle_event_raise(s->tx.ev);
- stream_up(s);
- }
-
- return s;
-}
-
-int
-tle_udp_stream_close(struct tle_udp_stream *s)
-{
- uint32_t i, n;
- int32_t rc;
- struct tle_udp_ctx *ctx;
- struct rte_mbuf *m[MAX_PKT_BURST];
-
- static const struct tle_udp_stream_cb zcb;
-
- if (s == NULL || s->type >= TLE_UDP_VNUM)
- return -EINVAL;
-
- ctx = s->ctx;
-
- /* mark stream as unavaialbe for RX/TX. */
- stream_down(s);
-
- /* reset stream events if any. */
- if (s->rx.ev != NULL) {
- tle_event_idle(s->rx.ev);
- s->rx.ev = NULL;
- }
- if (s->tx.ev != NULL) {
- tle_event_idle(s->tx.ev);
- s->tx.ev = NULL;
- }
-
- s->rx.cb = zcb;
- s->tx.cb = zcb;
-
- /* free stream's destination port */
- rte_spinlock_lock(&ctx->dev_lock);
- rc = stream_clear_dev(ctx, s);
- rte_spinlock_unlock(&ctx->dev_lock);
-
- /* empty stream's RX queue */
- do {
- n = rte_ring_dequeue_burst(s->rx.q, (void **)m, RTE_DIM(m));
- for (i = 0; i != n; i++)
- rte_pktmbuf_free(m[i]);
- } while (n != 0);
-
- /*
- * mark the stream as free again.
- * if there still are pkts queued for TX,
- * then put this stream to the tail of free list.
- */
- put_stream(ctx, s, UDP_STREAM_TX_FINISHED(s));
- return rc;
-}
-
-int
-tle_udp_stream_get_param(const struct tle_udp_stream *s,
- struct tle_udp_stream_param *prm)
-{
- struct sockaddr_in *lin4;
- struct sockaddr_in6 *lin6;
-
- if (prm == NULL || s == NULL || s->type >= TLE_UDP_VNUM)
- return -EINVAL;
-
- prm[0] = s->prm;
- if (prm->local_addr.ss_family == AF_INET) {
- lin4 = (struct sockaddr_in *)&prm->local_addr;
- lin4->sin_port = s->port.dst;
- } else if (s->prm.local_addr.ss_family == AF_INET6) {
- lin6 = (struct sockaddr_in6 *)&prm->local_addr;
- lin6->sin6_port = s->port.dst;
- }
-
- return 0;
-}
diff --git a/lib/libtle_udp/udp_impl.h b/lib/libtle_udp/udp_impl.h
deleted file mode 100644
index af35197..0000000
--- a/lib/libtle_udp/udp_impl.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2016 Intel Corporation.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _UDP_IMPL_H_
-#define _UDP_IMPL_H_
-
-#include <rte_spinlock.h>
-#include <rte_vect.h>
-#include <tle_dring.h>
-#include <tle_udp_impl.h>
-#include <tle_event.h>
-
-#include "port_bitmap.h"
-#include "osdep.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-enum {
- TLE_UDP_V4,
- TLE_UDP_V6,
- TLE_UDP_VNUM
-};
-
-union udp_ports {
- uint32_t raw;
- struct {
- uint16_t src;
- uint16_t dst;
- };
-};
-
-union udph {
- uint64_t raw;
- struct {
- union udp_ports ports;
- uint16_t len;
- uint16_t cksum;
- };
-};
-
-union ipv4_addrs {
- uint64_t raw;
- struct {
- uint32_t src;
- uint32_t dst;
- };
-};
-
-union ipv6_addrs {
- _ymm_t raw;
- struct {
- rte_xmm_t src;
- rte_xmm_t dst;
- };
-};
-
-union ip_addrs {
- union ipv4_addrs v4;
- union ipv6_addrs v6;
-};
-
-
-struct tle_udp_stream {
-
- STAILQ_ENTRY(tle_udp_stream) link;
- struct tle_udp_ctx *ctx;
-
- uint8_t type; /* TLE_UDP_V4 or TLE_UDP_V6 */
-
- struct {
- struct rte_ring *q;
- struct tle_event *ev;
- struct tle_udp_stream_cb cb;
- rte_atomic32_t use;
- } rx;
-
- union udp_ports port;
- union udp_ports pmsk;
-
- union {
- struct {
- union ipv4_addrs addr;
- union ipv4_addrs mask;
- } ipv4;
- struct {
- union ipv6_addrs addr;
- union ipv6_addrs mask;
- } ipv6;
- };
-
- struct {
- rte_atomic32_t use;
- struct {
- uint32_t nb_elem; /* number of obects per drb. */
- uint32_t nb_max; /* number of drbs per stream. */
- struct rte_ring *r;
- } drb;
- struct tle_event *ev;
- struct tle_udp_stream_cb cb;
- } tx __rte_cache_aligned;
-
- struct tle_udp_stream_param prm;
-} __rte_cache_aligned;
-
-#define UDP_STREAM_TX_PENDING(s) \
- ((s)->tx.drb.nb_max != rte_ring_count((s)->tx.drb.r))
-
-#define UDP_STREAM_TX_FINISHED(s) \
- ((s)->tx.drb.nb_max == rte_ring_count((s)->tx.drb.r))
-
-struct tle_udp_dport {
- struct udp_pbm use; /* ports in use. */
- struct tle_udp_stream *streams[MAX_PORT_NUM]; /* port to stream. */
-};
-
-struct tle_udp_dev {
- struct tle_udp_ctx *ctx;
- struct {
- uint64_t ol_flags[TLE_UDP_VNUM];
- } rx;
- struct {
- /* used by FE. */
- uint64_t ol_flags[TLE_UDP_VNUM];
- rte_atomic32_t packet_id[TLE_UDP_VNUM];
-
- /* used by FE & BE. */
- struct tle_dring dr;
- } tx;
- struct tle_udp_dev_param prm; /* copy of device paramaters. */
- struct tle_udp_dport *dp[TLE_UDP_VNUM]; /* device udp ports */
-};
-
-struct tle_udp_ctx {
- struct tle_udp_ctx_param prm;
- struct {
- rte_spinlock_t lock;
- uint32_t nb_free; /* number of free streams. */
- STAILQ_HEAD(, tle_udp_stream) free;
- struct tle_udp_stream *buf; /* array of streams */
- } streams;
-
- rte_spinlock_t dev_lock;
- uint32_t nb_dev;
- struct udp_pbm use[TLE_UDP_VNUM]; /* all ports in use. */
- struct tle_udp_dev dev[RTE_MAX_ETHPORTS];
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _UDP_IMPL_H_ */
diff --git a/test/Makefile b/test/Makefile
index 665396f..c5cf270 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -23,5 +23,6 @@ include $(RTE_SDK)/mk/rte.vars.mk
DIRS-y += dring
DIRS-y += gtest
+DIRS-y += timer
include $(TLDK_ROOT)/mk/tle.subdir.mk
diff --git a/test/gtest/Makefile b/test/gtest/Makefile
index ef86b9e..2598889 100644
--- a/test/gtest/Makefile
+++ b/test/gtest/Makefile
@@ -68,12 +68,16 @@ OBJ = gtest-rfc.o
# all source are stored in SRCS-y
SRCS-y += main.cpp
+SRCS-y += test_common.cpp
SRCS-y += test_tle_dring.cpp
-SRCS-y += test_tle_udp_ctx.cpp
+SRCS-y += test_tle_ctx.cpp
#SRCS-y += test_tle_udp_dev.cpp
SRCS-y += test_tle_udp_destroy.cpp
SRCS-y += test_tle_udp_event.cpp
-SRCS-y += test_tle_udp_stream.cpp
+#SRCS-y += test_tle_udp_stream.cpp
+#SRCS-y += test_tle_udp_stream_gen.cpp
+#SRCS-y += test_tle_tcp_stream.cpp
+#SRCS-y += test_tle_tcp_stream_gen.cpp
CXXFLAGS += -std=c++11
@@ -87,7 +91,7 @@ CXXFLAGS += -I$(RTE_OUTPUT)/include
LDFLAGS += -lstdc++
LDFLAGS += -L$(GMOCK_DIR) -lgmock
-LDLIBS += -ltle_udp -ltle_dring
+LDLIBS += -whole-archive -ltle_l4p -ltle_dring
include $(TLDK_ROOT)/mk/tle.cpp-obj.mk
endif
@@ -116,7 +120,7 @@ LDLIBS += gtest-rfc.o
LDLIBS += -lstdc++
LDLIBS += -L$(GMOCK_DIR) -lgmock
LDLIBS += -L$(RTE_OUTPUT)/lib
-LDLIBS += -ltle_udp -ltle_dring
+LDLIBS += -whole-archive -ltle_l4p -ltle_dring -ltle_timer
include $(TLDK_ROOT)/mk/tle.app.mk
endif
diff --git a/test/gtest/README b/test/gtest/README
index 8d39b6b..7fba5ff 100644
--- a/test/gtest/README
+++ b/test/gtest/README
@@ -1,41 +1,41 @@
-OVERVIEW
-========
-
-This application is a set of API unit tests for libtle_dring and libtle_udp
-libraries, plus a small set of functional tests for RX/TX functions in
-libtle_udp library.
-UT application needs GoogleTest C++ testing framework to compile, please
-follow installation steps below to enable them.
-
-INSTALLATION GUIDE
-==================
-Assumes that user had finished initial installation from TLDK root
-directory README.
-
-1. TLDK unit tests require pcap PMD to be available, enable it in DPDK and
- rebuild it.
- (http://dpdk.org/doc/quick-start has information how to do it)
-2. Obtain GoogleTest and build it. Some additional dependencies might be
- needed.
- (refer to https://github.com/google/googletest for information how to
- download and build it)
-3. Make sure that GTEST_DIR and GMOCK_DIR environment variables are set.
-4. Rebuild TLDK.
-
-Example:
-cd dpdk
-make config T=x86_64-native-linuxapp-gcc
-sed -ri 's,(PMD_PCAP=).*,\1y,' build/.config
-make install
-
-git clone https://github.com/google/googletest.git
-cd ../googletest
-export GTEST_DIR=`pwd`/googletest
-export GMOCK_DIR=`pwd`/googlemock
-cmake CMakeLists.txt
-make
-
-cd ../tldk
-make clean
-make all
-./x86_64-native-linuxapp-gcc/app/gtest-rfc --lcores=0
+1. OVERVIEW
+
+ This application is a set of API unit tests for libtle_dring, libtle_l4p,
+ and libtle_timer libraries, plus a small set of functional tests for RX/TX
+ functions in libtle_l4p library.
+
+ UT application needs GoogleTest C++ testing framework to compile, please
+ follow installation steps below to enable them.
+
+2. INSTALLATION GUIDE
+
+ Assumes that user had finished initial installation from TLDK root
+ directory README.
+
+ 1) TLDK unit tests require pcap PMD to be available, enable it in DPDK and
+ rebuild it. (http://dpdk.org/doc/quick-start has information how to
+ do it)
+ 2) Obtain GoogleTest and build it. Some additional dependencies might be
+ needed. (refer to https://github.com/google/googletest for information
+ how to download and build it)
+ 3) Make sure that GTEST_DIR and GMOCK_DIR environment variables are set.
+ 4) Rebuild TLDK.
+
+2.1 Example
+
+ cd dpdk
+ make config T=x86_64-native-linuxapp-gcc
+ sed -ri 's,(PMD_PCAP=).*,\1y,' build/.config
+ make install
+
+ git clone https://github.com/google/googletest.git
+ cd ../googletest
+ export GTEST_DIR=`pwd`/googletest
+ export GMOCK_DIR=`pwd`/googlemock
+ cmake CMakeLists.txt
+ make
+
+ cd ../tldk
+ make clean
+ make all
+ ./x86_64-native-linuxapp-gcc/app/gtest-rfc --lcores=0
diff --git a/test/gtest/main.cpp b/test/gtest/main.cpp
index 8c4e2dc..17cdccd 100644
--- a/test/gtest/main.cpp
+++ b/test/gtest/main.cpp
@@ -13,17 +13,30 @@
* limitations under the License.
*/
+#include <iostream>
#include <gtest/gtest.h>
#include <gmock/gmock.h>
-#include <iostream>
+
#include <rte_common.h>
#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+#include <rte_errno.h>
+
+#include "test_common.h"
-int main(int argc, char *argv[])
+struct rte_mempool *mbuf_pool;
+
+int
+main(int argc, char *argv[])
{
+ uint8_t nb_ports = 1;
+ int rc = 0;
+
/* Initialize GoogleTest&Mock and parse any args */
testing::InitGoogleMock(&argc, argv);
-
/* Initialize EAL */
int ret = rte_eal_init(argc, argv);
if (ret < 0)
@@ -31,5 +44,19 @@ int main(int argc, char *argv[])
argc -= ret;
argv += ret;
+ /*
+ * Creates a new mempool in memory to hold the mbufs.
+ * Multiplied by 2 because of mempeool to be used for packet
+ * fragmentation purposes.
+ */
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
+ 2 * NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (mbuf_pool == NULL) {
+ rc = -rte_errno;
+ printf("Mempool was not created, rc=%d\n", rc);
+ return rc;
+ }
+
return RUN_ALL_TESTS();
}
diff --git a/test/gtest/test_common.cpp b/test/gtest/test_common.cpp
new file mode 100644
index 0000000..0bdcebc
--- /dev/null
+++ b/test/gtest/test_common.cpp
@@ -0,0 +1,276 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "test_common.h"
+
+int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf;
+ const uint16_t rx_rings = 1, tx_rings = 1;
+ uint16_t q;
+ int retval;
+ int socket_id;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ socket_id = rte_eth_dev_socket_id(port);
+
+ memset(&port_conf, 0, sizeof(struct rte_eth_conf));
+ port_conf.rxmode.max_rx_pkt_len = ETHER_MAX_LEN;
+
+ /* Configure the Ethernet device. */
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ /* Allocate and set up 1 RX queue per Ethernet port. */
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ socket_id, NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Allocate and set up 1 TX queue per Ethernet port. */
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ socket_id, NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Start the Ethernet port. */
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ /* Enable RX in promiscuous mode for the Ethernet device. */
+ rte_eth_promiscuous_enable(port);
+
+ return 0;
+}
+
+/* TODO: Shameless rip of examples/udpfwd/pkt.c below. Sorry Would like to
+ * move these funcions to separate lib so all future created apps could
+ * re-use that code.
+ */
+void
+fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4)
+{
+ m->l2_len = l2;
+ m->l3_len = l3;
+ m->l4_len = l4;
+ m->tso_segsz = 0;
+ m->outer_l2_len = 0;
+ m->outer_l3_len = 0;
+}
+
+int
+is_ipv4_frag(const struct ipv4_hdr *iph)
+{
+ const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG);
+
+ return ((mask & iph->fragment_offset) != 0);
+}
+
+void
+fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto,
+ uint32_t frag)
+{
+ const struct ipv4_hdr *iph;
+ int32_t dlen, len;
+
+ dlen = rte_pktmbuf_data_len(m);
+ dlen -= l2 + sizeof(struct udp_hdr);
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2);
+ len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER;
+
+ if (frag != 0 && is_ipv4_frag(iph)) {
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_FRAG;
+ }
+
+ if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto))
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ else
+ fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr));
+}
+
+int
+ipv6x_hdr(uint32_t proto)
+{
+ return (proto == IPPROTO_HOPOPTS ||
+ proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_FRAGMENT ||
+ proto == IPPROTO_AH ||
+ proto == IPPROTO_NONE ||
+ proto == IPPROTO_DSTOPTS);
+}
+
+uint16_t
+ipv4x_cksum(const void *iph, size_t len)
+{
+ uint16_t cksum;
+
+ cksum = rte_raw_cksum(iph, len);
+ return (cksum == 0xffff) ? cksum : ~cksum;
+}
+
+void
+fill_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto,
+ uint32_t fproto)
+{
+ const struct ip6_ext *ipx;
+ int32_t dlen, len, ofs;
+
+ len = sizeof(struct ipv6_hdr);
+
+ dlen = rte_pktmbuf_data_len(m);
+ dlen -= l2 + sizeof(struct udp_hdr);
+
+ ofs = l2 + len;
+ ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs);
+
+ while (ofs > 0 && len < dlen) {
+
+ switch (nproto) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ ofs = (ipx->ip6e_len + 1) << 3;
+ break;
+ case IPPROTO_AH:
+ ofs = (ipx->ip6e_len + 2) << 2;
+ break;
+ case IPPROTO_FRAGMENT:
+ /*
+ * tso_segsz is not used by RX, so suse it as temporary
+ * buffer to store the fragment offset.
+ */
+ m->tso_segsz = ofs;
+ ofs = sizeof(struct ip6_frag);
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_FRAG;
+ break;
+ default:
+ ofs = 0;
+ }
+
+ if (ofs > 0) {
+ nproto = ipx->ip6e_nxt;
+ len += ofs;
+ ipx += ofs / sizeof(*ipx);
+ }
+ }
+
+ /* undercognised or invalid packet. */
+ if ((ofs == 0 && nproto != fproto) || len > dlen)
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ else
+ fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr));
+}
+
+void
+fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto)
+{
+ const struct ipv6_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ if (iph->proto == fproto)
+ fill_pkt_hdr_len(m, l2, sizeof(struct ipv6_hdr),
+ sizeof(struct udp_hdr));
+ else if (ipv6x_hdr(iph->proto) != 0)
+ fill_ipv6x_hdr_len(m, l2, iph->proto, fproto);
+}
+
+void
+fill_eth_hdr_len(struct rte_mbuf *m)
+{
+ uint32_t dlen, l2;
+ uint16_t etp;
+ const struct ether_hdr *eth;
+
+ dlen = rte_pktmbuf_data_len(m);
+
+ /* check that first segment is at least 42B long. */
+ if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
+ sizeof(struct udp_hdr)) {
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return;
+ }
+
+ l2 = sizeof(*eth);
+
+ eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
+ etp = eth->ether_type;
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
+ l2 += sizeof(struct vlan_hdr);
+
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
+ m->packet_type = RTE_PTYPE_L4_UDP |
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ fill_ipv4_hdr_len(m, l2, IPPROTO_UDP, 1);
+ } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
+ dlen >= l2 + sizeof(struct ipv6_hdr) +
+ sizeof(struct udp_hdr)) {
+ m->packet_type = RTE_PTYPE_L4_UDP |
+ RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ fill_ipv6_hdr_len(m, l2, IPPROTO_UDP);
+ } else
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+}
+
+/*
+ * generic, assumes HW doesn't recognise any packet type.
+ */
+uint16_t
+typen_rx_callback(uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j;
+
+ for (j = 0; j != nb_pkts; j++) {
+ fill_eth_hdr_len(pkt[j]);
+
+ }
+
+ return nb_pkts;
+}
+
+int
+dummy_lookup4(void *opaque, const struct in_addr *addr, struct tle_dest *res)
+{
+ RTE_SET_USED(opaque);
+ RTE_SET_USED(addr);
+ RTE_SET_USED(res);
+ return -ENOENT;
+}
+
+int
+dummy_lookup6(void *opaque, const struct in6_addr *addr, struct tle_dest *res)
+{
+ RTE_SET_USED(opaque);
+ RTE_SET_USED(addr);
+ RTE_SET_USED(res);
+ return -ENOENT;
+}
diff --git a/test/gtest/test_common.h b/test/gtest/test_common.h
new file mode 100644
index 0000000..2eb93e4
--- /dev/null
+++ b/test/gtest/test_common.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TEST_COMMON_H_
+#define TEST_COMMON_H_
+
+#include <netinet/in.h>
+#include <netinet/ip6.h>
+
+#include <rte_config.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_ethdev.h>
+#include <rte_kvargs.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_lpm.h>
+#include <rte_lpm6.h>
+#include <rte_hash.h>
+#include <rte_ip.h>
+#include <rte_ip_frag.h>
+#include <rte_udp.h>
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 128
+#define NUM_MBUFS 4095
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+
+extern struct rte_mempool *mbuf_pool;
+extern struct rte_mempool *frag_mp;
+
+int port_init(uint8_t port, struct rte_mempool *mbuf_pool);
+
+uint64_t
+_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
+ uint64_t ol3, uint64_t ol2);
+
+void
+fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4);
+
+int
+is_ipv4_frag(const struct ipv4_hdr *iph);
+
+void
+fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto,
+ uint32_t frag);
+
+int
+ipv6x_hdr(uint32_t proto);
+
+uint16_t
+ipv4x_cksum(const void *iph, size_t len);
+
+void
+fill_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto,
+ uint32_t fproto);
+
+void
+fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto);
+
+void
+fix_reassembled(struct rte_mbuf *m, int32_t hwcsum);
+
+uint32_t
+compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero);
+
+void
+fill_eth_hdr_len(struct rte_mbuf *m);
+
+uint16_t
+typen_rx_callback(uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param);
+
+int
+dummy_lookup4(void *opaque, const struct in_addr *addr, struct tle_dest *res);
+
+int
+dummy_lookup6(void *opaque, const struct in6_addr *addr, struct tle_dest *res);
+
+#endif /* TEST_COMMON_H_ */
diff --git a/test/gtest/test_scapy_gen.py b/test/gtest/test_scapy_gen.py
new file mode 100644
index 0000000..aecbe5f
--- /dev/null
+++ b/test/gtest/test_scapy_gen.py
@@ -0,0 +1,96 @@
+# Copyright (c) 2016 Intel Corporation.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+from socket import inet_pton
+import logging
+logging.getLogger("scapy.runtime").setLevel(logging.ERROR)
+from scapy.all import *
+from scapy.layers.inet import IP
+from scapy.layers.inet6 import IPv6
+from scapy.layers.inet import UDP
+from random import shuffle
+
+src_mac = "00:00:00:00:de:ad"
+dst_mac = "00:00:de:ad:be:ef"
+eth_hdr_len = len(Ether())
+ip_hdr_len = len(IP())
+ipv6_hdr_len = len(IPv6())
+udp_hdr_len = len(UDP())
+udpv4_hdr_len = eth_hdr_len + ip_hdr_len + udp_hdr_len
+udpv6_hdr_len = eth_hdr_len + ipv6_hdr_len + udp_hdr_len
+
+
+def write_pkts(pkts, pcap_path):
+ try:
+ pktdump = PcapWriter(pcap_path, append=False, sync=True)
+ if len(pkts) > 0:
+ pktdump.write(pkts)
+ except IOError:
+ pass
+
+
+def read_pkts(pcap_path):
+ try:
+ pkts_ref = PcapReader(pcap_path)
+ pkts = pkts_ref.read_all()
+ return list(pkts)
+ except IOError:
+ pkts = []
+ return pkts
+
+
+def main():
+ parser = argparse.ArgumentParser(description="Generate packets for"
+ "TLDK rx/tx tests")
+ parser.add_argument("l_ip")
+ parser.add_argument("r_ip")
+ parser.add_argument("l_port", type=int)
+ parser.add_argument("r_port", type=int)
+ parser.add_argument("nb_pkts", type=int)
+ parser.add_argument("file")
+ parser.add_argument("-bc3", "--bad_chksum_l3", default=None, type=int)
+ parser.add_argument("-bc4", "--bad_chksum_l4", default=None, type=int)
+ parser.add_argument("-f", "--fragment")
+ parser.add_argument("-r", "--rand-pkt-size")
+
+ args = parser.parse_args()
+
+ ip_ver = ""
+ try:
+ inet_pton(socket.AF_INET, args.l_ip)
+ ip_ver = "ipv4"
+ except socket.error:
+ ip_ver = "ipv6"
+
+ pkts = read_pkts(args.file)
+
+ if "ipv4" in ip_ver:
+ for i in range(0, args.nb_pkts):
+ pkt = Ether(dst=dst_mac, src=src_mac) /\
+ IP(src=args.l_ip, dst=args.r_ip, frag=0, chksum=args.bad_chksum_l3) /\
+ UDP(sport=args.l_port, dport=args.r_port, chksum=args.bad_chksum_l4) /\
+ Raw(RandString(size=(100 - udpv4_hdr_len)))
+ pkts.append(pkt)
+ else:
+ for i in range(0, args.nb_pkts):
+ pkt = Ether(dst=dst_mac, src=src_mac) /\
+ IPv6(src=args.l_ip, dst=args.r_ip) /\
+ UDP(sport=args.l_port, dport=args.r_port, chksum=args.bad_chksum_l4) / \
+ Raw(RandString(size=(100 - udpv6_hdr_len)))
+ pkts.append(pkt)
+
+ shuffle(pkts)
+ write_pkts(pkts, args.file)
+
+main()
diff --git a/test/gtest/test_tle_ctx.cpp b/test/gtest/test_tle_ctx.cpp
new file mode 100644
index 0000000..b9808ee
--- /dev/null
+++ b/test/gtest/test_tle_ctx.cpp
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "test_tle_ctx.h"
+
+TEST(ctx_create, ctx_create_null)
+{
+ struct tle_ctx *ctx;
+
+ ctx = tle_ctx_create(NULL);
+ ASSERT_EQ(ctx, (struct tle_ctx *) NULL);
+ ASSERT_EQ(rte_errno, EINVAL);
+}
+
+TEST(ctx_create, create_invalid_socket)
+{
+ struct tle_ctx *ctx;
+ struct tle_ctx_param prm;
+
+ memset(&prm, 0, sizeof(prm));
+ prm.socket_id = SOCKET_ID_ANY;
+ prm.max_streams = 0x10;
+ prm.max_stream_rbufs = 0x100;
+ prm.max_stream_sbufs = 0x100;
+
+ ctx = tle_ctx_create(NULL);
+ ASSERT_EQ(ctx, (struct tle_ctx *) NULL);
+ ASSERT_EQ(rte_errno, EINVAL);
+}
+
+TEST(ctx_create, ctx_create_proto_invalid)
+{
+ struct tle_ctx *ctx;
+ struct tle_ctx_param prm;
+
+ memset(&prm, 0, sizeof(prm));
+ prm.socket_id = SOCKET_ID_ANY;
+ prm.proto = TLE_PROTO_NUM;
+ prm.max_streams = 0x10;
+ prm.max_stream_rbufs = 0x100;
+ prm.max_stream_sbufs = 0x100;
+
+ ctx = tle_ctx_create(NULL);
+ ASSERT_EQ(ctx, (struct tle_ctx *) NULL);
+ ASSERT_EQ(rte_errno, EINVAL);
+}
+
+TEST(ctx_create, ctx_create_proto_not_spec)
+{
+ struct tle_ctx *ctx;
+ struct tle_ctx_param prm;
+
+ memset(&prm, 0, sizeof(prm));
+ prm.socket_id = SOCKET_ID_ANY;
+ prm.max_streams = 0x10;
+ prm.max_stream_rbufs = 0x100;
+ prm.max_stream_sbufs = 0x100;
+
+ ctx = tle_ctx_create(&prm);
+ ASSERT_NE(ctx, (void *)NULL);
+
+ tle_ctx_destroy(ctx);
+}
+
+TEST(ctx_create, ctx_create_proto_udp)
+{
+ struct tle_ctx *ctx;
+ struct tle_ctx_param prm;
+
+ memset(&prm, 0, sizeof(prm));
+ prm.socket_id = SOCKET_ID_ANY;
+ prm.proto = TLE_PROTO_UDP;
+ prm.max_streams = 0x10;
+ prm.max_stream_rbufs = 0x100;
+ prm.max_stream_sbufs = 0x100;
+
+ ctx = tle_ctx_create(&prm);
+ ASSERT_NE(ctx, (void *)NULL);
+
+ tle_ctx_destroy(ctx);
+}
+
+TEST(ctx_create, ctx_create_proto_tcp)
+{
+ struct tle_ctx *ctx;
+ struct tle_ctx_param prm;
+
+ memset(&prm, 0, sizeof(prm));
+ prm.socket_id = SOCKET_ID_ANY;
+ prm.proto = TLE_PROTO_TCP;
+ prm.max_streams = 0x10;
+ prm.max_stream_rbufs = 0x100;
+ prm.max_stream_sbufs = 0x100;
+
+ ctx = tle_ctx_create(&prm);
+ ASSERT_NE(ctx, (void *)NULL);
+
+ tle_ctx_destroy(ctx);
+}
+
+TEST(ctx_create, ctx_create_invalidate)
+{
+ struct tle_ctx *ctx;
+ struct tle_ctx_param prm;
+
+ memset(&prm, 0, sizeof(prm));
+ prm.socket_id = SOCKET_ID_ANY;
+ prm.max_streams = 0x10;
+ prm.max_stream_rbufs = 0x100;
+ prm.max_stream_sbufs = 0x100;
+
+ ctx = tle_ctx_create(&prm);
+ ASSERT_NE(ctx, (void *)NULL);
+
+ tle_ctx_invalidate(ctx);
+
+ tle_ctx_destroy(ctx);
+}
diff --git a/test/gtest/test_tle_udp_ctx.h b/test/gtest/test_tle_ctx.h
index af02440..5c6f2c3 100644
--- a/test/gtest/test_tle_udp_ctx.h
+++ b/test/gtest/test_tle_ctx.h
@@ -13,11 +13,11 @@
* limitations under the License.
*/
-#ifndef TEST_TLE_UDP_CTX_H_
-#define TEST_TLE_UDP_CTX_H_
+#ifndef TEST_TLE_CTX_H_
+#define TEST_TLE_CTX_H_
#include <gtest/gtest.h>
#include <rte_errno.h>
-#include <tle_udp_impl.h>
+#include <tle_ctx.h>
-#endif /* TEST_TLE_UDP_CTX_H_ */
+#endif /* TEST_TLE_CTX_H_ */
diff --git a/test/gtest/test_tle_tcp_stream.cpp b/test/gtest/test_tle_tcp_stream.cpp
new file mode 100644
index 0000000..b861049
--- /dev/null
+++ b/test/gtest/test_tle_tcp_stream.cpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "test_tle_tcp_stream.h"
+
+/* --------- Basic tests for opening / closing streams, no traffic --------- */
+
+TEST_F(test_tle_tcp_stream, tcp_stream_test_open_nullctx)
+{
+ stream = tle_tcp_stream_open(nullptr,
+ (const struct tle_tcp_stream_param *)&stream_prm);
+ EXPECT_EQ(stream, nullptr);
+ EXPECT_EQ(rte_errno, EINVAL);
+
+ ret = tle_tcp_stream_close(stream);
+ EXPECT_EQ(ret, -EINVAL);
+}
+
+TEST_F(test_tle_tcp_stream, tcp_stream_test_open_null_stream_prm)
+{
+ stream = tle_tcp_stream_open(ctx, nullptr);
+ EXPECT_EQ(stream, nullptr);
+ EXPECT_EQ(rte_errno, EINVAL);
+
+ ret = tle_tcp_stream_close(stream);
+ EXPECT_EQ(ret, -EINVAL);
+}
+
+TEST_F(test_tle_tcp_stream, tcp_stream_test_open_close_ipv4)
+{
+ stream = tle_tcp_stream_open(ctx,
+ (const struct tle_tcp_stream_param *)&stream_prm);
+ ASSERT_NE(stream, nullptr);
+
+ ret = tle_tcp_stream_close(stream);
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(test_tle_tcp_stream, tcp_stream_test_open_close_ipv6)
+{
+ stream6 = tle_tcp_stream_open(ctx,
+ (const struct tle_tcp_stream_param *)&stream_prm6);
+ ASSERT_NE(stream, nullptr);
+
+ ret = tle_tcp_stream_close(stream6);
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(test_tle_tcp_stream, tcp_stream_test_open_close_open_close)
+{
+ stream = tle_tcp_stream_open(ctx,
+ (const struct tle_tcp_stream_param *)&stream_prm);
+ ASSERT_NE(stream, nullptr);
+
+ ret = tle_tcp_stream_close(stream);
+ ASSERT_EQ(ret, 0);
+
+ stream = tle_tcp_stream_open(ctx,
+ (const struct tle_tcp_stream_param*)&stream_prm);
+ ASSERT_NE(stream, nullptr);
+
+ ret = tle_tcp_stream_close(stream);
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(test_tle_tcp_stream, tcp_stream_test_open_duplicate_ipv4)
+{
+ struct tle_stream *stream_dup;
+
+ stream = tle_tcp_stream_open(ctx,
+ (const struct tle_tcp_stream_param *)&stream_prm);
+ ASSERT_NE(stream, nullptr);
+
+ stream_dup = tle_tcp_stream_open(ctx,
+ (const struct tle_tcp_stream_param*)&stream_prm);
+ ASSERT_EQ(stream_dup, nullptr);
+ ASSERT_EQ(rte_errno, EEXIST);
+
+ ret = tle_tcp_stream_close(stream);
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(test_tle_tcp_stream, tcp_stream_test_open_duplicate_ipv6)
+{
+ struct tle_stream *stream_dup;
+
+ stream6 = tle_tcp_stream_open(ctx,
+ (const struct tle_tcp_stream_param *)&stream_prm6);
+ ASSERT_NE(stream, nullptr);
+
+ stream_dup = tle_tcp_stream_open(ctx,
+ (const struct tle_tcp_stream_param*)&stream_prm6);
+ ASSERT_EQ(stream_dup, nullptr);
+ ASSERT_EQ(rte_errno, EEXIST);
+
+ ret = tle_tcp_stream_close(stream6);
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(test_tle_tcp_stream, tcp_stream_test_close_null)
+{
+ ret = tle_tcp_stream_close(nullptr);
+ EXPECT_EQ(ret, -EINVAL);
+}
+
+TEST_F(test_tle_tcp_stream, tcp_stream_test_closed_already)
+{
+ stream = tle_tcp_stream_open(ctx,
+ (const struct tle_tcp_stream_param *)&stream_prm);
+ ASSERT_NE(stream, nullptr);
+
+ ret = tle_tcp_stream_close(stream);
+ EXPECT_EQ(ret, 0);
+
+ ret = tle_tcp_stream_close(stream);
+ EXPECT_NE(ret, 0);
+}
+
+/* --------- Tests for get_addr call --------- */
+
+TEST_F(test_tle_tcp_stream_ops, tcp_stream_get_addr_null_stream)
+{
+ struct tle_tcp_stream_addr addr;
+
+ ret = tle_tcp_stream_get_addr(nullptr, &addr);
+ EXPECT_EQ(ret, -EINVAL);
+}
+
+TEST_F(test_tle_tcp_stream_ops, tcp_stream_get_addr_null_addr)
+{
+ ret = tle_tcp_stream_get_addr(stream, NULL);
+ EXPECT_EQ(ret, -EINVAL);
+}
+
+TEST_F(test_tle_tcp_stream_ops, tcp_stream_get_addr_ipv4)
+{
+ struct tle_tcp_stream_addr addr;
+
+ memset(&addr, 0, sizeof(addr));
+ ret = tle_tcp_stream_get_addr(stream, &addr);
+ ASSERT_EQ(ret, 0);
+
+ ret = memcmp(&addr, &stream_prm.addr, sizeof(tle_tcp_stream_addr));
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(test_tle_tcp_stream_ops, tcp_stream_get_addr_ipv6)
+{
+ struct tle_tcp_stream_addr addr;
+
+ memset(&addr, 0, sizeof(addr));
+ ret = tle_tcp_stream_get_addr(stream6, &addr);
+ ASSERT_EQ(ret, 0);
+
+ ret = memcmp(&addr, &stream_prm6.addr, sizeof(tle_tcp_stream_addr));
+ ASSERT_EQ(ret, 0);
+}
+
+/* --------- Basic tests for listen call, no incoming connections --------- */
+
+TEST_F(test_tle_tcp_stream_ops, tcp_stream_listen_null_stream)
+{
+ ret = tle_tcp_stream_listen(nullptr);
+ EXPECT_EQ(ret, -EINVAL);
+}
+
+TEST_F(test_tle_tcp_stream_ops, tcp_stream_listen_ipv4)
+{
+ ret = tle_tcp_stream_listen(stream);
+ ASSERT_EQ(ret, 0);
+
+ ret = tle_tcp_stream_close(stream);
+ ASSERT_EQ(ret, 0);
+}
+
+TEST_F(test_tle_tcp_stream_ops, tcp_stream_listen_ipv6)
+{
+ ret = tle_tcp_stream_listen(stream6);
+ ASSERT_EQ(ret, 0);
+
+ ret = tle_tcp_stream_close(stream6);
+ ASSERT_EQ(ret, 0);
+}
diff --git a/test/gtest/test_tle_tcp_stream.h b/test/gtest/test_tle_tcp_stream.h
new file mode 100644
index 0000000..2caf2b5
--- /dev/null
+++ b/test/gtest/test_tle_tcp_stream.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TEST_TLE_TCP_STREAM_H_
+#define TEST_TLE_TCP_STREAM_H_
+
+#include <iostream>
+#include <arpa/inet.h>
+#include <netinet/ip6.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <gtest/gtest.h>
+#include <gmock/gmock.h>
+#include <rte_errno.h>
+
+#include <tle_event.h>
+#include <tle_ctx.h>
+#include <tle_tcp.h>
+
+#include "test_common.h"
+
+#define MAX_STREAMS 0x100
+#define MAX_STREAM_RBUFS 0x100
+#define MAX_STREAM_SBUFS 0x100
+#define RX_NO_OFFLOAD 0x0
+#define TX_NO_OFFLOAD 0x0
+
+static struct tle_ctx_param ctx_prm_tmpl = {
+ .socket_id = SOCKET_ID_ANY,
+ .proto = TLE_PROTO_TCP,
+ .max_streams = MAX_STREAMS,
+ .max_stream_rbufs = MAX_STREAM_RBUFS,
+ .max_stream_sbufs = MAX_STREAM_SBUFS,
+};
+
+static struct tle_dev_param dev_prm_tmpl = {
+ .rx_offload = RX_NO_OFFLOAD,
+ .tx_offload = TX_NO_OFFLOAD
+};
+
+class tcp_stream_base: public ::testing::Test {
+
+public:
+ struct tle_ctx *setup_ctx(struct tle_ctx_param *prm);
+ struct tle_dev *setup_dev(struct tle_ctx *ctx,
+ struct tle_dev_param *dev_prm);
+ void setup_dev_prm(struct tle_dev_param *dev_prm,
+ char const *ipv4, char const *ipv6);
+ int setup_stream_prm(struct tle_tcp_stream_param *stream_prm,
+ char const *l_ip, char const *r_ip,
+ int l_port, int r_port);
+ struct tle_evq *setup_event();
+};
+
+struct tle_evq
+*tcp_stream_base::setup_event()
+{
+ int32_t socket_id;
+ uint32_t max_events;
+ struct tle_evq_param evq_params;
+ struct tle_evq *evq;
+
+ socket_id = SOCKET_ID_ANY;
+ max_events = 10;
+ rte_errno = 0;
+ memset(&evq_params, 0, sizeof(struct tle_evq_param));
+ evq_params.socket_id = socket_id;
+ evq_params.max_events = max_events;
+ evq = tle_evq_create(&evq_params);
+ return evq;
+}
+
+struct tle_ctx
+*tcp_stream_base::setup_ctx(struct tle_ctx_param *prm)
+{
+ struct tle_ctx *ctx;
+
+ prm->lookup4 = dummy_lookup4;
+ prm->lookup6 = dummy_lookup6;
+
+ ctx = tle_ctx_create(prm);
+
+ return ctx;
+}
+
+struct tle_dev
+*tcp_stream_base::setup_dev(struct tle_ctx *ctx, struct tle_dev_param *dev_prm)
+{
+ struct tle_dev *dev;
+
+ dev = tle_add_dev(ctx, dev_prm);
+
+ return dev;
+}
+
+void
+tcp_stream_base::setup_dev_prm(struct tle_dev_param *dev_prm, char const *ipv4,
+ char const *ipv6)
+{
+ inet_pton(AF_INET, ipv4, &dev_prm->local_addr4);
+ inet_pton(AF_INET6, ipv6, &dev_prm->local_addr6);
+}
+
+int
+tcp_stream_base::setup_stream_prm(struct tle_tcp_stream_param *stream_prm,
+ char const *l_ip, char const *r_ip, int l_port, int r_port)
+{
+ int32_t ret;
+ struct sockaddr_in *ip4_addr;
+ struct sockaddr_in6 *ip6_addr;
+ struct addrinfo hint, *res = NULL;
+ struct tle_tcp_stream_cfg stream_cfg;
+
+ memset(&hint, '\0', sizeof(hint));
+ memset(&stream_cfg, 0, sizeof(stream_cfg));
+
+ ret = getaddrinfo(l_ip, NULL, &hint, &res);
+ if (ret != 0)
+ return -EINVAL;
+
+ if (res->ai_family == AF_INET) {
+ ip4_addr = (struct sockaddr_in *) &stream_prm->addr.local;
+ ip4_addr->sin_family = AF_INET;
+ ip4_addr->sin_port = htons(l_port);
+ ip4_addr->sin_addr.s_addr = inet_addr(l_ip);
+ } else if (res->ai_family == AF_INET6) {
+ ip6_addr = (struct sockaddr_in6 *) &stream_prm->addr.local;
+ ip6_addr->sin6_family = AF_INET6;
+ inet_pton(AF_INET6, l_ip, &ip6_addr->sin6_addr);
+ ip6_addr->sin6_port = htons(l_port);
+ } else {
+ freeaddrinfo(res);
+ return -EINVAL;
+ }
+ freeaddrinfo(res);
+
+ memset(&hint, '\0', sizeof(hint));
+ ret = getaddrinfo(r_ip, NULL, &hint, &res);
+ if (ret != 0)
+ return -EINVAL;
+
+ if (res->ai_family == AF_INET) {
+ ip4_addr = (struct sockaddr_in *) &stream_prm->addr.remote;
+ ip4_addr->sin_family = AF_INET;
+ ip4_addr->sin_port = htons(r_port);
+ ip4_addr->sin_addr.s_addr = inet_addr(r_ip);
+ } else if (res->ai_family == AF_INET6) {
+ ip6_addr = (struct sockaddr_in6 *) &stream_prm->addr.remote;
+ ip6_addr->sin6_family = AF_INET6;
+ inet_pton(AF_INET6, r_ip, &ip6_addr->sin6_addr);
+ ip6_addr->sin6_port = htons(r_port);
+ } else {
+ freeaddrinfo(res);
+ return -EINVAL;
+ }
+ freeaddrinfo(res);
+
+ stream_prm->cfg = stream_cfg;
+
+ return 0;
+}
+
+class test_tle_tcp_stream: public ::tcp_stream_base {
+protected:
+ virtual void SetUp(void)
+ {
+ ipv4_laddr = "192.0.0.1";
+ ipv4_raddr = "192.0.0.2";
+ ipv6_laddr = "2001::1000";
+ ipv6_raddr = "2001::2000";
+ l_port = 10000;
+ r_port = 10000;
+
+ memset(&ctx_prm, 0, sizeof(ctx_prm));
+ memset(&dev_prm, 0, sizeof(dev_prm));
+ memset(&stream_prm, 0, sizeof(stream_prm));
+ memset(&stream_prm6, 0, sizeof(stream_prm6));
+
+ ctx_prm = ctx_prm_tmpl;
+ dev_prm = dev_prm_tmpl;
+ setup_dev_prm(&dev_prm, ipv4_laddr, ipv6_laddr);
+ ret = setup_stream_prm(&stream_prm, ipv4_laddr, ipv4_raddr,
+ l_port, r_port);
+ ASSERT_EQ(ret, 0);
+ setup_stream_prm(&stream_prm6, ipv6_laddr, ipv6_raddr, l_port,
+ r_port);
+ ASSERT_EQ(ret, 0);
+
+ ctx = setup_ctx(&ctx_prm);
+ ASSERT_NE(ctx, (void *) NULL);
+ dev = setup_dev(ctx, &dev_prm);
+ ASSERT_NE(dev, (void *) NULL);
+ }
+
+ virtual void TearDown(void)
+ {
+ ret = 0;
+ tle_del_dev(dev);
+ tle_ctx_destroy(ctx);
+ }
+
+ int ret;
+ struct tle_ctx *ctx;
+ struct tle_dev *dev;
+ struct tle_stream *stream;
+ struct tle_stream *stream6;
+
+ struct tle_ctx_param ctx_prm;
+ struct tle_dev_param dev_prm;
+ struct tle_tcp_stream_param stream_prm;
+ struct tle_tcp_stream_param stream_prm6;
+
+ int l_port, r_port;
+ char const *ipv4_laddr;
+ char const *ipv4_raddr;
+ char const *ipv6_laddr;
+ char const *ipv6_raddr;
+};
+
+class test_tle_tcp_stream_ops: public ::test_tle_tcp_stream {
+public:
+ virtual void SetUp(void)
+ {
+ test_tle_tcp_stream::SetUp();
+ stream = tle_tcp_stream_open(ctx,
+ (const struct tle_tcp_stream_param *)&stream_prm);
+ stream6 = tle_tcp_stream_open(ctx,
+ (const struct tle_tcp_stream_param *)&stream_prm6);
+ }
+
+ virtual void TearDown(void)
+ {
+ tle_tcp_stream_close(stream6);
+ tle_tcp_stream_close(stream);
+ test_tle_tcp_stream::TearDown();
+ }
+};
+
+#endif /* TEST_TLE_TCP_STREAM_H_ */
diff --git a/test/gtest/test_tle_udp_ctx.cpp b/test/gtest/test_tle_udp_ctx.cpp
deleted file mode 100644
index 0bad39f..0000000
--- a/test/gtest/test_tle_udp_ctx.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2016 Intel Corporation.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "test_tle_udp_ctx.h"
-
-TEST(udp_ctx, udp_create_null)
-{
- struct tle_udp_ctx *ctx;
-
- ctx = tle_udp_create(NULL);
- ASSERT_EQ(ctx, (struct tle_udp_ctx *) NULL);
- ASSERT_EQ(rte_errno, EINVAL);
-}
-
-TEST(udp_ctx, udp_create)
-{
- struct tle_udp_ctx *ctx;
- struct tle_udp_ctx_param prm;
-
- memset(&prm, 0, sizeof(prm));
- prm.socket_id = SOCKET_ID_ANY;
- prm.max_streams = 0x10;
- prm.max_stream_rbufs = 0x100;
- prm.max_stream_sbufs = 0x100;
-
- ctx = tle_udp_create(&prm);
- ASSERT_NE(ctx, (void *)NULL);
-
- tle_udp_destroy(ctx);
-}
diff --git a/test/gtest/test_tle_udp_destroy.cpp b/test/gtest/test_tle_udp_destroy.cpp
index 2244b18..2f26dd8 100644
--- a/test/gtest/test_tle_udp_destroy.cpp
+++ b/test/gtest/test_tle_udp_destroy.cpp
@@ -17,13 +17,13 @@
TEST(udp_destroy_null, udp_destroy_null)
{
- tle_udp_destroy(NULL);
+ tle_ctx_destroy(NULL);
EXPECT_EQ(rte_errno, EINVAL);
}
TEST_F(udp_destroy, udp_destroy_positive)
{
int rc;
- tle_udp_destroy(ctx);
+ tle_ctx_destroy(ctx);
ASSERT_EQ(rte_errno, 0);
}
diff --git a/test/gtest/test_tle_udp_destroy.h b/test/gtest/test_tle_udp_destroy.h
index d52376e..37bcceb 100644
--- a/test/gtest/test_tle_udp_destroy.h
+++ b/test/gtest/test_tle_udp_destroy.h
@@ -18,13 +18,13 @@
#include <gtest/gtest.h>
#include <rte_errno.h>
-#include <tle_udp_impl.h>
+#include <tle_ctx.h>
class udp_destroy : public ::testing::Test {
protected:
- struct tle_udp_ctx *ctx;
- struct tle_udp_ctx_param prm;
+ struct tle_ctx *ctx;
+ struct tle_ctx_param prm;
virtual void SetUp(void)
{
@@ -35,7 +35,7 @@ protected:
prm.max_stream_rbufs = 0x100;
prm.max_stream_sbufs = 0x100;
- ctx = tle_udp_create(&prm);
+ ctx = tle_ctx_create(&prm);
ASSERT_NE(ctx, (void *) NULL);
}
diff --git a/test/gtest/test_tle_udp_dev.cpp b/test/gtest/test_tle_udp_dev.cpp
index a58186e..93d6c5e 100644
--- a/test/gtest/test_tle_udp_dev.cpp
+++ b/test/gtest/test_tle_udp_dev.cpp
@@ -17,14 +17,14 @@
TEST_F(udp_dev, udp_dev_add_null_ctx)
{
- dev = tle_udp_add_dev(NULL, &dev_prm);
+ dev = tle_add_dev(NULL, &dev_prm);
EXPECT_EQ(dev, (void *) NULL);
EXPECT_EQ(rte_errno, EINVAL);
}
TEST_F(udp_dev, udp_dev_add_null_dev_prm)
{
- dev = tle_udp_add_dev(ctx, NULL);
+ dev = tle_add_dev(ctx, NULL);
EXPECT_EQ(dev, (void *) NULL);
EXPECT_EQ(rte_errno, EINVAL);
}
@@ -33,7 +33,7 @@ TEST_F(udp_dev, udp_dev_add_no_addr)
{
memset(&(dev_prm).local_addr4, 0, sizeof(struct in_addr));
memset(&(dev_prm).local_addr6, 0, sizeof(struct in6_addr));
- dev = tle_udp_add_dev(ctx, &dev_prm);
+ dev = tle_add_dev(ctx, &dev_prm);
EXPECT_EQ(dev, (void *) NULL);
EXPECT_EQ(rte_errno, EINVAL);
}
@@ -42,7 +42,7 @@ TEST_F(udp_dev, udp_dev_add_anyaddr)
{
inet_pton(AF_INET, "0.0.0.0", &(dev_prm).local_addr4);
inet_pton(AF_INET6, "::0", &(dev_prm).local_addr6);
- dev = tle_udp_add_dev(ctx, &dev_prm);
+ dev = tle_add_dev(ctx, &dev_prm);
EXPECT_EQ(dev, (void *) NULL);
EXPECT_EQ(rte_errno, EINVAL);
}
@@ -50,7 +50,7 @@ TEST_F(udp_dev, udp_dev_add_anyaddr)
TEST_F(udp_dev, udp_dev_add_only_ipv4)
{
memset(&(dev_prm).local_addr6, 0, sizeof(struct in6_addr));
- dev = tle_udp_add_dev(ctx, &dev_prm);
+ dev = tle_add_dev(ctx, &dev_prm);
ASSERT_NE(dev, (void *) NULL);
EXPECT_EQ(rte_errno, 0);
devs.push_back(dev);
@@ -59,16 +59,17 @@ TEST_F(udp_dev, udp_dev_add_only_ipv4)
TEST_F(udp_dev, udp_dev_add_only_ipv6)
{
memset(&(dev_prm).local_addr4, 0, sizeof(struct in_addr));
- dev = tle_udp_add_dev(ctx, &dev_prm);
+ dev = tle_add_dev(ctx, &dev_prm);
ASSERT_NE(dev, (void *) NULL);
EXPECT_EQ(rte_errno, 0);
+ devs.push_back(dev);
}
TEST_F(udp_dev, udp_dev_add_nonexist_ipv4)
{
memset(&(dev_prm).local_addr4, 0, sizeof(struct in_addr));
inet_pton(AF_INET, "10.0.0.1", &(dev_prm).local_addr4);
- dev = tle_udp_add_dev(ctx, &dev_prm);
+ dev = tle_add_dev(ctx, &dev_prm);
ASSERT_NE(dev, (void *) NULL);
EXPECT_EQ(rte_errno, 0);
devs.push_back(dev);
@@ -76,7 +77,7 @@ TEST_F(udp_dev, udp_dev_add_nonexist_ipv4)
TEST_F(udp_dev, udp_dev_add_positive)
{
- dev = tle_udp_add_dev(ctx, &dev_prm);
+ dev = tle_add_dev(ctx, &dev_prm);
ASSERT_NE(dev, (void *) NULL);
EXPECT_EQ(rte_errno, 0);
devs.push_back(dev);
@@ -86,27 +87,27 @@ TEST_F(udp_dev, udp_dev_add_max)
{
int i;
for(i = 0; i < RTE_MAX_ETHPORTS; i++) {
- dev = tle_udp_add_dev(ctx, &dev_prm);
+ dev = tle_add_dev(ctx, &dev_prm);
ASSERT_NE(dev, (void *) NULL);
EXPECT_EQ(rte_errno, 0);
devs.push_back(dev);
}
- dev = tle_udp_add_dev(ctx, &dev_prm);
+ dev = tle_add_dev(ctx, &dev_prm);
ASSERT_EQ(dev, (void *) NULL);
EXPECT_EQ(rte_errno, ENODEV);
}
TEST_F(udp_dev, udp_dev_del_positive)
{
- dev = tle_udp_add_dev(ctx, &dev_prm);
+ dev = tle_add_dev(ctx, &dev_prm);
ASSERT_NE(dev, (void *) NULL);
EXPECT_EQ(rte_errno, 0);
devs.push_back(dev);
- ASSERT_EQ(tle_udp_del_dev(dev), 0);
+ ASSERT_EQ(tle_del_dev(dev), 0);
EXPECT_EQ(rte_errno, 0);
}
TEST_F(udp_dev, udp_dev_del_null_dev)
{
- ASSERT_EQ(tle_udp_del_dev(dev), -EINVAL);
+ ASSERT_EQ(tle_del_dev(dev), -EINVAL);
}
diff --git a/test/gtest/test_tle_udp_dev.h b/test/gtest/test_tle_udp_dev.h
index 02f544d..3eeef3a 100644
--- a/test/gtest/test_tle_udp_dev.h
+++ b/test/gtest/test_tle_udp_dev.h
@@ -21,8 +21,7 @@
#include <gtest/gtest.h>
#include <rte_errno.h>
-
-#include <tle_udp_impl.h>
+#include <tle_ctx.h>
#define RX_NO_OFFLOAD 0
#define TX_NO_OFFLOAD 0
@@ -32,11 +31,11 @@ using namespace std;
class udp_dev : public ::testing::Test {
public:
- struct tle_udp_ctx *ctx;
- struct tle_udp_dev *dev;
- struct tle_udp_ctx_param prm;
- struct tle_udp_dev_param dev_prm;
- vector<tle_udp_dev*> devs;
+ struct tle_ctx *ctx;
+ struct tle_dev *dev;
+ struct tle_ctx_param prm;
+ struct tle_dev_param dev_prm;
+ vector<tle_dev *> devs;
virtual void SetUp(void)
{
@@ -56,16 +55,16 @@ public:
inet_pton(AF_INET6, "fe80::21e:67ff:fec2:2568",
&(dev_prm).local_addr6);
- ctx = tle_udp_create(&prm);
+ ctx = tle_ctx_create(&prm);
ASSERT_NE(ctx, (void *) NULL);
}
virtual void TearDown(void)
{
- for(auto d : devs) {
- tle_udp_del_dev(d);
- }
- tle_udp_destroy(ctx);
+ for (auto d : devs)
+ tle_del_dev(d);
+
+ tle_ctx_destroy(ctx);
}
};
diff --git a/test/gtest/test_tle_udp_event.h b/test/gtest/test_tle_udp_event.h
index 4d66fb6..6aafebb 100644
--- a/test/gtest/test_tle_udp_event.h
+++ b/test/gtest/test_tle_udp_event.h
@@ -18,7 +18,7 @@
#include <gtest/gtest.h>
#include <rte_errno.h>
-#include <tle_udp_impl.h>
+#include <tle_ctx.h>
#include <tle_event.h>
struct event_state_active {
diff --git a/test/gtest/test_tle_udp_stream.cpp b/test/gtest/test_tle_udp_stream.cpp
index 22a1b44..44a8a65 100644
--- a/test/gtest/test_tle_udp_stream.cpp
+++ b/test/gtest/test_tle_udp_stream.cpp
@@ -14,15 +14,14 @@
*/
#include "test_tle_udp_stream.h"
+#include <arpa/inet.h>
TEST_F(test_tle_udp_stream, stream_test_open)
{
stream = tle_udp_stream_open(ctx,
(const struct tle_udp_stream_param *)&stream_prm);
EXPECT_NE(stream, nullptr);
- ret = tle_udp_stream_close(stream);
-
- EXPECT_EQ(ret, 0);
+ streams.push_back(stream);
}
TEST_F(test_tle_udp_stream, stream_test_open_nullctx)
@@ -60,8 +59,20 @@ TEST_F(test_tle_udp_stream, stream_test_open_close_open_close)
(const struct tle_udp_stream_param*)&stream_prm);
EXPECT_NE(stream, nullptr);
- ret = tle_udp_stream_close(stream);
- EXPECT_EQ(ret, 0);
+ streams.push_back(stream);
+}
+
+TEST_F(test_tle_udp_stream, stream_test_open_duplicate)
+{
+ stream = tle_udp_stream_open(ctx,
+ (const struct tle_udp_stream_param *)&stream_prm);
+ EXPECT_NE(stream, nullptr);
+ streams.push_back(stream);
+
+ stream = tle_udp_stream_open(ctx,
+ (const struct tle_udp_stream_param *)&stream_prm);
+ EXPECT_EQ(stream, nullptr);
+ EXPECT_EQ(rte_errno, EEXIST);
}
TEST_F(test_tle_udp_stream, stream_test_close)
@@ -80,7 +91,6 @@ TEST_F(test_tle_udp_stream, stream_test_close_null)
EXPECT_EQ(ret, -EINVAL);
}
-
TEST_F(test_tle_udp_stream, stream_test_close_already)
{
stream = tle_udp_stream_open(ctx,
@@ -92,6 +102,7 @@ TEST_F(test_tle_udp_stream, stream_test_close_already)
ret = tle_udp_stream_close(stream);
EXPECT_NE(ret, 0);
+ EXPECT_EQ(ret, -EINVAL);
}
TEST_F(test_tle_udp_stream, stream_get_param)
@@ -101,6 +112,7 @@ TEST_F(test_tle_udp_stream, stream_get_param)
stream = tle_udp_stream_open(ctx,
(const struct tle_udp_stream_param *)&stream_prm);
EXPECT_NE(stream, nullptr);
+ streams.push_back(stream);
ret = tle_udp_stream_get_param(stream,&prm);
EXPECT_EQ(ret, 0);
@@ -113,6 +125,7 @@ TEST_F(test_tle_udp_stream, stream_get_param_streamnull)
stream = tle_udp_stream_open(ctx,
(const struct tle_udp_stream_param *)&stream_prm);
EXPECT_NE(stream, nullptr);
+ streams.push_back(stream);
ret = tle_udp_stream_get_param(nullptr, &prm);
EXPECT_EQ(ret, -EINVAL);
@@ -125,10 +138,59 @@ TEST_F(test_tle_udp_stream, stream_get_param_prmnull)
stream = tle_udp_stream_open(ctx,
(const struct tle_udp_stream_param *)&stream_prm);
EXPECT_NE(stream, nullptr);
+ streams.push_back(stream);
ret = tle_udp_stream_get_param(stream, nullptr);
EXPECT_EQ(ret, -EINVAL);
}
-
-
+TEST_F(test_tle_udp_stream_max, stream_test_open_max)
+{
+ int i, j, cnt;
+ struct in_addr src_s;
+ struct in_addr dst_s;
+ int dst_port = 32678;
+ struct sockaddr_in *l_ipv4;
+ struct sockaddr_in *r_ipv4;
+
+ /* Set fields that will not change in sockaddr structures */
+ inet_pton(AF_INET, base_l_ipv4, &src_s);
+ l_ipv4 = (struct sockaddr_in *) &stream_prm.local_addr;
+ l_ipv4->sin_family = AF_INET;
+ l_ipv4->sin_port = htons(0);
+
+ inet_pton(AF_INET, base_r_ipv4, &dst_s);
+ r_ipv4 = (struct sockaddr_in *) &stream_prm.remote_addr;
+ r_ipv4->sin_family = AF_INET;
+
+ for(i = 0, cnt = 0; i < devs.size(); i++) {
+ /* Get base IPv4 address and increment it if needed for
+ * stream source address;
+ * Incrementing only highest octet
+ */
+
+ l_ipv4->sin_addr.s_addr = src_s.s_addr + i;
+
+ for(j = 0; j < nb_streams; j++, cnt++) {
+ /* Get base IPv4 address and increment it if needed for
+ * stream destination address
+ */
+ r_ipv4->sin_port = htons(dst_port + j);
+ r_ipv4->sin_addr.s_addr =
+ htonl(ntohl(dst_s.s_addr) + j);
+
+ stream = tle_udp_stream_open(ctx,
+ (const struct tle_udp_stream_param *)
+ &stream_prm);
+
+ if (cnt < MAX_STREAMS) {
+ EXPECT_EQ(rte_errno, 0);
+ ASSERT_NE(stream, nullptr);
+ streams.push_back(stream);
+ } else if (cnt >= MAX_STREAMS) {
+ EXPECT_EQ(stream, nullptr);
+ EXPECT_EQ(rte_errno, ENFILE);
+ }
+ }
+ }
+}
diff --git a/test/gtest/test_tle_udp_stream.h b/test/gtest/test_tle_udp_stream.h
index d0256c3..582eaea 100644
--- a/test/gtest/test_tle_udp_stream.h
+++ b/test/gtest/test_tle_udp_stream.h
@@ -16,81 +16,88 @@
#ifndef TEST_TLE_UDP_STREAM_H_
#define TEST_TLE_UDP_STREAM_H_
#include <iostream>
+#include <algorithm>
+#include <string>
#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <netinet/ip6.h>
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <rte_errno.h>
-#include <tle_udp_impl.h>
+#include <tle_udp.h>
#include <tle_event.h>
-int
-dummy_lookup4(void *opaque, const struct in_addr *addr,
- struct tle_udp_dest *res)
-{
- RTE_SET_USED(opaque);
- RTE_SET_USED(addr);
- RTE_SET_USED(res);
- return -ENOENT;
-}
+#include "test_common.h"
-int
-dummy_lookup6(void *opaque, const struct in6_addr *addr,
- struct tle_udp_dest *res)
-{
- RTE_SET_USED(opaque);
- RTE_SET_USED(addr);
- RTE_SET_USED(res);
- return -ENOENT;
-}
+#define MAX_STREAMS 0xFFFF
+#define MAX_STREAM_RBUFS 0x100
+#define MAX_STREAM_SBUFS 0x100
+#define RX_OFFLOAD 0x100
+#define TX_OFFLOAD 0x100
+
+using namespace std;
-struct tle_udp_ctx_param ctx_prm_tmpl = {
+struct tle_ctx_param ctx_prm_tmpl = {
.socket_id = SOCKET_ID_ANY,
- .max_streams = 0x10,
- .max_stream_rbufs = 0x100,
- .max_stream_sbufs = 0x100
+ .proto = TLE_PROTO_UDP,
+ .max_streams = MAX_STREAMS,
+ .max_stream_rbufs = MAX_STREAM_RBUFS,
+ .max_stream_sbufs = MAX_STREAM_SBUFS
};
-struct tle_udp_dev_param dev_prm_tmpl = {
- .rx_offload = 0x100,
- .tx_offload = 0x100
+struct tle_dev_param dev_prm_tmpl = {
+ .rx_offload = RX_OFFLOAD,
+ .tx_offload = TX_OFFLOAD
};
class test_tle_udp_stream: public ::testing::Test {
public:
- void setup_dev_prm(struct tle_udp_dev_param *,
+ void setup_dev_prm(struct tle_dev_param *,
char const *, char const *);
- struct tle_udp_ctx *setup_ctx(struct tle_udp_ctx_param *prm);
- struct tle_udp_dev *setup_dev(struct tle_udp_ctx *ctx,
- struct tle_udp_dev_param *dev_prm);
+ struct tle_ctx *setup_ctx(struct tle_ctx_param *prm);
+ struct tle_dev *setup_dev(struct tle_ctx *ctx,
+ struct tle_dev_param *dev_prm);
struct tle_evq *setup_event();
virtual void SetUp(void)
{
- char const *ipv4_laddr = "192.168.0.1";
- char const *ipv4_raddr = "192.168.0.2";
+ char const *ipv4_laddr = "192.0.0.1";
+ char const *ipv4_raddr = "10.0.0.1";
char const *ipv6 = "fe80::21e:67ff:fec2:2568";
- struct tle_udp_ctx_param cprm;
+ struct tle_ctx_param cprm;
+ port = 10000;
ctx = nullptr;
dev = nullptr;
stream = nullptr;
/* Setup Context */
cprm = ctx_prm_tmpl;
+ cprm.max_streams = 0xA;
cprm.lookup4 = dummy_lookup4;
cprm.lookup6 = dummy_lookup6;
ctx = setup_ctx(&cprm);
+ ASSERT_NE(ctx, nullptr);
+
/* Setup Dev */
memset(&dev_prm, 0, sizeof(dev_prm));
setup_dev_prm(&dev_prm, ipv4_laddr, ipv6);
dev = setup_dev(ctx, &dev_prm);
+ ASSERT_NE(dev, nullptr);
/* Stream Param & Event param */
memset(&stream_prm, 0, sizeof(struct tle_udp_stream_param));
- inet_pton(AF_INET, ipv4_laddr, &stream_prm.local_addr);
- inet_pton(AF_INET, ipv4_raddr, &stream_prm.remote_addr);
- stream_prm.local_addr.ss_family = AF_INET;
- stream_prm.remote_addr.ss_family = AF_INET;
+
+ ip4_addr = (struct sockaddr_in *) &stream_prm.local_addr;
+ ip4_addr->sin_family = AF_INET;
+ ip4_addr->sin_port = htons(port);
+ ip4_addr->sin_addr.s_addr = inet_addr(ipv4_laddr);
+
+ ip4_addr = (struct sockaddr_in *) &stream_prm.remote_addr;
+ ip4_addr->sin_family = AF_INET;
+ ip4_addr->sin_port = htons(port);
+ ip4_addr->sin_addr.s_addr = inet_addr(ipv4_raddr);
+
stream_prm.recv_ev = tle_event_alloc(setup_event(), nullptr);
stream_prm.send_ev = tle_event_alloc(setup_event(), nullptr);
}
@@ -98,22 +105,29 @@ public:
virtual void TearDown(void)
{
ret = 0;
- tle_udp_stream_close(stream);
- tle_udp_del_dev(dev);
- tle_udp_destroy(ctx);
+ for (auto s : streams)
+ tle_udp_stream_close(s);
+
+ tle_del_dev(dev);
+ tle_ctx_destroy(ctx);
}
int ret;
- struct tle_udp_ctx *ctx;
- struct tle_udp_dev *dev;
- struct tle_udp_stream *stream;
-
- struct tle_udp_ctx_param ctx_prm;
- struct tle_udp_dev_param dev_prm;
+ int port;
+ struct tle_ctx *ctx;
+ struct tle_dev *dev;
+ struct tle_stream *stream;
+ struct tle_ctx_param ctx_prm;
+ struct tle_dev_param dev_prm;
struct tle_udp_stream_param stream_prm;
+ struct sockaddr_in *ip4_addr;
+
+ vector<tle_stream *> streams;
};
-struct tle_evq *test_tle_udp_stream::setup_event() {
+struct tle_evq *
+test_tle_udp_stream::setup_event()
+{
int32_t socket_id;
uint32_t max_events;
struct tle_evq_param evq_params;
@@ -129,32 +143,98 @@ struct tle_evq *test_tle_udp_stream::setup_event() {
return evq;
}
-struct tle_udp_ctx
-*test_tle_udp_stream::setup_ctx(struct tle_udp_ctx_param *prm) {
- struct tle_udp_ctx *ctx;
+struct tle_ctx
+*test_tle_udp_stream::setup_ctx(struct tle_ctx_param *prm)
+{
+ struct tle_ctx *ctx;
- ctx = tle_udp_create(prm);
+ ctx = tle_ctx_create(prm);
return ctx;
}
-struct tle_udp_dev
-*test_tle_udp_stream::setup_dev(struct tle_udp_ctx *ctx,
- struct tle_udp_dev_param *dev_prm) {
-
- struct tle_udp_dev *dev;
+struct tle_dev
+*test_tle_udp_stream::setup_dev(struct tle_ctx *ctx,
+ struct tle_dev_param *dev_prm)
+{
+ struct tle_dev *dev;
- dev = tle_udp_add_dev(ctx, dev_prm);
+ dev = tle_add_dev(ctx, dev_prm);
return dev;
}
-void test_tle_udp_stream::setup_dev_prm(struct tle_udp_dev_param *dev_prm,
- char const *ipv4, char const *ipv6) {
-
+void
+test_tle_udp_stream::setup_dev_prm(struct tle_dev_param *dev_prm,
+ char const *ipv4, char const *ipv6)
+{
inet_pton(AF_INET, ipv4, &dev_prm->local_addr4);
inet_pton(AF_INET6, ipv6, &dev_prm->local_addr6);
}
+/* Fixture for max number of streams on single ctx + multiple devices */
+class test_tle_udp_stream_max: public ::test_tle_udp_stream {
+public:
+
+ virtual void SetUp(void)
+ {
+ /* Create enough devices and streams to exceed
+ * MAX_STREAMS on ctx
+ */
+ nb_devs = 10;
+ nb_streams = 6554;
+
+ in_addr_t src;
+ string ssrc;
+
+ memset(&ctx_prm, 0, sizeof(ctx_prm));
+ ctx_prm = ctx_prm_tmpl;
+ ctx_prm.lookup4 = dummy_lookup4;
+ ctx_prm.lookup6 = dummy_lookup6;
+ ctx = setup_ctx(&ctx_prm);
+ ASSERT_NE(ctx, (void *)NULL);
+
+ memset(&dev_prm, 0, sizeof(dev_prm));
+ setup_dev_prm(&dev_prm, base_l_ipv4, base_l_ipv6);
+
+ memset(&stream_prm, 0, sizeof(struct tle_udp_stream_param));
+ stream_prm.recv_ev = tle_event_alloc(setup_event(), nullptr);
+ stream_prm.send_ev = tle_event_alloc(setup_event(), nullptr);
+
+ for (i = 0; i < nb_devs; i++) {
+ ssrc = inet_ntoa(dev_prm.local_addr4);
+
+ dev = setup_dev(ctx, &dev_prm);
+ ASSERT_NE(dev, (void *)NULL);
+ devs.push_back(dev);
+
+ /* Modify base IP addresses for next loops */
+ src = dev_prm.local_addr4.s_addr;
+ src += 1;
+ dev_prm.local_addr4.s_addr = src;
+ }
+ }
+
+ virtual void TearDown(void)
+ {
+ for (auto s : streams)
+ tle_udp_stream_close(s);
+
+ for (auto d : devs)
+ tle_del_dev(d);
+
+ tle_ctx_destroy(ctx);
+ }
+
+ int i;
+ int nb_devs;
+ int nb_streams;
+ char const *base_l_ipv4 = "10.0.0.1";
+ char const *base_r_ipv4 = "190.0.0.1";
+ char const *base_l_ipv6 = "2000::1";
+ vector<tle_dev *> devs;
+ vector<tle_stream *> streams;
+};
+
#endif /* TEST_TLE_UDP_STREAM_H_ */
diff --git a/test/gtest/test_tle_udp_stream_gen.cpp b/test/gtest/test_tle_udp_stream_gen.cpp
new file mode 100644
index 0000000..0f60b09
--- /dev/null
+++ b/test/gtest/test_tle_udp_stream_gen.cpp
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "test_tle_udp_stream_gen.h"
+
+TEST_P(tle_rx_enobufs, enobufs_test)
+{
+ int j, pkt_cnt = 0, enobufs_cnt = 0;
+ uint16_t nb_rx, nb_rx_bulk;
+ struct rte_mbuf *m[BURST_SIZE];
+ struct rte_mbuf *rp[BURST_SIZE];
+ int rc[BURST_SIZE];
+
+ /* Receive packets until we reach end on pcap file*/
+ do {
+ memset(rc, 0, sizeof(int) * BURST_SIZE);
+ nb_rx = rte_eth_rx_burst(portid, 0, m, BURST_SIZE);
+ pkt_cnt += nb_rx;
+ for(auto &d: tp.devs){
+ nb_rx_bulk = tle_udp_rx_bulk(d.ptr, m, rp, rc, nb_rx);
+ for(j = 0; j < BURST_SIZE; j++) {
+ if(rc[j] == ENOBUFS) {
+ enobufs_cnt++;
+ }
+ }
+ d.act_pkts_bulk_rx += nb_rx_bulk;
+ }
+ } while (nb_rx > 0);
+
+ /*
+ * Verify results - number of rx packets per dev and stream
+ * and packets dropped due to ENOBUFS
+ */
+
+ for(auto &d: tp.devs) {
+ EXPECT_EQ(d.act_pkts_bulk_rx, d.exp_pkts_bulk_rx);
+ EXPECT_EQ(enobufs_cnt, pkt_cnt - d.act_pkts_bulk_rx);
+ }
+}
+/*
+ * TODO: Obviously this way of defining test scenarios is terrible.
+ * Need to move to JSON files in future and parse from external file.
+ * Currently first commented out entry is an example of what values should be
+ * inserted into certain fields
+ * */
+INSTANTIATE_TEST_CASE_P(enobufs_test, tle_rx_enobufs, testing::Values(
+/* test_str example */
+/* {
+ "Description",
+ Devices configs below
+ {
+ {"Dev local IPv4", "Dev local IPv6",
+ RX_OFFLOAD, TX_OFFLOAD,
+ Exp. nb. of rx pkts on device,
+ Exp. nb. of tx pkts on device,
+ Exp. nb. of total ENOENT pkts on device,
+ },
+ },
+ Streams config on device below
+ {
+ {local port, remote port, "local ip", "remote ip",
+ exp. nb. of rx. pkts, exp. nb. of tx. pkts},
+ },
+ Pkts to generate with scapy to pcap file
+ {
+ {"Src IP", "Dst IP",
+ Src port, Dst port,
+ nb of pkts,
+ l3 chksum, l4 chksum, fragment?},
+ }
+}, */
+test_str
+{
+ "IPv4 - 1 dev 1 stream, only correct pkts",
+ {
+ {"10.0.0.1", "2001::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD,
+ CTX_MAX_RBUFS - 1, 0, 0}
+ },
+ {
+ {AF_INET, 10001, 10002, "10.0.0.1", "10.0.0.2", 0, 0},
+ },
+ {
+ {AF_INET, "10.0.0.2", "10.0.0.1", 10002, 10001, 1000, 0, 0, 0},
+ }
+},
+test_str
+{
+ "IPv4 - 1 dev 1 stream, only correct pkts",
+ {
+ {"10.0.0.1", "2001::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD,
+ CTX_MAX_RBUFS - 1, 0, 0, 0}
+ },
+ {
+ {AF_INET6, 10001, 10002, "2001::1000", "2001::2000", 0, 0},
+ },
+ {
+ {AF_INET6, "2001::2000", "2001::1000", 10002, 10001, 1000, 0, 0, 0},
+ }
+}
+));
+
+TEST_P(tle_rx_test, test)
+{
+ int j;
+ uint16_t nb_rx, nb_rx_bulk, nb_str_rx;
+ struct rte_mbuf *m[BURST_SIZE];
+ struct rte_mbuf *n[BURST_SIZE];
+ struct rte_mbuf *rp[BURST_SIZE];
+ int rc[BURST_SIZE];
+
+ /* Receive packets until we reach end on pcap file*/
+ do {
+ nb_rx = rte_eth_rx_burst(portid, 0, m, BURST_SIZE);
+ for(auto &d: tp.devs) {
+ memset(rc, 0, sizeof(int) * BURST_SIZE);
+ nb_rx_bulk = tle_udp_rx_bulk(d.ptr, m, rp, rc, nb_rx);
+ d.act_pkts_bulk_rx += nb_rx_bulk;
+ for(j = 0; j < BURST_SIZE; j++) {
+ if(rc[j] == ENOENT)
+ d.act_pkts_enoent += 1;
+ }
+ }
+
+ for(auto &s: tp.streams) {
+ nb_str_rx = tle_udp_stream_recv(s.ptr, n, BURST_SIZE);
+ s.act_pkts_rx += nb_str_rx;
+ }
+ } while (nb_rx > 0);
+
+
+ /*
+ * Verify results - number of rx packets per dev and stream.
+ */
+ for(auto &d: tp.devs) {
+ EXPECT_EQ(d.act_pkts_bulk_rx, d.exp_pkts_bulk_rx);
+ EXPECT_EQ(d.act_pkts_enoent, d.exp_pkts_enoent);
+ }
+
+ for(auto &s: tp.streams) {
+ EXPECT_EQ(s.act_pkts_rx, s.exp_pkts_rx);
+ }
+}
+INSTANTIATE_TEST_CASE_P(rx_recv, tle_rx_test, testing::Values(
+test_str
+{
+ "IPv4 - 1 dev 1 stream, only correct pkts",
+ {
+ {"10.0.0.1", "2001::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD, 10, 0, 0},
+ },
+ {
+ {AF_INET, 10001, 10002, "10.0.0.1", "10.0.0.2", 10, 0},
+ },
+ {
+ {AF_INET, "10.0.0.2", "10.0.0.1", 10002, 10001, 10, 0, 0, 0},
+ }
+},
+
+test_str
+{
+ "IPv4 - 1 dev 1 stream, only incorrect pkts",
+ {
+ {"10.0.0.1", "2001::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD, 0, 0, 40},
+ },
+ {
+ {AF_INET, 10001, 10002, "10.0.0.1", "10.0.0.2", 0, 0},
+ },
+ {
+ {AF_INET, "20.0.0.2", "10.0.0.1", 10002, 10001, 10, 0, 0, 0},
+ {AF_INET, "10.0.0.2", "20.0.0.1", 10002, 10001, 10, 0, 0, 0},
+ {AF_INET, "10.0.0.2", "10.0.0.1", 20002, 10001, 10, 0, 0, 0},
+ {AF_INET, "10.0.0.2", "10.0.0.1", 10002, 20001, 10, 0, 0, 0},
+ }
+},
+
+test_str
+{
+ "IPv4 - 1 dev with 1 stream, only correct pkts but incorrect chksum",
+ {
+ {"10.0.0.1", "2001::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD, 30, 0, 0}
+ },
+ {
+ {AF_INET, 10001, 10002, "10.0.0.1", "10.0.0.2", 0, 0},
+ },
+ {
+ {AF_INET, "10.0.0.2", "10.0.0.1", 10002, 10001, 10, 1, 0, 0},
+ {AF_INET, "10.0.0.2", "10.0.0.1", 10002, 10001, 10, 0, 1, 0},
+ {AF_INET, "10.0.0.2", "10.0.0.1", 10002, 10001, 10, 1, 1, 0},
+ }
+},
+
+test_str
+{
+ "IPv6 - 1 dev with 1 stream, only correct pkts",
+ {
+ {"10.0.0.1", "2001::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD, 10, 0, 0}
+ },
+ {
+ {AF_INET6, 10001, 10002, "2001::1000", "2001::2000", 10, 0},
+ },
+ {
+ {AF_INET6, "2001::2000", "2001::1000", 10002, 10001, 10, 0, 0, 0},
+ }
+},
+
+test_str
+{
+ "IPv6 - 1 dev with 1 stream, only incorrect pkts",
+ {
+ {"10.0.0.1", "2001::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD, 0, 0, 40},
+ },
+ {
+ {AF_INET6, 10001, 10002, "2001::1000", "2001::2000", 0, 0},
+ },
+ {
+ {AF_INET6, "3001::2000", "2001::1000", 10002, 10001, 10, 0, 0, 0},
+ {AF_INET6, "2001::3000", "2001::1000", 10002, 10001, 10, 0, 0, 0},
+ {AF_INET6, "2001::2000", "2001::1000", 30002, 10001, 10, 0, 0, 0},
+ {AF_INET6, "2001::2000", "2001::1000", 10002, 30001, 10, 0, 0, 0},
+ }
+},
+
+test_str
+{
+ "IPv6 - 1 dev with 1 stream, only correct pkts but incorrect chksum",
+ /*
+ * Note: one of streams will be received as IPv6 does not have
+ * checksum field by default.
+ */
+ {
+ {"10.0.0.1", "2001::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD, 30, 0, 0}
+ },
+ {
+ {AF_INET6, 10001, 10002, "2001::1000", "2001::2000", 10, 0},
+ },
+ {
+ {AF_INET6, "2001::2000", "2001::1000", 10002, 10001, 10, 1, 0, 0},
+ {AF_INET6, "2001::2000", "2001::1000", 10002, 10001, 10, 0, 1, 0},
+ {AF_INET6, "2001::2000", "2001::1000", 10002, 10001, 10, 1, 1, 0},
+ }
+},
+
+test_str
+{
+ /* Multiple streams, multiple correct pkt streams, mixed IPv4 & IPv6;
+ * 3 dev, 3 stream per dev, only correct pkts */
+ "Mixed IPv4+IPv6; Multiple devs with multiple correct streams",
+ {
+ {"10.0.0.1", "2001::1000",RX_NO_OFFLOAD, TX_NO_OFFLOAD, 300, 0, 600},
+ {"20.0.0.1", "2002::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD, 300, 0, 600},
+ {"30.0.0.1", "2003::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD, 300, 0, 600},
+ },
+ {
+ {AF_INET, 10001, 10011, "10.0.0.1", "10.0.0.2", 100, 0},
+ {AF_INET, 10002, 10012, "10.0.0.1", "10.0.0.3", 100, 0},
+ {AF_INET6, 10003, 10013, "2001::1000", "2001::4000", 100, 0},
+ {AF_INET, 20001, 20011, "20.0.0.1", "20.0.0.2", 100, 0},
+ {AF_INET6, 20002, 20012, "2002::1000", "2002::3000", 100, 0},
+ {AF_INET6, 20003, 20013, "2002::1000", "2002::4000", 100, 0},
+ {AF_INET, 20001, 20011, "30.0.0.1", "30.0.0.2", 100, 0},
+ {AF_INET6, 20002, 20012, "2003::1000", "2003::3000", 100, 0},
+ {AF_INET6, 20003, 20013, "2003::1000", "2003::4000", 100, 0}
+ },
+ {
+ {AF_INET, "10.0.0.2", "10.0.0.1", 10011, 10001, 100, 0, 0, 0},
+ {AF_INET, "10.0.0.3", "10.0.0.1", 10012, 10002, 100, 0, 0, 0},
+ {AF_INET, "20.0.0.2", "20.0.0.1", 20011, 20001, 100, 0, 0, 0},
+ {AF_INET, "30.0.0.2", "30.0.0.1", 20011, 20001, 100, 0, 0, 0},
+ {AF_INET6, "2001::4000", "2001::1000", 10013, 10003, 100, 0, 0, 0},
+ {AF_INET6, "2002::3000", "2002::1000", 20012, 20002, 100, 0, 0, 0},
+ {AF_INET6, "2002::4000", "2002::1000", 20013, 20003, 100, 0, 0, 0},
+ {AF_INET6, "2003::3000", "2003::1000", 20012, 20002, 100, 0, 0, 0},
+ {AF_INET6, "2003::4000", "2003::1000", 20013, 20003, 100, 0, 0, 0},
+ }
+}
+));
+
+TEST_P(tle_tx_test, tx_send)
+{
+ int i, j, s, pkts_to_send;
+ uint16_t nb_tx, nb_tx_bulk, nb_str_tx;
+ struct rte_mbuf *m[BURST_SIZE];
+ struct rte_mbuf *n[BURST_SIZE];
+ int rc[BURST_SIZE];
+ struct sockaddr_storage dest;
+ uint8_t *plaintext;
+ unsigned plaintext_len;
+ unsigned plaintext_pad_len;
+ char text[]="DEADBEEF";
+
+ for(auto &sg: tp.gen_streams) {
+
+ /* Find from which stream we will be sending - save the pointer and
+ * index number for later TX counter validation */
+ for(s = 0; s < tp.streams.size(); s++) {
+ auto tmp = tp.streams[s];
+ if(sg.dst_ip.compare(tmp.l_ip) == 0 && sg.dst_port == tmp.l_port) {
+ stream = tmp.ptr;
+ break;
+ }
+ }
+
+ /* Prepare sockaddr for sending */
+ memset(&dest, 0, sizeof(dest));
+ if (sg.family == AF_INET) {
+ ((sockaddr_in *) &dest)->sin_family = AF_INET;
+ ((sockaddr_in *) &dest)->sin_port = htons(sg.src_port);
+ inet_pton(AF_INET, sg.src_ip.c_str(),
+ &((sockaddr_in *) &dest)->sin_addr);
+ } else if (sg.family == AF_INET6) {
+ ((sockaddr_in6 *) &dest)->sin6_family = AF_INET6;
+ ((sockaddr_in6 *) &dest)->sin6_port = htons(sg.src_port);
+ inet_pton(AF_INET6, sg.src_ip.c_str(),
+ &((sockaddr_in6 *) &dest)->sin6_addr);
+ }
+
+ nb_str_tx = 0;
+ /* Send all packets to stream*/
+ for(i = 0; i < sg.nb_pkts; i += nb_str_tx) {
+ pkts_to_send = (sg.nb_pkts - i < BURST_SIZE) ?
+ (sg.nb_pkts - i) : BURST_SIZE;
+
+ /* Allocate Mbufs */
+ for(j = 0; j < pkts_to_send; j++) {
+ m[j] = rte_pktmbuf_alloc(mbuf_pool);
+ ASSERT_NE(m[j], nullptr);
+
+ memset(rte_pktmbuf_mtod(m[j], uint8_t *), 0,
+ rte_pktmbuf_tailroom(m[j]));
+ plaintext = (uint8_t *)rte_pktmbuf_append(m[j],
+ sizeof(text));
+ memcpy(rte_pktmbuf_mtod(m[j], uint8_t *), &text, sizeof(text));
+ }
+
+ nb_str_tx = tle_udp_stream_send(stream, m, pkts_to_send,
+ reinterpret_cast<struct sockaddr*>(&dest));
+ ASSERT_GE(nb_str_tx, 0);
+ if(nb_str_tx == 0) {
+ for(j = 0; j < pkts_to_send; j++) {
+ rte_pktmbuf_free(m[j]);
+ }
+ nb_str_tx = pkts_to_send;
+ continue;
+ }
+ tp.streams[s].act_pkts_tx += nb_str_tx;
+ }
+ }
+
+ /* Send out packets from devices */
+ for(auto &d: tp.devs) {
+ nb_tx_bulk = 0;
+ do {
+ nb_tx_bulk = tle_udp_tx_bulk(d.ptr, n, BURST_SIZE);
+ ASSERT_GE(nb_str_tx, 0);
+ d.act_pkts_bulk_tx += nb_tx_bulk;
+ nb_tx = rte_eth_tx_burst(portid, 0, n, nb_tx_bulk);
+ ASSERT_GE(nb_str_tx, 0);
+ } while (nb_tx_bulk > 0);
+ }
+
+ /*
+ * Verify results - number of rx packets per dev and stream.
+ */
+ for(auto &d: tp.devs) {
+ EXPECT_EQ(d.act_pkts_bulk_tx, d.exp_pkts_bulk_tx);
+ EXPECT_EQ(d.act_pkts_enoent, d.exp_pkts_enoent);
+ }
+
+ for(auto &s: tp.streams) {
+ EXPECT_EQ(s.act_pkts_tx, s.exp_pkts_tx);
+ }
+}
+
+INSTANTIATE_TEST_CASE_P(test, tle_tx_test, testing::Values(
+test_str
+{
+ "IPv4 - 1 dev 1 stream, only correct pkts",
+ {
+ {"10.0.0.1", "2001::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD, 0, 100, 0},
+ },
+ {
+ {AF_INET, 10001, 10002, "10.0.0.1", "10.0.0.2", 0, 100},
+ },
+ {
+ {AF_INET, "10.0.0.2", "10.0.0.1", 10002, 10001, 100, 0, 0, 0},
+ }
+},
+test_str
+{
+ "IPv6 - 1 dev 1 stream, only correct pkts",
+ {
+ {"10.0.0.1", "2001::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD, 0, 100, 0},
+ },
+ {
+ {AF_INET6, 10001, 10002, "2001::1000", "2001::2000", 0, 100},
+ },
+ {
+ {AF_INET6, "2001::2000", "2001::1000", 10002, 10001, 100, 0, 0, 0},
+ }
+},
+test_str
+{
+ /* Multiple streams, mixed IPv4 & IPv6; */
+ "Mixed IPv4+IPv6; Multiple devs with multiple correct streams",
+ {
+ {"10.0.0.1", "2001::1000",RX_NO_OFFLOAD, TX_NO_OFFLOAD, 0, 300, 0},
+ {"20.0.0.1", "2002::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD, 0, 300, 0},
+ {"30.0.0.1", "2003::1000", RX_NO_OFFLOAD, TX_NO_OFFLOAD, 0, 300, 0},
+ },
+ {
+ {AF_INET, 10001, 10011, "10.0.0.1", "10.0.0.2", 0, 100},
+ {AF_INET, 10002, 10012, "10.0.0.1", "10.0.0.3", 0, 100},
+ {AF_INET6, 10003, 10013, "2001::1000", "2001::4000", 0, 100},
+ {AF_INET, 20001, 20011, "20.0.0.1", "20.0.0.2", 0, 100},
+ {AF_INET6, 20002, 20012, "2002::1000", "2002::3000", 0, 100},
+ {AF_INET6, 20003, 20013, "2002::1000", "2002::4000", 0, 100},
+ {AF_INET, 20001, 20011, "30.0.0.1", "30.0.0.2", 0, 100},
+ {AF_INET6, 20002, 20012, "2003::1000", "2003::3000", 0, 100},
+ {AF_INET6, 20003, 20013, "2003::1000", "2003::4000", 0, 100}
+ },
+ {
+ {AF_INET, "10.0.0.2", "10.0.0.1", 10011, 10001, 100, 0, 0, 0},
+ {AF_INET, "10.0.0.3", "10.0.0.1", 10012, 10002, 100, 0, 0, 0},
+ {AF_INET, "20.0.0.2", "20.0.0.1", 20011, 20001, 100, 0, 0, 0},
+ {AF_INET, "30.0.0.2", "30.0.0.1", 20011, 20001, 100, 0, 0, 0},
+ {AF_INET6, "2001::4000", "2001::1000", 10013, 10003, 100, 0, 0, 0},
+ {AF_INET6, "2002::3000", "2002::1000", 20012, 20002, 100, 0, 0, 0},
+ {AF_INET6, "2002::4000", "2002::1000", 20013, 20003, 100, 0, 0, 0},
+ {AF_INET6, "2003::3000", "2003::1000", 20012, 20002, 100, 0, 0, 0},
+ {AF_INET6, "2003::4000", "2003::1000", 20013, 20003, 100, 0, 0, 0},
+ }
+}
+));
diff --git a/test/gtest/test_tle_udp_stream_gen.h b/test/gtest/test_tle_udp_stream_gen.h
new file mode 100644
index 0000000..294e37c
--- /dev/null
+++ b/test/gtest/test_tle_udp_stream_gen.h
@@ -0,0 +1,541 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TEST_TLE_UDP_STREAM_GEN_H_
+#define TEST_TLE_UDP_STREAM_GEN_H_
+
+#include <sys/types.h>
+
+#include <stdio.h>
+#include <map>
+#include <string>
+#include <algorithm>
+#include <arpa/inet.h>
+#include <netinet/ip6.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <gtest/gtest.h>
+
+#include <rte_version.h>
+
+#include <tle_udp.h>
+#include <tle_event.h>
+
+#include "test_common.h"
+
+#define RX_NO_OFFLOAD 0
+#define TX_NO_OFFLOAD 0
+#define CTX_MAX_RBUFS 0x100
+#define CTX_MAX_SBUFS 0x100
+
+#define RX_PCAP "rx_pcap.cap"
+#define TX_PCAP "tx_pcap.cap"
+
+/*
+ * Check DPDK version:
+ * Previous "eth_pcap" was changed to "net_pcap" after DPDK 16.07.
+ * Use correct vdev name depending on version.
+ */
+#if (RTE_VERSION_NUM(16, 7, 0, 0) < \
+ RTE_VERSION_NUM(RTE_VER_YEAR, RTE_VER_MONTH, 0, 0))
+ #define VDEV_NAME "net_pcap0"
+#else
+ #define VDEV_NAME "eth_pcap0"
+#endif
+
+using namespace std;
+
+extern struct rte_mempool *mbuf_pool;
+
+/* Dummy lookup functions, TX operations are not performed in these tests */
+
+static int
+lookup4_function(void *opaque, const struct in_addr *addr, struct tle_dest *res)
+{
+ struct in_addr route;
+ struct ether_hdr *eth;
+ struct ipv4_hdr *ip4h;
+ auto routes = static_cast<map<string, tle_dev *> *>(opaque);
+
+ /* Check all routes added in map for a match with dest *addr */
+ for (auto it = routes->begin(); it != routes->end(); ++it) {
+ inet_pton(AF_INET, it->first.c_str(), &route);
+
+ /* If it matches then fill *res and return with 0 code */
+ if (memcmp(&route, addr, sizeof(struct in_addr)) == 0) {
+ memset(res, 0, sizeof(*res));
+ res->dev = it->second;
+ res->mtu = 1500;
+ res->l2_len = sizeof(*eth);
+ res->l3_len = sizeof(*ip4h);
+ res->head_mp = mbuf_pool;
+ eth = (struct ether_hdr *)res->hdr;
+ eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+ ip4h = (struct ipv4_hdr *)(eth + 1);
+ ip4h->version_ihl = (4 << 4) |
+ (sizeof(*ip4h) / IPV4_IHL_MULTIPLIER);
+ ip4h->time_to_live = 64;
+ ip4h->next_proto_id = IPPROTO_UDP;
+ ip4h->fragment_offset = 0;
+
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int
+lookup6_function(void *opaque, const struct in6_addr *addr,
+ struct tle_dest *res)
+{
+ struct ether_hdr *eth;
+ struct ipv6_hdr *ip6h;
+ struct in6_addr route;
+ auto routes = static_cast<map<string, tle_dev *> *>(opaque);
+
+ /* Check all routes added in map for a match with dest *addr */
+ for (auto it = routes->begin(); it != routes->end(); ++it) {
+ inet_pton(AF_INET6, it->first.c_str(), &route);
+
+ /* If it matches then fill *res and return with 0 code */
+ if (memcmp(&route, addr, sizeof(struct in6_addr)) == 0) {
+ memset(res, 0, sizeof(*res));
+ res->dev = it->second;
+ res->mtu = 1500;
+ res->l2_len = sizeof(*eth);
+ res->l3_len = sizeof(*ip6h);
+ res->head_mp = mbuf_pool;
+ eth = (struct ether_hdr *)res->hdr;
+ eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+ ip6h = (struct ipv6_hdr *)(eth + 1);
+ ip6h->vtc_flow = 6 << 4;
+ ip6h->proto = IPPROTO_UDP;
+ ip6h->hop_limits = 64;
+
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
+/*
+ * Structures used to describe test instances:
+ * test_str - main structure for describing test case instance; contains
+ * instance description, and vectors with information about
+ * devices, streams & streams to be generated for RX/TX path.
+ * dev_s - structure describing single device; contains device addresses,
+ * checksum offload information and expected number of received /
+ * transmitted packets.
+ * packets for that device in scenario.
+ * stream_s - structure describing single stream to be created; contains
+ * information on local & remote IP's and port numbers, expected
+ * number of received and transmitted packets.
+ * stream_g - structure describing a stream which to generate via scapy script;
+ * Contains information on IP addresses and port numbers and if
+ * L3/L4 checksums should be incorrectly calculated.
+ * In future: if packet should be fragmented.
+ */
+
+struct stream_g {
+ int family;
+ string src_ip;
+ string dst_ip;
+ int src_port;
+ int dst_port;
+ int nb_pkts;
+ bool bad_chksum_l3;
+ bool bad_chksum_l4;
+ bool fragment;
+};
+
+struct stream_s {
+ int family;
+ int l_port;
+ int r_port;
+ string l_ip;
+ string r_ip;
+ int exp_pkts_rx;
+ int exp_pkts_tx;
+ int act_pkts_rx;
+ int act_pkts_tx;
+ tle_stream *ptr;
+};
+
+struct dev_s {
+ string l_ipv4;
+ string l_ipv6;
+ int rx_offload;
+ int tx_offload;
+ int exp_pkts_bulk_rx;
+ int exp_pkts_bulk_tx;
+ int exp_pkts_enoent;
+ int act_pkts_bulk_rx;
+ int act_pkts_bulk_tx;
+ int act_pkts_enoent;
+ tle_dev *ptr;
+};
+
+struct test_str {
+ string test_desc;
+ vector<dev_s> devs;
+ vector<stream_s> streams;
+ vector<stream_g> gen_streams;
+};
+
+const char *vdevargs[] = {VDEV_NAME",rx_pcap=" RX_PCAP",tx_pcap=" TX_PCAP};
+
+class test_tle_udp_gen_base : public testing::TestWithParam<test_str> {
+public:
+
+ tle_ctx *setup_ctx(void);
+ tle_dev *setup_dev(tle_ctx *ctx, uint32_t rx_offload,
+ uint32_t tx_offload, const char *local_ipv4,
+ const char *local_ipv6);
+ tle_evq *setup_evq(void);
+ tle_event *setup_event(void);
+ tle_stream *setup_stream(struct tle_ctx *ctx, int family,
+ const char *l_ip, const char *r_ip, int l_port, int r_port);
+ int setup_devices(uint8_t *portid);
+ int cleanup_devices(uint8_t portid);
+ int prepare_pcaps(string l_ip, string r_ip, int l_port, int r_port,
+ int nb_pkts, int l3_chksum, int l4_chksum, string rx_pcap_dest);
+
+ int cleanup_pcaps(const char *file);
+ int close_streams(vector<struct stream_s> streams);
+ int del_devs(vector<struct dev_s> devs);
+
+ virtual void SetUp(void)
+ {
+ nb_ports = 1;
+ tp = GetParam();
+
+ /* Usual tldk stuff below -> ctx, dev, events etc. */
+ ctx = setup_ctx();
+ ASSERT_NE(ctx, nullptr);
+
+ evq = setup_evq();
+ ASSERT_NE(evq, nullptr);
+
+ for (auto &d : tp.devs) {
+ dev = setup_dev(ctx, d.rx_offload, d.tx_offload,
+ d.l_ipv4.c_str(), d.l_ipv6.c_str());
+ ASSERT_NE(dev, nullptr);
+
+ /* Initialize counters for verifying results */
+ d.act_pkts_bulk_rx = 0;
+ d.act_pkts_bulk_tx = 0;
+ d.act_pkts_enoent = 0;
+
+ /* Save pointer to device */
+ d.ptr = dev;
+ }
+
+ for (auto &s : tp.streams) {
+ stream = setup_stream(ctx, s.family,
+ s.l_ip.c_str(), s.r_ip.c_str(),
+ s.l_port, s.r_port);
+ ASSERT_NE(stream, nullptr);
+
+ /* Initialize counters for verifying results */
+ s.act_pkts_rx = 0;
+ s.act_pkts_tx = 0;
+
+ /* Save pointer to stream */
+ s.ptr = stream;
+
+ /* Find which dev has the same address as streams
+ * local address and save destination for later use
+ * in lookup functions
+ */
+ if (s.family == AF_INET) {
+ for (auto &d : tp.devs) {
+ if (s.l_ip.compare(d.l_ipv4) == 0)
+ routes4.insert(pair<string,
+ tle_dev *>(s.r_ip,
+ d.ptr));
+ }
+ } else if (s.family == AF_INET6) {
+ for (auto &d : tp.devs) {
+ if (s.l_ip.compare(d.l_ipv6) == 0)
+ routes6.insert(pair<string,
+ tle_dev *>(s.r_ip,
+ d.ptr));
+ }
+ }
+ }
+
+ /* setup pcap/eth devices */
+ setup_devices(&portid);
+ }
+
+ virtual void TearDown(void)
+ {
+ /*
+ * Remember to shutdown & detach rte devices
+ * and clean / delete .pcap files so not to
+ * interfere with next test
+ */
+ close_streams(tp.streams);
+ del_devs(tp.devs);
+ tle_ctx_destroy(ctx);
+ cleanup_devices(portid);
+ cleanup_pcaps(RX_PCAP);
+ cleanup_pcaps(TX_PCAP);
+ }
+
+ uint8_t nb_ports;
+ uint8_t portid;
+ uint32_t socket_id;
+ uint32_t max_events;
+ struct tle_ctx *ctx;
+ struct tle_dev *dev;
+ struct tle_evq *evq;
+ struct tle_stream *stream;
+ map<string, tle_dev *> routes4;
+ map<string, tle_dev *> routes6;
+ test_str tp;
+ void *cb;
+};
+
+int
+test_tle_udp_gen_base::setup_devices(uint8_t *portid)
+{
+ /* attach + configure + start pmd device */
+ if (rte_eth_dev_attach(vdevargs[0], portid) != 0)
+ return -1;
+ cb = rte_eth_add_rx_callback(*portid, 0,
+ typen_rx_callback, nullptr);
+ if (port_init(*portid, mbuf_pool) != 0)
+ return -1;
+
+ return 0;
+}
+
+int
+test_tle_udp_gen_base::cleanup_devices(uint8_t portid)
+{
+ /* release mbufs + detach device */
+ char name[RTE_ETH_NAME_MAX_LEN];
+
+ rte_eth_dev_stop(portid);
+ rte_eth_dev_close(portid);
+ rte_eth_dev_detach(portid, name);
+
+ return 0;
+}
+
+int
+test_tle_udp_gen_base::prepare_pcaps(string l_ip, string r_ip, int l_port,
+ int r_port, int nb_pkts, int l3_chksum, int l4_chksum,
+ string rx_pcap_dest)
+{
+ string py_cmd;
+
+ /* generate pcap rx & tx files * for tests using scapy */
+ py_cmd = "python ./test/gtest/test_scapy_gen.py ";
+ py_cmd = py_cmd + " " + l_ip + " " + r_ip + " " +
+ to_string(l_port) + " " + to_string(r_port) + " " +
+ to_string(nb_pkts);
+
+ if (l3_chksum > 0)
+ py_cmd = py_cmd + " -bc3 " + to_string(l3_chksum);
+ if (l4_chksum > 0)
+ py_cmd = py_cmd + " -bc4 " + to_string(l4_chksum);
+ py_cmd = py_cmd + " " + rx_pcap_dest;
+ system(py_cmd.c_str());
+
+ return 0;
+}
+
+int
+test_tle_udp_gen_base::cleanup_pcaps(const char *file)
+{
+ if (remove(file) != 0)
+ perror("Error deleting pcap file");
+
+ return 0;
+}
+
+tle_ctx *
+test_tle_udp_gen_base::setup_ctx(void)
+{
+
+ struct tle_ctx *ctx;
+ struct tle_ctx_param ctx_prm;
+
+ memset(&ctx_prm, 0, sizeof(ctx_prm));
+ ctx_prm.socket_id = SOCKET_ID_ANY;
+ ctx_prm.max_streams = 0x10;
+ ctx_prm.max_stream_rbufs = CTX_MAX_RBUFS;
+ ctx_prm.max_stream_sbufs = CTX_MAX_SBUFS;
+ ctx_prm.lookup4 = lookup4_function;
+ ctx_prm.lookup6 = lookup6_function;
+ ctx_prm.lookup4_data = &routes4;
+ ctx_prm.lookup6_data = &routes6;
+
+ ctx = tle_ctx_create(&ctx_prm);
+
+ return ctx;
+}
+
+struct tle_dev *
+test_tle_udp_gen_base::setup_dev(struct tle_ctx *ctx, uint32_t rx_offload,
+ uint32_t tx_offload, const char *l_ipv4, const char *l_ipv6)
+{
+ struct tle_dev *dev;
+ struct tle_dev_param dev_prm;
+
+ memset(&dev_prm, 0, sizeof(dev_prm));
+ dev_prm.rx_offload = RX_NO_OFFLOAD;
+ dev_prm.tx_offload = TX_NO_OFFLOAD;
+ if (l_ipv4 != NULL)
+ inet_pton(AF_INET, l_ipv4, &(dev_prm).local_addr4);
+ if (l_ipv6 != NULL)
+ inet_pton(AF_INET6, l_ipv6, &(dev_prm).local_addr6);
+
+ dev = tle_add_dev(ctx, &dev_prm);
+
+ return dev;
+}
+
+struct tle_evq *
+test_tle_udp_gen_base::setup_evq()
+{
+ uint32_t socket_id;
+ uint32_t max_events;
+ struct tle_evq_param evq_params;
+ struct tle_evq *evq;
+
+ socket_id = SOCKET_ID_ANY;
+ max_events = 10;
+ memset(&evq_params, 0, sizeof(struct tle_evq_param));
+
+ evq_params.socket_id = socket_id;
+ evq_params.max_events = max_events;
+ evq = tle_evq_create(&evq_params);
+ return evq;
+}
+
+struct tle_stream *
+test_tle_udp_gen_base::setup_stream(struct tle_ctx *ctx, int family,
+ const char *l_ip, const char *r_ip, int l_port, int r_port)
+{
+ struct tle_stream *stream;
+ struct tle_udp_stream_param stream_prm;
+ struct sockaddr_in *ip4_addr;
+ struct sockaddr_in6 *ip6_addr;
+ int32_t ret;
+
+ memset(&stream_prm, 0, sizeof(stream_prm));
+
+ if (family == AF_INET) {
+ ip4_addr = (struct sockaddr_in *) &stream_prm.local_addr;
+ ip4_addr->sin_family = AF_INET;
+ ip4_addr->sin_port = htons(l_port);
+ ip4_addr->sin_addr.s_addr = inet_addr(l_ip);
+
+ ip4_addr = (struct sockaddr_in *) &stream_prm.remote_addr;
+ ip4_addr->sin_family = AF_INET;
+ ip4_addr->sin_port = htons(r_port);
+ ip4_addr->sin_addr.s_addr = inet_addr(r_ip);
+ } else if (family == AF_INET6) {
+ ip6_addr = (struct sockaddr_in6 *) &stream_prm.local_addr;
+ ip6_addr->sin6_family = AF_INET6;
+ inet_pton(AF_INET6, l_ip, &ip6_addr->sin6_addr);
+ ip6_addr->sin6_port = htons(l_port);
+
+ ip6_addr = (struct sockaddr_in6 *) &stream_prm.remote_addr;
+ ip6_addr->sin6_family = AF_INET6;
+ inet_pton(AF_INET6, r_ip, &ip6_addr->sin6_addr);
+ ip6_addr->sin6_port = htons(r_port);
+ } else {
+ printf("Invalid address family, stream not created\n");
+ return NULL;
+ }
+
+ /* Not supporting callbacks and events at the moment */
+ /* TODO: Add tests which use cb's and events. */
+ stream_prm.recv_ev = tle_event_alloc(evq, nullptr);
+ stream_prm.send_ev = tle_event_alloc(evq, nullptr);
+
+ stream = tle_udp_stream_open(ctx,
+ (const struct tle_udp_stream_param *) &stream_prm);
+
+ return stream;
+}
+
+int
+test_tle_udp_gen_base::close_streams(vector<struct stream_s> streams)
+{
+ int rc;
+
+ for (auto &s : streams) {
+ rc = tle_udp_stream_close(s.ptr);
+ if (rc != 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+test_tle_udp_gen_base::del_devs(vector<struct dev_s> devs)
+{
+ int rc;
+
+ for (auto &d : devs) {
+ rc = tle_del_dev(d.ptr);
+ if (rc != 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+class tle_rx_test : public test_tle_udp_gen_base {
+public:
+ virtual void SetUp(void)
+ {
+ /* Generate RX pcap file, for RX tests, then
+ * follow setup steps as in base class */
+ tp = GetParam();
+
+ for(auto &s : tp.gen_streams) {
+ prepare_pcaps(s.src_ip.c_str(), s.dst_ip.c_str(),
+ s.src_port, s.dst_port, s.nb_pkts,
+ s.bad_chksum_l3, s.bad_chksum_l4, RX_PCAP);
+ }
+ test_tle_udp_gen_base::SetUp();
+ }
+};
+
+class tle_rx_enobufs: public tle_rx_test { };
+
+class tle_tx_test: public test_tle_udp_gen_base {
+public:
+ virtual void SetUp(void)
+ {
+ /* Generate 1-packet PCAP RX file so that virtual device can be
+ * initialized (needs a pcap file present during init), then
+ * follow setup steps as in base class
+ */
+ prepare_pcaps("10.0.0.1", "10.0.0.1", 100, 100, 1, 0, 0,
+ RX_PCAP);
+ test_tle_udp_gen_base::SetUp();
+ }
+};
+
+#endif /* TEST_TLE_UDP_STREAM_GEN_H_ */
diff --git a/test/timer/Makefile b/test/timer/Makefile
new file mode 100644
index 0000000..159faeb
--- /dev/null
+++ b/test/timer/Makefile
@@ -0,0 +1,42 @@
+# Copyright (c) 2016 Intel Corporation.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+ifeq ($(RTE_TARGET),)
+$(error "Please define RTE_TARGET environment variable")
+endif
+
+ifeq ($(TLDK_ROOT),)
+$(error "Please define TLDK_ROOT environment variable")
+endif
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = test_timer
+
+# all source are stored in SRCS-y
+SRCS-y += test_timer.c
+
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -I$(RTE_OUTPUT)/include
+
+LDLIBS += -L$(RTE_OUTPUT)/lib
+LDLIBS += -ltle_timer
+
+EXTRA_CFLAGS += -O3
+
+include $(TLDK_ROOT)/mk/tle.app.mk
diff --git a/test/timer/test_timer.c b/test/timer/test_timer.c
new file mode 100644
index 0000000..cde4b62
--- /dev/null
+++ b/test/timer/test_timer.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_random.h>
+#include <rte_log.h>
+
+#include <tle_timer.h>
+
+#define MAX_TIMER_BURST 0x20
+
+#define RDTSC_TO_SEC(t, h) ((double)(t)/(h))
+
+struct test_elements {
+ uint32_t expected_tick;
+ uint32_t active;
+ void *stop_handle;
+ uint32_t id;
+};
+
+struct timer_test_main {
+ struct tle_timer_wheel *tmr;
+ uint64_t last_run_time;
+ uint32_t current_tick;
+ uint32_t seed;
+ uint32_t ntimers;
+ uint32_t niter;
+ uint32_t ticks_per_iter;
+ struct tle_timer_wheel_args prm;
+ struct test_elements *test_elts;
+};
+
+struct timer_test_main *global_test_main;
+
+/** \brief 32-bit random number generator */
+static inline uint32_t
+random_uint32_t(uint32_t *seed)
+{
+ *seed = (1664525 * *seed) + 1013904223;
+ return *seed;
+}
+
+static void
+run_wheel(struct timer_test_main *tm, uint64_t interval, uint32_t *expired)
+{
+ uint32_t i, j, k;
+ uint64_t now = tm->last_run_time + tm->prm.tick_size;
+ uint32_t nb_tick;
+ struct test_elements *te[MAX_TIMER_BURST];
+
+ nb_tick = interval / tm->prm.tick_size;
+
+ for (i = 0; i < nb_tick; i++)
+ {
+ tle_timer_expire(tm->tmr, now);
+ tm->last_run_time = now;
+
+ k = tle_timer_get_expired_bulk(tm->tmr, (void **)te,
+ RTE_DIM(te));
+ while (k != 0) {
+ for (j = 0; j != k; j++)
+ {
+ if (tm->current_tick != te[j]->expected_tick)
+ RTE_LOG(ERR, USER1,
+ "%s: [%u] expired at tick=%u, "
+ "(not tick=%u)\n",
+ __func__, te[j]->id,
+ tm->current_tick,
+ te[j]->expected_tick);
+
+ te[j]->active = 0;
+ te[j]->stop_handle = NULL;
+ *expired += 1;
+ }
+
+ k = tle_timer_get_expired_bulk(tm->tmr, (void **)te,
+ RTE_DIM(te));
+ };
+ now += (tm->prm.tick_size);
+ tm->current_tick++;
+ }
+}
+
+static int
+test_timer_rdtsc(void)
+{
+ struct timer_test_main tm;
+ struct test_elements *te;
+ uint64_t expiration_time;
+ uint32_t i, j, k;
+ uint64_t initial_wheel_offset;
+ struct tle_timer_wheel_args prm;
+ uint64_t start_tsc, cur_tsc, diff_tsc;
+ uint64_t max_expiration_time = 0;
+ uint32_t adds = 0, deletes = 0, expires = 0;
+ double ops_per_sec;
+ uint64_t hz;
+
+ memset(&tm, 0, sizeof(tm));
+ /* Default values */
+ tm.ntimers = 1000000;
+ tm.seed = 0xDEADDABE;
+ tm.niter = 1000;
+ tm.ticks_per_iter = 57;
+ tm.current_tick = 0;
+ tm.test_elts = rte_zmalloc_socket(NULL,
+ tm.ntimers * sizeof(tm.test_elts[0]), RTE_CACHE_LINE_SIZE,
+ SOCKET_ID_ANY);
+ global_test_main = &tm;
+
+ hz = rte_get_tsc_hz(); /* timer in cpu cycles */
+ prm.tick_size = hz / 10;
+ prm.max_timer = tm.ntimers;
+ prm.socket_id = SOCKET_ID_ANY;
+
+ start_tsc = rte_rdtsc();
+
+ tm.prm = prm;
+ tm.tmr = tle_timer_create(&prm, start_tsc);
+ tm.last_run_time = start_tsc;
+
+ if (tm.tmr == NULL){
+ printf("%s: tcp_timer_wheel_init failed\n", __func__);
+ return -ENOMEM;
+ }
+
+ printf("hz=%lu, tick_size=%u, ntimers=%u, niter=%u, "
+ "ticks_per_iter=%u\n", hz, prm.tick_size, tm.ntimers,
+ tm.niter, tm.ticks_per_iter);
+
+ /* Prime offset */
+ initial_wheel_offset = tm.ticks_per_iter;
+
+ run_wheel(&tm, initial_wheel_offset * prm.tick_size, &expires);
+
+ /* Prime the pump */
+ for (i = 0; i < tm.ntimers; i++)
+ {
+ te= &tm.test_elts[i];
+ te->id = i;
+
+ do {
+ expiration_time =
+ (random_uint32_t(&tm.seed) & ((1<<17) - 1));
+ } while (expiration_time == 0);
+
+ if (expiration_time > max_expiration_time)
+ max_expiration_time = expiration_time;
+
+ te->expected_tick = expiration_time + initial_wheel_offset;
+ te->stop_handle = tle_timer_start(tm.tmr, te,
+ expiration_time * prm.tick_size);
+ if (te->stop_handle == NULL) {
+ RTE_LOG(ERR, USER1, "%s: timer start error=%d\n",
+ __func__, rte_errno);
+ break;
+ }
+ te->active = 1;
+ }
+
+ adds += i;
+
+ for (i = 0; i < tm.niter; i++)
+ {
+ run_wheel(&tm, initial_wheel_offset * prm.tick_size, &expires);
+
+ for (k = 0, j = 0; j < tm.ntimers; j++) {
+ te = &tm.test_elts[j];
+
+ if (te->active) {
+ tle_timer_stop(tm.tmr, te->stop_handle);
+ te->active = 0;
+ te->stop_handle = NULL;
+ k++;
+
+ if (k > tm.ntimers/4)
+ break;
+ }
+ }
+
+ deletes += k;
+
+ for (k = 0, j = 0; j < tm.ntimers; j++)
+ {
+ te = &tm.test_elts[j];
+
+ if (!te->active) {
+ do {
+ expiration_time =
+ (random_uint32_t(&tm.seed) &
+ ((1<<17) - 1));
+ } while (expiration_time == 0);
+
+ if (expiration_time > max_expiration_time)
+ max_expiration_time = expiration_time;
+
+ te->expected_tick = expiration_time +
+ tm.current_tick;
+ te->stop_handle = tle_timer_start(tm.tmr, te,
+ expiration_time * prm.tick_size);
+ if (te->stop_handle == NULL) {
+ RTE_LOG(ERR, USER1,
+ "%s: timer start error =%d\n",
+ __func__, rte_errno);
+ break;
+ }
+ te->active = 1;
+ k++;
+
+ if (k > tm.ntimers/4)
+ break;
+ }
+ }
+
+ adds += k;
+ }
+
+ run_wheel(&tm, (max_expiration_time + 1) * prm.tick_size, &expires);
+
+ cur_tsc = rte_rdtsc();
+ diff_tsc = cur_tsc - start_tsc;
+
+ ops_per_sec = ((double)adds + deletes +
+ tm.current_tick) / RDTSC_TO_SEC(diff_tsc, hz);
+
+ printf("%u adds, %u deletes, %u expires, %u ticks\n"
+ "test ran %.2f seconds, %.2f ops/second, %.2f cycles/op\n",
+ adds, deletes, expires, tm.current_tick,
+ RDTSC_TO_SEC(diff_tsc, hz), ops_per_sec,
+ (double)hz/ops_per_sec);
+
+ rte_free(tm.test_elts);
+ tle_timer_free(tm.tmr);
+
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int32_t rc;
+
+ rc = rte_eal_init(argc, argv);
+ if (rc < 0)
+ rte_exit(EXIT_FAILURE,
+ "%s: rte_eal_init failed with error code: %d\n",
+ __func__, rc);
+
+ rc = test_timer_rdtsc();
+ if (rc != 0)
+ printf("test_timer_rdtsc TEST FAILED\n");
+ else
+ printf("test_timer_rdtsc TEST OK\n");
+
+ return rc;
+}