diff options
author | 2017-02-21 18:12:20 +0000 | |
---|---|---|
committer | 2017-02-24 16:37:08 +0000 | |
commit | aa97dd1ce910b839fed46ad55d1e70e403f5a930 (patch) | |
tree | f6f0fd494eaf499859bff9f20f5ddfac9ab99233 /examples | |
parent | f5f10013ffef8e4ac1071087b8492fe6380d98fe (diff) |
Introduce first version of TCP code.
Supported functionality:
- open/close
- listen/accept/connect
- send/recv
In order to achieve that libtle_udp library was
reworked into libtle_l4p library that supports
both TCP and UDP protocols.
New libtle_timer library was introduced
(thanks to Cisco guys and Dave Barach <dbarach@cisco.com>
for sharing their timer code with us).
Sample application was also reworked significantly
to support both TCP and UDP traffic handling.
New UT were introduced.
Change-Id: I806b05011f521e89b58db403cfdd484a37beb775
Signed-off-by: Mohammad Abdul Awal <mohammad.abdul.awal@intel.com>
Signed-off-by: Karol Latecki <karolx.latecki@intel.com>
Signed-off-by: Daniel Mrzyglod <danielx.t.mrzyglod@intel.com>
Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Diffstat (limited to 'examples')
-rw-r--r-- | examples/Makefile | 2 | ||||
-rw-r--r-- | examples/l4fwd/Makefile (renamed from examples/udpfwd/Makefile) | 5 | ||||
-rw-r--r-- | examples/l4fwd/README | 346 | ||||
-rw-r--r-- | examples/l4fwd/be.cfg (renamed from examples/udpfwd/be.cfg) | 2 | ||||
-rw-r--r-- | examples/l4fwd/common.h | 662 | ||||
-rw-r--r-- | examples/l4fwd/dpdk_legacy.h (renamed from examples/udpfwd/main_dpdk_legacy.h) | 30 | ||||
-rw-r--r-- | examples/l4fwd/dpdk_version.h (renamed from examples/udpfwd/dpdk_version.h) | 2 | ||||
-rw-r--r-- | examples/l4fwd/fe.cfg (renamed from examples/udpfwd/fe.cfg) | 0 | ||||
-rw-r--r-- | examples/l4fwd/fwdtbl.h (renamed from examples/udpfwd/fwdtbl.h) | 0 | ||||
-rwxr-xr-x | examples/l4fwd/gen_fe_cfg.py (renamed from examples/udpfwd/gen_fe_cfg.py) | 5 | ||||
-rw-r--r-- | examples/l4fwd/lcore.h | 370 | ||||
-rw-r--r-- | examples/l4fwd/main.c | 313 | ||||
-rw-r--r-- | examples/l4fwd/netbe.h (renamed from examples/udpfwd/netbe.h) | 90 | ||||
-rw-r--r-- | examples/l4fwd/parse.c (renamed from examples/udpfwd/parse.c) | 247 | ||||
-rw-r--r-- | examples/l4fwd/parse.h (renamed from examples/udpfwd/parse.h) | 24 | ||||
-rw-r--r-- | examples/l4fwd/pkt.c | 872 | ||||
-rw-r--r-- | examples/l4fwd/pkt_dpdk_legacy.h (renamed from examples/udpfwd/pkt_dpdk_legacy.h) | 176 | ||||
-rw-r--r-- | examples/l4fwd/port.h | 453 | ||||
-rw-r--r-- | examples/l4fwd/tcp.h | 701 | ||||
-rw-r--r-- | examples/l4fwd/udp.h | 588 | ||||
-rw-r--r-- | examples/udpfwd/README | 141 | ||||
-rw-r--r-- | examples/udpfwd/main.c | 2134 | ||||
-rw-r--r-- | examples/udpfwd/pkt.c | 509 |
23 files changed, 4742 insertions, 2930 deletions
diff --git a/examples/Makefile b/examples/Makefile index bed34ac..cf13574 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -21,6 +21,6 @@ endif include $(RTE_SDK)/mk/rte.vars.mk -DIRS-y += udpfwd +DIRS-y += l4fwd include $(TLDK_ROOT)/mk/tle.subdir.mk diff --git a/examples/udpfwd/Makefile b/examples/l4fwd/Makefile index fae1c34..f18b622 100644 --- a/examples/udpfwd/Makefile +++ b/examples/l4fwd/Makefile @@ -26,7 +26,7 @@ endif include $(RTE_SDK)/mk/rte.vars.mk # binary name -APP = udpfwd +APP = l4fwd # all source are stored in SRCS-y SRCS-y += parse.c @@ -37,7 +37,8 @@ CFLAGS += $(WERROR_FLAGS) CFLAGS += -I$(RTE_OUTPUT)/include LDLIBS += -L$(RTE_OUTPUT)/lib -LDLIBS += -ltle_udp +LDLIBS += -ltle_l4p +LDLIBS += -ltle_timer EXTRA_CFLAGS += -O3 CFLAGS_parse.o += -D_GNU_SOURCE diff --git a/examples/l4fwd/README b/examples/l4fwd/README new file mode 100644 index 0000000..658fe3a --- /dev/null +++ b/examples/l4fwd/README @@ -0,0 +1,346 @@ +1. INTRODUCTION + + l4fwd is a sample application to demonstrate and test TLDK TCP/UDP + functionalities. Depending on configuration it can do simple send, recv or + both over opened TCP/UDP streams. Also it implements ability to do TCP/UDP + packet forwarding between different streams, so it is possible to use the + l4fwd application as a TCP/UDP proxy. + + The l4fwd application logically is divided into two parts, Back End (BE) and + Front End (FE). + +1.1 Back End (BE) + + BE is responsible for: + - RX over DPDK ports and feed them into TCP/UDP TLDK context(s) + (via tle_*_rx_bulk). + + - retrieve packets ready to be send out from TCP/UDP TLDK context(s) and TX + them over destined DPDK port. + + - Multiple RX/TX queues per port are supported by RSS. Right now the number + of TX is same as the number of RX queue. + +- Each BE lcore can serve multiple DPDK ports, TLDK TCP/UDP contexts. + + BE configuration record format: + + port=<uint>,addr=<ipv4/ipv6>,masklen=<uint>,mac=<ether><mtu> + + port - DPDK port id to be used to send packets to the destination. + It is an mandatory option. + addr - destination network address. It is an mandatory option. + masklen - destination network prefix length. It is an mandatory option. + mac - destination Ethernet address. It is an mandatory option. + mtu - MTU to be used on that port (= application data size + L2/L3/L4 + headers sizes, default=1514). It is an optional option. + + Below are some example of BE entries + + port=0,masklen=16,addr=192.168.0.0,mac=01:de:ad:be:ef:01 + port=0,addr=2001:4860:b002::,masklen=64,mac=01:de:ad:be:ef:01 + + These examples are also available in be.cfg file. + +1.2 Front End (FE) + + FE is responsible for: + - to open configured TCP/UDP streams and perform send/recv over them. + These streams can belong to different TCP/UDP contexts. + + Each lcore can act as BE and/or FE. + + In UDP mode the application can reassemble input fragmented IP packets, and + fragment outgoing IP packets (if destination MTU is less then packet size). + + FE configuration record format: + + lcore=<uint>,op=<"rx|tx|echo|fwd">,\ + laddr=<ip>,lport=<uint16>,raddr=<ip>,rport=<uint16>,\ + [txlen=<uint>,fwladdr=<ip>,fwlport=<uint16>,fwraddr=<ip>,fwrport=<uint16>,\ + belcore=<uint>] + + lcore - EAL lcore to manage that stream(s) in the FE. It is an mandatory + option. + belcore - EAL lcore to manage that stream(s) in the BE. It is an optional + option. lcore and belcore can specify the same cpu core. + op - operation to perform on that stream: + "rx" - do receive only on that stream. + "tx" - do send only on that stream. + "echo" - mimic recvfrom(..., &addr);sendto(..., &addr); + on that stream. + "fwd" - forward packets between streams. + It is an mandatory option. + laddr - local address for the stream to open. It is an mandatory option. + lport - local port for the stream to open. It is an mandatory option. + raddr - remote address for the stream to open. It is an mandatory option. + rport - remote port for the stream to open. It is an mandatory option. + txlen - data length sending in each packet (mandatory for "tx" mode only). + fwladdr - local address for the forwarding stream(s) to open + (mandatory for "fwd" mode only). + fwlport - local port for the forwarding stream(s) to open + (mandatory for "fwd" mode only). + fwraddr - remote address for the forwarding stream(s) to open + (mandatory for "fwd" mode only). + fwrport - remote port for the forwarding stream(s) to open + (mandatory for "fwd" mode only). + + Below are some example of FE entries + + lcore=3,op=echo,laddr=192.168.1.233,lport=0x8000,raddr=0.0.0.0,rport=0 + + lcore=3,op=tx,laddr=192.168.1.233,lport=0x8001,raddr=192.168.1.56,\ + rport=0x200,txlen=72 + + lcore=3,op=rx,laddr=::,lport=0x200,raddr=::,rport=0,txlen=72 + + lcore=3,op=fwd,laddr=0.0.0.0,lport=11211,raddr=0.0.0.0,rport=0,\ + fwladdr=::,fwlport=0,fwraddr=2001:4860:b002::56,fwrport=11211 + + These examples are also available in fe.cfg file with some more explanation. + +1.3 Configuration files format + + - each record on a separate line. + - lines started with '#' are treated as comments. + - empty lines (containing whitespace chars only) are ignored. + - kvargs style format for each record. + - each FE record correspond to at least one stream to be opened + (could be multiple streams in case of op="fwd"). + - each BE record define a ipv4/ipv6 destination. + +2. REQUIREMENTS + + DPDK libraries (16.11 or higher) + TLDK libraries (1.0) + Back-End (BE) configuration file + Front-End(FE) configuration file + +3. USAGE + + l4fwd <DPDK EAL parameters> -- \ + -P | --promisc /* promiscuous mode enabled. */ \ + -R | --rbufs <num> /* max recv buffers per stream. */ \ + -S | --sbufs <num> /* max send buffers per stream. */ \ + -s | --streams <num> /* streams to open per context. */ \ + -b | --becfg <filename> /* backend configuration file. */ \ + -f | --fecfg <filename> /* frontend configuration file. */ \ + -U | --udp /* run the app to handle UDP streams only. */ \ + -T | --tcp /* run the app to handle TCP streams only. */ \ + -L | --listen /* open TCP streams in server mode (listen). */ \ + -a | --enable-arp /* enable arp responses (request not supported) */ \ + -v | --verbose /* different level of verbose mode */ \ + <port0_params> <port1_params> ... <portN_params> + + Note that: options -U and -T cannot be used together. + Option -L can be used only with option -T. + + portX_params: port=<uint>,lcore=<uint>[-<uint>],[lcore=<uint>[-<uint>],]\ + [rx_offload=<uint>,tx_offload=<uint>,mtu=<uint>,ipv4=<ipv4>,ipv6=<ipv6>] + + portX_params are used to configure the particular DPDK device + (rte_ethdev port), and specify BE lcore that will handle RX/TX from/to the + device and manage BE part of corresponding TCP/UDP context. + Multiple BE lcore can be specified. + + port - DPDK port id (RSS are supported when multiple lcores are + specified for a port). It is an mandatory option. + lcore - EAL lcore id to handle IO over that port (rx_burst/tx_burst). + several ports can be managed by the same lcore, and same port + can be managed by more than one lcore. + It is an mandatory option. At least one lcore option has to be + specified. lcore range can be specified in one lcore option. + e.g. lcore=2-3,lcore=6 will enable lcores 2, 3, and 6 to + handle BE. + rx_offload - RX HW offload capabilities to enable/use on this port. + (bitmask of DEV_RX_OFFLOAD_* values). It is an optional option. + tx_offload - TX HW offload capabilities to enable/use on this port. + (bitmask of DEV_TX_OFFLOAD_* values). + mtu - MTU to be used on that port (= application data size + L2/L3/L4 + headers sizes, default=1514). + ipv4 - ipv4 address to assign to that port. + ipv6 - ipv6 address to assign to that port. + + At least one of ipv4/ipv6 values have to be specified for each port. + +3.1 RSS + + If multiple lcore is specified per DPDK port, the following RSS hash will + be enabled on that port: + ETH_RSS_UDP, or ETH_RSS_TCP + + The RSS queue qid will handle the stream according to the TCP/UDP source + ports of the stream. The qid can be calculated as below + + qid = (src_port % power_of_2(n)) % n + + where n is number of lcore used to mane the DPDK port. + +4. EXAMPLES + +4.1 Sample testbed + ++----------------------------+ +-------------------------------+ +| TLDK Box | | Linux Box | +| | | | +| port 0 +----------------+ port 0 | +| 192.168.1.1 | | 192.168.1.2 | +| 2001:4860:b002::1 | | 2001:4860:b002::2 | +| AA:BB:CC:DD:EE:F1 | | AA:BB:CC:DD:EE:F2 | ++----------------------------+ +-------------------------------+ + +4.2 UDP, "rx" mode, IPv4-only, Single core + + This example shows receiving data from a IPv4 stream. The TLDK UDP server + runs on single core where both BE and FE run on cpu core 3. + + be.cfg file contains: + + port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2 + + fe.cfg file contains (udp server listening to port 6000): + + lcore=3,op=rx,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0 + + run the l4fwd application as below (DPDK port 0 (pci 01:00.0)): + + l4fwd --lcores='3' -w 01:00.0 -- \ + --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \ + --becfg be.cfg -U port=0,lcore=3,ipv4=192.168.1.1 + + This will create TLDK UDP context on lcore=3 (BE lcore) to manage + DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. + All the streams will be in server mode and also managed by lcore 3. + +4.3 UDP, "echo" mode, IPv6-only, Multicore + + This example shows receiving data from a IPv6 stream and sending the data + back through the same IPv6 stream. The TLDK UDP server runs on multicore + where BE runs on cpu core 2 and FE runs on cpu core 3. + + be.cfg file contains: + + port=0,masklen=64,addr=2001:4860:b002::,mac=AA:BB:CC:DD:EE:F2 + + fe.cfg file contains (udp server listening to port 6000): + + lcore=3,op=rx,laddr=2001:4860:b002::1,lport=6000,raddr=::,rport=0 + + run the l4fwd application as below (DPDK port 0 (pci 01:00.0)): + + l4fwd --lcores='2,3' -w 01:00.0 -- \ + --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \ + --becfg be.cfg -U port=0,lcore=2,ipv6=2001:4860:b002::1 + + This will create TLDK UDP context on lcore=2 (BE lcore) to manage + DPDK port 0. The port 0 will have IPv4 address 2001:4860:b002::1. + All the streams will be in server mode and managed by lcore 3 (FE lcore). + In this case, the UDP server will send the incoming data back to the sender. + +4.4 TCP, "echo" mode, IPv4-only, Multicore, RX-Offload + + This example shows receiving data from a IPv4 stream and sending the data + back through the same IPv4 stream. The TLDK TCP server runs on multicore + where BE runs on cpu core 2 and FE runs on cpu core 3. The BE also uses + receive offload features of the NIC. + + be.cfg file contains: + + port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2 + + fe.cfg file contains (tcp server listening to port 6000): + + lcore=3,op=echo,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0 + + run the l4fwd application as below (DPDK port 0 (pci 01:00.0)): + + l4fwd --lcores='2,3' -w 01:00.0 -- \ + --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \ + --becfg be.cfg -T -L port=0,lcore=2,rx_offload=0xf,tx_offload=0,\ + ipv4=192.168.1.1 + + This will create TLDK TCP context on lcore=2 (BE lcore) to manage + DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. The following + DPDK RX HW offloads will be enabled on that port. + DEV_RX_OFFLOAD_VLAN_STRIP, + DEV_RX_OFFLOAD_IPV4_CKSUM, + DEV_RX_OFFLOAD_UDP_CKSUM, + DEV_RX_OFFLOAD_TCP_CKSUM + No HW TX offloads will be enabled. + All the streams will be in server mode and managed by lcore 3 (FE core). + In this case, the TCP server will send the incoming data back to the sender. + +4.5 TCP, "fwd" (proxy) mode, IPv4-to-IPv6, Multi-core, RX-Offload + + This example shows receiving data from a IPv4 stream and forwarding the + data to a IPv6 stream. The TLDK TCP server runs on multicore + where BE runs on cpu core 2 and FE runs on cpu core 3. The BE also uses + receive offload features of the NIC. + + be.cfg file contains: + + port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2 + + fe.cfg file contains (tcp server listening to port 6000): + + lcore=3,op=fwd,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0,\ + rladdr=::,lport=0,raddr=2001:4860:b002::2,rport=7000 + + run the l4fwd application as below (DPDK port 0 (pci 01:00.0)): + + l4fwd --lcores='2,3' -w 01:00.0 -- \ + --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \ + --becfg be.cfg -T -L port=0,lcore=2,rx_offload=0xf,tx_offload=0,\ + ipv4=192.168.1.1,ipv6=2001:4860:b002::1 + + This will create TLDK TCP context on lcore=2 (BE lcore) to manage + DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. The following + DPDK RX HW offloads will be enabled on that port. + DEV_RX_OFFLOAD_VLAN_STRIP, + DEV_RX_OFFLOAD_IPV4_CKSUM, + DEV_RX_OFFLOAD_UDP_CKSUM, + DEV_RX_OFFLOAD_TCP_CKSUM + No HW TX offloads will be enabled. + All the streams will be in server mode and managed by lcore 3 (FE core). + In this case, the IPv4 TCP server will forward the incoming data to the IPv6 + TCP server 2001:4860:b002::2 listening to port 7000. + +4.6 TCP, "echo" mode, RSS, IPv4-only, Multicore, RX-Offload + + This example shows receiving data from a IPv4 stream and sending the data + back through the same IPv4 stream. The TLDK TCP server runs on multicore + where BE runs on cpu cores 1-2 and FE runs on cpu core 3. As BE runs on + multicore, Receive Side Scaling (RSS) feature will be automatically enabled. + The BE also uses receive offload features of the NIC. + + be.cfg file contains: + + port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2 + + fe.cfg file contains (tcp server listening to port 6000): + + lcore=3,op=echo,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0 + lcore=3,op=echo,laddr=192.168.1.1,lport=6001,raddr=0.0.0.0,rport=0 + + run the l4fwd application as below (DPDK port 0 (pci 01:00.0)): + + l4fwd --lcores='1,2,3' -w 01:00.0 -- \ + --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \ + --becfg be.cfg -T -L port=0,lcore="1-2",rx_offload=0xf,tx_offload=0,\ + ipv4=192.168.1.1 + + This will create TLDK TCP context on lcore=1-2 (BE lcore) to manage + DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. The following + DPDK RX HW offloads will be enabled on that port. + DEV_RX_OFFLOAD_VLAN_STRIP, + DEV_RX_OFFLOAD_IPV4_CKSUM, + DEV_RX_OFFLOAD_UDP_CKSUM, + DEV_RX_OFFLOAD_TCP_CKSUM + No HW TX offloads will be enabled. + All the streams will be in server mode and managed by lcore 3 (FE core). + In this case, the TCP server will send the incoming data back to the sender. + + As RSS is enabled, all the packets with destination port 6000 and 6001 will + be managed by HW queue 0 and queue 1 respectively. Please note that RSS + is not supported on the interface when both IPv4 and IPv6 are enabled. + Only one of IPv4 or IPv6 has to be enabled in the port. diff --git a/examples/udpfwd/be.cfg b/examples/l4fwd/be.cfg index 5c1d173..8e6d983 100644 --- a/examples/udpfwd/be.cfg +++ b/examples/l4fwd/be.cfg @@ -1,5 +1,5 @@ # -# udpfwd BE cconfig file exaple +# l4fwd BE config file example # port=0,masklen=16,addr=192.168.0.0,mac=01:de:ad:be:ef:01 port=0,addr=2001:4860:b002::,masklen=64,mac=01:de:ad:be:ef:01 diff --git a/examples/l4fwd/common.h b/examples/l4fwd/common.h new file mode 100644 index 0000000..ff8ee7a --- /dev/null +++ b/examples/l4fwd/common.h @@ -0,0 +1,662 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef COMMON_H_ +#define COMMON_H_ + +#include <rte_arp.h> + +static void +sig_handle(int signum) +{ + RTE_LOG(ERR, USER1, "%s(%d)\n", __func__, signum); + force_quit = 1; +} + +static void +netfe_stream_dump(const struct netfe_stream *fes, struct sockaddr_storage *la, + struct sockaddr_storage *ra) +{ + struct sockaddr_in *l4, *r4; + struct sockaddr_in6 *l6, *r6; + uint16_t lport, rport; + char laddr[INET6_ADDRSTRLEN]; + char raddr[INET6_ADDRSTRLEN]; + + if (la->ss_family == AF_INET) { + + l4 = (struct sockaddr_in *)la; + r4 = (struct sockaddr_in *)ra; + + lport = l4->sin_port; + rport = r4->sin_port; + + } else if (la->ss_family == AF_INET6) { + + l6 = (struct sockaddr_in6 *)la; + r6 = (struct sockaddr_in6 *)ra; + + lport = l6->sin6_port; + rport = r6->sin6_port; + + } else { + RTE_LOG(ERR, USER1, "stream@%p - unknown family=%hu\n", + fes->s, la->ss_family); + return; + } + + format_addr(la, laddr, sizeof(laddr)); + format_addr(ra, raddr, sizeof(raddr)); + + RTE_LOG(INFO, USER1, "stream@%p={s=%p," + "family=%hu,proto=%s,laddr=%s,lport=%hu,raddr=%s,rport=%hu;" + "stats={" + "rxp=%" PRIu64 ",rxb=%" PRIu64 + ",txp=%" PRIu64 ",txb=%" PRIu64 + ",drops=%" PRIu64 "," + "rxev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "]," + "txev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "]" + "};};\n", + fes, fes->s, la->ss_family, proto_name[fes->proto], + laddr, ntohs(lport), raddr, ntohs(rport), + fes->stat.rxp, fes->stat.rxb, + fes->stat.txp, fes->stat.txb, + fes->stat.drops, + fes->stat.rxev[TLE_SEV_IDLE], + fes->stat.rxev[TLE_SEV_DOWN], + fes->stat.rxev[TLE_SEV_UP], + fes->stat.txev[TLE_SEV_IDLE], + fes->stat.txev[TLE_SEV_DOWN], + fes->stat.txev[TLE_SEV_UP]); +} + +static inline uint32_t +netfe_get_streams(struct netfe_stream_list *list, struct netfe_stream *rs[], + uint32_t num) +{ + struct netfe_stream *s; + uint32_t i, n; + + n = RTE_MIN(list->num, num); + for (i = 0, s = LIST_FIRST(&list->head); + i != n; + i++, s = LIST_NEXT(s, link)) { + rs[i] = s; + } + + if (s == NULL) + /* we retrieved all free entries */ + LIST_INIT(&list->head); + else + LIST_FIRST(&list->head) = s; + + list->num -= n; + + return n; +} + +static inline struct netfe_stream * +netfe_get_stream(struct netfe_stream_list *list) +{ + struct netfe_stream *s; + + s = NULL; + if (list->num == 0) + return s; + + netfe_get_streams(list, &s, 1); + + return s; +} + +static inline void +netfe_put_streams(struct netfe_lcore *fe, struct netfe_stream_list *list, + struct netfe_stream *fs[], uint32_t num) +{ + uint32_t i, n; + + n = RTE_MIN(fe->snum - list->num, num); + if (n != num) + RTE_LOG(ERR, USER1, "%s: list overflow by %u\n", __func__, + num - n); + + for (i = 0; i != n; i++) + LIST_INSERT_HEAD(&list->head, fs[i], link); + list->num += n; +} + +static inline void +netfe_put_stream(struct netfe_lcore *fe, struct netfe_stream_list *list, + struct netfe_stream *s) +{ + if (list->num == fe->snum) { + RTE_LOG(ERR, USER1, "%s: list is full\n", __func__); + return; + } + + netfe_put_streams(fe, list, &s, 1); +} + +static inline void +netfe_rem_stream(struct netfe_stream_list *list, struct netfe_stream *s) +{ + LIST_REMOVE(s, link); + list->num--; +} + +static void +netfe_stream_close(struct netfe_lcore *fe, struct netfe_stream *fes) +{ + tle_stream_close(fes->s); + tle_event_free(fes->txev); + tle_event_free(fes->rxev); + tle_event_free(fes->erev); + memset(fes, 0, sizeof(*fes)); + netfe_put_stream(fe, &fe->free, fes); +} + +/* + * Helper functions, verify the queue for corresponding UDP port. + */ +static uint8_t +verify_queue_for_port(const struct netbe_dev *prtq, const uint16_t lport) +{ + uint32_t align_nb_q, qid; + + align_nb_q = rte_align32pow2(prtq->port.nb_lcore); + qid = (lport % align_nb_q) % prtq->port.nb_lcore; + if (prtq->rxqid == qid) + return 1; + + return 0; +} + +static inline size_t +pkt_buf_empty(struct pkt_buf *pb) +{ + uint32_t i; + size_t x; + + x = 0; + for (i = 0; i != pb->num; i++) { + x += pb->pkt[i]->pkt_len; + NETFE_PKT_DUMP(pb->pkt[i]); + rte_pktmbuf_free(pb->pkt[i]); + } + + pb->num = 0; + return x; +} + +static inline void +pkt_buf_fill(uint32_t lcore, struct pkt_buf *pb, uint32_t dlen) +{ + uint32_t i; + int32_t sid; + + sid = rte_lcore_to_socket_id(lcore) + 1; + + for (i = pb->num; i != RTE_DIM(pb->pkt); i++) { + pb->pkt[i] = rte_pktmbuf_alloc(mpool[sid]); + if (pb->pkt[i] == NULL) + break; + rte_pktmbuf_append(pb->pkt[i], dlen); + } + + pb->num = i; +} + +static int +netbe_lcore_setup(struct netbe_lcore *lc) +{ + uint32_t i; + int32_t rc; + + RTE_LOG(NOTICE, USER1, "%s:(lcore=%u, proto=%s, ctx=%p) start\n", + __func__, lc->id, proto_name[lc->proto], lc->ctx); + + /* + * ??????? + * wait for FE lcores to start, so BE dont' drop any packets + * because corresponding streams not opened yet by FE. + * useful when used with pcap PMDS. + * think better way, or should this timeout be a cmdlien parameter. + * ??????? + */ + rte_delay_ms(10); + + rc = 0; + for (i = 0; i != lc->prtq_num && rc == 0; i++) { + RTE_LOG(NOTICE, USER1, + "%s:%u(port=%u, q=%u, proto=%s, dev=%p)\n", + __func__, i, lc->prtq[i].port.id, lc->prtq[i].rxqid, + proto_name[lc->proto], lc->prtq[i].dev); + + rc = setup_rx_cb(&lc->prtq[i].port, lc, lc->prtq[i].rxqid, + becfg.arp); + if (rc < 0) + return rc; + } + + if (rc == 0) + RTE_PER_LCORE(_be) = lc; + return rc; +} + +static void +netbe_lcore_clear(void) +{ + uint32_t i, j; + struct netbe_lcore *lc; + + lc = RTE_PER_LCORE(_be); + if (lc == NULL) + return; + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u, proto=%s, ctx: %p) finish\n", + __func__, lc->id, proto_name[lc->proto], lc->ctx); + for (i = 0; i != lc->prtq_num; i++) { + RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, q=%u, lcore=%u, dev=%p) " + "rx_stats={" + "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "}, " + "tx_stats={" + "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "};\n", + __func__, i, lc->prtq[i].port.id, lc->prtq[i].rxqid, + lc->id, + lc->prtq[i].dev, + lc->prtq[i].rx_stat.in, + lc->prtq[i].rx_stat.up, + lc->prtq[i].rx_stat.drop, + lc->prtq[i].tx_stat.down, + lc->prtq[i].tx_stat.out, + lc->prtq[i].tx_stat.drop); + } + + RTE_LOG(NOTICE, USER1, "tcp_stat={\n"); + for (i = 0; i != RTE_DIM(lc->tcp_stat.flags); i++) { + if (lc->tcp_stat.flags[i] != 0) + RTE_LOG(NOTICE, USER1, "[flag=%#x]==%" PRIu64 ";\n", + i, lc->tcp_stat.flags[i]); + } + RTE_LOG(NOTICE, USER1, "};\n"); + + for (i = 0; i != lc->prtq_num; i++) + for (j = 0; j != lc->prtq[i].tx_buf.num; j++) + rte_pktmbuf_free(lc->prtq[i].tx_buf.pkt[j]); + + RTE_PER_LCORE(_be) = NULL; +} + +static int +netbe_add_ipv4_route(struct netbe_lcore *lc, const struct netbe_dest *dst, + uint8_t idx) +{ + int32_t rc; + uint32_t addr, depth; + char str[INET_ADDRSTRLEN]; + + depth = dst->prfx; + addr = rte_be_to_cpu_32(dst->ipv4.s_addr); + + inet_ntop(AF_INET, &dst->ipv4, str, sizeof(str)); + rc = rte_lpm_add(lc->lpm4, addr, depth, idx); + RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p," + "ipv4=%s/%u,mtu=%u," + "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) " + "returns %d;\n", + __func__, lc->id, dst->port, lc->dst4[idx].dev, + str, depth, lc->dst4[idx].mtu, + dst->mac.addr_bytes[0], dst->mac.addr_bytes[1], + dst->mac.addr_bytes[2], dst->mac.addr_bytes[3], + dst->mac.addr_bytes[4], dst->mac.addr_bytes[5], + rc); + return rc; +} + +static int +netbe_add_ipv6_route(struct netbe_lcore *lc, const struct netbe_dest *dst, + uint8_t idx) +{ + int32_t rc; + uint32_t depth; + char str[INET6_ADDRSTRLEN]; + + depth = dst->prfx; + + rc = rte_lpm6_add(lc->lpm6, (uint8_t *)(uintptr_t)dst->ipv6.s6_addr, + depth, idx); + + inet_ntop(AF_INET6, &dst->ipv6, str, sizeof(str)); + RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p," + "ipv6=%s/%u,mtu=%u," + "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) " + "returns %d;\n", + __func__, lc->id, dst->port, lc->dst6[idx].dev, + str, depth, lc->dst4[idx].mtu, + dst->mac.addr_bytes[0], dst->mac.addr_bytes[1], + dst->mac.addr_bytes[2], dst->mac.addr_bytes[3], + dst->mac.addr_bytes[4], dst->mac.addr_bytes[5], + rc); + return rc; +} + +static void +fill_dst(struct tle_dest *dst, struct netbe_dev *bed, + const struct netbe_dest *bdp, uint16_t l3_type, int32_t sid, + uint8_t proto_id) +{ + struct ether_hdr *eth; + struct ipv4_hdr *ip4h; + struct ipv6_hdr *ip6h; + + dst->dev = bed->dev; + dst->head_mp = frag_mpool[sid + 1]; + dst->mtu = RTE_MIN(bdp->mtu, bed->port.mtu); + dst->l2_len = sizeof(*eth); + + eth = (struct ether_hdr *)dst->hdr; + + ether_addr_copy(&bed->port.mac, ð->s_addr); + ether_addr_copy(&bdp->mac, ð->d_addr); + eth->ether_type = rte_cpu_to_be_16(l3_type); + + if (l3_type == ETHER_TYPE_IPv4) { + dst->l3_len = sizeof(*ip4h); + ip4h = (struct ipv4_hdr *)(eth + 1); + ip4h->version_ihl = 4 << 4 | + sizeof(*ip4h) / IPV4_IHL_MULTIPLIER; + ip4h->time_to_live = 64; + ip4h->next_proto_id = proto_id; + } else if (l3_type == ETHER_TYPE_IPv6) { + dst->l3_len = sizeof(*ip6h); + ip6h = (struct ipv6_hdr *)(eth + 1); + ip6h->vtc_flow = 6 << 4; + ip6h->proto = proto_id; + ip6h->hop_limits = 64; + } +} + +static int +netbe_add_dest(struct netbe_lcore *lc, uint32_t dev_idx, uint16_t family, + const struct netbe_dest *dst, uint32_t dnum) +{ + int32_t rc, sid; + uint8_t proto; + uint16_t l3_type; + uint32_t i, n, m; + struct tle_dest *dp; + + if (family == AF_INET) { + n = lc->dst4_num; + dp = lc->dst4 + n; + m = RTE_DIM(lc->dst4); + l3_type = ETHER_TYPE_IPv4; + } else { + n = lc->dst6_num; + dp = lc->dst6 + n; + m = RTE_DIM(lc->dst6); + l3_type = ETHER_TYPE_IPv6; + } + + if (n + dnum >= m) { + RTE_LOG(ERR, USER1, "%s(lcore=%u, family=%hu, dnum=%u) exceeds " + "maximum allowed number of destinations(%u);\n", + __func__, lc->id, family, dnum, m); + return -ENOSPC; + } + + sid = rte_lcore_to_socket_id(lc->id); + proto = (becfg.proto == TLE_PROTO_UDP) ? IPPROTO_UDP : IPPROTO_TCP; + rc = 0; + + for (i = 0; i != dnum && rc == 0; i++) { + fill_dst(dp + i, lc->prtq + dev_idx, dst + i, l3_type, sid, + proto); + if (family == AF_INET) + rc = netbe_add_ipv4_route(lc, dst + i, n + i); + else + rc = netbe_add_ipv6_route(lc, dst + i, n + i); + } + + if (family == AF_INET) + lc->dst4_num = n + i; + else + lc->dst6_num = n + i; + + return rc; +} + +static inline void +fill_arp_reply(struct netbe_dev *dev, struct rte_mbuf *m) +{ + struct ether_hdr *eth; + struct arp_hdr *ahdr; + struct arp_ipv4 *adata; + uint32_t tip; + + /* set up the ethernet data */ + eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + eth->d_addr = eth->s_addr; + eth->s_addr = dev->port.mac; + + /* set up the arp data */ + ahdr = rte_pktmbuf_mtod_offset(m, struct arp_hdr *, m->l2_len); + adata = &ahdr->arp_data; + + ahdr->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); + + tip = adata->arp_tip; + adata->arp_tip = adata->arp_sip; + adata->arp_sip = tip; + + adata->arp_tha = adata->arp_sha; + adata->arp_sha = dev->port.mac; +} + +/* this is a semi ARP response implementation of RFC 826 + * in RFC, it algo is as below + * + * ?Do I have the hardware type in ar$hrd? + * Yes: (almost definitely) + * [optionally check the hardware length ar$hln] + * ?Do I speak the protocol in ar$pro? + * Yes: + * [optionally check the protocol length ar$pln] + * Merge_flag := false + * If the pair <protocol type, sender protocol address> is + * already in my translation table, update the sender + * hardware address field of the entry with the new + * information in the packet and set Merge_flag to true. + * ?Am I the target protocol address? + * Yes: + * If Merge_flag is false, add the triplet <protocol type, + * sender protocol address, sender hardware address> to + * the translation table. + * ?Is the opcode ares_op$REQUEST? (NOW look at the opcode!!) + * Yes: + * Swap hardware and protocol fields, putting the local + * hardware and protocol addresses in the sender fields. + * Set the ar$op field to ares_op$REPLY + * Send the packet to the (new) target hardware address on + * the same hardware on which the request was received. + * + * So, in our implementation we skip updating the local cache, + * we assume that local cache is ok, so we just reply the packet. + */ + +static inline void +send_arp_reply(struct netbe_dev *dev, struct pkt_buf *pb) +{ + uint32_t i, n, num; + struct rte_mbuf **m; + + m = pb->pkt; + num = pb->num; + for (i = 0; i != num; i++) { + fill_arp_reply(dev, m[i]); + NETBE_PKT_DUMP(m[i]); + } + + n = rte_eth_tx_burst(dev->port.id, dev->txqid, m, num); + NETBE_TRACE("%s: sent n=%u arp replies\n", __func__, n); + + /* free mbufs with unsent arp response */ + for (i = n; i != num; i++) + rte_pktmbuf_free(m[i]); + + pb->num = 0; +} + +static inline void +netbe_rx(struct netbe_lcore *lc, uint32_t pidx) +{ + uint32_t j, k, n; + struct rte_mbuf *pkt[MAX_PKT_BURST]; + struct rte_mbuf *rp[MAX_PKT_BURST]; + int32_t rc[MAX_PKT_BURST]; + struct pkt_buf *abuf; + + n = rte_eth_rx_burst(lc->prtq[pidx].port.id, + lc->prtq[pidx].rxqid, pkt, RTE_DIM(pkt)); + + if (n != 0) { + lc->prtq[pidx].rx_stat.in += n; + NETBE_TRACE("%s(%u): rte_eth_rx_burst(%u, %u) returns %u\n", + __func__, lc->id, lc->prtq[pidx].port.id, + lc->prtq[pidx].rxqid, n); + + k = tle_rx_bulk(lc->prtq[pidx].dev, pkt, rp, rc, n); + + lc->prtq[pidx].rx_stat.up += k; + lc->prtq[pidx].rx_stat.drop += n - k; + NETBE_TRACE("%s(%u): tle_%s_rx_bulk(%p, %u) returns %u\n", + __func__, lc->id, proto_name[lc->proto], + lc->prtq[pidx].dev, n, k); + + for (j = 0; j != n - k; j++) { + NETBE_TRACE("%s:%d(port=%u) rp[%u]={%p, %d};\n", + __func__, __LINE__, lc->prtq[pidx].port.id, + j, rp[j], rc[j]); + rte_pktmbuf_free(rp[j]); + } + } + + /* respond to incoming arp requests */ + abuf = &lc->prtq[pidx].arp_buf; + if (abuf->num == 0) + return; + + send_arp_reply(&lc->prtq[pidx], abuf); +} + +static inline void +netbe_tx(struct netbe_lcore *lc, uint32_t pidx) +{ + uint32_t j, k, n; + struct rte_mbuf **mb; + + n = lc->prtq[pidx].tx_buf.num; + k = RTE_DIM(lc->prtq[pidx].tx_buf.pkt) - n; + mb = lc->prtq[pidx].tx_buf.pkt; + + if (k >= RTE_DIM(lc->prtq[pidx].tx_buf.pkt) / 2) { + j = tle_tx_bulk(lc->prtq[pidx].dev, mb + n, k); + n += j; + lc->prtq[pidx].tx_stat.down += j; + } + + if (n == 0) + return; + + NETBE_TRACE("%s(%u): tle_%s_tx_bulk(%p) returns %u,\n" + "total pkts to send: %u\n", + __func__, lc->id, proto_name[lc->proto], + lc->prtq[pidx].dev, j, n); + + for (j = 0; j != n; j++) + NETBE_PKT_DUMP(mb[j]); + + k = rte_eth_tx_burst(lc->prtq[pidx].port.id, + lc->prtq[pidx].txqid, mb, n); + + lc->prtq[pidx].tx_stat.out += k; + lc->prtq[pidx].tx_stat.drop += n - k; + NETBE_TRACE("%s(%u): rte_eth_tx_burst(%u, %u, %u) returns %u\n", + __func__, lc->id, lc->prtq[pidx].port.id, lc->prtq[pidx].txqid, + n, k); + + lc->prtq[pidx].tx_buf.num = n - k; + if (k != 0) + for (j = k; j != n; j++) + mb[j - k] = mb[j]; +} + +static inline void +netbe_lcore(void) +{ + uint32_t i; + struct netbe_lcore *lc; + + lc = RTE_PER_LCORE(_be); + if (lc == NULL) + return; + + for (i = 0; i != lc->prtq_num; i++) { + netbe_rx(lc, i); + netbe_tx(lc, i); + } +} + +static inline void +netfe_rx_process(__rte_unused uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t k, n; + + n = fes->pbuf.num; + k = RTE_DIM(fes->pbuf.pkt) - n; + + /* packet buffer is full, can't receive any new packets. */ + if (k == 0) { + tle_event_idle(fes->rxev); + fes->stat.rxev[TLE_SEV_IDLE]++; + return; + } + + n = tle_stream_recv(fes->s, fes->pbuf.pkt + n, k); + if (n == 0) + return; + + NETFE_TRACE("%s(%u): tle_%s_stream_recv(%p, %u) returns %u\n", + __func__, lcore, proto_name[fes->proto], fes->s, k, n); + + fes->pbuf.num += n; + fes->stat.rxp += n; + + /* free all received mbufs. */ + if (fes->op == RXONLY) + fes->stat.rxb += pkt_buf_empty(&fes->pbuf); + /* mark stream as writable */ + else if (k == RTE_DIM(fes->pbuf.pkt)) { + if (fes->op == RXTX) { + tle_event_active(fes->txev, TLE_SEV_UP); + fes->stat.txev[TLE_SEV_UP]++; + } else if (fes->op == FWD) { + tle_event_raise(fes->txev); + fes->stat.txev[TLE_SEV_UP]++; + } + } +} + +#endif /* COMMON_H_ */ diff --git a/examples/udpfwd/main_dpdk_legacy.h b/examples/l4fwd/dpdk_legacy.h index e4bff24..84fab17 100644 --- a/examples/udpfwd/main_dpdk_legacy.h +++ b/examples/l4fwd/dpdk_legacy.h @@ -19,27 +19,11 @@ #include "dpdk_version.h" /* - * Helper functions, verify the queue for corresponding UDP port. - */ -static uint8_t -verify_queue_for_port(const struct netbe_dev *prtq, const uint16_t lport) -{ - uint32_t align_nb_q, qid; - - align_nb_q = rte_align32pow2(prtq->port.nb_lcore); - qid = (lport % align_nb_q) % prtq->port.nb_lcore; - if (prtq->rxqid == qid) - return 1; - - return 0; -} - -/* * UDP IPv4 destination lookup callback. */ static int lpm4_dst_lookup(void *data, const struct in_addr *addr, - struct tle_udp_dest *res) + struct tle_dest *res) { int32_t rc; #ifdef DPDK_VERSION_GE_1604 @@ -48,7 +32,7 @@ lpm4_dst_lookup(void *data, const struct in_addr *addr, uint8_t idx; #endif struct netbe_lcore *lc; - struct tle_udp_dest *dst; + struct tle_dest *dst; lc = data; @@ -56,7 +40,7 @@ lpm4_dst_lookup(void *data, const struct in_addr *addr, if (rc == 0) { dst = &lc->dst4[idx]; rte_memcpy(res, dst, dst->l2_len + dst->l3_len + - offsetof(struct tle_udp_dest, hdr)); + offsetof(struct tle_dest, hdr)); } return rc; } @@ -105,7 +89,7 @@ lcore_lpm_init(struct netbe_lcore *lc) */ static int netbe_find4(const struct in_addr *laddr, const uint16_t lport, - const struct in_addr *raddr, const uint32_t be_lc) + const struct in_addr *raddr, const uint32_t belc) { uint32_t i, j; #ifdef DPDK_VERSION_GE_1604 @@ -120,14 +104,14 @@ netbe_find4(const struct in_addr *laddr, const uint16_t lport, return 0; /* search by provided be_lcore */ - if (be_lc != LCORE_ID_ANY) { + if (belc != LCORE_ID_ANY) { for (i = 0; i != becfg.cpu_num; i++) { bc = becfg.cpu + i; - if (be_lc == bc->id) + if (belc == bc->id) return i; } RTE_LOG(NOTICE, USER1, "%s: no stream with be_lcore=%u\n", - __func__, be_lc); + __func__, belc); return -ENOENT; } diff --git a/examples/udpfwd/dpdk_version.h b/examples/l4fwd/dpdk_version.h index 4f6bdfb..43235c8 100644 --- a/examples/udpfwd/dpdk_version.h +++ b/examples/l4fwd/dpdk_version.h @@ -23,7 +23,7 @@ #define DPDK_VERSION_GE_1604 #endif #elif defined(RTE_VER_YEAR) -#if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0) +#if RTE_VERSION_NUM(16, 4, 0, 0) <= RTE_VERSION #define DPDK_VERSION_GE_1604 #endif #else diff --git a/examples/udpfwd/fe.cfg b/examples/l4fwd/fe.cfg index 2706323..2706323 100644 --- a/examples/udpfwd/fe.cfg +++ b/examples/l4fwd/fe.cfg diff --git a/examples/udpfwd/fwdtbl.h b/examples/l4fwd/fwdtbl.h index 1c4265e..1c4265e 100644 --- a/examples/udpfwd/fwdtbl.h +++ b/examples/l4fwd/fwdtbl.h diff --git a/examples/udpfwd/gen_fe_cfg.py b/examples/l4fwd/gen_fe_cfg.py index dbb500b..67a8d5c 100755 --- a/examples/udpfwd/gen_fe_cfg.py +++ b/examples/l4fwd/gen_fe_cfg.py @@ -31,7 +31,7 @@ def print_usage (): "lcores are comma-separated, within double quote" print " -b, --be_lcore_list: list of lcores used for BE. Multiple " \ "lcores are comma-separated, within double quote" - print " -p, --start_port: starting UDP port number" + print " -p, --start_port: starting TCP/UDP port number" print " -n, --number_of_streams: number of streams to be generated" print " -m, --mode: mode of the application. [echo, rx, tx, fwd]" print " -q, --local_address: local address of the stream" @@ -55,7 +55,8 @@ def print_stream(mode, la, ra, fwd_la, fwd_ra, lcore, belcore, lport, lport_str = str(lport) fwrport_str = str(fwrport) - stream = "lcore=" + str(lcore) + ",belcore=" + str(belcore) + ",op=" + mode + stream = "lcore=" + str(lcore) + ",belcore=" + str(belcore) + stream += ",op=" + mode stream += ",laddr=" + la + ",lport=" + lport_str stream += ",raddr=" + ra + ",rport=0" diff --git a/examples/l4fwd/lcore.h b/examples/l4fwd/lcore.h new file mode 100644 index 0000000..d88e434 --- /dev/null +++ b/examples/l4fwd/lcore.h @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LCORE_H_ +#define LCORE_H_ + +#include "dpdk_legacy.h" + +/* + * IPv6 destination lookup callback. + */ +static int +lpm6_dst_lookup(void *data, const struct in6_addr *addr, + struct tle_dest *res) +{ + int32_t rc; + uint8_t idx; + struct netbe_lcore *lc; + struct tle_dest *dst; + uintptr_t p; + + lc = data; + p = (uintptr_t)addr->s6_addr; + + rc = rte_lpm6_lookup(lc->lpm6, (uint8_t *)p, &idx); + if (rc == 0) { + dst = &lc->dst6[idx]; + rte_memcpy(res, dst, dst->l2_len + dst->l3_len + + offsetof(struct tle_dest, hdr)); + } + return rc; +} + +static int +create_context(struct netbe_lcore *lc, const struct tle_ctx_param *ctx_prm) +{ + uint32_t rc = 0, sid; + uint64_t frag_cycles; + struct tle_ctx_param cprm; + + if (lc->ctx == NULL) { + sid = rte_lcore_to_socket_id(lc->id); + + rc = lcore_lpm_init(lc); + if (rc != 0) + return rc; + + cprm = *ctx_prm; + cprm.socket_id = sid; + cprm.proto = lc->proto; + cprm.lookup4 = lpm4_dst_lookup; + cprm.lookup4_data = lc; + cprm.lookup6 = lpm6_dst_lookup; + cprm.lookup6_data = lc; + + frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / + MS_PER_S * FRAG_TTL; + + lc->ftbl = rte_ip_frag_table_create(cprm.max_streams, + FRAG_TBL_BUCKET_ENTRIES, cprm.max_streams, + frag_cycles, sid); + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u): frag_tbl=%p;\n", + __func__, lc->id, lc->ftbl); + + lc->ctx = tle_ctx_create(&cprm); + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u): proto=%s, ctx=%p;\n", + __func__, lc->id, proto_name[lc->proto], lc->ctx); + + if (lc->ctx == NULL || lc->ftbl == NULL) + rc = ENOMEM; + } + + return rc; +} + +/* + * BE lcore setup routine. + */ +static int +lcore_init(struct netbe_lcore *lc, const struct tle_ctx_param *ctx_prm, + const uint32_t prtqid, const uint16_t *bl_ports, uint32_t nb_bl_ports) +{ + int32_t rc = 0; + struct tle_dev_param dprm; + + rc = create_context(lc, ctx_prm); + + if (rc == 0 && lc->ctx != NULL) { + memset(&dprm, 0, sizeof(dprm)); + dprm.rx_offload = lc->prtq[prtqid].port.rx_offload; + dprm.tx_offload = lc->prtq[prtqid].port.tx_offload; + dprm.local_addr4.s_addr = lc->prtq[prtqid].port.ipv4; + memcpy(&dprm.local_addr6, &lc->prtq[prtqid].port.ipv6, + sizeof(lc->prtq[prtqid].port.ipv6)); + dprm.bl4.nb_port = nb_bl_ports; + dprm.bl4.port = bl_ports; + dprm.bl6.nb_port = nb_bl_ports; + dprm.bl6.port = bl_ports; + + lc->prtq[prtqid].dev = tle_add_dev(lc->ctx, &dprm); + + RTE_LOG(NOTICE, USER1, + "%s(lcore=%u, port=%u, qid=%u), dev: %p\n", + __func__, lc->id, lc->prtq[prtqid].port.id, + lc->prtq[prtqid].rxqid, lc->prtq[prtqid].dev); + + if (lc->prtq[prtqid].dev == NULL) + rc = -rte_errno; + + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s(lcore=%u) failed with error code: %d\n", + __func__, lc->id, rc); + tle_ctx_destroy(lc->ctx); + rte_ip_frag_table_destroy(lc->ftbl); + rte_lpm_free(lc->lpm4); + rte_lpm6_free(lc->lpm6); + rte_free(lc->prtq[prtqid].port.lcore_id); + lc->prtq[prtqid].port.nb_lcore = 0; + rte_free(lc->prtq); + lc->prtq_num = 0; + return rc; + } + } + + return rc; +} + +static uint16_t +create_blocklist(const struct netbe_port *beprt, uint16_t *bl_ports, + uint32_t q) +{ + uint32_t i, j, qid, align_nb_q; + + align_nb_q = rte_align32pow2(beprt->nb_lcore); + for (i = 0, j = 0; i < (UINT16_MAX + 1); i++) { + qid = (i % align_nb_q) % beprt->nb_lcore; + if (qid != q) + bl_ports[j++] = i; + } + + return j; +} + +static int +netbe_lcore_init(struct netbe_cfg *cfg, const struct tle_ctx_param *ctx_prm) +{ + int32_t rc; + uint32_t i, j, nb_bl_ports = 0, sz; + struct netbe_lcore *lc; + static uint16_t *bl_ports; + + /* Create the context and attached queue for each lcore. */ + rc = 0; + sz = sizeof(uint16_t) * UINT16_MAX; + bl_ports = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE); + for (i = 0; i < cfg->cpu_num; i++) { + lc = &cfg->cpu[i]; + for (j = 0; j < lc->prtq_num; j++) { + memset((uint8_t *)bl_ports, 0, sz); + /* create list of blocked ports based on q */ + nb_bl_ports = create_blocklist(&lc->prtq[j].port, + bl_ports, lc->prtq[j].rxqid); + RTE_LOG(NOTICE, USER1, + "lc=%u, q=%u, nb_bl_ports=%u\n", + lc->id, lc->prtq[j].rxqid, nb_bl_ports); + + rc = lcore_init(lc, ctx_prm, j, bl_ports, nb_bl_ports); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: failed with error code: %d\n", + __func__, rc); + rte_free(bl_ports); + return rc; + } + } + } + rte_free(bl_ports); + + return 0; +} + +static int +netfe_lcore_cmp(const void *s1, const void *s2) +{ + const struct netfe_stream_prm *p1, *p2; + + p1 = s1; + p2 = s2; + return p1->lcore - p2->lcore; +} + +static int +netbe_find6(const struct in6_addr *laddr, uint16_t lport, + const struct in6_addr *raddr, uint32_t belc) +{ + uint32_t i, j; + uint8_t idx; + struct netbe_lcore *bc; + + /* we have exactly one BE, use it for all traffic */ + if (becfg.cpu_num == 1) + return 0; + + /* search by provided be_lcore */ + if (belc != LCORE_ID_ANY) { + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + if (belc == bc->id) + return i; + } + RTE_LOG(NOTICE, USER1, "%s: no stream with belcore=%u\n", + __func__, belc); + return -ENOENT; + } + + /* search by local address */ + if (memcmp(laddr, &in6addr_any, sizeof(*laddr)) != 0) { + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + /* search by queue for the local port */ + for (j = 0; j != bc->prtq_num; j++) { + if (memcmp(laddr, &bc->prtq[j].port.ipv6, + sizeof(*laddr)) == 0) { + + if (lport == 0) + return i; + + if (verify_queue_for_port(bc->prtq + j, + lport) != 0) + return i; + } + } + } + } + + /* search by remote address */ + if (memcmp(raddr, &in6addr_any, sizeof(*raddr)) == 0) { + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + if (rte_lpm6_lookup(bc->lpm6, + (uint8_t *)(uintptr_t)raddr->s6_addr, + &idx) == 0) { + + if (lport == 0) + return i; + + /* search by queue for the local port */ + for (j = 0; j != bc->prtq_num; j++) + if (verify_queue_for_port(bc->prtq + j, + lport) != 0) + return i; + } + } + } + + return -ENOENT; +} + +static int +netbe_find(const struct sockaddr_storage *la, + const struct sockaddr_storage *ra, + uint32_t belc) +{ + const struct sockaddr_in *l4, *r4; + const struct sockaddr_in6 *l6, *r6; + + if (la->ss_family == AF_INET) { + l4 = (const struct sockaddr_in *)la; + r4 = (const struct sockaddr_in *)ra; + return netbe_find4(&l4->sin_addr, ntohs(l4->sin_port), + &r4->sin_addr, belc); + } else if (la->ss_family == AF_INET6) { + l6 = (const struct sockaddr_in6 *)la; + r6 = (const struct sockaddr_in6 *)ra; + return netbe_find6(&l6->sin6_addr, ntohs(l6->sin6_port), + &r6->sin6_addr, belc); + } + return -EINVAL; +} + +static int +netfe_sprm_flll_be(struct netfe_sprm *sp, uint32_t line, uint32_t belc) +{ + int32_t bidx; + + bidx = netbe_find(&sp->local_addr, &sp->remote_addr, belc); + + if (bidx < 0) { + RTE_LOG(ERR, USER1, "%s(line=%u): no BE for that stream\n", + __func__, line); + return -EINVAL; + } + sp->bidx = bidx; + return 0; +} + +/* start front-end processing. */ +static int +netfe_lcore_fill(struct lcore_prm prm[RTE_MAX_LCORE], + struct netfe_lcore_prm *lprm) +{ + uint32_t belc; + uint32_t i, j, lc, ln; + struct netfe_stream_prm *s; + + /* determine on what BE each stream should be open. */ + for (i = 0; i != lprm->nb_streams; i++) { + s = lprm->stream + i; + ln = s->line; + belc = s->belcore; + if (netfe_sprm_flll_be(&s->sprm, ln, belc) != 0 || + (s->op == FWD && + netfe_sprm_flll_be(&s->fprm, ln, belc) != 0)) + return -EINVAL; + } + + /* group all fe parameters by lcore. */ + + qsort(lprm->stream, lprm->nb_streams, sizeof(lprm->stream[0]), + netfe_lcore_cmp); + + for (i = 0; i != lprm->nb_streams; i = j) { + + lc = lprm->stream[i].lcore; + ln = lprm->stream[i].line; + + if (rte_lcore_is_enabled(lc) == 0) { + RTE_LOG(ERR, USER1, + "%s(line=%u): lcore %u is not enabled\n", + __func__, ln, lc); + return -EINVAL; + } + + if (rte_get_master_lcore() != lc && + rte_eal_get_lcore_state(lc) == RUNNING) { + RTE_LOG(ERR, USER1, + "%s(line=%u): lcore %u already in use\n", + __func__, ln, lc); + return -EINVAL; + } + + for (j = i + 1; j != lprm->nb_streams && + lc == lprm->stream[j].lcore; + j++) + ; + + prm[lc].fe.max_streams = lprm->max_streams; + prm[lc].fe.nb_streams = j - i; + prm[lc].fe.stream = lprm->stream + i; + } + + return 0; +} + +#endif /* LCORE_H_ */ diff --git a/examples/l4fwd/main.c b/examples/l4fwd/main.c new file mode 100644 index 0000000..37bd03e --- /dev/null +++ b/examples/l4fwd/main.c @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <time.h> + +#include "netbe.h" +#include "parse.h" + +#define MAX_RULES 0x100 +#define MAX_TBL8 0x800 + +#define RX_RING_SIZE 0x400 +#define TX_RING_SIZE 0x800 + +#define MPOOL_CACHE_SIZE 0x100 +#define MPOOL_NB_BUF 0x20000 + +#define FRAG_MBUF_BUF_SIZE (RTE_PKTMBUF_HEADROOM + TLE_DST_MAX_HDR) +#define FRAG_TTL MS_PER_S +#define FRAG_TBL_BUCKET_ENTRIES 16 + +#define FIRST_PORT 0x8000 + +#define RX_CSUM_OFFLOAD (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM) +#define TX_CSUM_OFFLOAD (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM) + +RTE_DEFINE_PER_LCORE(struct netbe_lcore *, _be); +RTE_DEFINE_PER_LCORE(struct netfe_lcore *, _fe); + +#include "fwdtbl.h" + +/** + * Location to be modified to create the IPv4 hash key which helps + * to distribute packets based on the destination TCP/UDP port. + */ +#define RSS_HASH_KEY_DEST_PORT_LOC_IPV4 15 + +/** + * Location to be modified to create the IPv6 hash key which helps + * to distribute packets based on the destination TCP/UDP port. + */ +#define RSS_HASH_KEY_DEST_PORT_LOC_IPV6 39 + +/** + * Size of the rte_eth_rss_reta_entry64 array to update through + * rte_eth_dev_rss_reta_update. + */ +#define RSS_RETA_CONF_ARRAY_SIZE (ETH_RSS_RETA_SIZE_512/RTE_RETA_GROUP_SIZE) + +static volatile int force_quit; + +static struct netbe_cfg becfg; +static struct rte_mempool *mpool[RTE_MAX_NUMA_NODES + 1]; +static struct rte_mempool *frag_mpool[RTE_MAX_NUMA_NODES + 1]; +static char proto_name[3][10] = {"udp", "tcp", ""}; + +static const struct rte_eth_conf port_conf_default = { + .rxmode = { + .max_rx_pkt_len = ETHER_MAX_VLAN_FRAME_LEN, + .hw_vlan_strip = 1, + .jumbo_frame = 1, + }, +}; + +/* function pointers */ +static TLE_RX_BULK_FUNCTYPE tle_rx_bulk; +static TLE_TX_BULK_FUNCTYPE tle_tx_bulk; +static TLE_STREAM_RECV_FUNCTYPE tle_stream_recv; +static TLE_STREAM_CLOSE_FUNCTYPE tle_stream_close; + +static LCORE_MAIN_FUNCTYPE lcore_main; + +#include "common.h" +#include "parse.h" +#include "lcore.h" +#include "port.h" +#include "tcp.h" +#include "udp.h" + +int verbose = VERBOSE_NONE; + +static void +netbe_lcore_fini(struct netbe_cfg *cfg) +{ + uint32_t i; + + for (i = 0; i != cfg->cpu_num; i++) { + tle_ctx_destroy(cfg->cpu[i].ctx); + rte_ip_frag_table_destroy(cfg->cpu[i].ftbl); + rte_lpm_free(cfg->cpu[i].lpm4); + rte_lpm6_free(cfg->cpu[i].lpm6); + + rte_free(cfg->cpu[i].prtq); + cfg->cpu[i].prtq_num = 0; + } + + rte_free(cfg->cpu); + cfg->cpu_num = 0; + for (i = 0; i != cfg->prt_num; i++) { + rte_free(cfg->prt[i].lcore_id); + cfg->prt[i].nb_lcore = 0; + } + rte_free(cfg->prt); + cfg->prt_num = 0; +} + +static int +netbe_dest_init(const char *fname, struct netbe_cfg *cfg) +{ + int32_t rc; + uint32_t f, i, p; + uint32_t k, l, cnt; + struct netbe_lcore *lc; + struct netbe_dest_prm prm; + + rc = netbe_parse_dest(fname, &prm); + if (rc != 0) + return rc; + + rc = 0; + for (i = 0; i != prm.nb_dest; i++) { + + p = prm.dest[i].port; + f = prm.dest[i].family; + + cnt = 0; + for (k = 0; k != cfg->cpu_num; k++) { + lc = cfg->cpu + k; + for (l = 0; l != lc->prtq_num; l++) + if (lc->prtq[l].port.id == p) { + rc = netbe_add_dest(lc, l, f, + prm.dest + i, 1); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s(lc=%u, family=%u) " + "could not add " + "destinations(%u)\n", + __func__, lc->id, f, i); + return -ENOSPC; + } + cnt++; + } + } + + if (cnt == 0) { + RTE_LOG(ERR, USER1, "%s(%s) error at line %u: " + "port %u not managed by any lcore;\n", + __func__, fname, prm.dest[i].line, p); + break; + } + } + + free(prm.dest); + return rc; +} + +static void +func_ptrs_init(uint32_t proto) { + if (proto == TLE_PROTO_TCP) { + tle_rx_bulk = tle_tcp_rx_bulk; + tle_tx_bulk = tle_tcp_tx_bulk; + tle_stream_recv = tle_tcp_stream_recv; + tle_stream_close = tle_tcp_stream_close; + + lcore_main = lcore_main_tcp; + + } else { + tle_rx_bulk = tle_udp_rx_bulk; + tle_tx_bulk = tle_udp_tx_bulk; + tle_stream_recv = tle_udp_stream_recv; + tle_stream_close = tle_udp_stream_close; + + lcore_main = lcore_main_udp; + } +} + +int +main(int argc, char *argv[]) +{ + int32_t rc; + uint32_t i; + struct tle_ctx_param ctx_prm; + struct netfe_lcore_prm feprm; + struct rte_eth_stats stats; + char fecfg_fname[PATH_MAX + 1]; + char becfg_fname[PATH_MAX + 1]; + struct lcore_prm prm[RTE_MAX_LCORE]; + struct rte_eth_dev_info dev_info; + + fecfg_fname[0] = 0; + becfg_fname[0] = 0; + memset(prm, 0, sizeof(prm)); + + rc = rte_eal_init(argc, argv); + if (rc < 0) + rte_exit(EXIT_FAILURE, + "%s: rte_eal_init failed with error code: %d\n", + __func__, rc); + + memset(&ctx_prm, 0, sizeof(ctx_prm)); + + signal(SIGINT, sig_handle); + + argc -= rc; + argv += rc; + + rc = parse_app_options(argc, argv, &becfg, &ctx_prm, + fecfg_fname, becfg_fname); + if (rc != 0) + rte_exit(EXIT_FAILURE, + "%s: parse_app_options failed with error code: %d\n", + __func__, rc); + + /* init all the function pointer */ + func_ptrs_init(becfg.proto); + + rc = netbe_port_init(&becfg); + if (rc != 0) + rte_exit(EXIT_FAILURE, + "%s: netbe_port_init failed with error code: %d\n", + __func__, rc); + + rc = netbe_lcore_init(&becfg, &ctx_prm); + if (rc != 0) + sig_handle(SIGQUIT); + + rc = netbe_dest_init(becfg_fname, &becfg); + if (rc != 0) + sig_handle(SIGQUIT); + + for (i = 0; i != becfg.prt_num && rc == 0; i++) { + RTE_LOG(NOTICE, USER1, "%s: starting port %u\n", + __func__, becfg.prt[i].id); + rc = rte_eth_dev_start(becfg.prt[i].id); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: rte_eth_dev_start(%u) returned " + "error code: %d\n", + __func__, becfg.prt[i].id, rc); + sig_handle(SIGQUIT); + } + rte_eth_dev_info_get(becfg.prt[i].id, &dev_info); + rc = update_rss_reta(&becfg.prt[i], &dev_info); + if (rc != 0) + sig_handle(SIGQUIT); + } + + feprm.max_streams = ctx_prm.max_streams * becfg.cpu_num; + + rc = (rc != 0) ? rc : netfe_parse_cfg(fecfg_fname, &feprm); + if (rc != 0) + sig_handle(SIGQUIT); + + for (i = 0; rc == 0 && i != becfg.cpu_num; i++) + prm[becfg.cpu[i].id].be.lc = becfg.cpu + i; + + rc = (rc != 0) ? rc : netfe_lcore_fill(prm, &feprm); + if (rc != 0) + sig_handle(SIGQUIT); + + /* launch all slave lcores. */ + RTE_LCORE_FOREACH_SLAVE(i) { + if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0) + rte_eal_remote_launch(lcore_main, prm + i, i); + } + + /* launch master lcore. */ + i = rte_get_master_lcore(); + if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0) + lcore_main(prm + i); + + rte_eal_mp_wait_lcore(); + + for (i = 0; i != becfg.prt_num; i++) { + RTE_LOG(NOTICE, USER1, "%s: stoping port %u\n", + __func__, becfg.prt[i].id); + rte_eth_stats_get(becfg.prt[i].id, &stats); + RTE_LOG(NOTICE, USER1, "port %u stats={\n" + "ipackets=%" PRIu64 ";" + "ibytes=%" PRIu64 ";" + "ierrors=%" PRIu64 ";" + "imissed=%" PRIu64 ";\n" + "opackets=%" PRIu64 ";" + "obytes=%" PRIu64 ";" + "oerrors=%" PRIu64 ";\n" + "}\n", + becfg.prt[i].id, + stats.ipackets, + stats.ibytes, + stats.ierrors, + stats.imissed, + stats.opackets, + stats.obytes, + stats.oerrors); + rte_eth_dev_stop(becfg.prt[i].id); + } + + netbe_lcore_fini(&becfg); + + return 0; +} diff --git a/examples/udpfwd/netbe.h b/examples/l4fwd/netbe.h index 1e5d9a7..6d25603 100644 --- a/examples/udpfwd/netbe.h +++ b/examples/l4fwd/netbe.h @@ -38,8 +38,10 @@ #include <rte_hash.h> #include <rte_ip.h> #include <rte_ip_frag.h> +#include <rte_tcp.h> #include <rte_udp.h> -#include <tle_udp_impl.h> +#include <tle_tcp.h> +#include <tle_udp.h> #include <tle_event.h> #define MAX_PKT_BURST 0x20 @@ -48,13 +50,24 @@ #define RSS_HASH_KEY_LENGTH 64 /* + * global variables + */ + +enum { + VERBOSE_NONE = 0, + VERBOSE_NUM = 9 +}; + +extern int verbose; + +/* * BE related structures. */ struct netbe_port { uint32_t id; uint32_t nb_lcore; - uint32_t *lcore; + uint32_t *lcore_id; uint32_t mtu; uint32_t rx_offload; uint32_t tx_offload; @@ -92,7 +105,7 @@ struct netbe_dev { uint16_t rxqid; uint16_t txqid; struct netbe_port port; - struct tle_udp_dev *dev; + struct tle_dev *dev; struct { uint64_t in; uint64_t up; @@ -104,6 +117,7 @@ struct netbe_dev { uint64_t drop; } tx_stat; struct pkt_buf tx_buf; + struct pkt_buf arp_buf; }; /* 8 bit LPM user data. */ @@ -111,21 +125,28 @@ struct netbe_dev { struct netbe_lcore { uint32_t id; + uint32_t proto; /**< L4 proto to handle. */ struct rte_lpm *lpm4; struct rte_lpm6 *lpm6; struct rte_ip_frag_tbl *ftbl; - struct tle_udp_ctx *ctx; + struct tle_ctx *ctx; uint32_t prtq_num; uint32_t dst4_num; uint32_t dst6_num; struct netbe_dev *prtq; - struct tle_udp_dest dst4[LCORE_MAX_DST]; - struct tle_udp_dest dst6[LCORE_MAX_DST]; + struct tle_dest dst4[LCORE_MAX_DST]; + struct tle_dest dst6[LCORE_MAX_DST]; struct rte_ip_frag_death_row death_row; + struct { + uint64_t flags[UINT8_MAX + 1]; + } tcp_stat; }; struct netbe_cfg { uint32_t promisc; + uint32_t proto; + uint32_t server; + uint32_t arp; uint32_t prt_num; uint32_t cpu_num; struct netbe_port *prt; @@ -145,13 +166,14 @@ enum { struct netfe_sprm { uint32_t bidx; /* BE index to use. */ - struct tle_udp_stream_param prm; + struct sockaddr_storage local_addr; /**< stream local address. */ + struct sockaddr_storage remote_addr; /**< stream remote address. */ }; struct netfe_stream_prm { uint32_t lcore; - uint32_t be_lcore; - uint32_t line; + uint32_t belcore; + uint16_t line; uint16_t op; uint16_t txlen; /* valid/used only for TXONLY op. */ struct netfe_sprm sprm; @@ -165,33 +187,53 @@ struct netfe_lcore_prm { }; struct netfe_stream { - struct tle_udp_stream *s; + struct tle_stream *s; + struct tle_event *erev; struct tle_event *rxev; struct tle_event *txev; uint16_t op; + uint16_t proto; uint16_t family; uint16_t txlen; struct { uint64_t rxp; + uint64_t rxb; uint64_t txp; + uint64_t txb; uint64_t fwp; uint64_t drops; uint64_t rxev[TLE_SEV_NUM]; uint64_t txev[TLE_SEV_NUM]; + uint64_t erev[TLE_SEV_NUM]; } stat; struct pkt_buf pbuf; + struct sockaddr_storage laddr; struct sockaddr_storage raddr; struct netfe_sprm fwdprm; + struct netfe_stream *fwds; + LIST_ENTRY(netfe_stream) link; +}; + +struct netfe_stream_list { + uint32_t num; + LIST_HEAD(, netfe_stream) head; }; struct netfe_lcore { uint32_t snum; /* max number of streams */ - uint32_t sidx; /* last open stream index */ + struct tle_evq *syneq; + struct tle_evq *ereq; struct tle_evq *rxeq; struct tle_evq *txeq; struct rte_hash *fw4h; struct rte_hash *fw6h; - struct netfe_stream *fs; + struct { + uint64_t acc; + uint64_t rej; + uint64_t ter; + } tcp_stat; + struct netfe_stream_list free; + struct netfe_stream_list use; }; struct lcore_prm { @@ -261,6 +303,28 @@ struct lcore_prm { } while (0) int setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc, - uint16_t qid); + uint16_t qid, uint32_t arp); + +/* + * application function pointers + */ + +typedef int (*LCORE_MAIN_FUNCTYPE)(void *arg); + +/* + * tle_l4p lib function pointers + */ + +typedef uint16_t (*TLE_RX_BULK_FUNCTYPE) + (struct tle_dev *dev, struct rte_mbuf *pkt[], + struct rte_mbuf *rp[], int32_t rc[], uint16_t num); + +typedef uint16_t (*TLE_TX_BULK_FUNCTYPE) + (struct tle_dev *dev, struct rte_mbuf *pkt[], uint16_t num); + +typedef uint16_t (*TLE_STREAM_RECV_FUNCTYPE) + (struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num); + +typedef int (*TLE_STREAM_CLOSE_FUNCTYPE)(struct tle_stream *s); #endif /* __NETBE_H__ */ diff --git a/examples/udpfwd/parse.c b/examples/l4fwd/parse.c index f46c7df..6593221 100644 --- a/examples/udpfwd/parse.c +++ b/examples/l4fwd/parse.c @@ -28,6 +28,75 @@ static const struct { { .name = "fwd", .op = FWD,}, }; +#define OPT_SHORT_ARP 'a' +#define OPT_LONG_ARP "enable-arp" + +#define OPT_SHORT_SBULK 'B' +#define OPT_LONG_SBULK "sburst" + +#define OPT_SHORT_PROMISC 'P' +#define OPT_LONG_PROMISC "promisc" + +#define OPT_SHORT_RBUFS 'R' +#define OPT_LONG_RBUFS "rbufs" + +#define OPT_SHORT_SBUFS 'S' +#define OPT_LONG_SBUFS "sbufs" + +#define OPT_SHORT_BECFG 'b' +#define OPT_LONG_BECFG "becfg" + +#define OPT_SHORT_FECFG 'f' +#define OPT_LONG_FECFG "fecfg" + +#define OPT_SHORT_STREAMS 's' +#define OPT_LONG_STREAMS "streams" + +#define OPT_SHORT_UDP 'U' +#define OPT_LONG_UDP "udp" + +#define OPT_SHORT_TCP 'T' +#define OPT_LONG_TCP "tcp" + +#define OPT_SHORT_LISTEN 'L' +#define OPT_LONG_LISTEN "listen" + +#define OPT_SHORT_VERBOSE 'v' +#define OPT_LONG_VERBOSE "verbose" + +static const struct option long_opt[] = { + {OPT_LONG_ARP, 1, 0, OPT_SHORT_ARP}, + {OPT_LONG_SBULK, 1, 0, OPT_SHORT_SBULK}, + {OPT_LONG_PROMISC, 0, 0, OPT_SHORT_PROMISC}, + {OPT_LONG_RBUFS, 1, 0, OPT_SHORT_RBUFS}, + {OPT_LONG_SBUFS, 1, 0, OPT_SHORT_SBUFS}, + {OPT_LONG_BECFG, 1, 0, OPT_SHORT_BECFG}, + {OPT_LONG_FECFG, 1, 0, OPT_SHORT_FECFG}, + {OPT_LONG_STREAMS, 1, 0, OPT_SHORT_STREAMS}, + {OPT_LONG_UDP, 0, 0, OPT_SHORT_UDP}, + {OPT_LONG_TCP, 0, 0, OPT_SHORT_TCP}, + {OPT_LONG_LISTEN, 0, 0, OPT_SHORT_LISTEN}, + {OPT_LONG_VERBOSE, 1, 0, OPT_SHORT_VERBOSE}, + {NULL, 0, 0, 0} +}; + +static int +parse_uint_val(__rte_unused const char *key, const char *val, void *prm) +{ + union parse_val *rv; + unsigned long v; + char *end; + + rv = prm; + errno = 0; + v = strtoul(val, &end, 0); + if (errno != 0 || end[0] != 0 || v > UINT32_MAX) + return -EINVAL; + + rv->u64 = v; + return 0; +} + static int parse_ipv4_val(__rte_unused const char *key, const char *val, void *prm) { @@ -117,6 +186,7 @@ parse_lcore_list_val(__rte_unused const char *key, const char *val, void *prm) char *end; rv = prm; + errno = 0; a = strtoul(val, &end, 0); if (errno != 0 || (end[0] != 0 && end[0] != '-') || a > UINT32_MAX) @@ -197,7 +267,7 @@ parse_kvargs(const char *arg, const char *keys_man[], uint32_t nb_man, } int -parse_netbe_arg(struct netbe_port *prt, const char *arg, rte_cpuset_t *cpuset) +parse_netbe_arg(struct netbe_port *prt, const char *arg, rte_cpuset_t *pcpu) { int32_t rc; uint32_t i, j, nc; @@ -239,14 +309,14 @@ parse_netbe_arg(struct netbe_port *prt, const char *arg, rte_cpuset_t *cpuset) for (i = 0, nc = 0; i < RTE_MAX_LCORE; i++) nc += CPU_ISSET(i, &val[1].cpuset); - prt->lcore = rte_zmalloc(NULL, nc * sizeof(prt->lcore[0]), + prt->lcore_id = rte_zmalloc(NULL, nc * sizeof(prt->lcore_id[0]), RTE_CACHE_LINE_SIZE); prt->nb_lcore = nc; for (i = 0, j = 0; i < RTE_MAX_LCORE; i++) if (CPU_ISSET(i, &val[1].cpuset)) - prt->lcore[j++] = i; - CPU_OR(cpuset, cpuset, &val[1].cpuset); + prt->lcore_id[j++] = i; + CPU_OR(pcpu, pcpu, &val[1].cpuset); prt->mtu = val[2].u64; prt->rx_offload = val[3].u64; @@ -348,7 +418,7 @@ netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm) n = 0; num = 0; dp = NULL; - + rc = 0; for (ln = 0; fgets(line, sizeof(line), f) != NULL; ln++) { /* skip spaces at the start. */ @@ -379,8 +449,9 @@ netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm) } dp[n].line = ln + 1; - if ((rc = parse_netbe_dest(dp + n, s)) != 0 || - (rc = check_netbe_dest(dp + n)) != 0) { + rc = parse_netbe_dest(dp + n, s); + rc = (rc != 0) ? rc : check_netbe_dest(dp + n); + if (rc != 0) { RTE_LOG(ERR, USER1, "%s(%s) failed to parse line %u\n", __func__, fname, dp[n].line); break; @@ -465,12 +536,12 @@ parse_netfe_arg(struct netfe_stream_prm *sp, const char *arg) return rc; sp->lcore = val[0].u64; sp->op = val[1].u64; - pv2saddr(&sp->sprm.prm.local_addr, val + 2, val + 3); - pv2saddr(&sp->sprm.prm.remote_addr, val + 4, val + 5); + pv2saddr(&sp->sprm.local_addr, val + 2, val + 3); + pv2saddr(&sp->sprm.remote_addr, val + 4, val + 5); sp->txlen = val[6].u64; - pv2saddr(&sp->fprm.prm.local_addr, val + 7, val + 8); - pv2saddr(&sp->fprm.prm.remote_addr, val + 9, val + 10); - sp->be_lcore = val[11].u64; + pv2saddr(&sp->fprm.local_addr, val + 7, val + 8); + pv2saddr(&sp->fprm.remote_addr, val + 9, val + 10); + sp->belcore = val[11].u64; return 0; } @@ -510,8 +581,8 @@ check_netfe_arg(const struct netfe_stream_prm *sp) { char buf[INET6_ADDRSTRLEN]; - if (sp->sprm.prm.local_addr.ss_family != - sp->sprm.prm.remote_addr.ss_family) { + if (sp->sprm.local_addr.ss_family != + sp->sprm.remote_addr.ss_family) { RTE_LOG(ERR, USER1, "invalid arg at line %u: " "laddr and raddr for different protocols\n", sp->line); @@ -524,27 +595,27 @@ check_netfe_arg(const struct netfe_stream_prm *sp) "exceeds allowed values: (0, %u]\n", sp->line, sp->txlen, RTE_MBUF_DEFAULT_DATAROOM); return -EINVAL; - } else if (is_addr_wc(&sp->sprm.prm.remote_addr)) { + } else if (is_addr_wc(&sp->sprm.remote_addr)) { RTE_LOG(ERR, USER1, "invalid arg at line %u: " "raddr=%s are not allowed for op=%s;\n", sp->line, - format_addr(&sp->sprm.prm.remote_addr, + format_addr(&sp->sprm.remote_addr, buf, sizeof(buf)), format_feop(sp->op)); return -EINVAL; } } else if (sp->op == FWD) { - if (sp->fprm.prm.local_addr.ss_family != - sp->fprm.prm.remote_addr.ss_family) { + if (sp->fprm.local_addr.ss_family != + sp->fprm.remote_addr.ss_family) { RTE_LOG(ERR, USER1, "invalid arg at line %u: " "fwladdr and fwraddr for different protocols\n", sp->line); return -EINVAL; - } else if (is_addr_wc(&sp->fprm.prm.remote_addr)) { + } else if (is_addr_wc(&sp->fprm.remote_addr)) { RTE_LOG(ERR, USER1, "invalid arg at line %u: " "fwaddr=%s are not allowed for op=%s;\n", sp->line, - format_addr(&sp->fprm.prm.remote_addr, + format_addr(&sp->fprm.remote_addr, buf, sizeof(buf)), format_feop(sp->op)); return -EINVAL; @@ -575,7 +646,7 @@ netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp) n = 0; num = 0; sp = NULL; - + rc = 0; for (ln = 0; fgets(line, sizeof(line), f) != NULL; ln++) { /* skip spaces at the start. */ @@ -615,8 +686,9 @@ netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp) } sp[n].line = ln + 1; - if ((rc = parse_netfe_arg(sp + n, s)) != 0 || - (rc = check_netfe_arg(sp + n)) != 0) { + rc = parse_netfe_arg(sp + n, s); + rc = (rc != 0) ? rc : check_netfe_arg(sp + n); + if (rc != 0) { RTE_LOG(ERR, USER1, "%s(%s) failed to parse line %u\n", __func__, fname, sp[n].line); break; @@ -636,3 +708,132 @@ netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp) lp->nb_streams = n; return rc; } + +int +parse_app_options(int argc, char **argv, struct netbe_cfg *cfg, + struct tle_ctx_param *ctx_prm, + char *fecfg_fname, char *becfg_fname) +{ + int32_t opt, opt_idx, rc; + uint64_t v; + uint32_t i, j, n, nc; + rte_cpuset_t cpuset; + uint32_t udp = 0, tcp = 0, listen = 0; + + optind = 0; + optarg = NULL; + while ((opt = getopt_long(argc, argv, "aB:LPR:S:TUb:f:s:v:", long_opt, + &opt_idx)) != EOF) { + if (opt == OPT_SHORT_ARP) { + cfg->arp = 1; + } else if (opt == OPT_SHORT_SBULK) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + ctx_prm->send_bulk_size = v; + } else if (opt == OPT_SHORT_PROMISC) { + cfg->promisc = 1; + } else if (opt == OPT_SHORT_RBUFS) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + ctx_prm->max_stream_rbufs = v; + } else if (opt == OPT_SHORT_SBUFS) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + ctx_prm->max_stream_sbufs = v; + } else if (opt == OPT_SHORT_STREAMS) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + ctx_prm->max_streams = v; + } else if (opt == OPT_SHORT_VERBOSE) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + verbose = (v > VERBOSE_NUM) ? VERBOSE_NUM : v; + } else if (opt == OPT_SHORT_BECFG) { + snprintf(becfg_fname, PATH_MAX, "%s", + optarg); + } else if (opt == OPT_SHORT_FECFG) { + snprintf(fecfg_fname, PATH_MAX, "%s", + optarg); + } else if (opt == OPT_SHORT_UDP) { + udp = 1; + cfg->proto = TLE_PROTO_UDP; + } else if (opt == OPT_SHORT_TCP) { + tcp = 1; + cfg->proto = TLE_PROTO_TCP; + } else if (opt == OPT_SHORT_LISTEN) { + listen = 1; + cfg->server = 1; + } else { + rte_exit(EXIT_FAILURE, + "%s: unknown option: \'%c\'\n", + __func__, opt); + } + } + + if (!udp && !tcp) + rte_exit(EXIT_FAILURE, "%s: either UDP or TCP option has to be " + "provided\n", __func__); + + if (udp && tcp) + rte_exit(EXIT_FAILURE, "%s: both UDP and TCP options are not " + "allowed\n", __func__); + + if (udp && listen) + rte_exit(EXIT_FAILURE, + "%s: listen mode cannot be opened with UDP\n", + __func__); + + if (udp && cfg->arp) + rte_exit(EXIT_FAILURE, + "%s: arp cannot be enabled with UDP\n", + __func__); + + /* parse port params */ + argc -= optind; + argv += optind; + + /* allocate memory for number of ports defined */ + n = (uint32_t)argc; + cfg->prt = rte_zmalloc(NULL, sizeof(struct netbe_port) * n, + RTE_CACHE_LINE_SIZE); + cfg->prt_num = n; + + rc = 0; + for (i = 0; i != n; i++) { + rc = parse_netbe_arg(cfg->prt + i, argv[i], &cpuset); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: processing of \"%s\" failed with error " + "code: %d\n", __func__, argv[i], rc); + for (j = 0; j != i; j++) + rte_free(cfg->prt[j].lcore_id); + rte_free(cfg->prt); + return rc; + } + } + + /* count the number of CPU defined in ports */ + for (i = 0, nc = 0; i < RTE_MAX_LCORE; i++) + nc += CPU_ISSET(i, &cpuset); + + /* allocate memory for number of CPU defined */ + cfg->cpu = rte_zmalloc(NULL, sizeof(struct netbe_lcore) * nc, + RTE_CACHE_LINE_SIZE); + + return 0; +} diff --git a/examples/udpfwd/parse.h b/examples/l4fwd/parse.h index 7df7671..4303623 100644 --- a/examples/udpfwd/parse.h +++ b/examples/l4fwd/parse.h @@ -33,23 +33,6 @@ union parse_val { rte_cpuset_t cpuset; }; -static int -parse_uint_val(__rte_unused const char *key, const char *val, void *prm) -{ - union parse_val *rv; - unsigned long v; - char *end; - - rv = prm; - errno = 0; - v = strtoul(val, &end, 0); - if (errno != 0 || end[0] != 0 || v > UINT32_MAX) - return -EINVAL; - - rv->u64 = v; - return 0; -} - static const char * format_addr(const struct sockaddr_storage *sp, char buf[], size_t len) { @@ -71,11 +54,16 @@ format_addr(const struct sockaddr_storage *sp, char buf[], size_t len) } int parse_netbe_arg(struct netbe_port *prt, const char *arg, - rte_cpuset_t *cpuset); + rte_cpuset_t *pcpu); int netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm); int netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp); +int +parse_app_options(int argc, char **argv, struct netbe_cfg *cfg, + struct tle_ctx_param *ctx_prm, + char *fecfg_fname, char *becfg_fname); + #endif /* __PARSE_H__ */ diff --git a/examples/l4fwd/pkt.c b/examples/l4fwd/pkt.c new file mode 100644 index 0000000..660e618 --- /dev/null +++ b/examples/l4fwd/pkt.c @@ -0,0 +1,872 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <netinet/ip6.h> +#include <rte_arp.h> + +#include "netbe.h" + +static inline uint64_t +_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso, + uint64_t ol3, uint64_t ol2) +{ + return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49; +} + +static inline void +fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4) +{ + m->tx_offload = _mbuf_tx_offload(l2, l3, l4, 0, 0, 0); +} + +static inline int +is_ipv4_frag(const struct ipv4_hdr *iph) +{ + const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG); + + return ((mask & iph->fragment_offset) != 0); +} + +static inline uint32_t +get_tcp_header_size(struct rte_mbuf *m, uint32_t l2_len, uint32_t l3_len) +{ + const struct tcp_hdr *tcp; + + tcp = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len); + return (tcp->data_off >> 4) * 4; +} + +static inline void +adjust_ipv4_pktlen(struct rte_mbuf *m, uint32_t l2_len) +{ + uint32_t plen, trim; + const struct ipv4_hdr *iph; + + iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2_len); + plen = rte_be_to_cpu_16(iph->total_length) + l2_len; + if (plen < m->pkt_len) { + trim = m->pkt_len - plen; + rte_pktmbuf_trim(m, trim); + } +} + +static inline void +adjust_ipv6_pktlen(struct rte_mbuf *m, uint32_t l2_len) +{ + uint32_t plen, trim; + const struct ipv6_hdr *iph; + + iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, l2_len); + plen = rte_be_to_cpu_16(iph->payload_len) + sizeof(*iph) + l2_len; + if (plen < m->pkt_len) { + trim = m->pkt_len - plen; + rte_pktmbuf_trim(m, trim); + } +} + +static inline void +tcp_stat_update(struct netbe_lcore *lc, const struct rte_mbuf *m, + uint32_t l2_len, uint32_t l3_len) +{ + const struct tcp_hdr *th; + + th = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len); + lc->tcp_stat.flags[th->tcp_flags]++; +} + +static inline uint32_t +get_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, uint32_t frag) +{ + const struct ipv4_hdr *iph; + int32_t dlen, len; + + dlen = rte_pktmbuf_data_len(m); + dlen -= l2; + + iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2); + len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER; + + if (frag != 0 && is_ipv4_frag(iph)) { + m->packet_type &= ~RTE_PTYPE_L4_MASK; + m->packet_type |= RTE_PTYPE_L4_FRAG; + } + + if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto)) + m->packet_type = RTE_PTYPE_UNKNOWN; + + return len; +} + +static inline void +fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, + uint32_t frag, uint32_t l4_len) +{ + uint32_t len; + + len = get_ipv4_hdr_len(m, l2, proto, frag); + fill_pkt_hdr_len(m, l2, len, l4_len); + adjust_ipv4_pktlen(m, l2); +} + +static inline int +ipv6x_hdr(uint32_t proto) +{ + return (proto == IPPROTO_HOPOPTS || + proto == IPPROTO_ROUTING || + proto == IPPROTO_FRAGMENT || + proto == IPPROTO_AH || + proto == IPPROTO_NONE || + proto == IPPROTO_DSTOPTS); +} + +static inline uint32_t +get_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto, + uint32_t fproto) +{ + const struct ip6_ext *ipx; + int32_t dlen, len, ofs; + + len = sizeof(struct ipv6_hdr); + + dlen = rte_pktmbuf_data_len(m); + dlen -= l2; + + ofs = l2 + len; + ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs); + + while (ofs > 0 && len < dlen) { + + switch (nproto) { + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + ofs = (ipx->ip6e_len + 1) << 3; + break; + case IPPROTO_AH: + ofs = (ipx->ip6e_len + 2) << 2; + break; + case IPPROTO_FRAGMENT: + /* + * tso_segsz is not used by RX, so use it as temporary + * buffer to store the fragment offset. + */ + m->tso_segsz = ofs; + ofs = sizeof(struct ip6_frag); + m->packet_type &= ~RTE_PTYPE_L4_MASK; + m->packet_type |= RTE_PTYPE_L4_FRAG; + break; + default: + ofs = 0; + } + + if (ofs > 0) { + nproto = ipx->ip6e_nxt; + len += ofs; + ipx += ofs / sizeof(*ipx); + } + } + + /* unrecognized or invalid packet. */ + if ((ofs == 0 && nproto != fproto) || len > dlen) + m->packet_type = RTE_PTYPE_UNKNOWN; + + return len; +} + +static inline uint32_t +get_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto) +{ + const struct ipv6_hdr *iph; + + iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + if (iph->proto == fproto) + return sizeof(struct ipv6_hdr); + else if (ipv6x_hdr(iph->proto) != 0) + return get_ipv6x_hdr_len(m, l2, iph->proto, fproto); + + m->packet_type = RTE_PTYPE_UNKNOWN; + return 0; +} + +static inline void +fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto, + uint32_t l4_len) +{ + uint32_t len; + + len = get_ipv6_hdr_len(m, l2, fproto); + fill_pkt_hdr_len(m, l2, len, l4_len); + adjust_ipv6_pktlen(m, l2); +} + +static inline struct rte_mbuf * +handle_arp(struct rte_mbuf *m, struct netbe_lcore *lc, uint8_t port, + uint32_t l2len) +{ + const struct arp_hdr *ahdr; + struct pkt_buf *abuf; + + ahdr = rte_pktmbuf_mtod_offset(m, const struct arp_hdr *, l2len); + + if (ahdr->arp_hrd != rte_be_to_cpu_16(ARP_HRD_ETHER) || + ahdr->arp_pro != rte_be_to_cpu_16(ETHER_TYPE_IPv4) || + ahdr->arp_op != rte_be_to_cpu_16(ARP_OP_REQUEST)) { + + m->packet_type = RTE_PTYPE_UNKNOWN; + return m; + } + + m->l2_len = l2len; + abuf = &lc->prtq[port].arp_buf; + if (abuf->num >= RTE_DIM(abuf->pkt)) + return m; + + abuf->pkt[abuf->num++] = m; + + return NULL; +} + +static inline struct rte_mbuf * +fill_eth_tcp_arp_hdr_len(struct rte_mbuf *m, struct netbe_lcore *lc, + uint8_t port) +{ + uint32_t dlen, l2_len, l3_len, l4_len; + uint16_t etp; + const struct ether_hdr *eth; + + dlen = rte_pktmbuf_data_len(m); + + /* check that first segment is at least 54B long. */ + if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + + sizeof(struct tcp_hdr)) { + m->packet_type = RTE_PTYPE_UNKNOWN; + return m; + } + + l2_len = sizeof(*eth); + + eth = rte_pktmbuf_mtod(m, const struct ether_hdr *); + etp = eth->ether_type; + if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN)) + l2_len += sizeof(struct vlan_hdr); + + if (etp == rte_be_to_cpu_16(ETHER_TYPE_ARP)) + return handle_arp(m, lc, port, l2_len); + + if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) { + m->packet_type = RTE_PTYPE_L4_TCP | + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1); + l4_len = get_tcp_header_size(m, l2_len, l3_len); + fill_pkt_hdr_len(m, l2_len, l3_len, l4_len); + adjust_ipv4_pktlen(m, l2_len); + } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) && + dlen >= l2_len + sizeof(struct ipv6_hdr) + + sizeof(struct tcp_hdr)) { + m->packet_type = RTE_PTYPE_L4_TCP | + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP); + l4_len = get_tcp_header_size(m, l2_len, l3_len); + fill_pkt_hdr_len(m, l2_len, l3_len, l4_len); + adjust_ipv6_pktlen(m, l2_len); + } else + m->packet_type = RTE_PTYPE_UNKNOWN; + + return m; +} + +static inline void +fill_eth_tcp_hdr_len(struct rte_mbuf *m) +{ + uint32_t dlen, l2_len, l3_len, l4_len; + uint16_t etp; + const struct ether_hdr *eth; + + dlen = rte_pktmbuf_data_len(m); + + /* check that first segment is at least 54B long. */ + if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + + sizeof(struct tcp_hdr)) { + m->packet_type = RTE_PTYPE_UNKNOWN; + return; + } + + l2_len = sizeof(*eth); + + eth = rte_pktmbuf_mtod(m, const struct ether_hdr *); + etp = eth->ether_type; + if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN)) + l2_len += sizeof(struct vlan_hdr); + + if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) { + m->packet_type = RTE_PTYPE_L4_TCP | + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1); + l4_len = get_tcp_header_size(m, l2_len, l3_len); + fill_pkt_hdr_len(m, l2_len, l3_len, l4_len); + adjust_ipv4_pktlen(m, l2_len); + } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) && + dlen >= l2_len + sizeof(struct ipv6_hdr) + + sizeof(struct tcp_hdr)) { + m->packet_type = RTE_PTYPE_L4_TCP | + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP); + l4_len = get_tcp_header_size(m, l2_len, l3_len); + fill_pkt_hdr_len(m, l2_len, l3_len, l4_len); + adjust_ipv6_pktlen(m, l2_len); + } else + m->packet_type = RTE_PTYPE_UNKNOWN; +} + +static inline void +fill_eth_udp_hdr_len(struct rte_mbuf *m) +{ + uint32_t dlen, l2_len; + uint16_t etp; + const struct ether_hdr *eth; + + dlen = rte_pktmbuf_data_len(m); + + /* check that first segment is at least 42B long. */ + if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + + sizeof(struct udp_hdr)) { + m->packet_type = RTE_PTYPE_UNKNOWN; + return; + } + + l2_len = sizeof(*eth); + + eth = rte_pktmbuf_mtod(m, const struct ether_hdr *); + etp = eth->ether_type; + if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN)) + l2_len += sizeof(struct vlan_hdr); + + if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) { + m->packet_type = RTE_PTYPE_L4_UDP | + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + fill_ipv4_hdr_len(m, l2_len, IPPROTO_UDP, 1, + sizeof(struct udp_hdr)); + } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) && + dlen >= l2_len + sizeof(struct ipv6_hdr) + + sizeof(struct udp_hdr)) { + m->packet_type = RTE_PTYPE_L4_UDP | + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + fill_ipv6_hdr_len(m, l2_len, IPPROTO_UDP, + sizeof(struct udp_hdr)); + } else + m->packet_type = RTE_PTYPE_UNKNOWN; +} + +static inline uint16_t +ipv4x_cksum(const void *iph, size_t len) +{ + uint16_t cksum; + + cksum = rte_raw_cksum(iph, len); + return (cksum == 0xffff) ? cksum : ~cksum; +} + +static inline void +fix_reassembled(struct rte_mbuf *m, int32_t hwcsum, uint32_t proto) +{ + struct ipv4_hdr *iph; + + /* update packet type. */ + m->packet_type &= ~RTE_PTYPE_L4_MASK; + + if (proto == IPPROTO_TCP) + m->packet_type |= RTE_PTYPE_L4_TCP; + else + m->packet_type |= RTE_PTYPE_L4_UDP; + + /* fix reassemble setting TX flags. */ + m->ol_flags &= ~PKT_TX_IP_CKSUM; + + /* fix l3_len after reassemble. */ + if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) + m->l3_len = m->l3_len - sizeof(struct ipv6_extension_fragment); + + /* recalculate ipv4 cksum after reassemble. */ + else if (hwcsum == 0 && RTE_ETH_IS_IPV4_HDR(m->packet_type)) { + iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); + iph->hdr_checksum = ipv4x_cksum(iph, m->l3_len); + } +} + +static struct rte_mbuf * +reassemble(struct rte_mbuf *m, struct netbe_lcore *lc, uint64_t tms, + uint8_t port, uint32_t proto) +{ + uint32_t l3cs; + struct rte_ip_frag_tbl *tbl; + struct rte_ip_frag_death_row *dr; + + tbl = lc->ftbl; + dr = &lc->death_row; + l3cs = lc->prtq[port].port.rx_offload & DEV_RX_OFFLOAD_IPV4_CKSUM; + + if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { + + struct ipv4_hdr *iph; + + iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); + + /* process this fragment. */ + m = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, iph); + + } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { + + struct ipv6_hdr *iph; + struct ipv6_extension_fragment *fhdr; + + iph = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len); + + /* + * we store fragment header offset in tso_segsz before + * temporary, just to avoid another scan of ipv6 header. + */ + fhdr = rte_pktmbuf_mtod_offset(m, + struct ipv6_extension_fragment *, m->tso_segsz); + m->tso_segsz = 0; + + /* process this fragment. */ + m = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, iph, fhdr); + + } else { + rte_pktmbuf_free(m); + m = NULL; + } + + /* got reassembled packet. */ + if (m != NULL) + fix_reassembled(m, l3cs, proto); + + return m; +} + +/* exclude NULLs from the final list of packets. */ +static inline uint32_t +compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero) +{ + uint32_t i, j, k, l; + + for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) { + + /* found a hole. */ + if (pkt[j] == NULL) { + + /* find how big is it. */ + for (i = j; i-- != 0 && pkt[i] == NULL; ) + ; + /* fill the hole. */ + for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++) + pkt[l] = pkt[k]; + + nb_pkt -= j - i; + nb_zero -= j - i; + j = i + 1; + } + } + + return nb_pkt; +} + +/* + * if it is a fragment, try to reassemble it, + * if by some reason it can't be done, then + * set pkt[] entry to NULL. + */ +#define DO_REASSEMBLE(proto) \ +do { \ + if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == \ + RTE_PTYPE_L4_FRAG) { \ + cts = (cts == 0) ? rte_rdtsc() : cts; \ + pkt[j] = reassemble(pkt[j], lc, cts, port, (proto)); \ + x += (pkt[j] == NULL); \ + } \ +} while (0) + +/* + * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k) + */ +static uint16_t +type0_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, tp; + struct netbe_lcore *lc; + uint32_t l4_len, l3_len, l2_len; + const struct ether_hdr *eth; + + lc = user_param; + l2_len = sizeof(*eth); + + RTE_SET_USED(lc); + + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + + tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); + + switch (tp) { + /* non fragmented tcp packets. */ + case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4 | + RTE_PTYPE_L2_ETHER): + l4_len = get_tcp_header_size(pkt[j], l2_len, + sizeof(struct ipv4_hdr)); + fill_pkt_hdr_len(pkt[j], l2_len, + sizeof(struct ipv4_hdr), l4_len); + adjust_ipv4_pktlen(pkt[j], l2_len); + break; + case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6 | + RTE_PTYPE_L2_ETHER): + l4_len = get_tcp_header_size(pkt[j], l2_len, + sizeof(struct ipv6_hdr)); + fill_pkt_hdr_len(pkt[j], l2_len, + sizeof(struct ipv6_hdr), l4_len); + adjust_ipv6_pktlen(pkt[j], l2_len); + break; + case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT | + RTE_PTYPE_L2_ETHER): + l3_len = get_ipv4_hdr_len(pkt[j], l2_len, + IPPROTO_TCP, 0); + l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len); + fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len); + adjust_ipv4_pktlen(pkt[j], l2_len); + break; + case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT | + RTE_PTYPE_L2_ETHER): + l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP); + l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len); + fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len); + adjust_ipv6_pktlen(pkt[j], l2_len); + break; + default: + /* treat packet types as invalid. */ + pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; + break; + } + } + + return nb_pkts; +} + +/* + * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k) + */ +static uint16_t +type0_udp_rx_callback(uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, tp, x; + uint64_t cts; + struct netbe_lcore *lc; + uint32_t l2_len; + const struct ether_hdr *eth; + + lc = user_param; + cts = 0; + l2_len = sizeof(*eth); + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + + tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); + + switch (tp) { + /* non fragmented udp packets. */ + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4 | + RTE_PTYPE_L2_ETHER): + fill_pkt_hdr_len(pkt[j], l2_len, + sizeof(struct ipv4_hdr), + sizeof(struct udp_hdr)); + adjust_ipv4_pktlen(pkt[j], l2_len); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6 | + RTE_PTYPE_L2_ETHER): + fill_pkt_hdr_len(pkt[j], l2_len, + sizeof(struct ipv6_hdr), + sizeof(struct udp_hdr)); + adjust_ipv6_pktlen(pkt[j], l2_len); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT | + RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], l2_len, + UINT32_MAX, 0, sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT | + RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], l2_len, + IPPROTO_UDP, sizeof(struct udp_hdr)); + break; + /* possibly fragmented udp packets. */ + case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER): + case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], l2_len, + IPPROTO_UDP, 1, sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER): + case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], l2_len, + IPPROTO_UDP, sizeof(struct udp_hdr)); + break; + default: + /* treat packet types as invalid. */ + pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; + break; + } + + DO_REASSEMBLE(IPPROTO_UDP); + } + + /* reassemble was invoked, cleanup its death-row. */ + if (cts != 0) + rte_ip_frag_free_death_row(&lc->death_row, 0); + + if (x == 0) + return nb_pkts; + + NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " + "%u non-reassembled fragments;\n", + __func__, port, queue, nb_pkts, x); + + return compress_pkt_list(pkt, nb_pkts, x); +} + +/* + * HW can recognize L2/L3/L4 and fragments (i40e). + */ +static uint16_t +type1_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, tp; + struct netbe_lcore *lc; + uint32_t l4_len, l3_len, l2_len; + const struct ether_hdr *eth; + + lc = user_param; + l2_len = sizeof(*eth); + + RTE_SET_USED(lc); + + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + + tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); + + switch (tp) { + case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + l3_len = get_ipv4_hdr_len(pkt[j], l2_len, + IPPROTO_TCP, 0); + l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len); + fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len); + adjust_ipv4_pktlen(pkt[j], l2_len); + tcp_stat_update(lc, pkt[j], l2_len, l3_len); + break; + case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP); + l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len); + fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len); + adjust_ipv6_pktlen(pkt[j], l2_len); + tcp_stat_update(lc, pkt[j], l2_len, l3_len); + break; + default: + /* treat packet types as invalid. */ + pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; + break; + } + + } + + return nb_pkts; +} + +/* + * HW can recognize L2/L3/L4 and fragments (i40e). + */ +static uint16_t +type1_udp_rx_callback(uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, tp, x; + uint64_t cts; + struct netbe_lcore *lc; + uint32_t l2_len; + const struct ether_hdr *eth; + + lc = user_param; + cts = 0; + l2_len = sizeof(*eth); + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + + tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); + + switch (tp) { + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], l2_len, + UINT32_MAX, 0, sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], l2_len, + IPPROTO_UDP, sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], l2_len, + IPPROTO_UDP, 0, sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], l2_len, + IPPROTO_UDP, sizeof(struct udp_hdr)); + break; + default: + /* treat packet types as invalid. */ + pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; + break; + } + + DO_REASSEMBLE(IPPROTO_UDP); + } + + /* reassemble was invoked, cleanup its death-row. */ + if (cts != 0) + rte_ip_frag_free_death_row(&lc->death_row, 0); + + if (x == 0) + return nb_pkts; + + NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " + "%u non-reassembled fragments;\n", + __func__, port, queue, nb_pkts, x); + + return compress_pkt_list(pkt, nb_pkts, x); +} + +/* + * generic, assumes HW doesn't recognize any packet type. + */ +static uint16_t +typen_tcp_arp_rx_callback(uint8_t port, uint16_t queue, struct rte_mbuf *pkt[], + uint16_t nb_pkts, uint16_t max_pkts, void *user_param) +{ + uint32_t j, x; + struct netbe_lcore *lc; + + lc = user_param; + + RTE_SET_USED(queue); + RTE_SET_USED(max_pkts); + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + pkt[j] = fill_eth_tcp_arp_hdr_len(pkt[j], lc, port); + x += (pkt[j] == NULL); + } + + if (x == 0) + return nb_pkts; + + return compress_pkt_list(pkt, nb_pkts, x); +} + +static uint16_t +typen_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j; + struct netbe_lcore *lc; + + lc = user_param; + + RTE_SET_USED(lc); + + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + fill_eth_tcp_hdr_len(pkt[j]); + } + + return nb_pkts; +} + +static uint16_t +typen_udp_rx_callback(uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, x; + uint64_t cts; + struct netbe_lcore *lc; + + lc = user_param; + cts = 0; + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + fill_eth_udp_hdr_len(pkt[j]); + + DO_REASSEMBLE(IPPROTO_UDP); + } + + /* reassemble was invoked, cleanup its death-row. */ + if (cts != 0) + rte_ip_frag_free_death_row(&lc->death_row, 0); + + if (x == 0) + return nb_pkts; + + NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " + "%u non-reassembled fragments;\n", + __func__, port, queue, nb_pkts, x); + + return compress_pkt_list(pkt, nb_pkts, x); +} + +#include "pkt_dpdk_legacy.h" diff --git a/examples/udpfwd/pkt_dpdk_legacy.h b/examples/l4fwd/pkt_dpdk_legacy.h index c32f044..d840978 100644 --- a/examples/udpfwd/pkt_dpdk_legacy.h +++ b/examples/l4fwd/pkt_dpdk_legacy.h @@ -18,59 +18,39 @@ #include "dpdk_version.h" +struct ptype2cb { + uint32_t mask; + const char *name; + rte_rx_callback_fn fn; +}; + +enum { + ETHER_PTYPE = 0x1, + IPV4_PTYPE = 0x2, + IPV4_EXT_PTYPE = 0x4, + IPV6_PTYPE = 0x8, + IPV6_EXT_PTYPE = 0x10, + TCP_PTYPE = 0x20, + UDP_PTYPE = 0x40, +}; + #ifdef DPDK_VERSION_GE_1604 -int -setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc, - uint16_t qid) +static uint32_t +get_ptypes(const struct netbe_port *uprt) { - int32_t i, rc; uint32_t smask; - void *cb; - + int32_t i, rc; const uint32_t pmask = RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK; - enum { - ETHER_PTYPE = 0x1, - IPV4_PTYPE = 0x2, - IPV4_EXT_PTYPE = 0x4, - IPV6_PTYPE = 0x8, - IPV6_EXT_PTYPE = 0x10, - UDP_PTYPE = 0x20, - }; - - static const struct { - uint32_t mask; - const char *name; - rte_rx_callback_fn fn; - } ptype2cb[] = { - { - .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE | - IPV6_PTYPE | IPV6_EXT_PTYPE | UDP_PTYPE, - .name = "HW l2/l3x/l4 ptype", - .fn = type0_rx_callback, - }, - { - .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE | - UDP_PTYPE, - .name = "HW l2/l3/l4 ptype", - .fn = type1_rx_callback, - }, - { - .mask = 0, - .name = "no HW ptype", - .fn = typen_rx_callback, - }, - }; - smask = 0; rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, NULL, 0); if (rc < 0) { RTE_LOG(ERR, USER1, "%s(port=%u) failed to get supported ptypes;\n", __func__, uprt->id); - return rc; + return smask; } uint32_t ptype[rc]; @@ -95,13 +75,106 @@ setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc, case RTE_PTYPE_L3_IPV6_EXT: smask |= IPV6_EXT_PTYPE; break; + case RTE_PTYPE_L4_TCP: + smask |= TCP_PTYPE; + break; case RTE_PTYPE_L4_UDP: smask |= UDP_PTYPE; break; } } - for (i = 0; i != RTE_DIM(ptype2cb); i++) { + return smask; +} + +#else + +static uint32_t +get_ptypes(__rte_unused const struct netbe_port *uprt) +{ + return 0; +} + +#endif /* DPDK_VERSION_GE_1604 */ + +int +setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc, + uint16_t qid, uint32_t arp) +{ + int32_t rc; + uint32_t i, n, smask; + void *cb; + const struct ptype2cb *ptype2cb; + + static const struct ptype2cb tcp_ptype2cb[] = { + { + .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE | + IPV6_PTYPE | IPV6_EXT_PTYPE | TCP_PTYPE, + .name = "HW l2/l3x/l4-tcp ptype", + .fn = type0_tcp_rx_callback, + }, + { + .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE | + TCP_PTYPE, + .name = "HW l2/l3/l4-tcp ptype", + .fn = type1_tcp_rx_callback, + }, + { + .mask = 0, + .name = "tcp no HW ptype", + .fn = typen_tcp_rx_callback, + }, + }; + + static const struct ptype2cb tcp_arp_ptype2cb[] = { + { + .mask = 0, + .name = "tcp with arp no HW ptype", + .fn = typen_tcp_arp_rx_callback, + }, + }; + + static const struct ptype2cb udp_ptype2cb[] = { + { + .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE | + IPV6_PTYPE | IPV6_EXT_PTYPE | UDP_PTYPE, + .name = "HW l2/l3x/l4-udp ptype", + .fn = type0_udp_rx_callback, + }, + { + .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE | + UDP_PTYPE, + .name = "HW l2/l3/l4-udp ptype", + .fn = type1_udp_rx_callback, + }, + { + .mask = 0, + .name = "udp no HW ptype", + .fn = typen_udp_rx_callback, + }, + }; + + smask = get_ptypes(uprt); + + if (lc->proto == TLE_PROTO_TCP) { + if (arp != 0) { + ptype2cb = tcp_arp_ptype2cb; + n = RTE_DIM(tcp_arp_ptype2cb); + } else { + ptype2cb = tcp_ptype2cb; + n = RTE_DIM(tcp_ptype2cb); + } + } else if (lc->proto == TLE_PROTO_UDP) { + ptype2cb = udp_ptype2cb; + n = RTE_DIM(udp_ptype2cb); + } else { + RTE_LOG(ERR, USER1, + "%s(lc=%u) unsupported proto: %u\n", + __func__, lc->id, lc->proto); + return -EINVAL; + } + + for (i = 0; i != n; i++) { if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) { cb = rte_eth_add_rx_callback(uprt->id, qid, ptype2cb[i].fn, lc); @@ -121,25 +194,4 @@ setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc, return -ENOENT; } -#else - -int -setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc, - uint16_t qid) -{ - void *cb; - int32_t rc; - - cb = rte_eth_add_rx_callback(uprt->id, qid, typen_rx_callback, lc); - rc = -rte_errno; - RTE_LOG(ERR, USER1, - "%s(port=%u), setup RX callback \"%s\" " - "returns %p;\n", - __func__, uprt->id, "no HW ptype", cb); - - return ((cb == NULL) ? rc : 0); -} - -#endif /* DPDK_VERSION_GE_1604 */ - #endif /* PKT_DPDK_LEGACY_H_ */ diff --git a/examples/l4fwd/port.h b/examples/l4fwd/port.h new file mode 100644 index 0000000..bc13dca --- /dev/null +++ b/examples/l4fwd/port.h @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PORT_H_ +#define PORT_H_ + +static void +prepare_hash_key(struct netbe_port *uprt, uint8_t key_size, uint16_t family) +{ + uint32_t align_nb_q; + + align_nb_q = rte_align32pow2(uprt->nb_lcore); + memset(uprt->hash_key, 0, RSS_HASH_KEY_LENGTH); + uprt->hash_key_size = key_size; + if (family == AF_INET) + uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV4] = align_nb_q; + else + uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV6] = align_nb_q; +} + +static int +update_rss_conf(struct netbe_port *uprt, + const struct rte_eth_dev_info *dev_info, + struct rte_eth_conf *port_conf, uint32_t proto) +{ + uint8_t hash_key_size; + + if (uprt->nb_lcore > 1) { + if (dev_info->hash_key_size > 0) + hash_key_size = dev_info->hash_key_size; + else { + RTE_LOG(ERR, USER1, + "%s: dev_info did not provide a valid hash " + "key size\n", __func__); + return -EINVAL; + } + + if (uprt->ipv4 != INADDR_ANY && + memcmp(&uprt->ipv6, &in6addr_any, + sizeof(uprt->ipv6)) != 0) { + RTE_LOG(ERR, USER1, + "%s: RSS for both IPv4 and IPv6 not " + "supported!\n", __func__); + return -EINVAL; + } else if (uprt->ipv4 != INADDR_ANY) { + prepare_hash_key(uprt, hash_key_size, AF_INET); + } else if (memcmp(&uprt->ipv6, &in6addr_any, sizeof(uprt->ipv6)) + != 0) { + prepare_hash_key(uprt, hash_key_size, AF_INET6); + } else { + RTE_LOG(ERR, USER1, + "%s: No IPv4 or IPv6 address is found!\n", + __func__); + return -EINVAL; + } + port_conf->rxmode.mq_mode = ETH_MQ_RX_RSS; + if (proto == TLE_PROTO_TCP) + port_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_TCP; + else + port_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_UDP; + port_conf->rx_adv_conf.rss_conf.rss_key_len = hash_key_size; + port_conf->rx_adv_conf.rss_conf.rss_key = uprt->hash_key; + } + + return 0; +} + +static uint32_t +qidx_from_hash_index(uint32_t hash, uint32_t align_nb_q) +{ + uint32_t i, nb_bit, q; + + nb_bit = (sizeof(uint32_t) * CHAR_BIT) - __builtin_clz(align_nb_q - 1); + q = (hash & 1); + for (i = 1; i < nb_bit; i++) { + hash >>= 1; + q <<= 1; + q |= (hash & 1); + } + + return q; +} + +static int +update_rss_reta(struct netbe_port *uprt, + const struct rte_eth_dev_info *dev_info) +{ + struct rte_eth_rss_reta_entry64 reta_conf[RSS_RETA_CONF_ARRAY_SIZE]; + int32_t i, rc, align_nb_q; + int32_t q_index, idx, shift; + + if (uprt->nb_lcore > 1) { + if (dev_info->reta_size == 0) { + RTE_LOG(ERR, USER1, + "%s: Redirection table size 0 is invalid for " + "RSS\n", __func__); + return -EINVAL; + } + RTE_LOG(NOTICE, USER1, + "%s: The reta size of port %d is %u\n", + __func__, uprt->id, dev_info->reta_size); + + if (dev_info->reta_size > ETH_RSS_RETA_SIZE_512) { + RTE_LOG(ERR, USER1, + "%s: More than %u entries of Reta not supported\n", + __func__, ETH_RSS_RETA_SIZE_512); + return -EINVAL; + } + + memset(reta_conf, 0, sizeof(reta_conf)); + align_nb_q = rte_align32pow2(uprt->nb_lcore); + for (i = 0; i < align_nb_q; i++) { + q_index = qidx_from_hash_index(i, align_nb_q) % + uprt->nb_lcore; + + idx = i / RTE_RETA_GROUP_SIZE; + shift = i % RTE_RETA_GROUP_SIZE; + reta_conf[idx].mask |= (1ULL << shift); + reta_conf[idx].reta[shift] = q_index; + RTE_LOG(NOTICE, USER1, + "%s: port=%u RSS reta conf: hash=%u, q=%u\n", + __func__, uprt->id, i, q_index); + } + + rc = rte_eth_dev_rss_reta_update(uprt->id, + reta_conf, dev_info->reta_size); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: Bad redirection table parameter, " + "rc = %d\n", __func__, rc); + return rc; + } + } + + return 0; +} + +/* + * Initilise DPDK port. + * In current version, multi-queue per port is used. + */ +static int +port_init(struct netbe_port *uprt, uint32_t proto) +{ + int32_t rc; + struct rte_eth_conf port_conf; + struct rte_eth_dev_info dev_info; + + rte_eth_dev_info_get(uprt->id, &dev_info); + if ((dev_info.rx_offload_capa & uprt->rx_offload) != uprt->rx_offload) { + RTE_LOG(ERR, USER1, + "port#%u supported/requested RX offloads don't match, " + "supported: %#x, requested: %#x;\n", + uprt->id, dev_info.rx_offload_capa, uprt->rx_offload); + return -EINVAL; + } + if ((dev_info.tx_offload_capa & uprt->tx_offload) != uprt->tx_offload) { + RTE_LOG(ERR, USER1, + "port#%u supported/requested TX offloads don't match, " + "supported: %#x, requested: %#x;\n", + uprt->id, dev_info.tx_offload_capa, uprt->tx_offload); + return -EINVAL; + } + + port_conf = port_conf_default; + if ((uprt->rx_offload & RX_CSUM_OFFLOAD) != 0) { + RTE_LOG(ERR, USER1, "%s(%u): enabling RX csum offload;\n", + __func__, uprt->id); + port_conf.rxmode.hw_ip_checksum = 1; + } + port_conf.rxmode.max_rx_pkt_len = uprt->mtu + ETHER_CRC_LEN; + + rc = update_rss_conf(uprt, &dev_info, &port_conf, proto); + if (rc != 0) + return rc; + + rc = rte_eth_dev_configure(uprt->id, uprt->nb_lcore, uprt->nb_lcore, + &port_conf); + RTE_LOG(NOTICE, USER1, + "%s: rte_eth_dev_configure(prt_id=%u, nb_rxq=%u, nb_txq=%u) " + "returns %d;\n", __func__, uprt->id, uprt->nb_lcore, + uprt->nb_lcore, rc); + if (rc != 0) + return rc; + + return 0; +} + +static int +queue_init(struct netbe_port *uprt, struct rte_mempool *mp) +{ + int32_t socket, rc; + uint16_t q; + struct rte_eth_dev_info dev_info; + + rte_eth_dev_info_get(uprt->id, &dev_info); + + socket = rte_eth_dev_socket_id(uprt->id); + + dev_info.default_rxconf.rx_drop_en = 1; + + dev_info.default_txconf.tx_free_thresh = TX_RING_SIZE / 2; + if (uprt->tx_offload != 0) { + RTE_LOG(ERR, USER1, "%s(%u): enabling full featured TX;\n", + __func__, uprt->id); + dev_info.default_txconf.txq_flags = 0; + } + + for (q = 0; q < uprt->nb_lcore; q++) { + rc = rte_eth_rx_queue_setup(uprt->id, q, RX_RING_SIZE, + socket, &dev_info.default_rxconf, mp); + if (rc < 0) { + RTE_LOG(ERR, USER1, + "%s: rx queue=%u setup failed with error " + "code: %d\n", __func__, q, rc); + return rc; + } + } + + for (q = 0; q < uprt->nb_lcore; q++) { + rc = rte_eth_tx_queue_setup(uprt->id, q, TX_RING_SIZE, + socket, &dev_info.default_txconf); + if (rc < 0) { + RTE_LOG(ERR, USER1, + "%s: tx queue=%u setup failed with error " + "code: %d\n", __func__, q, rc); + return rc; + } + } + return 0; +} + +/* + * Check that lcore is enabled, not master, and not in use already. + */ +static int +check_lcore(uint32_t lc) +{ + if (rte_lcore_is_enabled(lc) == 0) { + RTE_LOG(ERR, USER1, "lcore %u is not enabled\n", lc); + return -EINVAL; + } + if (rte_eal_get_lcore_state(lc) == RUNNING) { + RTE_LOG(ERR, USER1, "lcore %u already running %p\n", + lc, lcore_config[lc].f); + return -EINVAL; + } + return 0; +} + +static void +log_netbe_prt(const struct netbe_port *uprt) +{ + uint32_t i; + char corelist[2 * RTE_MAX_LCORE + 1]; + char hashkey[2 * RSS_HASH_KEY_LENGTH]; + + memset(corelist, 0, sizeof(corelist)); + memset(hashkey, 0, sizeof(hashkey)); + for (i = 0; i < uprt->nb_lcore; i++) + if (i < uprt->nb_lcore - 1) + sprintf(corelist + (2 * i), "%u,", uprt->lcore_id[i]); + else + sprintf(corelist + (2 * i), "%u", uprt->lcore_id[i]); + + for (i = 0; i < uprt->hash_key_size; i++) + sprintf(hashkey + (2 * i), "%02x", uprt->hash_key[i]); + + RTE_LOG(NOTICE, USER1, + "uprt %p = <id = %u, lcore = <%s>, mtu = %u, " + "rx_offload = %u, tx_offload = %u,\n" + "ipv4 = %#x, " + "ipv6 = %04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx, " + "mac = %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx>;\n" + "hashkey = %s;\n", + uprt, uprt->id, corelist, + uprt->mtu, uprt->rx_offload, uprt->tx_offload, + uprt->ipv4, + uprt->ipv6.s6_addr16[0], uprt->ipv6.s6_addr16[1], + uprt->ipv6.s6_addr16[2], uprt->ipv6.s6_addr16[3], + uprt->ipv6.s6_addr16[4], uprt->ipv6.s6_addr16[5], + uprt->ipv6.s6_addr16[6], uprt->ipv6.s6_addr16[7], + uprt->mac.addr_bytes[0], uprt->mac.addr_bytes[1], + uprt->mac.addr_bytes[2], uprt->mac.addr_bytes[3], + uprt->mac.addr_bytes[4], uprt->mac.addr_bytes[5], + hashkey); +} + +static void +log_netbe_cfg(const struct netbe_cfg *ucfg) +{ + uint32_t i; + + RTE_LOG(NOTICE, USER1, + "ucfg @ %p, prt_num = %u\n", ucfg, ucfg->prt_num); + + for (i = 0; i != ucfg->prt_num; i++) + log_netbe_prt(ucfg->prt + i); +} + +static int +pool_init(uint32_t sid) +{ + int32_t rc; + struct rte_mempool *mp; + char name[RTE_MEMPOOL_NAMESIZE]; + + snprintf(name, sizeof(name), "MP%u", sid); + mp = rte_pktmbuf_pool_create(name, MPOOL_NB_BUF, MPOOL_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, sid - 1); + if (mp == NULL) { + rc = -rte_errno; + RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n", + __func__, sid - 1, rc); + return rc; + } + + mpool[sid] = mp; + return 0; +} + +static int +frag_pool_init(uint32_t sid) +{ + int32_t rc; + struct rte_mempool *frag_mp; + char frag_name[RTE_MEMPOOL_NAMESIZE]; + + snprintf(frag_name, sizeof(frag_name), "frag_MP%u", sid); + frag_mp = rte_pktmbuf_pool_create(frag_name, MPOOL_NB_BUF, + MPOOL_CACHE_SIZE, 0, FRAG_MBUF_BUF_SIZE, sid - 1); + if (frag_mp == NULL) { + rc = -rte_errno; + RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n", + __func__, sid - 1, rc); + return rc; + } + + frag_mpool[sid] = frag_mp; + return 0; +} + +static struct netbe_lcore * +find_initilized_lcore(struct netbe_cfg *cfg, uint32_t lc_num) +{ + uint32_t i; + + for (i = 0; i < cfg->cpu_num; i++) + if (cfg->cpu[i].id == lc_num) + return &cfg->cpu[i]; + + return NULL; +} + +/* + * Setup all enabled ports. + */ +static int +netbe_port_init(struct netbe_cfg *cfg) +{ + int32_t rc; + uint32_t i, sid, j; + struct netbe_port *prt; + struct netbe_lcore *lc; + + for (i = 0; i != cfg->prt_num; i++) { + prt = cfg->prt + i; + rc = port_init(prt, cfg->proto); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: port=%u init failed with error code: %d\n", + __func__, prt->id, rc); + return rc; + } + rte_eth_macaddr_get(prt->id, &prt->mac); + if (cfg->promisc) + rte_eth_promiscuous_enable(prt->id); + + for (j = 0; j < prt->nb_lcore; j++) { + rc = check_lcore(prt->lcore_id[j]); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: processing failed with err: %d\n", + __func__, rc); + return rc; + } + + sid = rte_lcore_to_socket_id(prt->lcore_id[j]) + 1; + assert(sid < RTE_DIM(mpool)); + + if (mpool[sid] == NULL) { + rc = pool_init(sid); + if (rc != 0) + return rc; + } + + if (frag_mpool[sid] == NULL) { + rc = frag_pool_init(sid); + if (rc != 0) + return rc; + } + + rc = queue_init(prt, mpool[sid]); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: lcore=%u queue init failed with " + "err: %d\n", + __func__, prt->lcore_id[j], rc); + return rc; + } + + /* calculate number of queues and assign queue id + * per lcore. */ + lc = find_initilized_lcore(cfg, prt->lcore_id[j]); + if (lc == NULL) { + lc = &cfg->cpu[cfg->cpu_num]; + lc->id = prt->lcore_id[j]; + lc->proto = becfg.proto; + cfg->cpu_num++; + } + + lc->prtq = rte_realloc(lc->prtq, sizeof(*(lc->prtq)) * + (lc->prtq_num + 1), RTE_CACHE_LINE_SIZE); + if (lc->prtq == NULL) { + RTE_LOG(ERR, USER1, + "%s: failed to reallocate memory\n", + __func__); + return -ENOMEM; + } + lc->prtq[lc->prtq_num].rxqid = j; + lc->prtq[lc->prtq_num].txqid = j; + lc->prtq[lc->prtq_num].port = *prt; + lc->prtq_num++; + } + } + log_netbe_cfg(cfg); + + return 0; +} + +#endif /* PORT_H_ */ diff --git a/examples/l4fwd/tcp.h b/examples/l4fwd/tcp.h new file mode 100644 index 0000000..031ad8d --- /dev/null +++ b/examples/l4fwd/tcp.h @@ -0,0 +1,701 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TCP_H_ +#define TCP_H_ + +#define TCP_MAX_PROCESS 0x20 + +static inline void +netfe_stream_term_tcp(struct netfe_lcore *fe, struct netfe_stream *fes) +{ + fes->s = NULL; + fes->fwds = NULL; + memset(&fes->stat, 0, sizeof(fes->stat)); + netfe_put_stream(fe, &fe->free, fes); +} + +static inline void +netfe_stream_close_tcp(struct netfe_lcore *fe, struct netfe_stream *fes) +{ + tle_tcp_stream_close(fes->s); + netfe_stream_term_tcp(fe, fes); +} + +/* + * helper function: opens IPv4 and IPv6 streams for selected port. + */ +static struct netfe_stream * +netfe_stream_open_tcp(struct netfe_lcore *fe, struct netfe_sprm *sprm, + uint32_t lcore, uint16_t op, uint32_t bidx, uint8_t server_mode) +{ + int32_t rc; + struct netfe_stream *fes; + struct sockaddr_in *l4; + struct sockaddr_in6 *l6; + uint16_t errport; + struct tle_tcp_stream_param tprm; + + fes = netfe_get_stream(&fe->free); + if (fes == NULL) { + rte_errno = ENOBUFS; + return NULL; + } + + if (server_mode != 0) { + tle_event_free(fes->rxev); + fes->rxev = tle_event_alloc(fe->syneq, fes); + } + + if (fes->rxev == NULL) { + netfe_stream_close_tcp(fe, fes); + rte_errno = ENOMEM; + return NULL; + } + + /* activate rx, tx and err events for the stream */ + if (op == TXONLY || op == FWD) { + tle_event_active(fes->txev, TLE_SEV_DOWN); + fes->stat.txev[TLE_SEV_DOWN]++; + } + + if (op != TXONLY || server_mode != 0) { + tle_event_active(fes->rxev, TLE_SEV_DOWN); + fes->stat.rxev[TLE_SEV_DOWN]++; + } + tle_event_active(fes->erev, TLE_SEV_DOWN); + fes->stat.erev[TLE_SEV_DOWN]++; + + memset(&tprm, 0, sizeof(tprm)); + tprm.addr.local = sprm->local_addr; + tprm.addr.remote = sprm->remote_addr; + tprm.cfg.err_ev = fes->erev; + tprm.cfg.recv_ev = fes->rxev; + if (op != FWD) + tprm.cfg.send_ev = fes->txev; + + fes->s = tle_tcp_stream_open(becfg.cpu[bidx].ctx, &tprm); + + if (fes->s == NULL) { + rc = rte_errno; + netfe_stream_close_tcp(fe, fes); + rte_errno = rc; + + if (sprm->local_addr.ss_family == AF_INET) { + l4 = (struct sockaddr_in *) &sprm->local_addr; + errport = ntohs(l4->sin_port); + } else { + l6 = (struct sockaddr_in6 *) &sprm->local_addr; + errport = ntohs(l6->sin6_port); + } + + RTE_LOG(ERR, USER1, "stream open failed for port %u with error " + "code=%u, bidx=%u, lc=%u\n", + errport, rc, bidx, becfg.cpu[bidx].id); + return NULL; + } + + RTE_LOG(NOTICE, USER1, + "%s(%u)={s=%p, op=%hu, proto=%s, rxev=%p, txev=%p}, belc=%u\n", + __func__, lcore, fes->s, op, proto_name[becfg.proto], + fes->rxev, fes->txev, becfg.cpu[bidx].id); + + fes->op = op; + fes->proto = becfg.proto; + fes->family = sprm->local_addr.ss_family; + fes->laddr = sprm->local_addr; + netfe_put_stream(fe, &fe->use, fes); + + return fes; +} + +static int +netfe_lcore_init_tcp(const struct netfe_lcore_prm *prm) +{ + size_t sz; + int32_t rc; + uint32_t i, lcore, snum; + struct netfe_lcore *fe; + struct tle_evq_param eprm; + struct netfe_stream *fes; + struct netfe_sprm *sprm; + + lcore = rte_lcore_id(); + + snum = prm->max_streams; + RTE_LOG(NOTICE, USER1, "%s(lcore=%u, nb_streams=%u, max_streams=%u)\n", + __func__, lcore, prm->nb_streams, snum); + + memset(&eprm, 0, sizeof(eprm)); + eprm.socket_id = rte_lcore_to_socket_id(lcore); + eprm.max_events = snum; + + sz = sizeof(*fe) + snum * sizeof(struct netfe_stream); + fe = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, + rte_lcore_to_socket_id(lcore)); + + if (fe == NULL) { + RTE_LOG(ERR, USER1, "%s:%d failed to allocate %zu bytes\n", + __func__, __LINE__, sz); + return -ENOMEM; + } + + RTE_PER_LCORE(_fe) = fe; + + fe->snum = snum; + /* initialize the stream pool */ + LIST_INIT(&fe->free.head); + LIST_INIT(&fe->use.head); + + /* allocate the event queues */ + fe->syneq = tle_evq_create(&eprm); + fe->ereq = tle_evq_create(&eprm); + fe->rxeq = tle_evq_create(&eprm); + fe->txeq = tle_evq_create(&eprm); + + RTE_LOG(INFO, USER1, "%s(%u) synevq=%p, erevq=%p, rxevq=%p, txevq=%p\n", + __func__, lcore, fe->syneq, fe->ereq, fe->rxeq, fe->txeq); + if (fe->syneq == NULL || fe->ereq == NULL || fe->rxeq == NULL || + fe->txeq == NULL) + return -ENOMEM; + + fes = (struct netfe_stream *)(fe + 1); + for (i = 0; i != snum; i++) { + fes[i].rxev = tle_event_alloc(fe->rxeq, fes + i); + fes[i].txev = tle_event_alloc(fe->txeq, fes + i); + fes[i].erev = tle_event_alloc(fe->ereq, fes + i); + netfe_put_stream(fe, &fe->free, fes + i); + } + + + /* open all requested streams. */ + for (i = 0; i != prm->nb_streams; i++) { + sprm = &prm->stream[i].sprm; + fes = netfe_stream_open_tcp(fe, sprm, lcore, prm->stream[i].op, + sprm->bidx, becfg.server); + if (fes == NULL) { + rc = -rte_errno; + break; + } + + netfe_stream_dump(fes, &sprm->local_addr, &sprm->remote_addr); + + if (prm->stream[i].op == FWD) { + fes->fwdprm = prm->stream[i].fprm; + } else if (prm->stream[i].op == TXONLY) { + fes->txlen = prm->stream[i].txlen; + fes->raddr = prm->stream[i].sprm.remote_addr; + } + + if (becfg.server == 1) { + rc = tle_tcp_stream_listen(fes->s); + RTE_LOG(INFO, USER1, + "%s(%u) tle_tcp_stream_listen(stream=%p) " + "returns %d\n", + __func__, lcore, fes->s, rc); + if (rc != 0) + break; + } else { + rc = tle_tcp_stream_connect(fes->s, + (const struct sockaddr *)&sprm->remote_addr); + RTE_LOG(INFO, USER1, + "%s(%u) tle_tcp_stream_connect(stream=%p) " + "returns %d\n", + __func__, lcore, fes->s, rc); + if (rc != 0) + break; + } + } + + return rc; +} + +static inline struct netfe_stream * +netfe_create_fwd_stream(struct netfe_lcore *fe, struct netfe_stream *fes, + uint32_t lcore, uint32_t bidx) +{ + uint32_t rc; + struct netfe_stream *fws; + + fws = netfe_stream_open_tcp(fe, &fes->fwdprm, lcore, FWD, bidx, 0); + if (fws != NULL) { + rc = tle_tcp_stream_connect(fws->s, + (const struct sockaddr *)&fes->fwdprm.remote_addr); + NETFE_TRACE("%s(lc=%u, fes=%p): tle_tcp_stream_connect() " + "returns %d;\n", + __func__, rte_lcore_id(), fes, rc); + + if (rc != 0) { + netfe_stream_term_tcp(fe, fws); + fws = NULL; + } + } + + if (fws == NULL) + RTE_LOG(ERR, USER1, "%s(lc=%u fes=%p) failed to open " + "forwarding stream;\n", + __func__, rte_lcore_id(), fes); + + return fws; +} + +static inline void +netfe_fwd_tcp(uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, k, n; + struct rte_mbuf **pkt; + struct netfe_stream *fed; + + RTE_SET_USED(lcore); + + n = fes->pbuf.num; + pkt = fes->pbuf.pkt; + + if (n == 0) + return; + + fed = fes->fwds; + + if (fed != NULL) { + + k = tle_tcp_stream_send(fed->s, pkt, n); + + NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) " + "returns %u\n", + __func__, lcore, proto_name[fes->proto], + fed->s, n, k); + + fed->stat.txp += k; + fed->stat.drops += n - k; + fes->stat.fwp += k; + + } else { + NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n", + __func__, lcore, fes->s, n); + for (k = 0; k != n; k++) { + NETFE_TRACE("%s(%u, %p): free(%p);\n", + __func__, lcore, fes->s, pkt[k]); + rte_pktmbuf_free(pkt[k]); + } + fes->stat.drops += n; + } + + /* copy unforwarded mbufs. */ + for (i = 0; i != n - k; i++) + pkt[i] = pkt[i + k]; + + fes->pbuf.num = i; + + if (i != 0) { + tle_event_raise(fes->txev); + fes->stat.txev[TLE_SEV_UP]++; + } + + if (n == RTE_DIM(fes->pbuf.pkt)) { + tle_event_active(fes->rxev, TLE_SEV_UP); + fes->stat.rxev[TLE_SEV_UP]++; + } +} + +static inline void +netfe_new_conn_tcp(struct netfe_lcore *fe, __rte_unused uint32_t lcore, + struct netfe_stream *fes) +{ + uint32_t i, k, n, rc; + struct tle_tcp_stream_cfg *prm; + struct tle_tcp_accept_param acpt_prm[MAX_PKT_BURST]; + struct tle_stream *rs[MAX_PKT_BURST]; + struct tle_syn_req syn_reqs[MAX_PKT_BURST]; + struct netfe_stream *ts; + struct netfe_stream *fs[MAX_PKT_BURST]; + + static const struct tle_stream_cb zcb = {.func = NULL, .data = NULL}; + + /* check if any syn requests are waiting */ + n = tle_tcp_stream_synreqs(fes->s, syn_reqs, RTE_DIM(syn_reqs)); + if (n == 0) + return; + + NETFE_TRACE("%s(%u): tle_tcp_stream_synreqs(%p, %u) returns %u\n", + __func__, lcore, fes->s, MAX_PKT_BURST, n); + + /* get n free streams */ + k = netfe_get_streams(&fe->free, fs, n); + + /* fill accept params to accept k connection requests*/ + for (i = 0; i != k; i++) { + acpt_prm[i].syn = syn_reqs[i]; + prm = &acpt_prm[i].cfg; + prm->nb_retries = 0; + prm->recv_ev = fs[i]->rxev; + prm->send_ev = fs[i]->txev; + prm->err_ev = fs[i]->erev; + tle_event_active(fs[i]->erev, TLE_SEV_DOWN); + prm->err_cb = zcb; + prm->recv_cb = zcb; + prm->send_cb = zcb; + } + + /* accept k new connections */ + rc = tle_tcp_stream_accept(fes->s, acpt_prm, rs, k); + + NETFE_TRACE("%s(%u): tle_tcp_stream_accept(%p, %u) returns %u\n", + __func__, lcore, fes->s, k, rc); + + if (rc != n) { + /* n - rc connections could not be accepted */ + tle_tcp_reject(fes->s, syn_reqs + rc, n - rc); + + /* put back k - rc streams free list */ + netfe_put_streams(fe, &fe->free, fs + rc, k - rc); + } + + /* update the params for accepted streams */ + for (i = 0; i != rc; i++) { + + ts = fs[i]; + + ts->s = rs[i]; + ts->op = fes->op; + ts->proto = fes->proto; + ts->family = fes->family; + ts->txlen = fes->txlen; + + if (fes->op == TXONLY) { + tle_event_active(ts->txev, TLE_SEV_UP); + ts->stat.txev[TLE_SEV_UP]++; + } else { + tle_event_active(ts->rxev, TLE_SEV_DOWN); + ts->stat.rxev[TLE_SEV_DOWN]++; + } + + netfe_put_stream(fe, &fe->use, ts); + NETFE_TRACE("%s(%u) accept (stream=%p, s=%p)\n", + __func__, lcore, ts, rs[i]); + + /* create a new fwd stream if needed */ + if (fes->op == FWD) { + tle_event_active(ts->txev, TLE_SEV_DOWN); + ts->stat.txev[TLE_SEV_DOWN]++; + + ts->fwds = netfe_create_fwd_stream(fe, fes, lcore, + fes->fwdprm.bidx); + if (ts->fwds != NULL) + ts->fwds->fwds = ts; + } + } + fe->tcp_stat.acc += rc; + fe->tcp_stat.rej += n - rc; +} + +static inline void +netfe_lcore_tcp_req(void) +{ + struct netfe_lcore *fe; + uint32_t j, n, lcore; + struct netfe_stream *fs[MAX_PKT_BURST]; + + fe = RTE_PER_LCORE(_fe); + if (fe == NULL) + return; + + /* look for syn events */ + n = tle_evq_get(fe->syneq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); + if (n == 0) + return; + + lcore = rte_lcore_id(); + + NETFE_TRACE("%s(%u): tle_evq_get(synevq=%p) returns %u\n", + __func__, lcore, fe->syneq, n); + + for (j = 0; j != n; j++) + netfe_new_conn_tcp(fe, lcore, fs[j]); +} + +static inline void +netfe_lcore_tcp_rst(void) +{ + struct netfe_lcore *fe; + struct netfe_stream *fwds; + uint32_t j, n; + struct tle_stream *s[MAX_PKT_BURST]; + struct netfe_stream *fs[MAX_PKT_BURST]; + struct tle_event *rv[MAX_PKT_BURST]; + struct tle_event *tv[MAX_PKT_BURST]; + struct tle_event *ev[MAX_PKT_BURST]; + + fe = RTE_PER_LCORE(_fe); + if (fe == NULL) + return; + + /* look for err events */ + n = tle_evq_get(fe->ereq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); + if (n == 0) + return; + + NETFE_TRACE("%s(%u): tle_evq_get(errevq=%p) returns %u\n", + __func__, rte_lcore_id(), fe->ereq, n); + + for (j = 0; j != n; j++) { + if (verbose > VERBOSE_NONE) { + struct tle_tcp_stream_addr addr; + tle_tcp_stream_get_addr(fs[j]->s, &addr); + netfe_stream_dump(fs[j], &addr.local, &addr.remote); + } + s[j] = fs[j]->s; + rv[j] = fs[j]->rxev; + tv[j] = fs[j]->txev; + ev[j] = fs[j]->erev; + } + + tle_evq_idle(fe->rxeq, rv, n); + tle_evq_idle(fe->txeq, tv, n); + tle_evq_idle(fe->ereq, ev, n); + + tle_tcp_stream_close_bulk(s, n); + + for (j = 0; j != n; j++) { + + /* + * if forwarding mode, send unsent packets and + * signal peer stream to terminate too. + */ + fwds = fs[j]->fwds; + if (fwds != NULL && fwds->s != NULL) { + + /* forward all unsent packets */ + netfe_fwd_tcp(rte_lcore_id(), fs[j]); + + fwds->fwds = NULL; + tle_event_raise(fwds->erev); + fs[j]->fwds = NULL; + } + + /* now terminate the stream receiving rst event*/ + netfe_rem_stream(&fe->use, fs[j]); + netfe_stream_term_tcp(fe, fs[j]); + fe->tcp_stat.ter++; + } +} + +static inline void +netfe_rxtx_process_tcp(__rte_unused uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, k, n; + struct rte_mbuf **pkt; + + n = fes->pbuf.num; + pkt = fes->pbuf.pkt; + + /* there is nothing to send. */ + if (n == 0) { + tle_event_idle(fes->txev); + fes->stat.txev[TLE_SEV_IDLE]++; + return; + } + + + k = tle_tcp_stream_send(fes->s, pkt, n); + + NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n", + __func__, lcore, proto_name[fes->proto], + fes->s, n, k); + fes->stat.txp += k; + fes->stat.drops += n - k; + + /* not able to send anything. */ + if (k == 0) + return; + + if (n == RTE_DIM(fes->pbuf.pkt)) { + /* mark stream as readable */ + tle_event_active(fes->rxev, TLE_SEV_UP); + fes->stat.rxev[TLE_SEV_UP]++; + } + + /* adjust pbuf array. */ + fes->pbuf.num = n - k; + for (i = 0; i != n - k; i++) + pkt[i] = pkt[i + k]; +} + +static inline void +netfe_tx_process_tcp(uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, k, n; + + /* refill with new mbufs. */ + pkt_buf_fill(lcore, &fes->pbuf, fes->txlen); + + n = fes->pbuf.num; + if (n == 0) + return; + + /** + * TODO: cannot use function pointers for unequal param num. + */ + k = tle_tcp_stream_send(fes->s, fes->pbuf.pkt, n); + + NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n", + __func__, lcore, proto_name[fes->proto], fes->s, n, k); + fes->stat.txp += k; + fes->stat.drops += n - k; + + if (k == 0) + return; + + /* adjust pbuf array. */ + fes->pbuf.num = n - k; + for (i = k; i != n; i++) + fes->pbuf.pkt[i - k] = fes->pbuf.pkt[i]; +} + +static inline void +netfe_lcore_tcp(void) +{ + struct netfe_lcore *fe; + uint32_t j, n, lcore; + struct netfe_stream *fs[MAX_PKT_BURST]; + + fe = RTE_PER_LCORE(_fe); + if (fe == NULL) + return; + + lcore = rte_lcore_id(); + + /* look for rx events */ + n = tle_evq_get(fe->rxeq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); + + if (n != 0) { + NETFE_TRACE("%s(%u): tle_evq_get(rxevq=%p) returns %u\n", + __func__, lcore, fe->rxeq, n); + for (j = 0; j != n; j++) + netfe_rx_process(lcore, fs[j]); + } + + /* look for tx events */ + n = tle_evq_get(fe->txeq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); + + if (n != 0) { + NETFE_TRACE("%s(%u): tle_evq_get(txevq=%p) returns %u\n", + __func__, lcore, fe->txeq, n); + for (j = 0; j != n; j++) { + if (fs[j]->op == RXTX) + netfe_rxtx_process_tcp(lcore, fs[j]); + else if (fs[j]->op == FWD) + netfe_fwd_tcp(lcore, fs[j]); + else if (fs[j]->op == TXONLY) + netfe_tx_process_tcp(lcore, fs[j]); + } + } +} + +static void +netfe_lcore_fini_tcp(void) +{ + struct netfe_lcore *fe; + uint32_t i, snum; + struct tle_tcp_stream_addr addr; + struct netfe_stream *fes; + uint32_t acc, rej, ter; + + fe = RTE_PER_LCORE(_fe); + if (fe == NULL) + return; + + snum = fe->use.num; + for (i = 0; i != snum; i++) { + fes = netfe_get_stream(&fe->use); + tle_tcp_stream_get_addr(fes->s, &addr); + netfe_stream_dump(fes, &addr.local, &addr.remote); + netfe_stream_close(fe, fes); + } + + acc = fe->tcp_stat.acc; + rej = fe->tcp_stat.rej; + ter = fe->tcp_stat.ter; + RTE_LOG(NOTICE, USER1, + "tcp_stats={con_acc=%u,con_rej=%u,con_ter=%u};\n", + acc, rej, ter); + + tle_evq_destroy(fe->txeq); + tle_evq_destroy(fe->rxeq); + tle_evq_destroy(fe->ereq); + tle_evq_destroy(fe->syneq); + RTE_PER_LCORE(_fe) = NULL; + rte_free(fe); +} + +static inline void +netbe_lcore_tcp(void) +{ + uint32_t i; + struct netbe_lcore *lc; + + lc = RTE_PER_LCORE(_be); + if (lc == NULL) + return; + + for (i = 0; i != lc->prtq_num; i++) { + netbe_rx(lc, i); + tle_tcp_process(lc->ctx, TCP_MAX_PROCESS); + netbe_tx(lc, i); + } +} + +static int +lcore_main_tcp(void *arg) +{ + int32_t rc; + uint32_t lcore; + struct lcore_prm *prm; + + prm = arg; + lcore = rte_lcore_id(); + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n", + __func__, lcore); + + rc = 0; + + /* lcore FE init. */ + if (prm->fe.max_streams != 0) + rc = netfe_lcore_init_tcp(&prm->fe); + + /* lcore FE init. */ + if (rc == 0 && prm->be.lc != NULL) + rc = netbe_lcore_setup(prm->be.lc); + + if (rc != 0) + sig_handle(SIGQUIT); + + while (force_quit == 0) { + netfe_lcore_tcp_req(); + netfe_lcore_tcp_rst(); + netfe_lcore_tcp(); + netbe_lcore_tcp(); + } + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n", + __func__, lcore); + + netfe_lcore_fini_tcp(); + netbe_lcore_clear(); + + return rc; +} + +#endif /* TCP_H_ */ diff --git a/examples/l4fwd/udp.h b/examples/l4fwd/udp.h new file mode 100644 index 0000000..cdec6a5 --- /dev/null +++ b/examples/l4fwd/udp.h @@ -0,0 +1,588 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef UDP_H_ +#define UDP_H_ + +/* + * helper function: opens IPv4 and IPv6 streams for selected port. + */ +static struct netfe_stream * +netfe_stream_open_udp(struct netfe_lcore *fe, struct netfe_sprm *sprm, + uint32_t lcore, uint16_t op, uint32_t bidx) +{ + int32_t rc; + struct netfe_stream *fes; + struct sockaddr_in *l4; + struct sockaddr_in6 *l6; + uint16_t errport; + struct tle_udp_stream_param uprm; + + fes = netfe_get_stream(&fe->free); + if (fes == NULL) { + rte_errno = ENOBUFS; + return NULL; + } + + fes->rxev = tle_event_alloc(fe->rxeq, fes); + fes->txev = tle_event_alloc(fe->txeq, fes); + + if (fes->rxev == NULL || fes->txev == NULL) { + netfe_stream_close(fe, fes); + rte_errno = ENOMEM; + return NULL; + } + + if (op == TXONLY || op == FWD) { + tle_event_active(fes->txev, TLE_SEV_DOWN); + fes->stat.txev[TLE_SEV_DOWN]++; + } + + if (op != TXONLY) { + tle_event_active(fes->rxev, TLE_SEV_DOWN); + fes->stat.rxev[TLE_SEV_DOWN]++; + } + + memset(&uprm, 0, sizeof(uprm)); + uprm.local_addr = sprm->local_addr; + uprm.remote_addr = sprm->remote_addr; + uprm.recv_ev = fes->rxev; + if (op != FWD) + uprm.send_ev = fes->txev; + fes->s = tle_udp_stream_open(becfg.cpu[bidx].ctx, &uprm); + + if (fes->s == NULL) { + rc = rte_errno; + netfe_stream_close(fe, fes); + rte_errno = rc; + + if (sprm->local_addr.ss_family == AF_INET) { + l4 = (struct sockaddr_in *) &sprm->local_addr; + errport = ntohs(l4->sin_port); + } else { + l6 = (struct sockaddr_in6 *) &sprm->local_addr; + errport = ntohs(l6->sin6_port); + } + + RTE_LOG(ERR, USER1, "stream open failed for port %u with error " + "code=%u, bidx=%u, lc=%u\n", + errport, rc, bidx, becfg.cpu[bidx].id); + return NULL; + } + + RTE_LOG(NOTICE, USER1, + "%s(%u)={s=%p, op=%hu, proto=%s, rxev=%p, txev=%p}, belc=%u\n", + __func__, lcore, fes->s, op, proto_name[becfg.proto], + fes->rxev, fes->txev, becfg.cpu[bidx].id); + + fes->op = op; + fes->proto = becfg.proto; + fes->family = sprm->local_addr.ss_family; + + return fes; +} + +static int +netfe_lcore_init_udp(const struct netfe_lcore_prm *prm) +{ + size_t sz; + int32_t rc; + uint32_t i, lcore, snum; + struct netfe_lcore *fe; + struct tle_evq_param eprm; + struct netfe_stream *fes; + struct netfe_sprm *sprm; + + lcore = rte_lcore_id(); + + snum = prm->max_streams; + RTE_LOG(NOTICE, USER1, "%s(lcore=%u, nb_streams=%u, max_streams=%u)\n", + __func__, lcore, prm->nb_streams, snum); + + memset(&eprm, 0, sizeof(eprm)); + eprm.socket_id = rte_lcore_to_socket_id(lcore); + eprm.max_events = snum; + + sz = sizeof(*fe) + snum * sizeof(struct netfe_stream); + fe = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, + rte_lcore_to_socket_id(lcore)); + + if (fe == NULL) { + RTE_LOG(ERR, USER1, "%s:%d failed to allocate %zu bytes\n", + __func__, __LINE__, sz); + return -ENOMEM; + } + + RTE_PER_LCORE(_fe) = fe; + + fe->snum = snum; + /* initialize the stream pool */ + LIST_INIT(&fe->free.head); + LIST_INIT(&fe->use.head); + fes = (struct netfe_stream *)(fe + 1); + for (i = 0; i != snum; i++, fes++) + netfe_put_stream(fe, &fe->free, fes); + + /* allocate the event queues */ + fe->rxeq = tle_evq_create(&eprm); + fe->txeq = tle_evq_create(&eprm); + + RTE_LOG(INFO, USER1, "%s(%u) rx evq=%p, tx evq=%p\n", + __func__, lcore, fe->rxeq, fe->txeq); + if (fe->rxeq == NULL || fe->txeq == NULL) + return -ENOMEM; + + rc = fwd_tbl_init(fe, AF_INET, lcore); + RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n", + __func__, lcore, AF_INET, rc); + if (rc != 0) + return rc; + + rc = fwd_tbl_init(fe, AF_INET6, lcore); + RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n", + __func__, lcore, AF_INET6, rc); + if (rc != 0) + return rc; + + /* open all requested streams. */ + for (i = 0; i != prm->nb_streams; i++) { + sprm = &prm->stream[i].sprm; + fes = netfe_stream_open_udp(fe, sprm, lcore, prm->stream[i].op, + sprm->bidx); + if (fes == NULL) { + rc = -rte_errno; + break; + } + + netfe_stream_dump(fes, &sprm->local_addr, &sprm->remote_addr); + + if (prm->stream[i].op == FWD) { + fes->fwdprm = prm->stream[i].fprm; + rc = fwd_tbl_add(fe, + prm->stream[i].fprm.remote_addr.ss_family, + (const struct sockaddr *) + &prm->stream[i].fprm.remote_addr, + fes); + if (rc != 0) { + netfe_stream_close(fe, fes); + break; + } + } else if (prm->stream[i].op == TXONLY) { + fes->txlen = prm->stream[i].txlen; + fes->raddr = prm->stream[i].sprm.remote_addr; + } + } + + return rc; +} + +static struct netfe_stream * +find_fwd_dst_udp(uint32_t lcore, struct netfe_stream *fes, + const struct sockaddr *sa) +{ + uint32_t rc; + struct netfe_stream *fed; + struct netfe_lcore *fe; + struct tle_udp_stream_param uprm; + + fe = RTE_PER_LCORE(_fe); + + fed = fwd_tbl_lkp(fe, fes->family, sa); + if (fed != NULL) + return fed; + + /* create a new stream and put it into the fwd table. */ + memset(&uprm, 0, sizeof(uprm)); + uprm.local_addr = fes->fwdprm.local_addr; + uprm.remote_addr = fes->fwdprm.remote_addr; + + /* open forward stream with wildcard remote addr. */ + memset(&uprm.remote_addr.ss_family + 1, 0, + sizeof(uprm.remote_addr) - sizeof(uprm.remote_addr.ss_family)); + + fed = netfe_stream_open_udp(fe, &fes->fwdprm, lcore, FWD, + fes->fwdprm.bidx); + if (fed == NULL) + return NULL; + + rc = fwd_tbl_add(fe, fes->family, sa, fed); + if (rc != 0) { + netfe_stream_close(fe, fed); + fed = NULL; + } + + fed->fwdprm.remote_addr = *(const struct sockaddr_storage *)sa; + return fed; +} + +static inline int +netfe_addr_eq(struct sockaddr_storage *l, struct sockaddr_storage *r, + uint16_t family) +{ + struct sockaddr_in *l4, *r4; + struct sockaddr_in6 *l6, *r6; + + if (family == AF_INET) { + l4 = (struct sockaddr_in *)l; + r4 = (struct sockaddr_in *)r; + return (l4->sin_port == r4->sin_port && + l4->sin_addr.s_addr == r4->sin_addr.s_addr); + } else { + l6 = (struct sockaddr_in6 *)l; + r6 = (struct sockaddr_in6 *)r; + return (l6->sin6_port == r6->sin6_port && + memcmp(&l6->sin6_addr, &r6->sin6_addr, + sizeof(l6->sin6_addr))); + } +} + +static inline void +netfe_pkt_addr(const struct rte_mbuf *m, struct sockaddr_storage *ps, + uint16_t family) +{ + const struct ipv4_hdr *ip4h; + const struct ipv6_hdr *ip6h; + const struct udp_hdr *udph; + struct sockaddr_in *in4; + struct sockaddr_in6 *in6; + + NETFE_PKT_DUMP(m); + + udph = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, -m->l4_len); + + if (family == AF_INET) { + in4 = (struct sockaddr_in *)ps; + ip4h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + -(m->l4_len + m->l3_len)); + in4->sin_port = udph->src_port; + in4->sin_addr.s_addr = ip4h->src_addr; + } else { + in6 = (struct sockaddr_in6 *)ps; + ip6h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + -(m->l4_len + m->l3_len)); + in6->sin6_port = udph->src_port; + rte_memcpy(&in6->sin6_addr, ip6h->src_addr, + sizeof(in6->sin6_addr)); + } +} + +static inline uint32_t +pkt_eq_addr(struct rte_mbuf *pkt[], uint32_t num, uint16_t family, + struct sockaddr_storage *cur, struct sockaddr_storage *nxt) +{ + uint32_t i; + + for (i = 0; i != num; i++) { + netfe_pkt_addr(pkt[i], nxt, family); + if (netfe_addr_eq(cur, nxt, family) == 0) + break; + } + + return i; +} + +static inline void +netfe_fwd_udp(uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, j, k, n, x; + uint16_t family; + void *pi0, *pi1, *pt; + struct rte_mbuf **pkt; + struct netfe_stream *fed; + struct sockaddr_storage in[2]; + + family = fes->family; + n = fes->pbuf.num; + pkt = fes->pbuf.pkt; + + if (n == 0) + return; + + in[0].ss_family = family; + in[1].ss_family = family; + pi0 = &in[0]; + pi1 = &in[1]; + + netfe_pkt_addr(pkt[0], pi0, family); + + x = 0; + for (i = 0; i != n; i = j) { + + j = i + pkt_eq_addr(&pkt[i + 1], + n - i - 1, family, pi0, pi1) + 1; + + fed = find_fwd_dst_udp(lcore, fes, + (const struct sockaddr *)pi0); + if (fed != NULL) { + + /** + * TODO: cannot use function pointers for unequal + * number of params. + */ + k = tle_udp_stream_send(fed->s, pkt + i, j - i, + (const struct sockaddr *) + &fes->fwdprm.remote_addr); + + NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) " + "returns %u\n", + __func__, lcore, proto_name[fes->proto], + fed->s, j - i, k); + + fed->stat.txp += k; + fed->stat.drops += j - i - k; + fes->stat.fwp += k; + + } else { + NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n", + __func__, lcore, fes->s, j - i); + for (k = i; k != j; k++) { + NETFE_TRACE("%s(%u, %p): free(%p);\n", + __func__, lcore, fes->s, pkt[k]); + rte_pktmbuf_free(pkt[j]); + } + fes->stat.drops += j - i; + } + + /* copy unforwarded mbufs. */ + for (i += k; i != j; i++, x++) + pkt[x] = pkt[i]; + + /* swap the pointers */ + pt = pi0; + pi0 = pi1; + pi1 = pt; + } + + fes->pbuf.num = x; + + if (x != 0) { + tle_event_raise(fes->txev); + fes->stat.txev[TLE_SEV_UP]++; + } + + if (n == RTE_DIM(fes->pbuf.pkt)) { + tle_event_active(fes->rxev, TLE_SEV_UP); + fes->stat.rxev[TLE_SEV_UP]++; + } +} + +static inline void +netfe_rxtx_process_udp(__rte_unused uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, j, k, n; + uint16_t family; + void *pi0, *pi1, *pt; + struct rte_mbuf **pkt; + struct sockaddr_storage in[2]; + + family = fes->family; + n = fes->pbuf.num; + pkt = fes->pbuf.pkt; + + /* there is nothing to send. */ + if (n == 0) { + tle_event_idle(fes->txev); + fes->stat.txev[TLE_SEV_IDLE]++; + return; + } + + in[0].ss_family = family; + in[1].ss_family = family; + pi0 = &in[0]; + pi1 = &in[1]; + + netfe_pkt_addr(pkt[0], pi0, family); + + for (i = 0; i != n; i = j) { + + j = i + pkt_eq_addr(&pkt[i + 1], + n - i - 1, family, pi0, pi1) + 1; + + /** + * TODO: cannot use function pointers for unequal param num. + */ + k = tle_udp_stream_send(fes->s, pkt + i, j - i, + (const struct sockaddr *)pi0); + + NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n", + __func__, lcore, proto_name[fes->proto], + fes->s, j - i, k); + fes->stat.txp += k; + fes->stat.drops += j - i - k; + + i += k; + + /* stream send buffer is full */ + if (i != j) + break; + + /* swap the pointers */ + pt = pi0; + pi0 = pi1; + pi1 = pt; + } + + /* not able to send anything. */ + if (i == 0) + return; + + if (n == RTE_DIM(fes->pbuf.pkt)) { + /* mark stream as readable */ + tle_event_active(fes->rxev, TLE_SEV_UP); + fes->stat.rxev[TLE_SEV_UP]++; + } + + /* adjust pbuf array. */ + fes->pbuf.num = n - i; + for (j = i; j != n; j++) + pkt[j - i] = pkt[j]; +} + +static inline void +netfe_tx_process_udp(uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, k, n; + + /* refill with new mbufs. */ + pkt_buf_fill(lcore, &fes->pbuf, fes->txlen); + + n = fes->pbuf.num; + if (n == 0) + return; + + /** + * TODO: cannot use function pointers for unequal param num. + */ + k = tle_udp_stream_send(fes->s, fes->pbuf.pkt, n, NULL); + NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n", + __func__, lcore, proto_name[fes->proto], fes->s, n, k); + fes->stat.txp += k; + fes->stat.drops += n - k; + + if (k == 0) + return; + + /* adjust pbuf array. */ + fes->pbuf.num = n - k; + for (i = k; i != n; i++) + fes->pbuf.pkt[i - k] = fes->pbuf.pkt[i]; +} + +static inline void +netfe_lcore_udp(void) +{ + struct netfe_lcore *fe; + uint32_t j, n, lcore; + struct netfe_stream *fs[MAX_PKT_BURST]; + + fe = RTE_PER_LCORE(_fe); + if (fe == NULL) + return; + + lcore = rte_lcore_id(); + + /* look for rx events */ + n = tle_evq_get(fe->rxeq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); + + if (n != 0) { + NETFE_TRACE("%s(%u): tle_evq_get(rxevq=%p) returns %u\n", + __func__, lcore, fe->rxeq, n); + for (j = 0; j != n; j++) + netfe_rx_process(lcore, fs[j]); + } + + /* look for tx events */ + n = tle_evq_get(fe->txeq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); + + if (n != 0) { + NETFE_TRACE("%s(%u): tle_evq_get(txevq=%p) returns %u\n", + __func__, lcore, fe->txeq, n); + for (j = 0; j != n; j++) { + if (fs[j]->op == RXTX) + netfe_rxtx_process_udp(lcore, fs[j]); + else if (fs[j]->op == FWD) + netfe_fwd_udp(lcore, fs[j]); + else if (fs[j]->op == TXONLY) + netfe_tx_process_udp(lcore, fs[j]); + } + } +} + +static void +netfe_lcore_fini_udp(void) +{ + struct netfe_lcore *fe; + uint32_t i; + struct tle_udp_stream_param uprm; + struct netfe_stream *fes; + + fe = RTE_PER_LCORE(_fe); + if (fe == NULL) + return; + + for (i = 0; i != fe->use.num; i++) { + fes = netfe_get_stream(&fe->use); + tle_udp_stream_get_param(fes->s, &uprm); + netfe_stream_dump(fes, &uprm.local_addr, &uprm.remote_addr); + netfe_stream_close(fe, fes); + } + + tle_evq_destroy(fe->txeq); + tle_evq_destroy(fe->rxeq); + RTE_PER_LCORE(_fe) = NULL; + rte_free(fe); +} + +static int +lcore_main_udp(void *arg) +{ + int32_t rc; + uint32_t lcore; + struct lcore_prm *prm; + + prm = arg; + lcore = rte_lcore_id(); + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n", + __func__, lcore); + + rc = 0; + + /* lcore FE init. */ + if (prm->fe.max_streams != 0) + rc = netfe_lcore_init_udp(&prm->fe); + + /* lcore FE init. */ + if (rc == 0 && prm->be.lc != NULL) + rc = netbe_lcore_setup(prm->be.lc); + + if (rc != 0) + sig_handle(SIGQUIT); + + while (force_quit == 0) { + netfe_lcore_udp(); + netbe_lcore(); + } + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n", + __func__, lcore); + + netfe_lcore_fini_udp(); + netbe_lcore_clear(); + + return rc; +} + +#endif /* UDP_H_ */ diff --git a/examples/udpfwd/README b/examples/udpfwd/README deleted file mode 100644 index 91b6e76..0000000 --- a/examples/udpfwd/README +++ /dev/null @@ -1,141 +0,0 @@ -Introduction -============ - -udpfwd is a sample application to demonstrate and test libtle_udp. -Depending on configuration it can do simple send/recv or both over -opened udp streams. Also it implements ability to do UDP datagram -forwarding between different streams, so it is possible to use that -application as some sort of 'UDP proxy'. -The application can reassemble input fragmented IP packets, -and fragment outgoing IP packets (if destination MTU is less then packet size). -To build and run the application DPDK and TLDK libraries are required. - -Logically the application is divided into two parts: - -- Back End (BE) -BE is responsible for: - - RX over DPDK ports and feed them into UDP TLDK context(s) - (via tle_udp_rx_bulk). - - retrieve packets ready to be send out from UDP TLDK context(s) - and TX them over destined DPDK port. -Multiple RX/TX queues per port are supported by RSS. Right now the number of -TX is same as the number of RX queue. -Each BE lcore can serve multiple DPDK ports, TLDK UDP contexts. - -- Front End (FE) -FE responsibility is to open configured UDP streams and perform -send/recv over them. These streams can belong to different UDP contexts. - -Right now each lcore can act as BE and/or FE. - -Usage -===== - -udpfwd <EAL parameters> -- \ - -P | --promisc /* promiscuous mode enabled. */ \ - -R | --rbufs <num> /* max recv buffers per stream. */ \ - -S | --sbufs <num> /* max send buffers per stream. */ \ - -s | --streams <num> /* streams to open per context. */ \ - -b | --becfg <filename> /* backend configuration file. */ \ - -f | --fecfg <filename> /* frontend configuration file. */ \ - <port0_params> <port1_params> ... <portN_params> - -port_params: port=<uint>,lcore=<uint>[-<uint>],\ -[rx_offload=<uint>,tx_offload=<uint>,mtu=<uint>,ipv4=<ipv4>,ipv6=<ipv6>] - -port_params are used to configure the particular DPDK device (rte_ethdev port), -and specify BE lcore that will do RX/TX from/to the device and manage -BE part of corresponding UDP context. Multiple BE lcore can be specified. - -port - DPDK port id (multiple queues are supported when multiple lcore - is specified for a port). -lcore - EAL lcore id to do IO over that port (rx_burst/tx_burst). - several ports can be managed by the same lcore, and same port can - belong to more than one lcore. -rx_offload - RX HW offload capabilities to enable/use on this port. - (bitmask of DEV_RX_OFFLOAD_* values). -tx_offload - TX HW offload capabilities to enable/use on this port. - (bitmask of DEV_TX_OFFLOAD_* values). -mtu - MTU to be used on that port - ( = UDP data size + L2/L3/L4 headers sizes, default=1514). -ipv4 - ipv4 address to assign to that port. -ipv6 - ipv6 address to assign to that port. - -At least one of ipv4/ipv6 values have to be specified for each port. - -As an example: -udpfwd --lcores='3,6,8' -w 01:00.0 -- \ ---promisc --rbufs 0x1000 --sbufs 0x1000 --streams 0x100 \ ---fecfg ./fe.cfg --becfg ./be.cfg \ -port=0,lcore=6,lcore=8,rx_offload=0xf,tx_offload=0,\ -ipv4=192.168.1.233,ipv6=2001:4860:b002::28 - -Will create TLDK UDP context on lcore=6 and lcore=8 (BE lcore) to manage -DPDK port 0. Will assign IPv4 address 192.168.1.233 and IPv6 address -2001:4860:b002::28 to that port. -The following supported by DPDK RX HW offloads: - DEV_RX_OFFLOAD_VLAN_STRIP, - DEV_RX_OFFLOAD_IPV4_CKSUM, - DEV_RX_OFFLOAD_UDP_CKSUM, - DEV_RX_OFFLOAD_TCP_CKSUM -will be enabled on that port. -No HW TX offloads will be enabled. - -If multiple lcore is specified per DPDK port, the following RSS hash will -be enabled on that port: - ETH_RSS_UDP - - -Fornt-End (FE) and Back-End (BE) configuration files format: ------------------------------------------------------------- - - each record on a separate line. - - lines started with '#' are treated as comments. - - empty lines (containing whitespace chars only) are ignored. - - kvargs style format for each record. - - each FE record correspond to at least one stream to be opened - (could be multiple streams in case of op="fwd"). - - each BE record define a ipv4/ipv6 destination. - -FE config record format: ------------------------- - -lcore=<uint>,op=<"rx|tx|echo|fwd">,\ -laddr=<ip>,lport=<uint16>,raddr=<ip>,rport=<uint16>,\ -[txlen=<uint>,fwladdr=<ip>,fwlport=<uint16>,fwraddr=<ip>,fwrport=<uint16>,\ -belcore=<uint>] - -lcore - EAL lcore to manage that stream(s) in the FE. -op - operation to perform on that stream: - "rx" - do receive only on that stream. - "tx" - do send only on that stream. - "echo" - mimic recvfrom(..., &addr);sendto(..., &addr); - on that stream. - "fwd" - forward packets between streams. -laddr - local address for the stream to open. -lport - local port for the stream to open. -raddr - remote address for the stream to open. -rport - remote port for the stream to open. -txlen - data length to send with each packet ("tx" mode only). -fwladdr - local address for the forwarding stream(s) to open - ("fwd mode only). -fwlport - local port for the forwarding stream(s) to open - ("fwd mode only). -fwraddr - remote address for the forwarding stream(s) to open - ("fwd mode only). -fwrport - remote port for the forwarding stream(s) to open - ("fwd mode only). -belcore - EAL lcore to manage that stream(s) in the BE. - -Refer to fe.cfg for an example. - -BE config record format: ------------------------- - -port=<uint>,addr=<ipv4/ipv6>,masklen=<uint>,mac=<ether> - -port - port number to be used to send packets to the destination. -addr - destionation network address. -masklen - desitantion network prefix length. -mac - destination ethernet address. - -Refer to fe.cfg for an example. diff --git a/examples/udpfwd/main.c b/examples/udpfwd/main.c deleted file mode 100644 index 0463588..0000000 --- a/examples/udpfwd/main.c +++ /dev/null @@ -1,2134 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "netbe.h" -#include "parse.h" - -#define MAX_RULES 0x100 -#define MAX_TBL8 0x800 - -#define RX_RING_SIZE 0x400 -#define TX_RING_SIZE 0x800 - -#define MPOOL_CACHE_SIZE 0x100 -#define MPOOL_NB_BUF 0x20000 - -#define FRAG_MBUF_BUF_SIZE (RTE_PKTMBUF_HEADROOM + TLE_UDP_MAX_HDR) -#define FRAG_TTL MS_PER_S -#define FRAG_TBL_BUCKET_ENTRIES 16 - -#define FIRST_PORT 0x8000 - -#define RX_CSUM_OFFLOAD (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM) -#define TX_CSUM_OFFLOAD (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM) - -#define OPT_SHORT_SBULK 'B' -#define OPT_LONG_SBULK "sburst" - -#define OPT_SHORT_PROMISC 'P' -#define OPT_LONG_PROMISC "promisc" - -#define OPT_SHORT_RBUFS 'R' -#define OPT_LONG_RBUFS "rbufs" - -#define OPT_SHORT_SBUFS 'S' -#define OPT_LONG_SBUFS "sbufs" - -#define OPT_SHORT_STREAMS 's' -#define OPT_LONG_STREAMS "streams" - -#define OPT_SHORT_FECFG 'f' -#define OPT_LONG_FECFG "fecfg" - -#define OPT_SHORT_BECFG 'b' -#define OPT_LONG_BECFG "becfg" - -RTE_DEFINE_PER_LCORE(struct netbe_lcore *, _be); -RTE_DEFINE_PER_LCORE(struct netfe_lcore *, _fe); - -#include "fwdtbl.h" - -static const struct option long_opt[] = { - {OPT_LONG_BECFG, 1, 0, OPT_SHORT_BECFG}, - {OPT_LONG_FECFG, 1, 0, OPT_SHORT_FECFG}, - {OPT_LONG_PROMISC, 0, 0, OPT_SHORT_PROMISC}, - {OPT_LONG_RBUFS, 1, 0, OPT_SHORT_RBUFS}, - {OPT_LONG_SBUFS, 1, 0, OPT_SHORT_SBUFS}, - {OPT_LONG_SBULK, 1, 0, OPT_SHORT_SBULK}, - {OPT_LONG_STREAMS, 1, 0, OPT_SHORT_STREAMS}, - {NULL, 0, 0, 0} -}; - -/** - * IPv4 Input size in bytes for RSS hash key calculation. - * source address, destination address, source port, and destination port. - */ -#define IPV4_TUPLE_SIZE 12 - -/** - * IPv6 Input size in bytes for RSS hash key calculation. - * source address, destination address, source port, and destination port. - */ -#define IPV6_TUPLE_SIZE 36 - -/** - * Location to be modified to create the IPv4 hash key which helps - * to distribute packets based on the destination UDP port. - */ -#define RSS_HASH_KEY_DEST_PORT_LOC_IPV4 15 - -/* - * Location to be modified to create the IPv6 hash key which helps - * to distribute packets based on the destination UDP port. - */ -#define RSS_HASH_KEY_DEST_PORT_LOC_IPV6 39 - -/** - * Size of the rte_eth_rss_reta_entry64 array to update through - * rte_eth_dev_rss_reta_update. - */ -#define RSS_RETA_CONF_ARRAY_SIZE (ETH_RSS_RETA_SIZE_512/RTE_RETA_GROUP_SIZE) - -#define NETBE_REALLOC(loc, n) do { \ - (loc) = rte_realloc((loc), sizeof(*(loc)) * (n), RTE_CACHE_LINE_SIZE); \ - if ((loc) == NULL) { \ - RTE_LOG(ERR, USER1, \ - "%s: failed to reallocate memory\n", \ - __func__); \ - return -ENOMEM; \ - } \ -} while (0) - -static volatile int force_quit; - -static struct netbe_cfg becfg; -static struct rte_mempool *mpool[RTE_MAX_NUMA_NODES + 1]; -static struct rte_mempool *frag_mpool[RTE_MAX_NUMA_NODES + 1]; - -static const struct rte_eth_conf port_conf_default = { - .rxmode = { - .max_rx_pkt_len = ETHER_MAX_VLAN_FRAME_LEN, - .hw_vlan_strip = 1, - .jumbo_frame = 1, - }, -}; - -#include "parse.h" -#include "main_dpdk_legacy.h" - -static void -sig_handle(int signum) -{ - RTE_LOG(ERR, USER1, "%s(%d)\n", __func__, signum); - force_quit = 1; -} - -static void -prepare_hash_key(struct netbe_port *uprt, uint8_t key_size, uint16_t family) -{ - uint32_t align_nb_q; - - align_nb_q = rte_align32pow2(uprt->nb_lcore); - memset(uprt->hash_key, 0, RSS_HASH_KEY_LENGTH); - uprt->hash_key_size = key_size; - if (family == AF_INET) - uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV4] = align_nb_q; - else - uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV6] = align_nb_q; -} - -static uint32_t -qidx_from_hash_index(uint32_t hash, uint32_t align_nb_q) -{ - uint32_t i, nb_bit, q; - - nb_bit = (sizeof(uint32_t) * CHAR_BIT) - __builtin_clz(align_nb_q - 1); - q = (hash & 1); - for (i = 1; i < nb_bit; i++) { - hash >>= 1; - q <<= 1; - q |= (hash & 1); - } - - return q; -} - -static int -update_rss_conf(struct netbe_port *uprt, - const struct rte_eth_dev_info *dev_info, - struct rte_eth_conf *port_conf) -{ - uint8_t hash_key_size; - - if (uprt->nb_lcore > 1) { - if (dev_info->hash_key_size > 0) - hash_key_size = dev_info->hash_key_size; - else { - RTE_LOG(ERR, USER1, - "%s: dev_info did not provide a valid hash key size\n", - __func__); - return -EINVAL; - } - - if (uprt->ipv4 != INADDR_ANY && - memcmp(&uprt->ipv6, &in6addr_any, - sizeof(uprt->ipv6)) != 0) { - RTE_LOG(ERR, USER1, - "%s: RSS for both IPv4 and IPv6 not supported!\n", - __func__); - return -EINVAL; - } else if (uprt->ipv4 != INADDR_ANY) { - prepare_hash_key(uprt, hash_key_size, AF_INET); - } else if (memcmp(&uprt->ipv6, &in6addr_any, sizeof(uprt->ipv6)) - != 0) { - prepare_hash_key(uprt, hash_key_size, AF_INET6); - } else { - RTE_LOG(ERR, USER1, - "%s: No IPv4 or IPv6 address is found!\n", - __func__); - return -EINVAL; - } - port_conf->rxmode.mq_mode = ETH_MQ_RX_RSS; - port_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_UDP; - port_conf->rx_adv_conf.rss_conf.rss_key_len = hash_key_size; - port_conf->rx_adv_conf.rss_conf.rss_key = uprt->hash_key; - } - - return 0; -} - -static int -update_rss_reta(struct netbe_port *uprt, - const struct rte_eth_dev_info *dev_info) -{ - struct rte_eth_rss_reta_entry64 reta_conf[RSS_RETA_CONF_ARRAY_SIZE]; - int32_t i, rc, align_nb_q; - int32_t q_index, idx, shift; - - if (uprt->nb_lcore > 1) { - if (dev_info->reta_size == 0) { - RTE_LOG(ERR, USER1, - "%s: Redirection table size 0 is invalid for RSS\n", - __func__); - return -EINVAL; - } - RTE_LOG(NOTICE, USER1, - "%s: The reta size of port %d is %u\n", - __func__, uprt->id, dev_info->reta_size); - - if (dev_info->reta_size > ETH_RSS_RETA_SIZE_512) { - RTE_LOG(ERR, USER1, - "%s: More than %u entries of Reta not supported\n", - __func__, ETH_RSS_RETA_SIZE_512); - return -EINVAL; - } - - memset(reta_conf, 0, sizeof(reta_conf)); - align_nb_q = rte_align32pow2(uprt->nb_lcore); - for (i = 0; i < align_nb_q; i++) { - q_index = qidx_from_hash_index(i, align_nb_q) % - uprt->nb_lcore; - - idx = i / RTE_RETA_GROUP_SIZE; - shift = i % RTE_RETA_GROUP_SIZE; - reta_conf[idx].mask |= (1ULL << shift); - reta_conf[idx].reta[shift] = q_index; - RTE_LOG(NOTICE, USER1, - "%s: port=%u RSS reta conf: hash=%u, q=%u\n", - __func__, uprt->id, i, q_index); - } - - rc = rte_eth_dev_rss_reta_update(uprt->id, - reta_conf, dev_info->reta_size); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s: Bad redirection table parameter, rc = %d\n", - __func__, rc); - return rc; - } - } - - return 0; -} - -/* - * Initilise DPDK port. - * In current version, multi-queue per port is used. - */ -static int -port_init(struct netbe_port *uprt) -{ - int32_t rc; - struct rte_eth_conf port_conf; - struct rte_eth_dev_info dev_info; - - rte_eth_dev_info_get(uprt->id, &dev_info); - if ((dev_info.rx_offload_capa & uprt->rx_offload) != uprt->rx_offload) { - RTE_LOG(ERR, USER1, - "port#%u supported/requested RX offloads don't match, " - "supported: %#x, requested: %#x;\n", - uprt->id, dev_info.rx_offload_capa, uprt->rx_offload); - return -EINVAL; - } - if ((dev_info.tx_offload_capa & uprt->tx_offload) != uprt->tx_offload) { - RTE_LOG(ERR, USER1, - "port#%u supported/requested TX offloads don't match, " - "supported: %#x, requested: %#x;\n", - uprt->id, dev_info.tx_offload_capa, uprt->tx_offload); - return -EINVAL; - } - - port_conf = port_conf_default; - if ((uprt->rx_offload & RX_CSUM_OFFLOAD) != 0) { - RTE_LOG(ERR, USER1, "%s(%u): enabling RX csum offload;\n", - __func__, uprt->id); - port_conf.rxmode.hw_ip_checksum = 1; - } - port_conf.rxmode.max_rx_pkt_len = uprt->mtu + ETHER_CRC_LEN; - - rc = update_rss_conf(uprt, &dev_info, &port_conf); - if (rc != 0) - return rc; - - rc = rte_eth_dev_configure(uprt->id, uprt->nb_lcore, uprt->nb_lcore, - &port_conf); - RTE_LOG(NOTICE, USER1, - "%s: rte_eth_dev_configure(prt_id=%u, nb_rxq=%u, nb_txq=%u) " - "returns %d;\n", __func__, uprt->id, uprt->nb_lcore, - uprt->nb_lcore, rc); - if (rc != 0) - return rc; - - return 0; -} - -static int -queue_init(struct netbe_port *uprt, struct rte_mempool *mp) -{ - int32_t socket, rc; - uint16_t q; - struct rte_eth_dev_info dev_info; - - rte_eth_dev_info_get(uprt->id, &dev_info); - - socket = rte_eth_dev_socket_id(uprt->id); - - dev_info.default_rxconf.rx_drop_en = 1; - - dev_info.default_txconf.tx_free_thresh = TX_RING_SIZE / 2; - if (uprt->tx_offload != 0) { - RTE_LOG(ERR, USER1, "%s(%u): enabling full featured TX;\n", - __func__, uprt->id); - dev_info.default_txconf.txq_flags = 0; - } - - for (q = 0; q < uprt->nb_lcore; q++) { - rc = rte_eth_rx_queue_setup(uprt->id, q, RX_RING_SIZE, - socket, &dev_info.default_rxconf, mp); - if (rc < 0) { - RTE_LOG(ERR, USER1, - "%s: rx queue=%u setup failed with error code: %d\n", - __func__, q, rc); - return rc; - } - } - - for (q = 0; q < uprt->nb_lcore; q++) { - rc = rte_eth_tx_queue_setup(uprt->id, q, TX_RING_SIZE, - socket, &dev_info.default_txconf); - if (rc < 0) { - RTE_LOG(ERR, USER1, - "%s: tx queue=%u setup failed with error code: %d\n", - __func__, q, rc); - return rc; - } - } - return 0; -} - -/* - * Check that lcore is enabled, not master, and not in use already. - */ -static int -check_lcore(uint32_t lc) -{ - if (rte_lcore_is_enabled(lc) == 0) { - RTE_LOG(ERR, USER1, "lcore %u is not enabled\n", lc); - return -EINVAL; - } - if (rte_eal_get_lcore_state(lc) == RUNNING) { - RTE_LOG(ERR, USER1, "lcore %u already running %p\n", - lc, lcore_config[lc].f); - return -EINVAL; - } - return 0; -} - -static void -log_netbe_prt(const struct netbe_port *uprt) -{ - uint32_t i; - char corelist[2 * RTE_MAX_LCORE + 1]; - char hashkey[2 * RSS_HASH_KEY_LENGTH]; - - memset(corelist, 0, sizeof(corelist)); - memset(hashkey, 0, sizeof(hashkey)); - for (i = 0; i < uprt->nb_lcore; i++) - if (i < uprt->nb_lcore - 1) - sprintf(corelist + (2 * i), "%u,", uprt->lcore[i]); - else - sprintf(corelist + (2 * i), "%u", uprt->lcore[i]); - - for (i = 0; i < uprt->hash_key_size; i++) - sprintf(hashkey + (2 * i), "%02x", uprt->hash_key[i]); - - RTE_LOG(NOTICE, USER1, - "uprt %p = <id = %u, lcore = <%s>, mtu = %u, " - "rx_offload = %u, tx_offload = %u,\n" - "ipv4 = %#x, " - "ipv6 = %04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx, " - "mac = %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx>;\n" - "hashkey = %s;\n", - uprt, uprt->id, corelist, - uprt->mtu, uprt->rx_offload, uprt->tx_offload, - uprt->ipv4, - uprt->ipv6.s6_addr16[0], uprt->ipv6.s6_addr16[1], - uprt->ipv6.s6_addr16[2], uprt->ipv6.s6_addr16[3], - uprt->ipv6.s6_addr16[4], uprt->ipv6.s6_addr16[5], - uprt->ipv6.s6_addr16[6], uprt->ipv6.s6_addr16[7], - uprt->mac.addr_bytes[0], uprt->mac.addr_bytes[1], - uprt->mac.addr_bytes[2], uprt->mac.addr_bytes[3], - uprt->mac.addr_bytes[4], uprt->mac.addr_bytes[5], - hashkey); -} - -static void -log_netbe_cfg(const struct netbe_cfg *ucfg) -{ - uint32_t i; - - RTE_LOG(NOTICE, USER1, - "ucfg @ %p, prt_num = %u\n", ucfg, ucfg->prt_num); - - for (i = 0; i != ucfg->prt_num; i++) - log_netbe_prt(ucfg->prt + i); -} - -static int -pool_init(uint32_t sid) -{ - int32_t rc; - struct rte_mempool *mp; - char name[RTE_MEMPOOL_NAMESIZE]; - - snprintf(name, sizeof(name), "MP%u", sid); - mp = rte_pktmbuf_pool_create(name, MPOOL_NB_BUF, MPOOL_CACHE_SIZE, 0, - RTE_MBUF_DEFAULT_BUF_SIZE, sid - 1); - if (mp == NULL) { - rc = -rte_errno; - RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n", - __func__, sid - 1, rc); - return rc; - } - - mpool[sid] = mp; - return 0; -} - -static int -frag_pool_init(uint32_t sid) -{ - int32_t rc; - struct rte_mempool *frag_mp; - char frag_name[RTE_MEMPOOL_NAMESIZE]; - - snprintf(frag_name, sizeof(frag_name), "frag_MP%u", sid); - frag_mp = rte_pktmbuf_pool_create(frag_name, MPOOL_NB_BUF, - MPOOL_CACHE_SIZE, 0, FRAG_MBUF_BUF_SIZE, sid - 1); - if (frag_mp == NULL) { - rc = -rte_errno; - RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n", - __func__, sid - 1, rc); - return rc; - } - - frag_mpool[sid] = frag_mp; - return 0; -} - -static struct netbe_lcore * -find_initilized_lcore(struct netbe_cfg *cfg, uint32_t lc_num) -{ - uint32_t i; - - for (i = 0; i < cfg->cpu_num; i++) - if (cfg->cpu[i].id == lc_num) - return &cfg->cpu[i]; - - return NULL; -} - -/* - * Setup all enabled ports. - */ -static int -netbe_port_init(struct netbe_cfg *cfg, int argc, char *argv[]) -{ - int32_t rc; - uint32_t i, n, sid, j; - struct netbe_port *prt; - rte_cpuset_t cpuset; - uint32_t nc; - struct netbe_lcore *lc; - - n = (uint32_t)argc; - cfg->prt = rte_zmalloc(NULL, sizeof(struct netbe_port) * n, - RTE_CACHE_LINE_SIZE); - cfg->prt_num = n; - - rc = 0; - for (i = 0; i != n; i++) { - rc = parse_netbe_arg(cfg->prt + i, argv[i], &cpuset); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s: processing of \"%s\" failed with error code: %d\n", - __func__, argv[i], rc); - return rc; - } - } - - for (i = 0, nc = 0; i < RTE_MAX_LCORE; i++) - nc += CPU_ISSET(i, &cpuset); - cfg->cpu = rte_zmalloc(NULL, sizeof(struct netbe_lcore) * nc, - RTE_CACHE_LINE_SIZE); - - for (i = 0; i != cfg->prt_num; i++) { - prt = cfg->prt + i; - rc = port_init(prt); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s: port=%u init failed with error code: %d\n", - __func__, prt->id, rc); - return rc; - } - rte_eth_macaddr_get(prt->id, &prt->mac); - if (cfg->promisc) - rte_eth_promiscuous_enable(prt->id); - - for (j = 0; j < prt->nb_lcore; j++) { - rc = check_lcore(prt->lcore[j]); - if (rc != 0) - return rc; - - sid = rte_lcore_to_socket_id(prt->lcore[j]) + 1; - assert(sid < RTE_DIM(mpool)); - - if (mpool[sid] == NULL) { - rc = pool_init(sid); - if (rc != 0) - return rc; - } - - if (frag_mpool[sid] == NULL) { - rc = frag_pool_init(sid); - if (rc != 0) - return rc; - } - - rc = queue_init(prt, mpool[sid]); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s: lcore=%u queue init failed with err: %d\n", - __func__, prt->lcore[j], rc); - return rc; - } - - /* calculate number of queues and assign queue id per lcore. */ - lc = find_initilized_lcore(cfg, prt->lcore[j]); - if (lc == NULL) { - lc = &cfg->cpu[cfg->cpu_num]; - lc->id = prt->lcore[j]; - cfg->cpu_num++; - } - - NETBE_REALLOC(lc->prtq, lc->prtq_num + 1); - lc->prtq[lc->prtq_num].rxqid = j; - lc->prtq[lc->prtq_num].txqid = j; - lc->prtq[lc->prtq_num].port = *prt; - lc->prtq_num++; - } - } - log_netbe_cfg(cfg); - - return 0; -} - -/* - * UDP IPv6 destination lookup callback. - */ -static int -lpm6_dst_lookup(void *data, const struct in6_addr *addr, - struct tle_udp_dest *res) -{ - int32_t rc; - uint8_t idx; - struct netbe_lcore *lc; - struct tle_udp_dest *dst; - uintptr_t p; - - lc = data; - p = (uintptr_t)addr->s6_addr; - - rc = rte_lpm6_lookup(lc->lpm6, (uint8_t *)p, &idx); - if (rc == 0) { - dst = &lc->dst6[idx]; - rte_memcpy(res, dst, dst->l2_len + dst->l3_len + - offsetof(struct tle_udp_dest, hdr)); - } - return rc; -} - -static int -netbe_add_ipv4_route(struct netbe_lcore *lc, const struct netbe_dest *dst, - uint8_t idx) -{ - int32_t rc; - uint32_t addr, depth; - char str[INET_ADDRSTRLEN]; - - depth = dst->prfx; - addr = rte_be_to_cpu_32(dst->ipv4.s_addr); - - inet_ntop(AF_INET, &dst->ipv4, str, sizeof(str)); - rc = rte_lpm_add(lc->lpm4, addr, depth, idx); - RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p," - "ipv4=%s/%u,mtu=%u," - "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) " - "returns %d;\n", - __func__, lc->id, dst->port, lc->dst4[idx].dev, - str, depth, lc->dst4[idx].mtu, - dst->mac.addr_bytes[0], dst->mac.addr_bytes[1], - dst->mac.addr_bytes[2], dst->mac.addr_bytes[3], - dst->mac.addr_bytes[4], dst->mac.addr_bytes[5], - rc); - return rc; -} - -static int -netbe_add_ipv6_route(struct netbe_lcore *lc, const struct netbe_dest *dst, - uint8_t idx) -{ - int32_t rc; - uint32_t depth; - char str[INET6_ADDRSTRLEN]; - - depth = dst->prfx; - - rc = rte_lpm6_add(lc->lpm6, (uint8_t *)(uintptr_t)dst->ipv6.s6_addr, - depth, idx); - - inet_ntop(AF_INET6, &dst->ipv6, str, sizeof(str)); - RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p," - "ipv6=%s/%u,mtu=%u," - "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) " - "returns %d;\n", - __func__, lc->id, dst->port, lc->dst6[idx].dev, - str, depth, lc->dst4[idx].mtu, - dst->mac.addr_bytes[0], dst->mac.addr_bytes[1], - dst->mac.addr_bytes[2], dst->mac.addr_bytes[3], - dst->mac.addr_bytes[4], dst->mac.addr_bytes[5], - rc); - return rc; -} - -static void -fill_dst(struct tle_udp_dest *dst, struct netbe_dev *bed, - const struct netbe_dest *bdp, uint16_t l3_type, int32_t sid) -{ - struct ether_hdr *eth; - struct ipv4_hdr *ip4h; - struct ipv6_hdr *ip6h; - - static const struct ipv4_hdr ipv4_tmpl = { - .version_ihl = 4 << 4 | sizeof(*ip4h) / IPV4_IHL_MULTIPLIER, - .time_to_live = 64, - .next_proto_id = IPPROTO_UDP, - }; - - static const struct ipv6_hdr ipv6_tmpl = { - .vtc_flow = 6 << 4, - .proto = IPPROTO_UDP, - .hop_limits = 64, - }; - - dst->dev = bed->dev; - dst->head_mp = frag_mpool[sid + 1]; - dst->mtu = RTE_MIN(bdp->mtu, bed->port.mtu); - dst->l2_len = sizeof(*eth); - - eth = (struct ether_hdr *)dst->hdr; - - ether_addr_copy(&bed->port.mac, ð->s_addr); - ether_addr_copy(&bdp->mac, ð->d_addr); - eth->ether_type = rte_cpu_to_be_16(l3_type); - - if (l3_type == ETHER_TYPE_IPv4) { - dst->l3_len = sizeof(*ip4h); - ip4h = (struct ipv4_hdr *)(eth + 1); - ip4h[0] = ipv4_tmpl; - } else if (l3_type == ETHER_TYPE_IPv6) { - dst->l3_len = sizeof(*ip6h); - ip6h = (struct ipv6_hdr *)(eth + 1); - ip6h[0] = ipv6_tmpl; - } -} - -static int -create_context(struct netbe_lcore *lc, const struct tle_udp_ctx_param *ctx_prm) -{ - uint32_t rc = 0, sid; - uint64_t frag_cycles; - struct tle_udp_ctx_param cprm; - - if (lc->ctx == NULL) { - sid = rte_lcore_to_socket_id(lc->id); - - rc = lcore_lpm_init(lc); - if (rc != 0) - return rc; - - cprm = *ctx_prm; - cprm.socket_id = sid; - cprm.lookup4 = lpm4_dst_lookup; - cprm.lookup4_data = lc; - cprm.lookup6 = lpm6_dst_lookup; - cprm.lookup6_data = lc; - - /* to facilitate both IPv4 and IPv6. */ - cprm.max_streams *= 2; - - frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / - MS_PER_S * FRAG_TTL; - - lc->ftbl = rte_ip_frag_table_create(cprm.max_streams, - FRAG_TBL_BUCKET_ENTRIES, cprm.max_streams, - frag_cycles, sid); - - RTE_LOG(NOTICE, USER1, "%s(lcore=%u): frag_tbl=%p;\n", - __func__, lc->id, lc->ftbl); - - lc->ctx = tle_udp_create(&cprm); - - RTE_LOG(NOTICE, USER1, "%s(lcore=%u): udp_ctx=%p;\n", - __func__, lc->id, lc->ctx); - - if (lc->ctx == NULL || lc->ftbl == NULL) - rc = ENOMEM; - } - - return rc; -} - -/* - * BE lcore setup routine. - */ -static int -lcore_init(struct netbe_lcore *lc, const struct tle_udp_ctx_param *ctx_prm, - const uint32_t prtqid, const uint16_t *bl_ports, uint32_t nb_bl_ports) -{ - int32_t rc = 0; - struct tle_udp_dev_param dprm; - - rc = create_context(lc, ctx_prm); - - if (lc->ctx != NULL) { - memset(&dprm, 0, sizeof(dprm)); - dprm.rx_offload = lc->prtq[prtqid].port.rx_offload; - dprm.tx_offload = lc->prtq[prtqid].port.tx_offload; - dprm.local_addr4.s_addr = lc->prtq[prtqid].port.ipv4; - memcpy(&dprm.local_addr6, &lc->prtq[prtqid].port.ipv6, - sizeof(lc->prtq[prtqid].port.ipv6)); - dprm.bl4.nb_port = nb_bl_ports; - dprm.bl4.port = bl_ports; - dprm.bl6.nb_port = nb_bl_ports; - dprm.bl6.port = bl_ports; - - lc->prtq[prtqid].dev = tle_udp_add_dev(lc->ctx, &dprm); - - RTE_LOG(NOTICE, USER1, - "%s(lcore=%u, port=%u, qid=%u), udp_dev: %p\n", - __func__, lc->id, lc->prtq[prtqid].port.id, - lc->prtq[prtqid].rxqid, lc->prtq[prtqid].dev); - - if (lc->prtq[prtqid].dev == NULL) - rc = -rte_errno; - - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s(lcore=%u) failed with error code: %d\n", - __func__, lc->id, rc); - tle_udp_destroy(lc->ctx); - rte_ip_frag_table_destroy(lc->ftbl); - rte_lpm_free(lc->lpm4); - rte_lpm6_free(lc->lpm6); - rte_free(lc->prtq[prtqid].port.lcore); - lc->prtq[prtqid].port.nb_lcore = 0; - rte_free(lc->prtq); - lc->prtq_num = 0; - return rc; - } - } - - return rc; -} - -static uint16_t -create_blocklist(const struct netbe_port *beprt, uint16_t *bl_ports, - uint32_t q) -{ - uint32_t i, j, qid, align_nb_q; - - align_nb_q = rte_align32pow2(beprt->nb_lcore); - for (i = 0, j = 0; i < (UINT16_MAX + 1); i++) { - qid = (i % align_nb_q) % beprt->nb_lcore; - if (qid != q) - bl_ports[j++] = i; - } - - return j; -} - -static int -netbe_lcore_init(struct netbe_cfg *cfg, - const struct tle_udp_ctx_param *ctx_prm) -{ - int32_t rc; - uint32_t i, j, nb_bl_ports = 0, sz; - struct netbe_lcore *lc; - static uint16_t *bl_ports; - - /* Create the udp context and attached queue for each lcore. */ - rc = 0; - sz = sizeof(uint16_t) * UINT16_MAX; - bl_ports = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE); - for (i = 0; i < cfg->cpu_num; i++) { - lc = &cfg->cpu[i]; - for (j = 0; j < lc->prtq_num; j++) { - memset((uint8_t *)bl_ports, 0, sz); - /* create list of blocked ports based on q */ - nb_bl_ports = create_blocklist(&lc->prtq[j].port, - bl_ports, lc->prtq[j].rxqid); - RTE_LOG(NOTICE, USER1, - "lc=%u, q=%u, nb_bl_ports=%u\n", - lc->id, lc->prtq[j].rxqid, nb_bl_ports); - - rc = lcore_init(lc, ctx_prm, j, bl_ports, nb_bl_ports); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s: failed with error code: %d\n", - __func__, rc); - rte_free(bl_ports); - return rc; - } - } - } - rte_free(bl_ports); - - return 0; -} - -static void -netbe_lcore_fini(struct netbe_cfg *cfg) -{ - uint32_t i; - - for (i = 0; i != cfg->cpu_num; i++) { - tle_udp_destroy(cfg->cpu[i].ctx); - rte_ip_frag_table_destroy(cfg->cpu[i].ftbl); - rte_lpm_free(cfg->cpu[i].lpm4); - rte_lpm6_free(cfg->cpu[i].lpm6); - - rte_free(cfg->cpu[i].prtq); - cfg->cpu[i].prtq_num = 0; - } - - rte_free(cfg->cpu); - cfg->cpu_num = 0; - for (i = 0; i != cfg->prt_num; i++) { - rte_free(cfg->prt[i].lcore); - cfg->prt[i].nb_lcore = 0; - } - rte_free(cfg->prt); - cfg->prt_num = 0; -} - -static int -netbe_add_dest(struct netbe_lcore *lc, uint32_t dev_idx, uint16_t family, - const struct netbe_dest *dst, uint32_t dnum) -{ - int32_t rc, sid; - uint16_t l3_type; - uint32_t i, n, m; - struct tle_udp_dest *dp; - - if (family == AF_INET) { - n = lc->dst4_num; - dp = lc->dst4 + n; - m = RTE_DIM(lc->dst4); - l3_type = ETHER_TYPE_IPv4; - } else { - n = lc->dst6_num; - dp = lc->dst6 + n; - m = RTE_DIM(lc->dst6); - l3_type = ETHER_TYPE_IPv6; - } - - if (n + dnum >= m) { - RTE_LOG(ERR, USER1, "%s(lcore=%u, family=%hu, dnum=%u) exceeds " - "maximum allowed number of destinations(%u);\n", - __func__, lc->id, family, dnum, m); - return -ENOSPC; - } - - sid = rte_lcore_to_socket_id(lc->id); - rc = 0; - - for (i = 0; i != dnum && rc == 0; i++) { - fill_dst(dp + i, lc->prtq + dev_idx, dst + i, l3_type, sid); - if (family == AF_INET) - rc = netbe_add_ipv4_route(lc, dst + i, n + i); - else - rc = netbe_add_ipv6_route(lc, dst + i, n + i); - } - - if (family == AF_INET) - lc->dst4_num = n + i; - else - lc->dst6_num = n + i; - - return rc; -} - -static int -netbe_dest_init(const char *fname, struct netbe_cfg *cfg) -{ - int32_t rc; - uint32_t f, i, p; - uint32_t k, l, cnt; - struct netbe_lcore *lc; - struct netbe_dest_prm prm; - - rc = netbe_parse_dest(fname, &prm); - if (rc != 0) - return rc; - - rc = 0; - for (i = 0; i != prm.nb_dest; i++) { - - p = prm.dest[i].port; - f = prm.dest[i].family; - - cnt = 0; - for (k = 0; k != cfg->cpu_num; k++) { - lc = cfg->cpu + k; - for (l = 0; l != lc->prtq_num; l++) - if (lc->prtq[l].port.id == p) { - rc = netbe_add_dest(lc, l, f, - prm.dest + i, 1); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s(lcore=%u, family=%u) could not " - "add destinations(%u);\n", - __func__, lc->id, f, i); - return -ENOSPC; - } - cnt++; - } - } - - if (cnt == 0) { - RTE_LOG(ERR, USER1, "%s(%s) error at line %u: " - "port %u not managed by any lcore;\n", - __func__, fname, prm.dest[i].line, p); - break; - } - } - - free(prm.dest); - return rc; -} - -static void -netfe_stream_close(struct netfe_lcore *fe, uint32_t dec) -{ - uint32_t sidx; - - fe->sidx -= dec; - sidx = fe->sidx; - tle_event_free(fe->fs[sidx].txev); - tle_event_free(fe->fs[sidx].rxev); - tle_udp_stream_close(fe->fs[sidx].s); - memset(&fe->fs[sidx], 0, sizeof(fe->fs[sidx])); -} - -static void -netfe_stream_dump(const struct netfe_stream *fes) -{ - struct sockaddr_in *l4, *r4; - struct sockaddr_in6 *l6, *r6; - uint16_t lport, rport; - struct tle_udp_stream_param sprm; - char laddr[INET6_ADDRSTRLEN]; - char raddr[INET6_ADDRSTRLEN]; - - tle_udp_stream_get_param(fes->s, &sprm); - - if (sprm.local_addr.ss_family == AF_INET) { - - l4 = (struct sockaddr_in *)&sprm.local_addr; - r4 = (struct sockaddr_in *)&sprm.remote_addr; - - lport = l4->sin_port; - rport = r4->sin_port; - - } else if (sprm.local_addr.ss_family == AF_INET6) { - - l6 = (struct sockaddr_in6 *)&sprm.local_addr; - r6 = (struct sockaddr_in6 *)&sprm.remote_addr; - - lport = l6->sin6_port; - rport = r6->sin6_port; - - } else { - RTE_LOG(ERR, USER1, "stream@%p - unknown family=%hu\n", - fes->s, sprm.local_addr.ss_family); - return; - } - - format_addr(&sprm.local_addr, laddr, sizeof(laddr)); - format_addr(&sprm.remote_addr, raddr, sizeof(raddr)); - - RTE_LOG(INFO, USER1, - "stream@%p={" - "family=%hu,laddr=%s,lport=%hu,raddr=%s,rport=%hu," - "stats={" - "rxp=%" PRIu64 ",txp=%" PRIu64 ",drops=%" PRIu64 "," - "rxev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "]," - "txev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "]," - "}};\n", - fes->s, - sprm.local_addr.ss_family, - laddr, ntohs(lport), raddr, ntohs(rport), - fes->stat.rxp, fes->stat.txp, fes->stat.drops, - fes->stat.rxev[TLE_SEV_IDLE], - fes->stat.rxev[TLE_SEV_DOWN], - fes->stat.rxev[TLE_SEV_UP], - fes->stat.txev[TLE_SEV_IDLE], - fes->stat.txev[TLE_SEV_DOWN], - fes->stat.txev[TLE_SEV_UP]); -} - -/* - * helper function: opens IPv4 and IPv6 streams for selected port. - */ -static struct netfe_stream * -netfe_stream_open(struct netfe_lcore *fe, struct tle_udp_stream_param *sprm, - uint32_t lcore, uint16_t op, uint32_t bidx) -{ - int32_t rc; - uint32_t sidx; - struct netfe_stream *fes; - struct sockaddr_in *l4; - struct sockaddr_in6 *l6; - uint16_t errport; - - sidx = fe->sidx; - fes = fe->fs + sidx; - if (sidx >= fe->snum) { - rte_errno = ENOBUFS; - return NULL; - } - - fes->rxev = tle_event_alloc(fe->rxeq, &fe->fs[sidx]); - fes->txev = tle_event_alloc(fe->txeq, &fe->fs[sidx]); - sprm->recv_ev = fes->rxev; - if (op != FWD) - sprm->send_ev = fes->txev; - - RTE_LOG(ERR, USER1, - "%s(%u) [%u]={op=%hu, rxev=%p, txev=%p}, be_lc=%u\n", - __func__, lcore, sidx, op, fes->rxev, fes->txev, - becfg.cpu[bidx].id); - if (fes->rxev == NULL || fes->txev == NULL) { - netfe_stream_close(fe, 0); - rte_errno = ENOMEM; - return NULL; - } - - if (op == TXONLY || op == FWD) { - tle_event_active(fes->txev, TLE_SEV_DOWN); - fes->stat.txev[TLE_SEV_DOWN]++; - } - - if (op != TXONLY) { - tle_event_active(fes->rxev, TLE_SEV_DOWN); - fes->stat.rxev[TLE_SEV_DOWN]++; - } - - fes->s = tle_udp_stream_open(becfg.cpu[bidx].ctx, sprm); - if (fes->s == NULL) { - rc = rte_errno; - netfe_stream_close(fe, 0); - rte_errno = rc; - - if (sprm->local_addr.ss_family == AF_INET) { - l4 = (struct sockaddr_in *) &sprm->local_addr; - errport = ntohs(l4->sin_port); - } else { - l6 = (struct sockaddr_in6 *) &sprm->local_addr; - errport = ntohs(l6->sin6_port); - } - RTE_LOG(ERR, USER1, "stream open failed for port %u with error " - "code=%u, bidx=%u, lc=%u\n", - errport, rc, bidx, becfg.cpu[bidx].id); - return NULL; - } - - fes->op = op; - fes->family = sprm->local_addr.ss_family; - - fe->sidx = sidx + 1; - return fes; -} - -static inline int -netfe_addr_eq(struct sockaddr_storage *l, struct sockaddr_storage *r, - uint16_t family) -{ - struct sockaddr_in *l4, *r4; - struct sockaddr_in6 *l6, *r6; - - if (family == AF_INET) { - l4 = (struct sockaddr_in *)l; - r4 = (struct sockaddr_in *)r; - return (l4->sin_port == r4->sin_port && - l4->sin_addr.s_addr == r4->sin_addr.s_addr); - } else { - l6 = (struct sockaddr_in6 *)l; - r6 = (struct sockaddr_in6 *)r; - return (l6->sin6_port == r6->sin6_port && - memcmp(&l6->sin6_addr, &r6->sin6_addr, - sizeof(l6->sin6_addr))); - } -} - -static inline void -netfe_pkt_addr(const struct rte_mbuf *m, struct sockaddr_storage *ps, - uint16_t family) -{ - const struct ipv4_hdr *ip4h; - const struct ipv6_hdr *ip6h; - const struct udp_hdr *udph; - struct sockaddr_in *in4; - struct sockaddr_in6 *in6; - - NETFE_PKT_DUMP(m); - - udph = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, -m->l4_len); - - if (family == AF_INET) { - in4 = (struct sockaddr_in *)ps; - ip4h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, - -(m->l4_len + m->l3_len)); - in4->sin_port = udph->src_port; - in4->sin_addr.s_addr = ip4h->src_addr; - } else { - in6 = (struct sockaddr_in6 *)ps; - ip6h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, - -(m->l4_len + m->l3_len)); - in6->sin6_port = udph->src_port; - rte_memcpy(&in6->sin6_addr, ip6h->src_addr, - sizeof(in6->sin6_addr)); - } -} - -static inline uint32_t -pkt_eq_addr(struct rte_mbuf *pkt[], uint32_t num, uint16_t family, - struct sockaddr_storage *cur, struct sockaddr_storage *nxt) -{ - uint32_t i; - - for (i = 0; i != num; i++) { - netfe_pkt_addr(pkt[i], nxt, family); - if (netfe_addr_eq(cur, nxt, family) == 0) - break; - } - - return i; -} - -static inline void -pkt_buf_empty(struct pkt_buf *pb) -{ - uint32_t i; - - for (i = 0; i != pb->num; i++) - rte_pktmbuf_free(pb->pkt[i]); - - pb->num = 0; -} - -static inline void -pkt_buf_fill(uint32_t lcore, struct pkt_buf *pb, uint32_t dlen) -{ - uint32_t i; - int32_t sid; - - sid = rte_lcore_to_socket_id(lcore) + 1; - - for (i = pb->num; i != RTE_DIM(pb->pkt); i++) { - pb->pkt[i] = rte_pktmbuf_alloc(mpool[sid]); - if (pb->pkt[i] == NULL) - break; - rte_pktmbuf_append(pb->pkt[i], dlen); - } - - pb->num = i; -} - -static struct netfe_stream * -find_fwd_dst(uint32_t lcore, struct netfe_stream *fes, - const struct sockaddr *sa) -{ - uint32_t rc; - struct netfe_stream *fed; - struct netfe_lcore *fe; - struct tle_udp_stream_param sprm; - - fe = RTE_PER_LCORE(_fe); - - fed = fwd_tbl_lkp(fe, fes->family, sa); - if (fed != NULL) - return fed; - - /* create a new stream and put it into the fwd table. */ - - sprm = fes->fwdprm.prm; - - /* open forward stream with wildcard remote addr. */ - memset(&sprm.remote_addr.ss_family + 1, 0, - sizeof(sprm.remote_addr) - sizeof(sprm.remote_addr.ss_family)); - fed = netfe_stream_open(fe, &sprm, lcore, FWD, fes->fwdprm.bidx); - if (fed == NULL) - return NULL; - - rc = fwd_tbl_add(fe, fes->family, sa, fed); - if (rc != 0) { - netfe_stream_close(fe, 1); - fed = NULL; - } - - fed->fwdprm.prm.remote_addr = *(const struct sockaddr_storage *)sa; - return fed; -} - -static inline void -netfe_tx_process(uint32_t lcore, struct netfe_stream *fes) -{ - uint32_t i, k, n; - - /* refill with new mbufs. */ - pkt_buf_fill(lcore, &fes->pbuf, fes->txlen); - - n = fes->pbuf.num; - if (n == 0) - return; - - k = tle_udp_stream_send(fes->s, fes->pbuf.pkt, n, NULL); - NETFE_TRACE("%s(%u): tle_udp_stream_send(%p, %u) returns %u\n", - __func__, lcore, fes->s, n, k); - fes->stat.txp += k; - fes->stat.drops += n - k; - - if (k == 0) - return; - - /* adjust pbuf array. */ - fes->pbuf.num = n - k; - for (i = k; i != n; i++) - fes->pbuf.pkt[i - k] = fes->pbuf.pkt[i]; -} - -static inline void -netfe_fwd(uint32_t lcore, struct netfe_stream *fes) -{ - uint32_t i, j, k, n, x; - uint16_t family; - void *pi0, *pi1, *pt; - struct rte_mbuf **pkt; - struct netfe_stream *fed; - struct sockaddr_storage in[2]; - - family = fes->family; - n = fes->pbuf.num; - pkt = fes->pbuf.pkt; - - if (n == 0) - return; - - in[0].ss_family = family; - in[1].ss_family = family; - pi0 = &in[0]; - pi1 = &in[1]; - - netfe_pkt_addr(pkt[0], pi0, family); - - x = 0; - for (i = 0; i != n; i = j) { - - j = i + pkt_eq_addr(&pkt[i + 1], - n - i - 1, family, pi0, pi1) + 1; - - fed = find_fwd_dst(lcore, fes, (const struct sockaddr *)pi0); - if (fed != NULL) { - - k = tle_udp_stream_send(fed->s, pkt + i, j - i, - (const struct sockaddr *) - &fes->fwdprm.prm.remote_addr); - - NETFE_TRACE("%s(%u): tle_udp_stream_send(%p, %u) " - "returns %u\n", - __func__, lcore, fed->s, j - i, k); - fed->stat.txp += k; - fed->stat.drops += j - i - k; - fes->stat.fwp += k; - - } else { - NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n", - __func__, lcore, fes->s, j - i); - for (k = i; k != j; k++) { - NETFE_TRACE("%s(%u, %p): free(%p);\n", - __func__, lcore, fes->s, pkt[k]); - rte_pktmbuf_free(pkt[j]); - } - fes->stat.drops += j - i; - } - - /* copy unforwarded mbufs. */ - for (i += k; i != j; i++, x++) - pkt[x] = pkt[i]; - - /* swap the pointers */ - pt = pi0; - pi0 = pi1; - pi1 = pt; - } - - fes->pbuf.num = x; - - if (x != 0) { - tle_event_raise(fes->txev); - fes->stat.txev[TLE_SEV_UP]++; - } - - if (n == RTE_DIM(fes->pbuf.pkt)) { - tle_event_active(fes->rxev, TLE_SEV_UP); - fes->stat.rxev[TLE_SEV_UP]++; - } -} - -static inline void -netfe_rx_process(__rte_unused uint32_t lcore, struct netfe_stream *fes) -{ - uint32_t k, n; - - n = fes->pbuf.num; - k = RTE_DIM(fes->pbuf.pkt) - n; - - /* packet buffer is full, can't receive any new packets. */ - if (k == 0) { - tle_event_idle(fes->rxev); - fes->stat.rxev[TLE_SEV_IDLE]++; - return; - } - - n = tle_udp_stream_recv(fes->s, fes->pbuf.pkt + n, k); - if (n == 0) - return; - - NETFE_TRACE("%s(%u): tle_udp_stream_recv(%p, %u) returns %u\n", - __func__, lcore, fes->s, k, n); - - fes->pbuf.num += n; - fes->stat.rxp += n; - - /* free all received mbufs. */ - if (fes->op == RXONLY) - pkt_buf_empty(&fes->pbuf); - /* mark stream as writable */ - else if (k == RTE_DIM(fes->pbuf.pkt)) { - if (fes->op == RXTX) { - tle_event_active(fes->txev, TLE_SEV_UP); - fes->stat.txev[TLE_SEV_UP]++; - } else if (fes->op == FWD) { - tle_event_raise(fes->txev); - fes->stat.txev[TLE_SEV_UP]++; - } - } -} - -static inline void -netfe_rxtx_process(__rte_unused uint32_t lcore, struct netfe_stream *fes) -{ - uint32_t i, j, k, n; - uint16_t family; - void *pi0, *pi1, *pt; - struct rte_mbuf **pkt; - struct sockaddr_storage in[2]; - - family = fes->family; - n = fes->pbuf.num; - pkt = fes->pbuf.pkt; - - /* there is nothing to send. */ - if (n == 0) { - tle_event_idle(fes->txev); - fes->stat.txev[TLE_SEV_IDLE]++; - return; - } - - in[0].ss_family = family; - in[1].ss_family = family; - pi0 = &in[0]; - pi1 = &in[1]; - - netfe_pkt_addr(pkt[0], pi0, family); - - for (i = 0; i != n; i = j) { - - j = i + pkt_eq_addr(&pkt[i + 1], - n - i - 1, family, pi0, pi1) + 1; - - k = tle_udp_stream_send(fes->s, pkt + i, j - i, - (const struct sockaddr *)pi0); - - NETFE_TRACE("%s(%u): tle_udp_stream_send(%p, %u) returns %u\n", - __func__, lcore, fes->s, j - i, k); - fes->stat.txp += k; - fes->stat.drops += j - i - k; - - i += k; - - /* stream send buffer is full */ - if (i != j) - break; - - /* swap the pointers */ - pt = pi0; - pi0 = pi1; - pi1 = pt; - } - - /* not able to send anything. */ - if (i == 0) - return; - - if (n == RTE_DIM(fes->pbuf.pkt)) { - /* mark stream as readable */ - tle_event_active(fes->rxev, TLE_SEV_UP); - fes->stat.rxev[TLE_SEV_UP]++; - } - - /* adjust pbuf array. */ - fes->pbuf.num = n - i; - for (j = i; j != n; j++) - pkt[j - i] = pkt[j]; -} - -static int -netfe_lcore_init(const struct netfe_lcore_prm *prm) -{ - size_t sz; - int32_t rc; - uint32_t i, lcore, snum; - struct netfe_lcore *fe; - struct tle_evq_param eprm; - struct tle_udp_stream_param sprm; - struct netfe_stream *fes; - - lcore = rte_lcore_id(); - - snum = prm->max_streams; - RTE_LOG(NOTICE, USER1, "%s(lcore=%u, nb_streams=%u, max_streams=%u)\n", - __func__, lcore, prm->nb_streams, snum); - - memset(&eprm, 0, sizeof(eprm)); - eprm.socket_id = rte_lcore_to_socket_id(lcore); - eprm.max_events = snum; - - sz = sizeof(*fe) + snum * sizeof(fe->fs[0]); - fe = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, - rte_lcore_to_socket_id(lcore)); - - if (fe == NULL) { - RTE_LOG(ERR, USER1, "%s:%d failed to allocate %zu bytes\n", - __func__, __LINE__, sz); - return -ENOMEM; - } - - RTE_PER_LCORE(_fe) = fe; - - fe->snum = snum; - fe->fs = (struct netfe_stream *)(fe + 1); - - fe->rxeq = tle_evq_create(&eprm); - fe->txeq = tle_evq_create(&eprm); - - RTE_LOG(INFO, USER1, "%s(%u) rx evq=%p, tx evq=%p\n", - __func__, lcore, fe->rxeq, fe->txeq); - if (fe->rxeq == NULL || fe->txeq == NULL) - return -ENOMEM; - - rc = fwd_tbl_init(fe, AF_INET, lcore); - RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n", - __func__, lcore, AF_INET, rc); - if (rc != 0) - return rc; - - rc = fwd_tbl_init(fe, AF_INET6, lcore); - RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n", - __func__, lcore, AF_INET6, rc); - if (rc != 0) - return rc; - - /* open all requested streams. */ - for (i = 0; i != prm->nb_streams; i++) { - sprm = prm->stream[i].sprm.prm; - fes = netfe_stream_open(fe, &sprm, lcore, prm->stream[i].op, - prm->stream[i].sprm.bidx); - if (fes == NULL) { - rc = -rte_errno; - break; - } - - netfe_stream_dump(fes); - - if (prm->stream[i].op == FWD) { - fes->fwdprm = prm->stream[i].fprm; - rc = fwd_tbl_add(fe, - prm->stream[i].fprm.prm.remote_addr.ss_family, - (const struct sockaddr *) - &prm->stream[i].fprm.prm.remote_addr, - fes); - if (rc != 0) { - netfe_stream_close(fe, 1); - break; - } - } else if (prm->stream[i].op == TXONLY) { - fes->txlen = prm->stream[i].txlen; - fes->raddr = sprm.remote_addr; - } - } - - return rc; -} - -static inline void -netfe_lcore(void) -{ - struct netfe_lcore *fe; - uint32_t j, n, lcore; - struct netfe_stream *fs[MAX_PKT_BURST]; - - fe = RTE_PER_LCORE(_fe); - if (fe == NULL) - return; - - lcore = rte_lcore_id(); - - n = tle_evq_get(fe->rxeq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); - - if (n != 0) { - NETFE_TRACE("%s(%u): tle_evq_get(rxevq=%p) returns %u\n", - __func__, lcore, fe->rxeq, n); - for (j = 0; j != n; j++) - netfe_rx_process(lcore, fs[j]); - } - - n = tle_evq_get(fe->txeq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); - - if (n != 0) { - NETFE_TRACE("%s(%u): tle_evq_get(txevq=%p) returns %u\n", - __func__, lcore, fe->txeq, n); - for (j = 0; j != n; j++) { - if (fs[j]->op == RXTX) - netfe_rxtx_process(lcore, fs[j]); - else if (fs[j]->op == FWD) - netfe_fwd(lcore, fs[j]); - else if (fs[j]->op == TXONLY) - netfe_tx_process(lcore, fs[j]); - } - } -} - -static void -netfe_lcore_fini(void) -{ - struct netfe_lcore *fe; - uint32_t i; - - fe = RTE_PER_LCORE(_fe); - if (fe == NULL) - return; - - while (fe->sidx != 0) { - i = fe->sidx - 1; - netfe_stream_dump(fe->fs + i); - netfe_stream_close(fe, 1); - } - - tle_evq_destroy(fe->txeq); - tle_evq_destroy(fe->rxeq); - RTE_PER_LCORE(_fe) = NULL; - rte_free(fe); -} - -static inline void -netbe_rx(struct netbe_lcore *lc, uint32_t pidx) -{ - uint32_t j, k, n; - struct rte_mbuf *pkt[MAX_PKT_BURST]; - struct rte_mbuf *rp[MAX_PKT_BURST]; - int32_t rc[MAX_PKT_BURST]; - - n = rte_eth_rx_burst(lc->prtq[pidx].port.id, - lc->prtq[pidx].rxqid, pkt, RTE_DIM(pkt)); - if (n == 0) - return; - - lc->prtq[pidx].rx_stat.in += n; - NETBE_TRACE("%s(%u): rte_eth_rx_burst(%u, %u) returns %u\n", - __func__, lc->id, lc->prtq[pidx].port.id, lc->prtq[pidx].rxqid, - n); - - k = tle_udp_rx_bulk(lc->prtq[pidx].dev, pkt, rp, rc, n); - - lc->prtq[pidx].rx_stat.up += k; - lc->prtq[pidx].rx_stat.drop += n - k; - NETBE_TRACE("%s(%u): tle_udp_rx_bulk(%p, %u) returns %u\n", - __func__, lc->id, lc->prtq[pidx].dev, n, k); - - for (j = 0; j != n - k; j++) { - NETBE_TRACE("%s:%d(port=%u) rp[%u]={%p, %d};\n", - __func__, __LINE__, lc->prtq[pidx].port.id, - j, rp[j], rc[j]); - rte_pktmbuf_free(rp[j]); - } -} - -static inline void -netbe_tx(struct netbe_lcore *lc, uint32_t pidx) -{ - uint32_t j, k, n; - struct rte_mbuf **mb; - - n = lc->prtq[pidx].tx_buf.num; - k = RTE_DIM(lc->prtq[pidx].tx_buf.pkt) - n; - mb = lc->prtq[pidx].tx_buf.pkt; - - if (k >= RTE_DIM(lc->prtq[pidx].tx_buf.pkt) / 2) { - j = tle_udp_tx_bulk(lc->prtq[pidx].dev, mb + n, k); - n += j; - lc->prtq[pidx].tx_stat.down += j; - } - - if (n == 0) - return; - - NETBE_TRACE("%s(%u): tle_udp_tx_bulk(%p) returns %u,\n" - "total pkts to send: %u\n", - __func__, lc->id, lc->prtq[pidx].dev, j, n); - - for (j = 0; j != n; j++) - NETBE_PKT_DUMP(mb[j]); - - k = rte_eth_tx_burst(lc->prtq[pidx].port.id, - lc->prtq[pidx].txqid, mb, n); - - lc->prtq[pidx].tx_stat.out += k; - lc->prtq[pidx].tx_stat.drop += n - k; - NETBE_TRACE("%s(%u): rte_eth_tx_burst(%u, %u, %u) returns %u\n", - __func__, lc->id, lc->prtq[pidx].port.id, lc->prtq[pidx].txqid, - n, k); - - lc->prtq[pidx].tx_buf.num = n - k; - if (k != 0) - for (j = k; j != n; j++) - mb[j - k] = mb[j]; -} - -static int -netbe_lcore_setup(struct netbe_lcore *lc) -{ - uint32_t i; - int32_t rc; - - RTE_LOG(NOTICE, USER1, "%s(lcore=%u, udp_ctx: %p) start\n", - __func__, lc->id, lc->ctx); - - /* - * ??????? - * wait for FE lcores to start, so BE dont' drop any packets - * because corresponding streams not opened yet by FE. - * useful when used with pcap PMDS. - * think better way, or should this timeout be a cmdlien parameter. - * ??????? - */ - rte_delay_ms(10); - - rc = 0; - for (i = 0; i != lc->prtq_num && rc == 0; i++) { - RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, udp_dev: %p)\n", - __func__, i, lc->prtq[i].port.id, lc->prtq[i].dev); - rc = setup_rx_cb(&lc->prtq[i].port, lc, lc->prtq[i].rxqid); - if (rc < 0) - return rc; - } - - if (rc == 0) - RTE_PER_LCORE(_be) = lc; - return rc; -} - -static inline void -netbe_lcore(void) -{ - uint32_t i; - struct netbe_lcore *lc; - - lc = RTE_PER_LCORE(_be); - if (lc == NULL) - return; - - for (i = 0; i != lc->prtq_num; i++) { - netbe_rx(lc, i); - netbe_tx(lc, i); - } -} - -static void -netbe_lcore_clear(void) -{ - uint32_t i, j; - struct netbe_lcore *lc; - - lc = RTE_PER_LCORE(_be); - if (lc == NULL) - return; - - RTE_LOG(NOTICE, USER1, "%s(lcore=%u, udp_ctx: %p) finish\n", - __func__, lc->id, lc->ctx); - for (i = 0; i != lc->prtq_num; i++) { - RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, lcore=%u, q=%u, dev=%p) " - "rx_stats={" - "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "}, " - "tx_stats={" - "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "};\n", - __func__, i, lc->prtq[i].port.id, lc->id, - lc->prtq[i].rxqid, - lc->prtq[i].dev, - lc->prtq[i].rx_stat.in, - lc->prtq[i].rx_stat.up, - lc->prtq[i].rx_stat.drop, - lc->prtq[i].tx_stat.down, - lc->prtq[i].tx_stat.out, - lc->prtq[i].tx_stat.drop); - } - - for (i = 0; i != lc->prtq_num; i++) - for (j = 0; j != lc->prtq[i].tx_buf.num; j++) - rte_pktmbuf_free(lc->prtq[i].tx_buf.pkt[j]); - - RTE_PER_LCORE(_be) = NULL; -} - -static int -lcore_main(void *arg) -{ - int32_t rc; - uint32_t lcore; - struct lcore_prm *prm; - - prm = arg; - lcore = rte_lcore_id(); - - RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n", - __func__, lcore); - - rc = 0; - - /* lcore FE init. */ - if (prm->fe.max_streams != 0) - rc = netfe_lcore_init(&prm->fe); - - /* lcore FE init. */ - if (rc == 0 && prm->be.lc != NULL) - rc = netbe_lcore_setup(prm->be.lc); - - if (rc != 0) - sig_handle(SIGQUIT); - - while (force_quit == 0) { - netfe_lcore(); - netbe_lcore(); - } - - RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n", - __func__, lcore); - - netfe_lcore_fini(); - netbe_lcore_clear(); - - return rc; -} - -static int -netfe_lcore_cmp(const void *s1, const void *s2) -{ - const struct netfe_stream_prm *p1, *p2; - - p1 = s1; - p2 = s2; - return p1->lcore - p2->lcore; -} - -static int -netbe_find6(const struct in6_addr *laddr, uint16_t lport, - const struct in6_addr *raddr, uint32_t be_lc) -{ - uint32_t i, j; - uint8_t idx; - struct netbe_lcore *bc; - - /* we have exactly one BE, use it for all traffic */ - if (becfg.cpu_num == 1) - return 0; - - /* search by provided be_lcore */ - if (be_lc != LCORE_ID_ANY) { - for (i = 0; i != becfg.cpu_num; i++) { - bc = becfg.cpu + i; - if (be_lc == bc->id) - return i; - } - RTE_LOG(NOTICE, USER1, "%s: no stream with be_lcore=%u\n", - __func__, be_lc); - return -ENOENT; - } - - /* search by local address */ - if (memcmp(laddr, &in6addr_any, sizeof(*laddr)) != 0) { - for (i = 0; i != becfg.cpu_num; i++) { - bc = becfg.cpu + i; - /* search by queue for the local port */ - for (j = 0; j != bc->prtq_num; j++) { - if (memcmp(laddr, &bc->prtq[j].port.ipv6, - sizeof(*laddr)) == 0) { - - if (lport == 0) - return i; - - if (verify_queue_for_port(bc->prtq + j, - lport) != 0) - return i; - } - } - } - } - - /* search by remote address */ - if (memcmp(raddr, &in6addr_any, sizeof(*raddr)) == 0) { - for (i = 0; i != becfg.cpu_num; i++) { - bc = becfg.cpu + i; - if (rte_lpm6_lookup(bc->lpm6, - (uint8_t *)(uintptr_t)raddr->s6_addr, - &idx) == 0) { - - if (lport == 0) - return i; - - /* search by queue for the local port */ - for (j = 0; j != bc->prtq_num; j++) - if (verify_queue_for_port(bc->prtq + j, - lport) != 0) - return i; - } - } - } - - return -ENOENT; -} - -static int -netbe_find(const struct tle_udp_stream_param *p, uint32_t be_lc) -{ - const struct sockaddr_in *l4, *r4; - const struct sockaddr_in6 *l6, *r6; - - if (p->local_addr.ss_family == AF_INET) { - l4 = (const struct sockaddr_in *)&p->local_addr; - r4 = (const struct sockaddr_in *)&p->remote_addr; - return netbe_find4(&l4->sin_addr, ntohs(l4->sin_port), - &r4->sin_addr, be_lc); - } else if (p->local_addr.ss_family == AF_INET6) { - l6 = (const struct sockaddr_in6 *)&p->local_addr; - r6 = (const struct sockaddr_in6 *)&p->remote_addr; - return netbe_find6(&l6->sin6_addr, ntohs(l6->sin6_port), - &r6->sin6_addr, be_lc); - } - return -EINVAL; -} - -static int -netfe_sprm_flll_be(struct netfe_sprm *sp, uint32_t line, uint32_t be_lc) -{ - int32_t bidx; - - bidx = netbe_find(&sp->prm, be_lc); - if (bidx < 0) { - RTE_LOG(ERR, USER1, "%s(line=%u): no BE for that stream\n", - __func__, line); - return -EINVAL; - } - sp->bidx = bidx; - return 0; -} - -/* start front-end processing. */ -static int -netfe_lcore_fill(struct lcore_prm prm[RTE_MAX_LCORE], - struct netfe_lcore_prm *lprm) -{ - uint32_t be_lc; - uint32_t i, j, lc, ln; - - /* determine on what BE each stream should be open. */ - for (i = 0; i != lprm->nb_streams; i++) { - lc = lprm->stream[i].lcore; - ln = lprm->stream[i].line; - be_lc = lprm->stream[i].be_lcore; - if (netfe_sprm_flll_be(&lprm->stream[i].sprm, ln, - be_lc) != 0 || - (lprm->stream[i].op == FWD && - netfe_sprm_flll_be(&lprm->stream[i].fprm, ln, - be_lc) != 0)) - return -EINVAL; - } - - /* group all fe parameters by lcore. */ - - qsort(lprm->stream, lprm->nb_streams, sizeof(lprm->stream[0]), - netfe_lcore_cmp); - - for (i = 0; i != lprm->nb_streams; i = j) { - - lc = lprm->stream[i].lcore; - ln = lprm->stream[i].line; - - if (rte_lcore_is_enabled(lc) == 0) { - RTE_LOG(ERR, USER1, - "%s(line=%u): lcore %u is not enabled\n", - __func__, ln, lc); - return -EINVAL; - } - - if (rte_get_master_lcore() != lc && - rte_eal_get_lcore_state(lc) == RUNNING) { - RTE_LOG(ERR, USER1, - "%s(line=%u): lcore %u already in use\n", - __func__, ln, lc); - return -EINVAL; - } - - for (j = i + 1; j != lprm->nb_streams && - lc == lprm->stream[j].lcore; - j++) - ; - - prm[lc].fe.max_streams = lprm->max_streams; - prm[lc].fe.nb_streams = j - i; - prm[lc].fe.stream = lprm->stream + i; - } - - return 0; -} - -int -main(int argc, char *argv[]) -{ - int32_t opt, opt_idx, rc; - uint32_t i; - uint64_t v; - struct tle_udp_ctx_param ctx_prm; - struct netfe_lcore_prm feprm; - struct rte_eth_stats stats; - char fecfg_fname[PATH_MAX + 1]; - char becfg_fname[PATH_MAX + 1]; - struct lcore_prm prm[RTE_MAX_LCORE]; - struct rte_eth_dev_info dev_info; - - fecfg_fname[0] = 0; - becfg_fname[0] = 0; - memset(prm, 0, sizeof(prm)); - - rc = rte_eal_init(argc, argv); - if (rc < 0) - rte_exit(EXIT_FAILURE, - "%s: rte_eal_init failed with error code: %d\n", - __func__, rc); - - memset(&ctx_prm, 0, sizeof(ctx_prm)); - - argc -= rc; - argv += rc; - - optind = 0; - optarg = NULL; - while ((opt = getopt_long(argc, argv, "B:PR:S:b:f:s:", long_opt, - &opt_idx)) != EOF) { - if (opt == OPT_SHORT_SBULK) { - rc = parse_uint_val(NULL, optarg, &v); - if (rc < 0) - rte_exit(EXIT_FAILURE, "%s: invalid value: %s " - "for option: \'%c\'\n", - __func__, optarg, opt); - ctx_prm.send_bulk_size = v; - } else if (opt == OPT_SHORT_PROMISC) { - becfg.promisc = 1; - } else if (opt == OPT_SHORT_RBUFS) { - rc = parse_uint_val(NULL, optarg, &v); - if (rc < 0) - rte_exit(EXIT_FAILURE, "%s: invalid value: %s " - "for option: \'%c\'\n", - __func__, optarg, opt); - ctx_prm.max_stream_rbufs = v; - } else if (opt == OPT_SHORT_SBUFS) { - rc = parse_uint_val(NULL, optarg, &v); - if (rc < 0) - rte_exit(EXIT_FAILURE, "%s: invalid value: %s " - "for option: \'%c\'\n", - __func__, optarg, opt); - ctx_prm.max_stream_sbufs = v; - } else if (opt == OPT_SHORT_STREAMS) { - rc = parse_uint_val(NULL, optarg, &v); - if (rc < 0) - rte_exit(EXIT_FAILURE, "%s: invalid value: %s " - "for option: \'%c\'\n", - __func__, optarg, opt); - ctx_prm.max_streams = v; - } else if (opt == OPT_SHORT_BECFG) { - snprintf(becfg_fname, sizeof(becfg_fname), "%s", - optarg); - } else if (opt == OPT_SHORT_FECFG) { - snprintf(fecfg_fname, sizeof(fecfg_fname), "%s", - optarg); - } else { - rte_exit(EXIT_FAILURE, - "%s: unknown option: \'%c\'\n", - __func__, opt); - } - } - - signal(SIGINT, sig_handle); - - rc = netbe_port_init(&becfg, argc - optind, argv + optind); - if (rc != 0) - rte_exit(EXIT_FAILURE, - "%s: netbe_port_init failed with error code: %d\n", - __func__, rc); - - rc = netbe_lcore_init(&becfg, &ctx_prm); - if (rc != 0) - sig_handle(SIGQUIT); - - if ((rc = netbe_dest_init(becfg_fname, &becfg)) != 0) - sig_handle(SIGQUIT); - - for (i = 0; i != becfg.prt_num && rc == 0; i++) { - RTE_LOG(NOTICE, USER1, "%s: starting port %u\n", - __func__, becfg.prt[i].id); - rc = rte_eth_dev_start(becfg.prt[i].id); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s: rte_eth_dev_start(%u) returned " - "error code: %d\n", - __func__, becfg.prt[i].id, rc); - sig_handle(SIGQUIT); - } - rte_eth_dev_info_get(becfg.prt[i].id, &dev_info); - rc = update_rss_reta(&becfg.prt[i], &dev_info); - if (rc != 0) - sig_handle(SIGQUIT); - } - - feprm.max_streams = ctx_prm.max_streams * becfg.cpu_num; - if (rc == 0 && (rc = netfe_parse_cfg(fecfg_fname, &feprm)) != 0) - sig_handle(SIGQUIT); - - for (i = 0; rc == 0 && i != becfg.cpu_num; i++) - prm[becfg.cpu[i].id].be.lc = becfg.cpu + i; - - if (rc == 0 && (rc = netfe_lcore_fill(prm, &feprm)) != 0) - sig_handle(SIGQUIT); - - /* launch all slave lcores. */ - RTE_LCORE_FOREACH_SLAVE(i) { - if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0) - rte_eal_remote_launch(lcore_main, prm + i, i); - } - - /* launch master lcore. */ - i = rte_get_master_lcore(); - if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0) - lcore_main(prm + i); - - rte_eal_mp_wait_lcore(); - - for (i = 0; i != becfg.prt_num; i++) { - RTE_LOG(NOTICE, USER1, "%s: stoping port %u\n", - __func__, becfg.prt[i].id); - rte_eth_stats_get(becfg.prt[i].id, &stats); - RTE_LOG(NOTICE, USER1, "port %u stats={\n" - "ipackets=%" PRIu64 ";" - "ibytes=%" PRIu64 ";" - "ierrors=%" PRIu64 ";\n" - "opackets=%" PRIu64 ";" - "obytes=%" PRIu64 ";" - "oerrors=%" PRIu64 ";\n" - "}\n", - becfg.prt[i].id, - stats.ipackets, - stats.ibytes, - stats.ierrors, - stats.opackets, - stats.obytes, - stats.oerrors); - rte_eth_dev_stop(becfg.prt[i].id); - } - - netbe_lcore_fini(&becfg); - - return 0; -} diff --git a/examples/udpfwd/pkt.c b/examples/udpfwd/pkt.c deleted file mode 100644 index a68e428..0000000 --- a/examples/udpfwd/pkt.c +++ /dev/null @@ -1,509 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "netbe.h" -#include <netinet/ip6.h> - -static inline uint64_t -_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso, - uint64_t ol3, uint64_t ol2) -{ - return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49; -} - -static inline void -fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4) -{ - m->tx_offload = _mbuf_tx_offload(l2, l3, l4, 0, 0, 0); -} - -static inline int -is_ipv4_frag(const struct ipv4_hdr *iph) -{ - const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG); - - return ((mask & iph->fragment_offset) != 0); -} - -static inline void -fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, - uint32_t frag) -{ - const struct ipv4_hdr *iph; - int32_t dlen, len; - - dlen = rte_pktmbuf_data_len(m); - dlen -= l2 + sizeof(struct udp_hdr); - - iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2); - len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER; - - if (frag != 0 && is_ipv4_frag(iph)) { - m->packet_type &= ~RTE_PTYPE_L4_MASK; - m->packet_type |= RTE_PTYPE_L4_FRAG; - } - - if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto)) - m->packet_type = RTE_PTYPE_UNKNOWN; - else - fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr)); -} - -static inline int -ipv6x_hdr(uint32_t proto) -{ - return (proto == IPPROTO_HOPOPTS || - proto == IPPROTO_ROUTING || - proto == IPPROTO_FRAGMENT || - proto == IPPROTO_AH || - proto == IPPROTO_NONE || - proto == IPPROTO_DSTOPTS); -} - -static inline void -fill_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto, - uint32_t fproto) -{ - const struct ip6_ext *ipx; - int32_t dlen, len, ofs; - - len = sizeof(struct ipv6_hdr); - - dlen = rte_pktmbuf_data_len(m); - dlen -= l2 + sizeof(struct udp_hdr); - - ofs = l2 + len; - ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs); - - while (ofs > 0 && len < dlen) { - - switch (nproto) { - case IPPROTO_HOPOPTS: - case IPPROTO_ROUTING: - case IPPROTO_DSTOPTS: - ofs = (ipx->ip6e_len + 1) << 3; - break; - case IPPROTO_AH: - ofs = (ipx->ip6e_len + 2) << 2; - break; - case IPPROTO_FRAGMENT: - /* - * tso_segsz is not used by RX, so suse it as temporary - * buffer to store the fragment offset. - */ - m->tso_segsz = ofs; - ofs = sizeof(struct ip6_frag); - m->packet_type &= ~RTE_PTYPE_L4_MASK; - m->packet_type |= RTE_PTYPE_L4_FRAG; - break; - default: - ofs = 0; - } - - if (ofs > 0) { - nproto = ipx->ip6e_nxt; - len += ofs; - ipx += ofs / sizeof(*ipx); - } - } - - /* undercognised or invalid packet. */ - if ((ofs == 0 && nproto != fproto) || len > dlen) - m->packet_type = RTE_PTYPE_UNKNOWN; - else - fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr)); -} - -static inline void -fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto) -{ - const struct ipv6_hdr *iph; - - iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, - sizeof(struct ether_hdr)); - - if (iph->proto == fproto) - fill_pkt_hdr_len(m, l2, sizeof(struct ipv6_hdr), - sizeof(struct udp_hdr)); - else if (ipv6x_hdr(iph->proto) != 0) - fill_ipv6x_hdr_len(m, l2, iph->proto, fproto); -} - -static inline void -fill_eth_hdr_len(struct rte_mbuf *m) -{ - uint32_t dlen, l2; - uint16_t etp; - const struct ether_hdr *eth; - - dlen = rte_pktmbuf_data_len(m); - - /* check that first segment is at least 42B long. */ - if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + - sizeof(struct udp_hdr)) { - m->packet_type = RTE_PTYPE_UNKNOWN; - return; - } - - l2 = sizeof(*eth); - - eth = rte_pktmbuf_mtod(m, const struct ether_hdr *); - etp = eth->ether_type; - if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN)) - l2 += sizeof(struct vlan_hdr); - - if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) { - m->packet_type = RTE_PTYPE_L4_UDP | - RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L2_ETHER; - fill_ipv4_hdr_len(m, l2, IPPROTO_UDP, 1); - } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) && - dlen >= l2 + sizeof(struct ipv6_hdr) + - sizeof(struct udp_hdr)) { - m->packet_type = RTE_PTYPE_L4_UDP | - RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L2_ETHER; - fill_ipv6_hdr_len(m, l2, IPPROTO_UDP); - } else - m->packet_type = RTE_PTYPE_UNKNOWN; -} - -static inline uint16_t -ipv4x_cksum(const void *iph, size_t len) -{ - uint16_t cksum; - - cksum = rte_raw_cksum(iph, len); - return (cksum == 0xffff) ? cksum : ~cksum; -} - -static inline void -fix_reassembled(struct rte_mbuf *m, int32_t hwcsum) -{ - struct ipv4_hdr *iph; - - /* update packet type. */ - m->packet_type &= ~RTE_PTYPE_L4_MASK; - m->packet_type |= RTE_PTYPE_L4_UDP; - - /* fix reassemble setting TX flags. */ - m->ol_flags &= ~PKT_TX_IP_CKSUM; - - /* fix l3_len after reassemble. */ - if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) - m->l3_len = m->l3_len - sizeof(struct ipv6_extension_fragment); - - /* recalculate ipv4 cksum after reassemble. */ - else if (hwcsum == 0 && RTE_ETH_IS_IPV4_HDR(m->packet_type)) { - iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); - iph->hdr_checksum = ipv4x_cksum(iph, m->l3_len); - } -} - -static struct rte_mbuf * -reassemble(struct rte_mbuf *m, struct netbe_lcore *lc, uint64_t tms, - uint8_t port) -{ - uint32_t l3cs; - struct rte_ip_frag_tbl *tbl; - struct rte_ip_frag_death_row *dr; - - tbl = lc->ftbl; - dr = &lc->death_row; - l3cs = lc->prtq[port].port.rx_offload & DEV_RX_OFFLOAD_IPV4_CKSUM; - - if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { - - struct ipv4_hdr *iph; - - iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); - - /* process this fragment. */ - m = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, iph); - - } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { - - struct ipv6_hdr *iph; - struct ipv6_extension_fragment *fhdr; - - iph = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len); - - /* - * we store fragment header offset in tso_segsz before - * temporary, just to avoid another scan of ipv6 header. - */ - fhdr = rte_pktmbuf_mtod_offset(m, - struct ipv6_extension_fragment *, m->tso_segsz); - m->tso_segsz = 0; - - /* process this fragment. */ - m = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, iph, fhdr); - - } else { - rte_pktmbuf_free(m); - m = NULL; - } - - /* got reassembled packet. */ - if (m != NULL) - fix_reassembled(m, l3cs); - - return m; -} - -/* exclude NULLs from the final list of packets. */ -static inline uint32_t -compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero) -{ - uint32_t i, j, k, l; - - for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) { - - /* found a hole. */ - if (pkt[j] == NULL) { - - /* find how big is it. */ - for (i = j; i-- != 0 && pkt[i] == NULL; ) - ; - /* fill the hole. */ - for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++) - pkt[l] = pkt[k]; - - nb_pkt -= j - i; - nb_zero -= j - i; - j = i + 1; - } - } - - return nb_pkt; -} - -/* - * HW can recognise L2/L3 with/without extentions/L4 (ixgbe/igb/fm10k) - */ -static uint16_t __rte_unused -type0_rx_callback(uint8_t port, __rte_unused uint16_t queue, - struct rte_mbuf *pkt[], uint16_t nb_pkts, - __rte_unused uint16_t max_pkts, void *user_param) -{ - uint32_t j, tp, x; - uint64_t cts; - struct netbe_lcore *lc; - - lc = user_param; - cts = 0; - - x = 0; - for (j = 0; j != nb_pkts; j++) { - - NETBE_PKT_DUMP(pkt[j]); - - tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | - RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); - - switch (tp) { - /* non fragmented udp packets. */ - case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4 | - RTE_PTYPE_L2_ETHER): - fill_pkt_hdr_len(pkt[j], sizeof(struct ether_hdr), - sizeof(struct ipv4_hdr), - sizeof(struct udp_hdr)); - break; - case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6 | - RTE_PTYPE_L2_ETHER): - fill_pkt_hdr_len(pkt[j], sizeof(struct ether_hdr), - sizeof(struct ipv6_hdr), - sizeof(struct udp_hdr)); - break; - case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT | - RTE_PTYPE_L2_ETHER): - fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), - UINT32_MAX, 0); - break; - case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT | - RTE_PTYPE_L2_ETHER): - fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), - IPPROTO_UDP); - break; - /* possibly fragmented udp packets. */ - case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER): - case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER): - fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), - IPPROTO_UDP, 1); - break; - case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER): - case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER): - fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), - IPPROTO_UDP); - break; - default: - /* treat packet types as invalid. */ - pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; - break; - } - - /* - * if it is a fragment, try to reassemble it, - * if by some reason it can't be done, then - * set pkt[] entry to NULL. - */ - if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == - RTE_PTYPE_L4_FRAG) { - cts = (cts == 0) ? rte_rdtsc() : cts; - pkt[j] = reassemble(pkt[j], lc, cts, port); - x += (pkt[j] == NULL); - } - } - - /* reassemble was invoked, cleanup its death-row. */ - if (cts != 0) - rte_ip_frag_free_death_row(&lc->death_row, 0); - - if (x == 0) - return nb_pkts; - - NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " - "%u non-reassembled fragments;\n", - __func__, port, queue, nb_pkts, x); - - return compress_pkt_list(pkt, nb_pkts, x); -} - -/* - * HW can recognise L2/L3/L4 and fragments (i40e). - */ -static uint16_t __rte_unused -type1_rx_callback(uint8_t port, __rte_unused uint16_t queue, - struct rte_mbuf *pkt[], uint16_t nb_pkts, - __rte_unused uint16_t max_pkts, void *user_param) -{ - uint32_t j, tp, x; - uint64_t cts; - struct netbe_lcore *lc; - - lc = user_param; - cts = 0; - - x = 0; - for (j = 0; j != nb_pkts; j++) { - - NETBE_PKT_DUMP(pkt[j]); - - tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | - RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); - - switch (tp) { - case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L2_ETHER): - fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), - UINT32_MAX, 0); - break; - case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L2_ETHER): - fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), - IPPROTO_UDP); - break; - case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L2_ETHER): - fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), - IPPROTO_UDP, 0); - break; - case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L2_ETHER): - fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), - IPPROTO_UDP); - break; - default: - /* treat packet types as invalid. */ - pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; - break; - } - - /* - * if it is a fragment, try to reassemble it, - * if by some reason it can't be done, then - * set pkt[] entry to NULL. - */ - if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == - RTE_PTYPE_L4_FRAG) { - cts = (cts == 0) ? rte_rdtsc() : cts; - pkt[j] = reassemble(pkt[j], lc, cts, port); - x += (pkt[j] == NULL); - } - } - - /* reassemble was invoked, cleanup its death-row. */ - if (cts != 0) - rte_ip_frag_free_death_row(&lc->death_row, 0); - - if (x == 0) - return nb_pkts; - - NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " - "%u non-reassembled fragments;\n", - __func__, port, queue, nb_pkts, x); - - return compress_pkt_list(pkt, nb_pkts, x); -} - -/* - * generic, assumes HW doesn't recognise any packet type. - */ -static uint16_t -typen_rx_callback(uint8_t port, __rte_unused uint16_t queue, - struct rte_mbuf *pkt[], uint16_t nb_pkts, - __rte_unused uint16_t max_pkts, void *user_param) -{ - uint32_t j, x; - uint64_t cts; - struct netbe_lcore *lc; - - lc = user_param; - cts = 0; - - x = 0; - for (j = 0; j != nb_pkts; j++) { - - NETBE_PKT_DUMP(pkt[j]); - fill_eth_hdr_len(pkt[j]); - - /* - * if it is a fragment, try to reassemble it, - * if by some reason it can't be done, then - * set pkt[] entry to NULL. - */ - if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == - RTE_PTYPE_L4_FRAG) { - cts = (cts == 0) ? rte_rdtsc() : cts; - pkt[j] = reassemble(pkt[j], lc, cts, port); - x += (pkt[j] == NULL); - } - } - - /* reassemble was invoked, cleanup its death-row. */ - if (cts != 0) - rte_ip_frag_free_death_row(&lc->death_row, 0); - - if (x == 0) - return nb_pkts; - - NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " - "%u non-reassembled fragments;\n", - __func__, port, queue, nb_pkts, x); - - return compress_pkt_list(pkt, nb_pkts, x); -} - -#include "pkt_dpdk_legacy.h" |