From aa97dd1ce910b839fed46ad55d1e70e403f5a930 Mon Sep 17 00:00:00 2001 From: Konstantin Ananyev Date: Tue, 21 Feb 2017 18:12:20 +0000 Subject: Introduce first version of TCP code. Supported functionality: - open/close - listen/accept/connect - send/recv In order to achieve that libtle_udp library was reworked into libtle_l4p library that supports both TCP and UDP protocols. New libtle_timer library was introduced (thanks to Cisco guys and Dave Barach for sharing their timer code with us). Sample application was also reworked significantly to support both TCP and UDP traffic handling. New UT were introduced. Change-Id: I806b05011f521e89b58db403cfdd484a37beb775 Signed-off-by: Mohammad Abdul Awal Signed-off-by: Karol Latecki Signed-off-by: Daniel Mrzyglod Signed-off-by: Konstantin Ananyev --- examples/Makefile | 2 +- examples/l4fwd/Makefile | 47 + examples/l4fwd/README | 346 ++++++ examples/l4fwd/be.cfg | 5 + examples/l4fwd/common.h | 662 +++++++++++ examples/l4fwd/dpdk_legacy.h | 160 +++ examples/l4fwd/dpdk_version.h | 33 + examples/l4fwd/fe.cfg | 24 + examples/l4fwd/fwdtbl.h | 117 ++ examples/l4fwd/gen_fe_cfg.py | 200 ++++ examples/l4fwd/lcore.h | 370 +++++++ examples/l4fwd/main.c | 313 ++++++ examples/l4fwd/netbe.h | 330 ++++++ examples/l4fwd/parse.c | 839 ++++++++++++++ examples/l4fwd/parse.h | 69 ++ examples/l4fwd/pkt.c | 872 +++++++++++++++ examples/l4fwd/pkt_dpdk_legacy.h | 197 ++++ examples/l4fwd/port.h | 453 ++++++++ examples/l4fwd/tcp.h | 701 ++++++++++++ examples/l4fwd/udp.h | 588 ++++++++++ examples/udpfwd/Makefile | 46 - examples/udpfwd/README | 141 --- examples/udpfwd/be.cfg | 5 - examples/udpfwd/dpdk_version.h | 33 - examples/udpfwd/fe.cfg | 24 - examples/udpfwd/fwdtbl.h | 117 -- examples/udpfwd/gen_fe_cfg.py | 199 ---- examples/udpfwd/main.c | 2134 ------------------------------------ examples/udpfwd/main_dpdk_legacy.h | 176 --- examples/udpfwd/netbe.h | 266 ----- examples/udpfwd/parse.c | 638 ----------- examples/udpfwd/parse.h | 81 -- examples/udpfwd/pkt.c | 509 --------- examples/udpfwd/pkt_dpdk_legacy.h | 145 --- 34 files changed, 6327 insertions(+), 4515 deletions(-) create mode 100644 examples/l4fwd/Makefile create mode 100644 examples/l4fwd/README create mode 100644 examples/l4fwd/be.cfg create mode 100644 examples/l4fwd/common.h create mode 100644 examples/l4fwd/dpdk_legacy.h create mode 100644 examples/l4fwd/dpdk_version.h create mode 100644 examples/l4fwd/fe.cfg create mode 100644 examples/l4fwd/fwdtbl.h create mode 100755 examples/l4fwd/gen_fe_cfg.py create mode 100644 examples/l4fwd/lcore.h create mode 100644 examples/l4fwd/main.c create mode 100644 examples/l4fwd/netbe.h create mode 100644 examples/l4fwd/parse.c create mode 100644 examples/l4fwd/parse.h create mode 100644 examples/l4fwd/pkt.c create mode 100644 examples/l4fwd/pkt_dpdk_legacy.h create mode 100644 examples/l4fwd/port.h create mode 100644 examples/l4fwd/tcp.h create mode 100644 examples/l4fwd/udp.h delete mode 100644 examples/udpfwd/Makefile delete mode 100644 examples/udpfwd/README delete mode 100644 examples/udpfwd/be.cfg delete mode 100644 examples/udpfwd/dpdk_version.h delete mode 100644 examples/udpfwd/fe.cfg delete mode 100644 examples/udpfwd/fwdtbl.h delete mode 100755 examples/udpfwd/gen_fe_cfg.py delete mode 100644 examples/udpfwd/main.c delete mode 100644 examples/udpfwd/main_dpdk_legacy.h delete mode 100644 examples/udpfwd/netbe.h delete mode 100644 examples/udpfwd/parse.c delete mode 100644 examples/udpfwd/parse.h delete mode 100644 examples/udpfwd/pkt.c delete mode 100644 examples/udpfwd/pkt_dpdk_legacy.h (limited to 'examples') diff --git a/examples/Makefile b/examples/Makefile index bed34ac..cf13574 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -21,6 +21,6 @@ endif include $(RTE_SDK)/mk/rte.vars.mk -DIRS-y += udpfwd +DIRS-y += l4fwd include $(TLDK_ROOT)/mk/tle.subdir.mk diff --git a/examples/l4fwd/Makefile b/examples/l4fwd/Makefile new file mode 100644 index 0000000..f18b622 --- /dev/null +++ b/examples/l4fwd/Makefile @@ -0,0 +1,47 @@ +# Copyright (c) 2016 Intel Corporation. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ifeq ($(RTE_SDK),) +$(error "Please define RTE_SDK environment variable") +endif + +ifeq ($(RTE_TARGET),) +$(error "Please define RTE_TARGET environment variable") +endif + +ifeq ($(TLDK_ROOT),) +$(error "Please define TLDK_ROOT environment variable") +endif + +include $(RTE_SDK)/mk/rte.vars.mk + +# binary name +APP = l4fwd + +# all source are stored in SRCS-y +SRCS-y += parse.c +SRCS-y += pkt.c +SRCS-y += main.c + +CFLAGS += $(WERROR_FLAGS) +CFLAGS += -I$(RTE_OUTPUT)/include + +LDLIBS += -L$(RTE_OUTPUT)/lib +LDLIBS += -ltle_l4p +LDLIBS += -ltle_timer + +EXTRA_CFLAGS += -O3 +CFLAGS_parse.o += -D_GNU_SOURCE +CFLAGS_main.o += -D_GNU_SOURCE + +include $(TLDK_ROOT)/mk/tle.app.mk diff --git a/examples/l4fwd/README b/examples/l4fwd/README new file mode 100644 index 0000000..658fe3a --- /dev/null +++ b/examples/l4fwd/README @@ -0,0 +1,346 @@ +1. INTRODUCTION + + l4fwd is a sample application to demonstrate and test TLDK TCP/UDP + functionalities. Depending on configuration it can do simple send, recv or + both over opened TCP/UDP streams. Also it implements ability to do TCP/UDP + packet forwarding between different streams, so it is possible to use the + l4fwd application as a TCP/UDP proxy. + + The l4fwd application logically is divided into two parts, Back End (BE) and + Front End (FE). + +1.1 Back End (BE) + + BE is responsible for: + - RX over DPDK ports and feed them into TCP/UDP TLDK context(s) + (via tle_*_rx_bulk). + + - retrieve packets ready to be send out from TCP/UDP TLDK context(s) and TX + them over destined DPDK port. + + - Multiple RX/TX queues per port are supported by RSS. Right now the number + of TX is same as the number of RX queue. + +- Each BE lcore can serve multiple DPDK ports, TLDK TCP/UDP contexts. + + BE configuration record format: + + port=,addr=,masklen=,mac= + + port - DPDK port id to be used to send packets to the destination. + It is an mandatory option. + addr - destination network address. It is an mandatory option. + masklen - destination network prefix length. It is an mandatory option. + mac - destination Ethernet address. It is an mandatory option. + mtu - MTU to be used on that port (= application data size + L2/L3/L4 + headers sizes, default=1514). It is an optional option. + + Below are some example of BE entries + + port=0,masklen=16,addr=192.168.0.0,mac=01:de:ad:be:ef:01 + port=0,addr=2001:4860:b002::,masklen=64,mac=01:de:ad:be:ef:01 + + These examples are also available in be.cfg file. + +1.2 Front End (FE) + + FE is responsible for: + - to open configured TCP/UDP streams and perform send/recv over them. + These streams can belong to different TCP/UDP contexts. + + Each lcore can act as BE and/or FE. + + In UDP mode the application can reassemble input fragmented IP packets, and + fragment outgoing IP packets (if destination MTU is less then packet size). + + FE configuration record format: + + lcore=,op=<"rx|tx|echo|fwd">,\ + laddr=,lport=,raddr=,rport=,\ + [txlen=,fwladdr=,fwlport=,fwraddr=,fwrport=,\ + belcore=] + + lcore - EAL lcore to manage that stream(s) in the FE. It is an mandatory + option. + belcore - EAL lcore to manage that stream(s) in the BE. It is an optional + option. lcore and belcore can specify the same cpu core. + op - operation to perform on that stream: + "rx" - do receive only on that stream. + "tx" - do send only on that stream. + "echo" - mimic recvfrom(..., &addr);sendto(..., &addr); + on that stream. + "fwd" - forward packets between streams. + It is an mandatory option. + laddr - local address for the stream to open. It is an mandatory option. + lport - local port for the stream to open. It is an mandatory option. + raddr - remote address for the stream to open. It is an mandatory option. + rport - remote port for the stream to open. It is an mandatory option. + txlen - data length sending in each packet (mandatory for "tx" mode only). + fwladdr - local address for the forwarding stream(s) to open + (mandatory for "fwd" mode only). + fwlport - local port for the forwarding stream(s) to open + (mandatory for "fwd" mode only). + fwraddr - remote address for the forwarding stream(s) to open + (mandatory for "fwd" mode only). + fwrport - remote port for the forwarding stream(s) to open + (mandatory for "fwd" mode only). + + Below are some example of FE entries + + lcore=3,op=echo,laddr=192.168.1.233,lport=0x8000,raddr=0.0.0.0,rport=0 + + lcore=3,op=tx,laddr=192.168.1.233,lport=0x8001,raddr=192.168.1.56,\ + rport=0x200,txlen=72 + + lcore=3,op=rx,laddr=::,lport=0x200,raddr=::,rport=0,txlen=72 + + lcore=3,op=fwd,laddr=0.0.0.0,lport=11211,raddr=0.0.0.0,rport=0,\ + fwladdr=::,fwlport=0,fwraddr=2001:4860:b002::56,fwrport=11211 + + These examples are also available in fe.cfg file with some more explanation. + +1.3 Configuration files format + + - each record on a separate line. + - lines started with '#' are treated as comments. + - empty lines (containing whitespace chars only) are ignored. + - kvargs style format for each record. + - each FE record correspond to at least one stream to be opened + (could be multiple streams in case of op="fwd"). + - each BE record define a ipv4/ipv6 destination. + +2. REQUIREMENTS + + DPDK libraries (16.11 or higher) + TLDK libraries (1.0) + Back-End (BE) configuration file + Front-End(FE) configuration file + +3. USAGE + + l4fwd -- \ + -P | --promisc /* promiscuous mode enabled. */ \ + -R | --rbufs /* max recv buffers per stream. */ \ + -S | --sbufs /* max send buffers per stream. */ \ + -s | --streams /* streams to open per context. */ \ + -b | --becfg /* backend configuration file. */ \ + -f | --fecfg /* frontend configuration file. */ \ + -U | --udp /* run the app to handle UDP streams only. */ \ + -T | --tcp /* run the app to handle TCP streams only. */ \ + -L | --listen /* open TCP streams in server mode (listen). */ \ + -a | --enable-arp /* enable arp responses (request not supported) */ \ + -v | --verbose /* different level of verbose mode */ \ + ... + + Note that: options -U and -T cannot be used together. + Option -L can be used only with option -T. + + portX_params: port=,lcore=[-],[lcore=[-],]\ + [rx_offload=,tx_offload=,mtu=,ipv4=,ipv6=] + + portX_params are used to configure the particular DPDK device + (rte_ethdev port), and specify BE lcore that will handle RX/TX from/to the + device and manage BE part of corresponding TCP/UDP context. + Multiple BE lcore can be specified. + + port - DPDK port id (RSS are supported when multiple lcores are + specified for a port). It is an mandatory option. + lcore - EAL lcore id to handle IO over that port (rx_burst/tx_burst). + several ports can be managed by the same lcore, and same port + can be managed by more than one lcore. + It is an mandatory option. At least one lcore option has to be + specified. lcore range can be specified in one lcore option. + e.g. lcore=2-3,lcore=6 will enable lcores 2, 3, and 6 to + handle BE. + rx_offload - RX HW offload capabilities to enable/use on this port. + (bitmask of DEV_RX_OFFLOAD_* values). It is an optional option. + tx_offload - TX HW offload capabilities to enable/use on this port. + (bitmask of DEV_TX_OFFLOAD_* values). + mtu - MTU to be used on that port (= application data size + L2/L3/L4 + headers sizes, default=1514). + ipv4 - ipv4 address to assign to that port. + ipv6 - ipv6 address to assign to that port. + + At least one of ipv4/ipv6 values have to be specified for each port. + +3.1 RSS + + If multiple lcore is specified per DPDK port, the following RSS hash will + be enabled on that port: + ETH_RSS_UDP, or ETH_RSS_TCP + + The RSS queue qid will handle the stream according to the TCP/UDP source + ports of the stream. The qid can be calculated as below + + qid = (src_port % power_of_2(n)) % n + + where n is number of lcore used to mane the DPDK port. + +4. EXAMPLES + +4.1 Sample testbed + ++----------------------------+ +-------------------------------+ +| TLDK Box | | Linux Box | +| | | | +| port 0 +----------------+ port 0 | +| 192.168.1.1 | | 192.168.1.2 | +| 2001:4860:b002::1 | | 2001:4860:b002::2 | +| AA:BB:CC:DD:EE:F1 | | AA:BB:CC:DD:EE:F2 | ++----------------------------+ +-------------------------------+ + +4.2 UDP, "rx" mode, IPv4-only, Single core + + This example shows receiving data from a IPv4 stream. The TLDK UDP server + runs on single core where both BE and FE run on cpu core 3. + + be.cfg file contains: + + port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2 + + fe.cfg file contains (udp server listening to port 6000): + + lcore=3,op=rx,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0 + + run the l4fwd application as below (DPDK port 0 (pci 01:00.0)): + + l4fwd --lcores='3' -w 01:00.0 -- \ + --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \ + --becfg be.cfg -U port=0,lcore=3,ipv4=192.168.1.1 + + This will create TLDK UDP context on lcore=3 (BE lcore) to manage + DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. + All the streams will be in server mode and also managed by lcore 3. + +4.3 UDP, "echo" mode, IPv6-only, Multicore + + This example shows receiving data from a IPv6 stream and sending the data + back through the same IPv6 stream. The TLDK UDP server runs on multicore + where BE runs on cpu core 2 and FE runs on cpu core 3. + + be.cfg file contains: + + port=0,masklen=64,addr=2001:4860:b002::,mac=AA:BB:CC:DD:EE:F2 + + fe.cfg file contains (udp server listening to port 6000): + + lcore=3,op=rx,laddr=2001:4860:b002::1,lport=6000,raddr=::,rport=0 + + run the l4fwd application as below (DPDK port 0 (pci 01:00.0)): + + l4fwd --lcores='2,3' -w 01:00.0 -- \ + --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \ + --becfg be.cfg -U port=0,lcore=2,ipv6=2001:4860:b002::1 + + This will create TLDK UDP context on lcore=2 (BE lcore) to manage + DPDK port 0. The port 0 will have IPv4 address 2001:4860:b002::1. + All the streams will be in server mode and managed by lcore 3 (FE lcore). + In this case, the UDP server will send the incoming data back to the sender. + +4.4 TCP, "echo" mode, IPv4-only, Multicore, RX-Offload + + This example shows receiving data from a IPv4 stream and sending the data + back through the same IPv4 stream. The TLDK TCP server runs on multicore + where BE runs on cpu core 2 and FE runs on cpu core 3. The BE also uses + receive offload features of the NIC. + + be.cfg file contains: + + port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2 + + fe.cfg file contains (tcp server listening to port 6000): + + lcore=3,op=echo,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0 + + run the l4fwd application as below (DPDK port 0 (pci 01:00.0)): + + l4fwd --lcores='2,3' -w 01:00.0 -- \ + --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \ + --becfg be.cfg -T -L port=0,lcore=2,rx_offload=0xf,tx_offload=0,\ + ipv4=192.168.1.1 + + This will create TLDK TCP context on lcore=2 (BE lcore) to manage + DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. The following + DPDK RX HW offloads will be enabled on that port. + DEV_RX_OFFLOAD_VLAN_STRIP, + DEV_RX_OFFLOAD_IPV4_CKSUM, + DEV_RX_OFFLOAD_UDP_CKSUM, + DEV_RX_OFFLOAD_TCP_CKSUM + No HW TX offloads will be enabled. + All the streams will be in server mode and managed by lcore 3 (FE core). + In this case, the TCP server will send the incoming data back to the sender. + +4.5 TCP, "fwd" (proxy) mode, IPv4-to-IPv6, Multi-core, RX-Offload + + This example shows receiving data from a IPv4 stream and forwarding the + data to a IPv6 stream. The TLDK TCP server runs on multicore + where BE runs on cpu core 2 and FE runs on cpu core 3. The BE also uses + receive offload features of the NIC. + + be.cfg file contains: + + port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2 + + fe.cfg file contains (tcp server listening to port 6000): + + lcore=3,op=fwd,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0,\ + rladdr=::,lport=0,raddr=2001:4860:b002::2,rport=7000 + + run the l4fwd application as below (DPDK port 0 (pci 01:00.0)): + + l4fwd --lcores='2,3' -w 01:00.0 -- \ + --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \ + --becfg be.cfg -T -L port=0,lcore=2,rx_offload=0xf,tx_offload=0,\ + ipv4=192.168.1.1,ipv6=2001:4860:b002::1 + + This will create TLDK TCP context on lcore=2 (BE lcore) to manage + DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. The following + DPDK RX HW offloads will be enabled on that port. + DEV_RX_OFFLOAD_VLAN_STRIP, + DEV_RX_OFFLOAD_IPV4_CKSUM, + DEV_RX_OFFLOAD_UDP_CKSUM, + DEV_RX_OFFLOAD_TCP_CKSUM + No HW TX offloads will be enabled. + All the streams will be in server mode and managed by lcore 3 (FE core). + In this case, the IPv4 TCP server will forward the incoming data to the IPv6 + TCP server 2001:4860:b002::2 listening to port 7000. + +4.6 TCP, "echo" mode, RSS, IPv4-only, Multicore, RX-Offload + + This example shows receiving data from a IPv4 stream and sending the data + back through the same IPv4 stream. The TLDK TCP server runs on multicore + where BE runs on cpu cores 1-2 and FE runs on cpu core 3. As BE runs on + multicore, Receive Side Scaling (RSS) feature will be automatically enabled. + The BE also uses receive offload features of the NIC. + + be.cfg file contains: + + port=0,masklen=24,addr=192.168.1.0,mac=AA:BB:CC:DD:EE:F2 + + fe.cfg file contains (tcp server listening to port 6000): + + lcore=3,op=echo,laddr=192.168.1.1,lport=6000,raddr=0.0.0.0,rport=0 + lcore=3,op=echo,laddr=192.168.1.1,lport=6001,raddr=0.0.0.0,rport=0 + + run the l4fwd application as below (DPDK port 0 (pci 01:00.0)): + + l4fwd --lcores='1,2,3' -w 01:00.0 -- \ + --promisc --rbufs 0x100 --sbufs 0x100 --streams 0x100 --fecfg fe.cfg \ + --becfg be.cfg -T -L port=0,lcore="1-2",rx_offload=0xf,tx_offload=0,\ + ipv4=192.168.1.1 + + This will create TLDK TCP context on lcore=1-2 (BE lcore) to manage + DPDK port 0. The port 0 will have IPv4 address 192.168.1.1. The following + DPDK RX HW offloads will be enabled on that port. + DEV_RX_OFFLOAD_VLAN_STRIP, + DEV_RX_OFFLOAD_IPV4_CKSUM, + DEV_RX_OFFLOAD_UDP_CKSUM, + DEV_RX_OFFLOAD_TCP_CKSUM + No HW TX offloads will be enabled. + All the streams will be in server mode and managed by lcore 3 (FE core). + In this case, the TCP server will send the incoming data back to the sender. + + As RSS is enabled, all the packets with destination port 6000 and 6001 will + be managed by HW queue 0 and queue 1 respectively. Please note that RSS + is not supported on the interface when both IPv4 and IPv6 are enabled. + Only one of IPv4 or IPv6 has to be enabled in the port. diff --git a/examples/l4fwd/be.cfg b/examples/l4fwd/be.cfg new file mode 100644 index 0000000..8e6d983 --- /dev/null +++ b/examples/l4fwd/be.cfg @@ -0,0 +1,5 @@ +# +# l4fwd BE config file example +# +port=0,masklen=16,addr=192.168.0.0,mac=01:de:ad:be:ef:01 +port=0,addr=2001:4860:b002::,masklen=64,mac=01:de:ad:be:ef:01 diff --git a/examples/l4fwd/common.h b/examples/l4fwd/common.h new file mode 100644 index 0000000..ff8ee7a --- /dev/null +++ b/examples/l4fwd/common.h @@ -0,0 +1,662 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef COMMON_H_ +#define COMMON_H_ + +#include + +static void +sig_handle(int signum) +{ + RTE_LOG(ERR, USER1, "%s(%d)\n", __func__, signum); + force_quit = 1; +} + +static void +netfe_stream_dump(const struct netfe_stream *fes, struct sockaddr_storage *la, + struct sockaddr_storage *ra) +{ + struct sockaddr_in *l4, *r4; + struct sockaddr_in6 *l6, *r6; + uint16_t lport, rport; + char laddr[INET6_ADDRSTRLEN]; + char raddr[INET6_ADDRSTRLEN]; + + if (la->ss_family == AF_INET) { + + l4 = (struct sockaddr_in *)la; + r4 = (struct sockaddr_in *)ra; + + lport = l4->sin_port; + rport = r4->sin_port; + + } else if (la->ss_family == AF_INET6) { + + l6 = (struct sockaddr_in6 *)la; + r6 = (struct sockaddr_in6 *)ra; + + lport = l6->sin6_port; + rport = r6->sin6_port; + + } else { + RTE_LOG(ERR, USER1, "stream@%p - unknown family=%hu\n", + fes->s, la->ss_family); + return; + } + + format_addr(la, laddr, sizeof(laddr)); + format_addr(ra, raddr, sizeof(raddr)); + + RTE_LOG(INFO, USER1, "stream@%p={s=%p," + "family=%hu,proto=%s,laddr=%s,lport=%hu,raddr=%s,rport=%hu;" + "stats={" + "rxp=%" PRIu64 ",rxb=%" PRIu64 + ",txp=%" PRIu64 ",txb=%" PRIu64 + ",drops=%" PRIu64 "," + "rxev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "]," + "txev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "]" + "};};\n", + fes, fes->s, la->ss_family, proto_name[fes->proto], + laddr, ntohs(lport), raddr, ntohs(rport), + fes->stat.rxp, fes->stat.rxb, + fes->stat.txp, fes->stat.txb, + fes->stat.drops, + fes->stat.rxev[TLE_SEV_IDLE], + fes->stat.rxev[TLE_SEV_DOWN], + fes->stat.rxev[TLE_SEV_UP], + fes->stat.txev[TLE_SEV_IDLE], + fes->stat.txev[TLE_SEV_DOWN], + fes->stat.txev[TLE_SEV_UP]); +} + +static inline uint32_t +netfe_get_streams(struct netfe_stream_list *list, struct netfe_stream *rs[], + uint32_t num) +{ + struct netfe_stream *s; + uint32_t i, n; + + n = RTE_MIN(list->num, num); + for (i = 0, s = LIST_FIRST(&list->head); + i != n; + i++, s = LIST_NEXT(s, link)) { + rs[i] = s; + } + + if (s == NULL) + /* we retrieved all free entries */ + LIST_INIT(&list->head); + else + LIST_FIRST(&list->head) = s; + + list->num -= n; + + return n; +} + +static inline struct netfe_stream * +netfe_get_stream(struct netfe_stream_list *list) +{ + struct netfe_stream *s; + + s = NULL; + if (list->num == 0) + return s; + + netfe_get_streams(list, &s, 1); + + return s; +} + +static inline void +netfe_put_streams(struct netfe_lcore *fe, struct netfe_stream_list *list, + struct netfe_stream *fs[], uint32_t num) +{ + uint32_t i, n; + + n = RTE_MIN(fe->snum - list->num, num); + if (n != num) + RTE_LOG(ERR, USER1, "%s: list overflow by %u\n", __func__, + num - n); + + for (i = 0; i != n; i++) + LIST_INSERT_HEAD(&list->head, fs[i], link); + list->num += n; +} + +static inline void +netfe_put_stream(struct netfe_lcore *fe, struct netfe_stream_list *list, + struct netfe_stream *s) +{ + if (list->num == fe->snum) { + RTE_LOG(ERR, USER1, "%s: list is full\n", __func__); + return; + } + + netfe_put_streams(fe, list, &s, 1); +} + +static inline void +netfe_rem_stream(struct netfe_stream_list *list, struct netfe_stream *s) +{ + LIST_REMOVE(s, link); + list->num--; +} + +static void +netfe_stream_close(struct netfe_lcore *fe, struct netfe_stream *fes) +{ + tle_stream_close(fes->s); + tle_event_free(fes->txev); + tle_event_free(fes->rxev); + tle_event_free(fes->erev); + memset(fes, 0, sizeof(*fes)); + netfe_put_stream(fe, &fe->free, fes); +} + +/* + * Helper functions, verify the queue for corresponding UDP port. + */ +static uint8_t +verify_queue_for_port(const struct netbe_dev *prtq, const uint16_t lport) +{ + uint32_t align_nb_q, qid; + + align_nb_q = rte_align32pow2(prtq->port.nb_lcore); + qid = (lport % align_nb_q) % prtq->port.nb_lcore; + if (prtq->rxqid == qid) + return 1; + + return 0; +} + +static inline size_t +pkt_buf_empty(struct pkt_buf *pb) +{ + uint32_t i; + size_t x; + + x = 0; + for (i = 0; i != pb->num; i++) { + x += pb->pkt[i]->pkt_len; + NETFE_PKT_DUMP(pb->pkt[i]); + rte_pktmbuf_free(pb->pkt[i]); + } + + pb->num = 0; + return x; +} + +static inline void +pkt_buf_fill(uint32_t lcore, struct pkt_buf *pb, uint32_t dlen) +{ + uint32_t i; + int32_t sid; + + sid = rte_lcore_to_socket_id(lcore) + 1; + + for (i = pb->num; i != RTE_DIM(pb->pkt); i++) { + pb->pkt[i] = rte_pktmbuf_alloc(mpool[sid]); + if (pb->pkt[i] == NULL) + break; + rte_pktmbuf_append(pb->pkt[i], dlen); + } + + pb->num = i; +} + +static int +netbe_lcore_setup(struct netbe_lcore *lc) +{ + uint32_t i; + int32_t rc; + + RTE_LOG(NOTICE, USER1, "%s:(lcore=%u, proto=%s, ctx=%p) start\n", + __func__, lc->id, proto_name[lc->proto], lc->ctx); + + /* + * ??????? + * wait for FE lcores to start, so BE dont' drop any packets + * because corresponding streams not opened yet by FE. + * useful when used with pcap PMDS. + * think better way, or should this timeout be a cmdlien parameter. + * ??????? + */ + rte_delay_ms(10); + + rc = 0; + for (i = 0; i != lc->prtq_num && rc == 0; i++) { + RTE_LOG(NOTICE, USER1, + "%s:%u(port=%u, q=%u, proto=%s, dev=%p)\n", + __func__, i, lc->prtq[i].port.id, lc->prtq[i].rxqid, + proto_name[lc->proto], lc->prtq[i].dev); + + rc = setup_rx_cb(&lc->prtq[i].port, lc, lc->prtq[i].rxqid, + becfg.arp); + if (rc < 0) + return rc; + } + + if (rc == 0) + RTE_PER_LCORE(_be) = lc; + return rc; +} + +static void +netbe_lcore_clear(void) +{ + uint32_t i, j; + struct netbe_lcore *lc; + + lc = RTE_PER_LCORE(_be); + if (lc == NULL) + return; + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u, proto=%s, ctx: %p) finish\n", + __func__, lc->id, proto_name[lc->proto], lc->ctx); + for (i = 0; i != lc->prtq_num; i++) { + RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, q=%u, lcore=%u, dev=%p) " + "rx_stats={" + "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "}, " + "tx_stats={" + "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "};\n", + __func__, i, lc->prtq[i].port.id, lc->prtq[i].rxqid, + lc->id, + lc->prtq[i].dev, + lc->prtq[i].rx_stat.in, + lc->prtq[i].rx_stat.up, + lc->prtq[i].rx_stat.drop, + lc->prtq[i].tx_stat.down, + lc->prtq[i].tx_stat.out, + lc->prtq[i].tx_stat.drop); + } + + RTE_LOG(NOTICE, USER1, "tcp_stat={\n"); + for (i = 0; i != RTE_DIM(lc->tcp_stat.flags); i++) { + if (lc->tcp_stat.flags[i] != 0) + RTE_LOG(NOTICE, USER1, "[flag=%#x]==%" PRIu64 ";\n", + i, lc->tcp_stat.flags[i]); + } + RTE_LOG(NOTICE, USER1, "};\n"); + + for (i = 0; i != lc->prtq_num; i++) + for (j = 0; j != lc->prtq[i].tx_buf.num; j++) + rte_pktmbuf_free(lc->prtq[i].tx_buf.pkt[j]); + + RTE_PER_LCORE(_be) = NULL; +} + +static int +netbe_add_ipv4_route(struct netbe_lcore *lc, const struct netbe_dest *dst, + uint8_t idx) +{ + int32_t rc; + uint32_t addr, depth; + char str[INET_ADDRSTRLEN]; + + depth = dst->prfx; + addr = rte_be_to_cpu_32(dst->ipv4.s_addr); + + inet_ntop(AF_INET, &dst->ipv4, str, sizeof(str)); + rc = rte_lpm_add(lc->lpm4, addr, depth, idx); + RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p," + "ipv4=%s/%u,mtu=%u," + "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) " + "returns %d;\n", + __func__, lc->id, dst->port, lc->dst4[idx].dev, + str, depth, lc->dst4[idx].mtu, + dst->mac.addr_bytes[0], dst->mac.addr_bytes[1], + dst->mac.addr_bytes[2], dst->mac.addr_bytes[3], + dst->mac.addr_bytes[4], dst->mac.addr_bytes[5], + rc); + return rc; +} + +static int +netbe_add_ipv6_route(struct netbe_lcore *lc, const struct netbe_dest *dst, + uint8_t idx) +{ + int32_t rc; + uint32_t depth; + char str[INET6_ADDRSTRLEN]; + + depth = dst->prfx; + + rc = rte_lpm6_add(lc->lpm6, (uint8_t *)(uintptr_t)dst->ipv6.s6_addr, + depth, idx); + + inet_ntop(AF_INET6, &dst->ipv6, str, sizeof(str)); + RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p," + "ipv6=%s/%u,mtu=%u," + "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) " + "returns %d;\n", + __func__, lc->id, dst->port, lc->dst6[idx].dev, + str, depth, lc->dst4[idx].mtu, + dst->mac.addr_bytes[0], dst->mac.addr_bytes[1], + dst->mac.addr_bytes[2], dst->mac.addr_bytes[3], + dst->mac.addr_bytes[4], dst->mac.addr_bytes[5], + rc); + return rc; +} + +static void +fill_dst(struct tle_dest *dst, struct netbe_dev *bed, + const struct netbe_dest *bdp, uint16_t l3_type, int32_t sid, + uint8_t proto_id) +{ + struct ether_hdr *eth; + struct ipv4_hdr *ip4h; + struct ipv6_hdr *ip6h; + + dst->dev = bed->dev; + dst->head_mp = frag_mpool[sid + 1]; + dst->mtu = RTE_MIN(bdp->mtu, bed->port.mtu); + dst->l2_len = sizeof(*eth); + + eth = (struct ether_hdr *)dst->hdr; + + ether_addr_copy(&bed->port.mac, ð->s_addr); + ether_addr_copy(&bdp->mac, ð->d_addr); + eth->ether_type = rte_cpu_to_be_16(l3_type); + + if (l3_type == ETHER_TYPE_IPv4) { + dst->l3_len = sizeof(*ip4h); + ip4h = (struct ipv4_hdr *)(eth + 1); + ip4h->version_ihl = 4 << 4 | + sizeof(*ip4h) / IPV4_IHL_MULTIPLIER; + ip4h->time_to_live = 64; + ip4h->next_proto_id = proto_id; + } else if (l3_type == ETHER_TYPE_IPv6) { + dst->l3_len = sizeof(*ip6h); + ip6h = (struct ipv6_hdr *)(eth + 1); + ip6h->vtc_flow = 6 << 4; + ip6h->proto = proto_id; + ip6h->hop_limits = 64; + } +} + +static int +netbe_add_dest(struct netbe_lcore *lc, uint32_t dev_idx, uint16_t family, + const struct netbe_dest *dst, uint32_t dnum) +{ + int32_t rc, sid; + uint8_t proto; + uint16_t l3_type; + uint32_t i, n, m; + struct tle_dest *dp; + + if (family == AF_INET) { + n = lc->dst4_num; + dp = lc->dst4 + n; + m = RTE_DIM(lc->dst4); + l3_type = ETHER_TYPE_IPv4; + } else { + n = lc->dst6_num; + dp = lc->dst6 + n; + m = RTE_DIM(lc->dst6); + l3_type = ETHER_TYPE_IPv6; + } + + if (n + dnum >= m) { + RTE_LOG(ERR, USER1, "%s(lcore=%u, family=%hu, dnum=%u) exceeds " + "maximum allowed number of destinations(%u);\n", + __func__, lc->id, family, dnum, m); + return -ENOSPC; + } + + sid = rte_lcore_to_socket_id(lc->id); + proto = (becfg.proto == TLE_PROTO_UDP) ? IPPROTO_UDP : IPPROTO_TCP; + rc = 0; + + for (i = 0; i != dnum && rc == 0; i++) { + fill_dst(dp + i, lc->prtq + dev_idx, dst + i, l3_type, sid, + proto); + if (family == AF_INET) + rc = netbe_add_ipv4_route(lc, dst + i, n + i); + else + rc = netbe_add_ipv6_route(lc, dst + i, n + i); + } + + if (family == AF_INET) + lc->dst4_num = n + i; + else + lc->dst6_num = n + i; + + return rc; +} + +static inline void +fill_arp_reply(struct netbe_dev *dev, struct rte_mbuf *m) +{ + struct ether_hdr *eth; + struct arp_hdr *ahdr; + struct arp_ipv4 *adata; + uint32_t tip; + + /* set up the ethernet data */ + eth = rte_pktmbuf_mtod(m, struct ether_hdr *); + eth->d_addr = eth->s_addr; + eth->s_addr = dev->port.mac; + + /* set up the arp data */ + ahdr = rte_pktmbuf_mtod_offset(m, struct arp_hdr *, m->l2_len); + adata = &ahdr->arp_data; + + ahdr->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY); + + tip = adata->arp_tip; + adata->arp_tip = adata->arp_sip; + adata->arp_sip = tip; + + adata->arp_tha = adata->arp_sha; + adata->arp_sha = dev->port.mac; +} + +/* this is a semi ARP response implementation of RFC 826 + * in RFC, it algo is as below + * + * ?Do I have the hardware type in ar$hrd? + * Yes: (almost definitely) + * [optionally check the hardware length ar$hln] + * ?Do I speak the protocol in ar$pro? + * Yes: + * [optionally check the protocol length ar$pln] + * Merge_flag := false + * If the pair is + * already in my translation table, update the sender + * hardware address field of the entry with the new + * information in the packet and set Merge_flag to true. + * ?Am I the target protocol address? + * Yes: + * If Merge_flag is false, add the triplet to + * the translation table. + * ?Is the opcode ares_op$REQUEST? (NOW look at the opcode!!) + * Yes: + * Swap hardware and protocol fields, putting the local + * hardware and protocol addresses in the sender fields. + * Set the ar$op field to ares_op$REPLY + * Send the packet to the (new) target hardware address on + * the same hardware on which the request was received. + * + * So, in our implementation we skip updating the local cache, + * we assume that local cache is ok, so we just reply the packet. + */ + +static inline void +send_arp_reply(struct netbe_dev *dev, struct pkt_buf *pb) +{ + uint32_t i, n, num; + struct rte_mbuf **m; + + m = pb->pkt; + num = pb->num; + for (i = 0; i != num; i++) { + fill_arp_reply(dev, m[i]); + NETBE_PKT_DUMP(m[i]); + } + + n = rte_eth_tx_burst(dev->port.id, dev->txqid, m, num); + NETBE_TRACE("%s: sent n=%u arp replies\n", __func__, n); + + /* free mbufs with unsent arp response */ + for (i = n; i != num; i++) + rte_pktmbuf_free(m[i]); + + pb->num = 0; +} + +static inline void +netbe_rx(struct netbe_lcore *lc, uint32_t pidx) +{ + uint32_t j, k, n; + struct rte_mbuf *pkt[MAX_PKT_BURST]; + struct rte_mbuf *rp[MAX_PKT_BURST]; + int32_t rc[MAX_PKT_BURST]; + struct pkt_buf *abuf; + + n = rte_eth_rx_burst(lc->prtq[pidx].port.id, + lc->prtq[pidx].rxqid, pkt, RTE_DIM(pkt)); + + if (n != 0) { + lc->prtq[pidx].rx_stat.in += n; + NETBE_TRACE("%s(%u): rte_eth_rx_burst(%u, %u) returns %u\n", + __func__, lc->id, lc->prtq[pidx].port.id, + lc->prtq[pidx].rxqid, n); + + k = tle_rx_bulk(lc->prtq[pidx].dev, pkt, rp, rc, n); + + lc->prtq[pidx].rx_stat.up += k; + lc->prtq[pidx].rx_stat.drop += n - k; + NETBE_TRACE("%s(%u): tle_%s_rx_bulk(%p, %u) returns %u\n", + __func__, lc->id, proto_name[lc->proto], + lc->prtq[pidx].dev, n, k); + + for (j = 0; j != n - k; j++) { + NETBE_TRACE("%s:%d(port=%u) rp[%u]={%p, %d};\n", + __func__, __LINE__, lc->prtq[pidx].port.id, + j, rp[j], rc[j]); + rte_pktmbuf_free(rp[j]); + } + } + + /* respond to incoming arp requests */ + abuf = &lc->prtq[pidx].arp_buf; + if (abuf->num == 0) + return; + + send_arp_reply(&lc->prtq[pidx], abuf); +} + +static inline void +netbe_tx(struct netbe_lcore *lc, uint32_t pidx) +{ + uint32_t j, k, n; + struct rte_mbuf **mb; + + n = lc->prtq[pidx].tx_buf.num; + k = RTE_DIM(lc->prtq[pidx].tx_buf.pkt) - n; + mb = lc->prtq[pidx].tx_buf.pkt; + + if (k >= RTE_DIM(lc->prtq[pidx].tx_buf.pkt) / 2) { + j = tle_tx_bulk(lc->prtq[pidx].dev, mb + n, k); + n += j; + lc->prtq[pidx].tx_stat.down += j; + } + + if (n == 0) + return; + + NETBE_TRACE("%s(%u): tle_%s_tx_bulk(%p) returns %u,\n" + "total pkts to send: %u\n", + __func__, lc->id, proto_name[lc->proto], + lc->prtq[pidx].dev, j, n); + + for (j = 0; j != n; j++) + NETBE_PKT_DUMP(mb[j]); + + k = rte_eth_tx_burst(lc->prtq[pidx].port.id, + lc->prtq[pidx].txqid, mb, n); + + lc->prtq[pidx].tx_stat.out += k; + lc->prtq[pidx].tx_stat.drop += n - k; + NETBE_TRACE("%s(%u): rte_eth_tx_burst(%u, %u, %u) returns %u\n", + __func__, lc->id, lc->prtq[pidx].port.id, lc->prtq[pidx].txqid, + n, k); + + lc->prtq[pidx].tx_buf.num = n - k; + if (k != 0) + for (j = k; j != n; j++) + mb[j - k] = mb[j]; +} + +static inline void +netbe_lcore(void) +{ + uint32_t i; + struct netbe_lcore *lc; + + lc = RTE_PER_LCORE(_be); + if (lc == NULL) + return; + + for (i = 0; i != lc->prtq_num; i++) { + netbe_rx(lc, i); + netbe_tx(lc, i); + } +} + +static inline void +netfe_rx_process(__rte_unused uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t k, n; + + n = fes->pbuf.num; + k = RTE_DIM(fes->pbuf.pkt) - n; + + /* packet buffer is full, can't receive any new packets. */ + if (k == 0) { + tle_event_idle(fes->rxev); + fes->stat.rxev[TLE_SEV_IDLE]++; + return; + } + + n = tle_stream_recv(fes->s, fes->pbuf.pkt + n, k); + if (n == 0) + return; + + NETFE_TRACE("%s(%u): tle_%s_stream_recv(%p, %u) returns %u\n", + __func__, lcore, proto_name[fes->proto], fes->s, k, n); + + fes->pbuf.num += n; + fes->stat.rxp += n; + + /* free all received mbufs. */ + if (fes->op == RXONLY) + fes->stat.rxb += pkt_buf_empty(&fes->pbuf); + /* mark stream as writable */ + else if (k == RTE_DIM(fes->pbuf.pkt)) { + if (fes->op == RXTX) { + tle_event_active(fes->txev, TLE_SEV_UP); + fes->stat.txev[TLE_SEV_UP]++; + } else if (fes->op == FWD) { + tle_event_raise(fes->txev); + fes->stat.txev[TLE_SEV_UP]++; + } + } +} + +#endif /* COMMON_H_ */ diff --git a/examples/l4fwd/dpdk_legacy.h b/examples/l4fwd/dpdk_legacy.h new file mode 100644 index 0000000..84fab17 --- /dev/null +++ b/examples/l4fwd/dpdk_legacy.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MAIN_DPDK_LEGACY_H_ +#define MAIN_DPDK_LEGACY_H_ + +#include "dpdk_version.h" + +/* + * UDP IPv4 destination lookup callback. + */ +static int +lpm4_dst_lookup(void *data, const struct in_addr *addr, + struct tle_dest *res) +{ + int32_t rc; +#ifdef DPDK_VERSION_GE_1604 + uint32_t idx; +#else + uint8_t idx; +#endif + struct netbe_lcore *lc; + struct tle_dest *dst; + + lc = data; + + rc = rte_lpm_lookup(lc->lpm4, rte_be_to_cpu_32(addr->s_addr), &idx); + if (rc == 0) { + dst = &lc->dst4[idx]; + rte_memcpy(res, dst, dst->l2_len + dst->l3_len + + offsetof(struct tle_dest, hdr)); + } + return rc; +} + +static int +lcore_lpm_init(struct netbe_lcore *lc) +{ + int32_t sid; + char str[RTE_LPM_NAMESIZE]; +#ifdef DPDK_VERSION_GE_1604 + const struct rte_lpm_config lpm4_cfg = { + .max_rules = MAX_RULES, + .number_tbl8s = MAX_TBL8, + }; +#endif + const struct rte_lpm6_config lpm6_cfg = { + .max_rules = MAX_RULES, + .number_tbl8s = MAX_TBL8, + }; + + sid = rte_lcore_to_socket_id(lc->id); + + snprintf(str, sizeof(str), "LPM4%u\n", lc->id); +#ifdef DPDK_VERSION_GE_1604 + lc->lpm4 = rte_lpm_create(str, sid, &lpm4_cfg); +#else + lc->lpm4 = rte_lpm_create(str, sid, MAX_RULES, 0); +#endif + RTE_LOG(NOTICE, USER1, "%s(lcore=%u): lpm4=%p;\n", + __func__, lc->id, lc->lpm4); + if (lc->lpm4 == NULL) + return -ENOMEM; + + snprintf(str, sizeof(str), "LPM6%u\n", lc->id); + lc->lpm6 = rte_lpm6_create(str, sid, &lpm6_cfg); + RTE_LOG(NOTICE, USER1, "%s(lcore=%u): lpm6=%p;\n", + __func__, lc->id, lc->lpm6); + if (lc->lpm6 == NULL) + return -ENOMEM; + + return 0; +} + +/* + * Helper functions, finds BE by given local and remote addresses. + */ +static int +netbe_find4(const struct in_addr *laddr, const uint16_t lport, + const struct in_addr *raddr, const uint32_t belc) +{ + uint32_t i, j; +#ifdef DPDK_VERSION_GE_1604 + uint32_t idx; +#else + uint8_t idx; +#endif + struct netbe_lcore *bc; + + /* we have exactly one BE, use it for all traffic */ + if (becfg.cpu_num == 1) + return 0; + + /* search by provided be_lcore */ + if (belc != LCORE_ID_ANY) { + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + if (belc == bc->id) + return i; + } + RTE_LOG(NOTICE, USER1, "%s: no stream with be_lcore=%u\n", + __func__, belc); + return -ENOENT; + } + + /* search by local address */ + if (laddr->s_addr != INADDR_ANY) { + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + /* search by queue for the local port */ + for (j = 0; j != bc->prtq_num; j++) { + if (laddr->s_addr == bc->prtq[j].port.ipv4) { + + if (lport == 0) + return i; + + if (verify_queue_for_port(bc->prtq + j, + lport) != 0) + return i; + } + } + } + } + + /* search by remote address */ + if (raddr->s_addr != INADDR_ANY) { + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + if (rte_lpm_lookup(bc->lpm4, + rte_be_to_cpu_32(raddr->s_addr), + &idx) == 0) { + + if (lport == 0) + return i; + + /* search by queue for the local port */ + for (j = 0; j != bc->prtq_num; j++) + if (verify_queue_for_port(bc->prtq + j, + lport) != 0) + return i; + } + } + } + + return -ENOENT; +} + +#endif /* MAIN_DPDK_LEGACY_H_ */ diff --git a/examples/l4fwd/dpdk_version.h b/examples/l4fwd/dpdk_version.h new file mode 100644 index 0000000..43235c8 --- /dev/null +++ b/examples/l4fwd/dpdk_version.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DPDK_VERSION_H_ +#define DPDK_VERSION_H_ + +#include + +#ifdef RTE_VER_MAJOR +#if RTE_VER_MAJOR >= 16 && RTE_VER_MINOR >= 4 +#define DPDK_VERSION_GE_1604 +#endif +#elif defined(RTE_VER_YEAR) +#if RTE_VERSION_NUM(16, 4, 0, 0) <= RTE_VERSION +#define DPDK_VERSION_GE_1604 +#endif +#else +#error "RTE_VER_MAJOR and RTE_VER_YEAR are undefined!" +#endif + +#endif /* DPDK_VERSION_H_ */ diff --git a/examples/l4fwd/fe.cfg b/examples/l4fwd/fe.cfg new file mode 100644 index 0000000..2706323 --- /dev/null +++ b/examples/l4fwd/fe.cfg @@ -0,0 +1,24 @@ +# +# udpfwd FE config file example +# + +# open IPv4 stream with local_addr=192.168.1.233:32768, +# and remote_addr as wildcard (any remote addressi/port allowed). +# use it echo mode - for any received packet - send it back to the source +lcore=3,op=echo,laddr=192.168.1.233,lport=0x8000,raddr=0.0.0.0,rport=0 + +# open IPv4 stream with specified local/remote address/port and +# do send only over that stream. +lcore=3,op=tx,laddr=192.168.1.233,lport=0x8001,raddr=192.168.1.56,rport=0x200,txlen=72 + +# open IPv6 stream with specified local port (512) probably over multiple +# eth ports, and do recv only over that stream. +lcore=3,op=rx,laddr=::,lport=0x200,raddr=::,rport=0,txlen=72 + +# fwd mode example. +# open IPv4 stream on local port 11211 (memcached) over all possible ports. +# for each new flow, sort of tunnel will be created, i.e: +# new stream will be opend to communcate with forwarding remote address, +# so all packets with will be forwarded to +# and visa-versa. +lcore=3,op=fwd,laddr=0.0.0.0,lport=11211,raddr=0.0.0.0,rport=0,fwladdr=::,fwlport=0,fwraddr=2001:4860:b002::56,fwrport=11211 diff --git a/examples/l4fwd/fwdtbl.h b/examples/l4fwd/fwdtbl.h new file mode 100644 index 0000000..1c4265e --- /dev/null +++ b/examples/l4fwd/fwdtbl.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __FWDTBL_H__ +#define __FWDTBL_H__ + +struct fwd4_key { + uint32_t port; + struct in_addr addr; +} __attribute__((__packed__)); + +struct fwd6_key { + uint32_t port; + struct in6_addr addr; +} __attribute__((__packed__)); + +union fwd_key { + struct fwd4_key k4; + struct fwd6_key k6; +}; + +static struct rte_hash * +fwd_tbl_key_prep(const struct netfe_lcore *fe, uint16_t family, + const struct sockaddr *sa, union fwd_key *key) +{ + struct rte_hash *h; + const struct sockaddr_in *sin4; + const struct sockaddr_in6 *sin6; + + if (family == AF_INET) { + h = fe->fw4h; + sin4 = (const struct sockaddr_in *)sa; + key->k4.port = sin4->sin_port; + key->k4.addr = sin4->sin_addr; + } else { + h = fe->fw6h; + sin6 = (const struct sockaddr_in6 *)sa; + key->k6.port = sin6->sin6_port; + key->k6.addr = sin6->sin6_addr; + } + + return h; +} + +static int +fwd_tbl_add(struct netfe_lcore *fe, uint16_t family, const struct sockaddr *sa, + struct netfe_stream *data) +{ + int32_t rc; + struct rte_hash *h; + union fwd_key key; + + h = fwd_tbl_key_prep(fe, family, sa, &key); + rc = rte_hash_add_key_data(h, &key, data); + return rc; +} + +static struct netfe_stream * +fwd_tbl_lkp(struct netfe_lcore *fe, uint16_t family, const struct sockaddr *sa) +{ + int rc; + void *d; + struct rte_hash *h; + union fwd_key key; + + h = fwd_tbl_key_prep(fe, family, sa, &key); + rc = rte_hash_lookup_data(h, &key, &d); + if (rc < 0) + d = NULL; + return d; +} + +static int +fwd_tbl_init(struct netfe_lcore *fe, uint16_t family, uint32_t lcore) +{ + int32_t rc; + struct rte_hash **h; + struct rte_hash_parameters hprm; + char buf[RTE_HASH_NAMESIZE]; + + if (family == AF_INET) { + snprintf(buf, sizeof(buf), "fwd4tbl@%u", lcore); + h = &fe->fw4h; + hprm.key_len = sizeof(struct fwd4_key); + } else { + snprintf(buf, sizeof(buf), "fwd6tbl@%u", lcore); + h = &fe->fw6h; + hprm.key_len = sizeof(struct fwd6_key); + } + + hprm.name = buf; + hprm.entries = RTE_MAX(2 * fe->snum, 0x10U); + hprm.socket_id = rte_lcore_to_socket_id(lcore); + hprm.hash_func = NULL; + hprm.hash_func_init_val = 0; + + *h = rte_hash_create(&hprm); + if (*h == NULL) + rc = (rte_errno != 0) ? -rte_errno : -ENOMEM; + else + rc = 0; + return rc; +} + +#endif /* __FWDTBL_H__ */ diff --git a/examples/l4fwd/gen_fe_cfg.py b/examples/l4fwd/gen_fe_cfg.py new file mode 100755 index 0000000..67a8d5c --- /dev/null +++ b/examples/l4fwd/gen_fe_cfg.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python +# +# Copyright (c) 2016 Intel Corporation. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# +# generate FE config script +# + +import sys, optparse, math + +def print_usage (): + print "Usage: " + sys.argv[0] + " [-f fe_lcore_list] [-b be_lcore_list]" + print " [-p start_port] [-n number_of_streams]" + print " [-m mode] [-q local_address] [-r remote_address]" + print " [-s fwd_local_address] [-t fwd_remote_address]" + print " [-l txlen] [-4/-6] [-H]" + print + print "Options" + print " -f, --fe_lcore_list: list of lcores used for FE. Multiple " \ + "lcores are comma-separated, within double quote" + print " -b, --be_lcore_list: list of lcores used for BE. Multiple " \ + "lcores are comma-separated, within double quote" + print " -p, --start_port: starting TCP/UDP port number" + print " -n, --number_of_streams: number of streams to be generated" + print " -m, --mode: mode of the application. [echo, rx, tx, fwd]" + print " -q, --local_address: local address of the stream" + print " -r, --remote_address: remote address of the stream" + print " -s, --fwd_local_address: forwarding local address of the stream" + print " -t, --fwd_remote_address: forwarding remote address of the " \ + "stream" + print " -l, --txlen: transmission length for rx/tx mode" + print " -H: if port number to printed in hex format" + print " -4/-6: if default ip addresses to be printed in ipv4/ipv6 format" + +def align_power_of_2(x): + return 2**(x-1).bit_length() + +def print_stream(mode, la, ra, fwd_la, fwd_ra, lcore, belcore, lport, + fwrport, txlen): + if support_hex != 0: + lport_str = str(format(lport, '#x')) + fwrport_str = str(format(fwrport, '#x')) + else: + lport_str = str(lport) + fwrport_str = str(fwrport) + + stream = "lcore=" + str(lcore) + ",belcore=" + str(belcore) + stream += ",op=" + mode + stream += ",laddr=" + la + ",lport=" + lport_str + stream += ",raddr=" + ra + ",rport=0" + + if mode == 'fwd': + stream += ",fwladdr=" + fwd_la + ",fwlport=0,fwraddr=" + fwd_ra + stream += ",fwrport=" + fwrport_str + + if mode == 'rx' or mode == 'tx': + stream += ",txlen=" + str(txlen) + + print stream + +parser = optparse.OptionParser() +parser.add_option("-b", "--be_lcore_list", dest = "be_lcore_list", + help = "BE lcore lists.") +parser.add_option("-f", "--fe_lcore_list", dest = "fe_lcore_list", + help = "FE lcore lists.") +parser.add_option("-p", "--start_port", dest = "start_port", + help = "start port.") +parser.add_option("-n", "--number_of_streams", dest = "number_of_streams", + help = "number of streams.") +parser.add_option("-m", "--mode", dest = "mode", + help = "mode (op, rx, tx, fwd).") +parser.add_option("-q", "--local_address", dest = "local_address", + help = "local_address.") +parser.add_option("-r", "--remote_address", dest = "remote_address", + help = "remote_address.") +parser.add_option("-s", "--fwd_local_address", dest = "fwd_local_address", + help = "fwd_local_address.") +parser.add_option("-t", "--fwd_remote_address", dest = "fwd_remote_address", + help = "fwd_remote_address.") +parser.add_option("-l", "--txlen", dest = "txlen", help = "txlen.") +parser.add_option("-4", action = "store_false", dest = "ipv6", + help = "IPv4/IPv6") +parser.add_option("-6", action = "store_true", dest = "ipv6", + help = "IPv4/IPv6") +parser.add_option("-H", action = "store_true", dest = "support_hex", + help = "print ports in hexa format") + +(options, args) = parser.parse_args() + +if len(sys.argv) == 1: + print + print_usage() + print + sys.exit() + +supported_modes = ['echo', 'rx', 'tx', 'fwd'] +support_hex = 0 +txlen = 72 + +if options.ipv6 == True: + la = '::' + ra = '::' + fwd_la = '::' + fwd_ra = '::' +else: + la = '0.0.0.0' + ra = '0.0.0.0' + fwd_la = '0.0.0.0' + fwd_ra = '0.0.0.0' + +if options.support_hex == True: + support_hex = 1 + +if options.txlen != None: + txlen = options.txlen + +if options.fe_lcore_list != None: + felcore_list = list(map(int, options.fe_lcore_list.split(","))) + felcore_list.sort() +else: + felcore_list = [] + +if options.be_lcore_list != None: + belcore_list = list(map(int, options.be_lcore_list.split(","))) + belcore_list.sort() +else: + print "default BE lcore list = [ 1 ]" + belcore_list = [1] + +if options.mode != None: + mode = options.mode + if mode not in supported_modes: + print "Supported modes: " + str(supported_modes) + print "Provided mode \"" + mode + "\" is not supported. Terminating..." + sys.exit() +else: + print "default mode = echo" + mode = "echo" + +if options.start_port != None: + start_port = int(options.start_port) +else: + print "default start_port = 32768" + start_port = 32768 + +if options.number_of_streams != None: + number_of_streams = int(options.number_of_streams) +else: + print "default number_of_streams = 1" + number_of_streams = 1 + +fwd_start_port = 53248 + +if options.local_address != None: + la = options.local_address + +if options.remote_address != None: + ra = options.remote_address + +if options.fwd_local_address != None: + fwd_la = options.fwd_local_address + +if options.fwd_remote_address != None: + fwd_ra = options.fwd_remote_address + +belcore_count = len(belcore_list) +align_belcore_count = align_power_of_2(belcore_count) +nb_bits = int(math.log(align_belcore_count, 2)) + +felcore_count = len(felcore_list) +if felcore_count != 0: + if number_of_streams % felcore_count == 0: + nb_stream_per_flc = number_of_streams / felcore_count + else: + nb_stream_per_flc = (number_of_streams / felcore_count) + 1 + +for i in range(start_port, start_port + number_of_streams): + k = i - start_port + align_belcore_count = align_power_of_2(belcore_count) + blc_indx = (i % align_belcore_count) % belcore_count + belcore = belcore_list[blc_indx] + if felcore_count != 0: + flc_indx = k / nb_stream_per_flc + felcore = felcore_list[flc_indx] + else: + felcore = belcore + + print_stream(mode, la, ra, fwd_la, fwd_ra, felcore, belcore, i, + fwd_start_port + k, txlen) diff --git a/examples/l4fwd/lcore.h b/examples/l4fwd/lcore.h new file mode 100644 index 0000000..d88e434 --- /dev/null +++ b/examples/l4fwd/lcore.h @@ -0,0 +1,370 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef LCORE_H_ +#define LCORE_H_ + +#include "dpdk_legacy.h" + +/* + * IPv6 destination lookup callback. + */ +static int +lpm6_dst_lookup(void *data, const struct in6_addr *addr, + struct tle_dest *res) +{ + int32_t rc; + uint8_t idx; + struct netbe_lcore *lc; + struct tle_dest *dst; + uintptr_t p; + + lc = data; + p = (uintptr_t)addr->s6_addr; + + rc = rte_lpm6_lookup(lc->lpm6, (uint8_t *)p, &idx); + if (rc == 0) { + dst = &lc->dst6[idx]; + rte_memcpy(res, dst, dst->l2_len + dst->l3_len + + offsetof(struct tle_dest, hdr)); + } + return rc; +} + +static int +create_context(struct netbe_lcore *lc, const struct tle_ctx_param *ctx_prm) +{ + uint32_t rc = 0, sid; + uint64_t frag_cycles; + struct tle_ctx_param cprm; + + if (lc->ctx == NULL) { + sid = rte_lcore_to_socket_id(lc->id); + + rc = lcore_lpm_init(lc); + if (rc != 0) + return rc; + + cprm = *ctx_prm; + cprm.socket_id = sid; + cprm.proto = lc->proto; + cprm.lookup4 = lpm4_dst_lookup; + cprm.lookup4_data = lc; + cprm.lookup6 = lpm6_dst_lookup; + cprm.lookup6_data = lc; + + frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / + MS_PER_S * FRAG_TTL; + + lc->ftbl = rte_ip_frag_table_create(cprm.max_streams, + FRAG_TBL_BUCKET_ENTRIES, cprm.max_streams, + frag_cycles, sid); + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u): frag_tbl=%p;\n", + __func__, lc->id, lc->ftbl); + + lc->ctx = tle_ctx_create(&cprm); + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u): proto=%s, ctx=%p;\n", + __func__, lc->id, proto_name[lc->proto], lc->ctx); + + if (lc->ctx == NULL || lc->ftbl == NULL) + rc = ENOMEM; + } + + return rc; +} + +/* + * BE lcore setup routine. + */ +static int +lcore_init(struct netbe_lcore *lc, const struct tle_ctx_param *ctx_prm, + const uint32_t prtqid, const uint16_t *bl_ports, uint32_t nb_bl_ports) +{ + int32_t rc = 0; + struct tle_dev_param dprm; + + rc = create_context(lc, ctx_prm); + + if (rc == 0 && lc->ctx != NULL) { + memset(&dprm, 0, sizeof(dprm)); + dprm.rx_offload = lc->prtq[prtqid].port.rx_offload; + dprm.tx_offload = lc->prtq[prtqid].port.tx_offload; + dprm.local_addr4.s_addr = lc->prtq[prtqid].port.ipv4; + memcpy(&dprm.local_addr6, &lc->prtq[prtqid].port.ipv6, + sizeof(lc->prtq[prtqid].port.ipv6)); + dprm.bl4.nb_port = nb_bl_ports; + dprm.bl4.port = bl_ports; + dprm.bl6.nb_port = nb_bl_ports; + dprm.bl6.port = bl_ports; + + lc->prtq[prtqid].dev = tle_add_dev(lc->ctx, &dprm); + + RTE_LOG(NOTICE, USER1, + "%s(lcore=%u, port=%u, qid=%u), dev: %p\n", + __func__, lc->id, lc->prtq[prtqid].port.id, + lc->prtq[prtqid].rxqid, lc->prtq[prtqid].dev); + + if (lc->prtq[prtqid].dev == NULL) + rc = -rte_errno; + + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s(lcore=%u) failed with error code: %d\n", + __func__, lc->id, rc); + tle_ctx_destroy(lc->ctx); + rte_ip_frag_table_destroy(lc->ftbl); + rte_lpm_free(lc->lpm4); + rte_lpm6_free(lc->lpm6); + rte_free(lc->prtq[prtqid].port.lcore_id); + lc->prtq[prtqid].port.nb_lcore = 0; + rte_free(lc->prtq); + lc->prtq_num = 0; + return rc; + } + } + + return rc; +} + +static uint16_t +create_blocklist(const struct netbe_port *beprt, uint16_t *bl_ports, + uint32_t q) +{ + uint32_t i, j, qid, align_nb_q; + + align_nb_q = rte_align32pow2(beprt->nb_lcore); + for (i = 0, j = 0; i < (UINT16_MAX + 1); i++) { + qid = (i % align_nb_q) % beprt->nb_lcore; + if (qid != q) + bl_ports[j++] = i; + } + + return j; +} + +static int +netbe_lcore_init(struct netbe_cfg *cfg, const struct tle_ctx_param *ctx_prm) +{ + int32_t rc; + uint32_t i, j, nb_bl_ports = 0, sz; + struct netbe_lcore *lc; + static uint16_t *bl_ports; + + /* Create the context and attached queue for each lcore. */ + rc = 0; + sz = sizeof(uint16_t) * UINT16_MAX; + bl_ports = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE); + for (i = 0; i < cfg->cpu_num; i++) { + lc = &cfg->cpu[i]; + for (j = 0; j < lc->prtq_num; j++) { + memset((uint8_t *)bl_ports, 0, sz); + /* create list of blocked ports based on q */ + nb_bl_ports = create_blocklist(&lc->prtq[j].port, + bl_ports, lc->prtq[j].rxqid); + RTE_LOG(NOTICE, USER1, + "lc=%u, q=%u, nb_bl_ports=%u\n", + lc->id, lc->prtq[j].rxqid, nb_bl_ports); + + rc = lcore_init(lc, ctx_prm, j, bl_ports, nb_bl_ports); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: failed with error code: %d\n", + __func__, rc); + rte_free(bl_ports); + return rc; + } + } + } + rte_free(bl_ports); + + return 0; +} + +static int +netfe_lcore_cmp(const void *s1, const void *s2) +{ + const struct netfe_stream_prm *p1, *p2; + + p1 = s1; + p2 = s2; + return p1->lcore - p2->lcore; +} + +static int +netbe_find6(const struct in6_addr *laddr, uint16_t lport, + const struct in6_addr *raddr, uint32_t belc) +{ + uint32_t i, j; + uint8_t idx; + struct netbe_lcore *bc; + + /* we have exactly one BE, use it for all traffic */ + if (becfg.cpu_num == 1) + return 0; + + /* search by provided be_lcore */ + if (belc != LCORE_ID_ANY) { + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + if (belc == bc->id) + return i; + } + RTE_LOG(NOTICE, USER1, "%s: no stream with belcore=%u\n", + __func__, belc); + return -ENOENT; + } + + /* search by local address */ + if (memcmp(laddr, &in6addr_any, sizeof(*laddr)) != 0) { + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + /* search by queue for the local port */ + for (j = 0; j != bc->prtq_num; j++) { + if (memcmp(laddr, &bc->prtq[j].port.ipv6, + sizeof(*laddr)) == 0) { + + if (lport == 0) + return i; + + if (verify_queue_for_port(bc->prtq + j, + lport) != 0) + return i; + } + } + } + } + + /* search by remote address */ + if (memcmp(raddr, &in6addr_any, sizeof(*raddr)) == 0) { + for (i = 0; i != becfg.cpu_num; i++) { + bc = becfg.cpu + i; + if (rte_lpm6_lookup(bc->lpm6, + (uint8_t *)(uintptr_t)raddr->s6_addr, + &idx) == 0) { + + if (lport == 0) + return i; + + /* search by queue for the local port */ + for (j = 0; j != bc->prtq_num; j++) + if (verify_queue_for_port(bc->prtq + j, + lport) != 0) + return i; + } + } + } + + return -ENOENT; +} + +static int +netbe_find(const struct sockaddr_storage *la, + const struct sockaddr_storage *ra, + uint32_t belc) +{ + const struct sockaddr_in *l4, *r4; + const struct sockaddr_in6 *l6, *r6; + + if (la->ss_family == AF_INET) { + l4 = (const struct sockaddr_in *)la; + r4 = (const struct sockaddr_in *)ra; + return netbe_find4(&l4->sin_addr, ntohs(l4->sin_port), + &r4->sin_addr, belc); + } else if (la->ss_family == AF_INET6) { + l6 = (const struct sockaddr_in6 *)la; + r6 = (const struct sockaddr_in6 *)ra; + return netbe_find6(&l6->sin6_addr, ntohs(l6->sin6_port), + &r6->sin6_addr, belc); + } + return -EINVAL; +} + +static int +netfe_sprm_flll_be(struct netfe_sprm *sp, uint32_t line, uint32_t belc) +{ + int32_t bidx; + + bidx = netbe_find(&sp->local_addr, &sp->remote_addr, belc); + + if (bidx < 0) { + RTE_LOG(ERR, USER1, "%s(line=%u): no BE for that stream\n", + __func__, line); + return -EINVAL; + } + sp->bidx = bidx; + return 0; +} + +/* start front-end processing. */ +static int +netfe_lcore_fill(struct lcore_prm prm[RTE_MAX_LCORE], + struct netfe_lcore_prm *lprm) +{ + uint32_t belc; + uint32_t i, j, lc, ln; + struct netfe_stream_prm *s; + + /* determine on what BE each stream should be open. */ + for (i = 0; i != lprm->nb_streams; i++) { + s = lprm->stream + i; + ln = s->line; + belc = s->belcore; + if (netfe_sprm_flll_be(&s->sprm, ln, belc) != 0 || + (s->op == FWD && + netfe_sprm_flll_be(&s->fprm, ln, belc) != 0)) + return -EINVAL; + } + + /* group all fe parameters by lcore. */ + + qsort(lprm->stream, lprm->nb_streams, sizeof(lprm->stream[0]), + netfe_lcore_cmp); + + for (i = 0; i != lprm->nb_streams; i = j) { + + lc = lprm->stream[i].lcore; + ln = lprm->stream[i].line; + + if (rte_lcore_is_enabled(lc) == 0) { + RTE_LOG(ERR, USER1, + "%s(line=%u): lcore %u is not enabled\n", + __func__, ln, lc); + return -EINVAL; + } + + if (rte_get_master_lcore() != lc && + rte_eal_get_lcore_state(lc) == RUNNING) { + RTE_LOG(ERR, USER1, + "%s(line=%u): lcore %u already in use\n", + __func__, ln, lc); + return -EINVAL; + } + + for (j = i + 1; j != lprm->nb_streams && + lc == lprm->stream[j].lcore; + j++) + ; + + prm[lc].fe.max_streams = lprm->max_streams; + prm[lc].fe.nb_streams = j - i; + prm[lc].fe.stream = lprm->stream + i; + } + + return 0; +} + +#endif /* LCORE_H_ */ diff --git a/examples/l4fwd/main.c b/examples/l4fwd/main.c new file mode 100644 index 0000000..37bd03e --- /dev/null +++ b/examples/l4fwd/main.c @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "netbe.h" +#include "parse.h" + +#define MAX_RULES 0x100 +#define MAX_TBL8 0x800 + +#define RX_RING_SIZE 0x400 +#define TX_RING_SIZE 0x800 + +#define MPOOL_CACHE_SIZE 0x100 +#define MPOOL_NB_BUF 0x20000 + +#define FRAG_MBUF_BUF_SIZE (RTE_PKTMBUF_HEADROOM + TLE_DST_MAX_HDR) +#define FRAG_TTL MS_PER_S +#define FRAG_TBL_BUCKET_ENTRIES 16 + +#define FIRST_PORT 0x8000 + +#define RX_CSUM_OFFLOAD (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM) +#define TX_CSUM_OFFLOAD (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM) + +RTE_DEFINE_PER_LCORE(struct netbe_lcore *, _be); +RTE_DEFINE_PER_LCORE(struct netfe_lcore *, _fe); + +#include "fwdtbl.h" + +/** + * Location to be modified to create the IPv4 hash key which helps + * to distribute packets based on the destination TCP/UDP port. + */ +#define RSS_HASH_KEY_DEST_PORT_LOC_IPV4 15 + +/** + * Location to be modified to create the IPv6 hash key which helps + * to distribute packets based on the destination TCP/UDP port. + */ +#define RSS_HASH_KEY_DEST_PORT_LOC_IPV6 39 + +/** + * Size of the rte_eth_rss_reta_entry64 array to update through + * rte_eth_dev_rss_reta_update. + */ +#define RSS_RETA_CONF_ARRAY_SIZE (ETH_RSS_RETA_SIZE_512/RTE_RETA_GROUP_SIZE) + +static volatile int force_quit; + +static struct netbe_cfg becfg; +static struct rte_mempool *mpool[RTE_MAX_NUMA_NODES + 1]; +static struct rte_mempool *frag_mpool[RTE_MAX_NUMA_NODES + 1]; +static char proto_name[3][10] = {"udp", "tcp", ""}; + +static const struct rte_eth_conf port_conf_default = { + .rxmode = { + .max_rx_pkt_len = ETHER_MAX_VLAN_FRAME_LEN, + .hw_vlan_strip = 1, + .jumbo_frame = 1, + }, +}; + +/* function pointers */ +static TLE_RX_BULK_FUNCTYPE tle_rx_bulk; +static TLE_TX_BULK_FUNCTYPE tle_tx_bulk; +static TLE_STREAM_RECV_FUNCTYPE tle_stream_recv; +static TLE_STREAM_CLOSE_FUNCTYPE tle_stream_close; + +static LCORE_MAIN_FUNCTYPE lcore_main; + +#include "common.h" +#include "parse.h" +#include "lcore.h" +#include "port.h" +#include "tcp.h" +#include "udp.h" + +int verbose = VERBOSE_NONE; + +static void +netbe_lcore_fini(struct netbe_cfg *cfg) +{ + uint32_t i; + + for (i = 0; i != cfg->cpu_num; i++) { + tle_ctx_destroy(cfg->cpu[i].ctx); + rte_ip_frag_table_destroy(cfg->cpu[i].ftbl); + rte_lpm_free(cfg->cpu[i].lpm4); + rte_lpm6_free(cfg->cpu[i].lpm6); + + rte_free(cfg->cpu[i].prtq); + cfg->cpu[i].prtq_num = 0; + } + + rte_free(cfg->cpu); + cfg->cpu_num = 0; + for (i = 0; i != cfg->prt_num; i++) { + rte_free(cfg->prt[i].lcore_id); + cfg->prt[i].nb_lcore = 0; + } + rte_free(cfg->prt); + cfg->prt_num = 0; +} + +static int +netbe_dest_init(const char *fname, struct netbe_cfg *cfg) +{ + int32_t rc; + uint32_t f, i, p; + uint32_t k, l, cnt; + struct netbe_lcore *lc; + struct netbe_dest_prm prm; + + rc = netbe_parse_dest(fname, &prm); + if (rc != 0) + return rc; + + rc = 0; + for (i = 0; i != prm.nb_dest; i++) { + + p = prm.dest[i].port; + f = prm.dest[i].family; + + cnt = 0; + for (k = 0; k != cfg->cpu_num; k++) { + lc = cfg->cpu + k; + for (l = 0; l != lc->prtq_num; l++) + if (lc->prtq[l].port.id == p) { + rc = netbe_add_dest(lc, l, f, + prm.dest + i, 1); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s(lc=%u, family=%u) " + "could not add " + "destinations(%u)\n", + __func__, lc->id, f, i); + return -ENOSPC; + } + cnt++; + } + } + + if (cnt == 0) { + RTE_LOG(ERR, USER1, "%s(%s) error at line %u: " + "port %u not managed by any lcore;\n", + __func__, fname, prm.dest[i].line, p); + break; + } + } + + free(prm.dest); + return rc; +} + +static void +func_ptrs_init(uint32_t proto) { + if (proto == TLE_PROTO_TCP) { + tle_rx_bulk = tle_tcp_rx_bulk; + tle_tx_bulk = tle_tcp_tx_bulk; + tle_stream_recv = tle_tcp_stream_recv; + tle_stream_close = tle_tcp_stream_close; + + lcore_main = lcore_main_tcp; + + } else { + tle_rx_bulk = tle_udp_rx_bulk; + tle_tx_bulk = tle_udp_tx_bulk; + tle_stream_recv = tle_udp_stream_recv; + tle_stream_close = tle_udp_stream_close; + + lcore_main = lcore_main_udp; + } +} + +int +main(int argc, char *argv[]) +{ + int32_t rc; + uint32_t i; + struct tle_ctx_param ctx_prm; + struct netfe_lcore_prm feprm; + struct rte_eth_stats stats; + char fecfg_fname[PATH_MAX + 1]; + char becfg_fname[PATH_MAX + 1]; + struct lcore_prm prm[RTE_MAX_LCORE]; + struct rte_eth_dev_info dev_info; + + fecfg_fname[0] = 0; + becfg_fname[0] = 0; + memset(prm, 0, sizeof(prm)); + + rc = rte_eal_init(argc, argv); + if (rc < 0) + rte_exit(EXIT_FAILURE, + "%s: rte_eal_init failed with error code: %d\n", + __func__, rc); + + memset(&ctx_prm, 0, sizeof(ctx_prm)); + + signal(SIGINT, sig_handle); + + argc -= rc; + argv += rc; + + rc = parse_app_options(argc, argv, &becfg, &ctx_prm, + fecfg_fname, becfg_fname); + if (rc != 0) + rte_exit(EXIT_FAILURE, + "%s: parse_app_options failed with error code: %d\n", + __func__, rc); + + /* init all the function pointer */ + func_ptrs_init(becfg.proto); + + rc = netbe_port_init(&becfg); + if (rc != 0) + rte_exit(EXIT_FAILURE, + "%s: netbe_port_init failed with error code: %d\n", + __func__, rc); + + rc = netbe_lcore_init(&becfg, &ctx_prm); + if (rc != 0) + sig_handle(SIGQUIT); + + rc = netbe_dest_init(becfg_fname, &becfg); + if (rc != 0) + sig_handle(SIGQUIT); + + for (i = 0; i != becfg.prt_num && rc == 0; i++) { + RTE_LOG(NOTICE, USER1, "%s: starting port %u\n", + __func__, becfg.prt[i].id); + rc = rte_eth_dev_start(becfg.prt[i].id); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: rte_eth_dev_start(%u) returned " + "error code: %d\n", + __func__, becfg.prt[i].id, rc); + sig_handle(SIGQUIT); + } + rte_eth_dev_info_get(becfg.prt[i].id, &dev_info); + rc = update_rss_reta(&becfg.prt[i], &dev_info); + if (rc != 0) + sig_handle(SIGQUIT); + } + + feprm.max_streams = ctx_prm.max_streams * becfg.cpu_num; + + rc = (rc != 0) ? rc : netfe_parse_cfg(fecfg_fname, &feprm); + if (rc != 0) + sig_handle(SIGQUIT); + + for (i = 0; rc == 0 && i != becfg.cpu_num; i++) + prm[becfg.cpu[i].id].be.lc = becfg.cpu + i; + + rc = (rc != 0) ? rc : netfe_lcore_fill(prm, &feprm); + if (rc != 0) + sig_handle(SIGQUIT); + + /* launch all slave lcores. */ + RTE_LCORE_FOREACH_SLAVE(i) { + if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0) + rte_eal_remote_launch(lcore_main, prm + i, i); + } + + /* launch master lcore. */ + i = rte_get_master_lcore(); + if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0) + lcore_main(prm + i); + + rte_eal_mp_wait_lcore(); + + for (i = 0; i != becfg.prt_num; i++) { + RTE_LOG(NOTICE, USER1, "%s: stoping port %u\n", + __func__, becfg.prt[i].id); + rte_eth_stats_get(becfg.prt[i].id, &stats); + RTE_LOG(NOTICE, USER1, "port %u stats={\n" + "ipackets=%" PRIu64 ";" + "ibytes=%" PRIu64 ";" + "ierrors=%" PRIu64 ";" + "imissed=%" PRIu64 ";\n" + "opackets=%" PRIu64 ";" + "obytes=%" PRIu64 ";" + "oerrors=%" PRIu64 ";\n" + "}\n", + becfg.prt[i].id, + stats.ipackets, + stats.ibytes, + stats.ierrors, + stats.imissed, + stats.opackets, + stats.obytes, + stats.oerrors); + rte_eth_dev_stop(becfg.prt[i].id); + } + + netbe_lcore_fini(&becfg); + + return 0; +} diff --git a/examples/l4fwd/netbe.h b/examples/l4fwd/netbe.h new file mode 100644 index 0000000..6d25603 --- /dev/null +++ b/examples/l4fwd/netbe.h @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __NETBE_H__ +#define __NETBE_H__ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAX_PKT_BURST 0x20 + +/* Used to allocate the memory for hash key. */ +#define RSS_HASH_KEY_LENGTH 64 + +/* + * global variables + */ + +enum { + VERBOSE_NONE = 0, + VERBOSE_NUM = 9 +}; + +extern int verbose; + +/* + * BE related structures. + */ + +struct netbe_port { + uint32_t id; + uint32_t nb_lcore; + uint32_t *lcore_id; + uint32_t mtu; + uint32_t rx_offload; + uint32_t tx_offload; + uint32_t ipv4; + struct in6_addr ipv6; + struct ether_addr mac; + uint32_t hash_key_size; + uint8_t hash_key[RSS_HASH_KEY_LENGTH]; +}; + +struct netbe_dest { + uint32_t line; + uint32_t port; + uint32_t mtu; + uint32_t prfx; + uint16_t family; + union { + struct in_addr ipv4; + struct in6_addr ipv6; + }; + struct ether_addr mac; +}; + +struct netbe_dest_prm { + uint32_t nb_dest; + struct netbe_dest *dest; +}; + +struct pkt_buf { + uint32_t num; + struct rte_mbuf *pkt[2 * MAX_PKT_BURST]; +}; + +struct netbe_dev { + uint16_t rxqid; + uint16_t txqid; + struct netbe_port port; + struct tle_dev *dev; + struct { + uint64_t in; + uint64_t up; + uint64_t drop; + } rx_stat; + struct { + uint64_t down; + uint64_t out; + uint64_t drop; + } tx_stat; + struct pkt_buf tx_buf; + struct pkt_buf arp_buf; +}; + +/* 8 bit LPM user data. */ +#define LCORE_MAX_DST (UINT8_MAX + 1) + +struct netbe_lcore { + uint32_t id; + uint32_t proto; /**< L4 proto to handle. */ + struct rte_lpm *lpm4; + struct rte_lpm6 *lpm6; + struct rte_ip_frag_tbl *ftbl; + struct tle_ctx *ctx; + uint32_t prtq_num; + uint32_t dst4_num; + uint32_t dst6_num; + struct netbe_dev *prtq; + struct tle_dest dst4[LCORE_MAX_DST]; + struct tle_dest dst6[LCORE_MAX_DST]; + struct rte_ip_frag_death_row death_row; + struct { + uint64_t flags[UINT8_MAX + 1]; + } tcp_stat; +}; + +struct netbe_cfg { + uint32_t promisc; + uint32_t proto; + uint32_t server; + uint32_t arp; + uint32_t prt_num; + uint32_t cpu_num; + struct netbe_port *prt; + struct netbe_lcore *cpu; +}; + +/* + * FE related structures. + */ + +enum { + RXONLY, + TXONLY, + RXTX, + FWD, +}; + +struct netfe_sprm { + uint32_t bidx; /* BE index to use. */ + struct sockaddr_storage local_addr; /**< stream local address. */ + struct sockaddr_storage remote_addr; /**< stream remote address. */ +}; + +struct netfe_stream_prm { + uint32_t lcore; + uint32_t belcore; + uint16_t line; + uint16_t op; + uint16_t txlen; /* valid/used only for TXONLY op. */ + struct netfe_sprm sprm; + struct netfe_sprm fprm; /* valid/used only for FWD op. */ +}; + +struct netfe_lcore_prm { + uint32_t max_streams; + uint32_t nb_streams; + struct netfe_stream_prm *stream; +}; + +struct netfe_stream { + struct tle_stream *s; + struct tle_event *erev; + struct tle_event *rxev; + struct tle_event *txev; + uint16_t op; + uint16_t proto; + uint16_t family; + uint16_t txlen; + struct { + uint64_t rxp; + uint64_t rxb; + uint64_t txp; + uint64_t txb; + uint64_t fwp; + uint64_t drops; + uint64_t rxev[TLE_SEV_NUM]; + uint64_t txev[TLE_SEV_NUM]; + uint64_t erev[TLE_SEV_NUM]; + } stat; + struct pkt_buf pbuf; + struct sockaddr_storage laddr; + struct sockaddr_storage raddr; + struct netfe_sprm fwdprm; + struct netfe_stream *fwds; + LIST_ENTRY(netfe_stream) link; +}; + +struct netfe_stream_list { + uint32_t num; + LIST_HEAD(, netfe_stream) head; +}; + +struct netfe_lcore { + uint32_t snum; /* max number of streams */ + struct tle_evq *syneq; + struct tle_evq *ereq; + struct tle_evq *rxeq; + struct tle_evq *txeq; + struct rte_hash *fw4h; + struct rte_hash *fw6h; + struct { + uint64_t acc; + uint64_t rej; + uint64_t ter; + } tcp_stat; + struct netfe_stream_list free; + struct netfe_stream_list use; +}; + +struct lcore_prm { + struct { + struct netbe_lcore *lc; + } be; + struct netfe_lcore_prm fe; +}; + +/* + * debug/trace macros. + */ + +#define DUMMY_MACRO do {} while (0) + +#ifdef NETFE_DEBUG +#define NETFE_TRACE(fmt, arg...) printf(fmt, ##arg) +#define NETFE_PKT_DUMP(p) rte_pktmbuf_dump(stdout, (p), 64) +#else +#define NETFE_TRACE(fmt, arg...) DUMMY_MACRO +#define NETFE_PKT_DUMP(p) DUMMY_MACRO +#endif + +#ifdef NETBE_DEBUG +#define NETBE_TRACE(fmt, arg...) printf(fmt, ##arg) +#define NETBE_PKT_DUMP(p) rte_pktmbuf_dump(stdout, (p), 64) +#else +#define NETBE_TRACE(fmt, arg...) DUMMY_MACRO +#define NETBE_PKT_DUMP(p) DUMMY_MACRO +#endif + +#define FUNC_STAT(v, c) do { \ + static uint64_t nb_call, nb_data; \ + nb_call++; \ + nb_data += (v); \ + if ((nb_call & ((c) - 1)) == 0) { \ + printf("%s#%d@%u: nb_call=%lu, avg(" #v ")=%#Lf\n", \ + __func__, __LINE__, rte_lcore_id(), nb_call, \ + (long double)nb_data / nb_call); \ + nb_call = 0; \ + nb_data = 0; \ + } \ +} while (0) + +#define FUNC_TM_STAT(v, c) do { \ + static uint64_t nb_call, nb_data; \ + static uint64_t cts, pts, sts; \ + cts = rte_rdtsc(); \ + if (pts != 0) \ + sts += cts - pts; \ + pts = cts; \ + nb_call++; \ + nb_data += (v); \ + if ((nb_call & ((c) - 1)) == 0) { \ + printf("%s#%d@%u: nb_call=%lu, " \ + "avg(" #v ")=%#Lf, " \ + "avg(cycles)=%#Lf, " \ + "avg(cycles/" #v ")=%#Lf\n", \ + __func__, __LINE__, rte_lcore_id(), nb_call, \ + (long double)nb_data / nb_call, \ + (long double)sts / nb_call, \ + (long double)sts / nb_data); \ + nb_call = 0; \ + nb_data = 0; \ + sts = 0; \ + } \ +} while (0) + +int setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc, + uint16_t qid, uint32_t arp); + +/* + * application function pointers + */ + +typedef int (*LCORE_MAIN_FUNCTYPE)(void *arg); + +/* + * tle_l4p lib function pointers + */ + +typedef uint16_t (*TLE_RX_BULK_FUNCTYPE) + (struct tle_dev *dev, struct rte_mbuf *pkt[], + struct rte_mbuf *rp[], int32_t rc[], uint16_t num); + +typedef uint16_t (*TLE_TX_BULK_FUNCTYPE) + (struct tle_dev *dev, struct rte_mbuf *pkt[], uint16_t num); + +typedef uint16_t (*TLE_STREAM_RECV_FUNCTYPE) + (struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num); + +typedef int (*TLE_STREAM_CLOSE_FUNCTYPE)(struct tle_stream *s); + +#endif /* __NETBE_H__ */ diff --git a/examples/l4fwd/parse.c b/examples/l4fwd/parse.c new file mode 100644 index 0000000..6593221 --- /dev/null +++ b/examples/l4fwd/parse.c @@ -0,0 +1,839 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "netbe.h" +#include "parse.h" + +#define DEF_LINE_NUM 0x400 + +static const struct { + const char *name; + uint16_t op; +} name2feop[] = { + { .name = "rx", .op = RXONLY,}, + { .name = "tx", .op = TXONLY,}, + { .name = "echo", .op = RXTX,}, + { .name = "fwd", .op = FWD,}, +}; + +#define OPT_SHORT_ARP 'a' +#define OPT_LONG_ARP "enable-arp" + +#define OPT_SHORT_SBULK 'B' +#define OPT_LONG_SBULK "sburst" + +#define OPT_SHORT_PROMISC 'P' +#define OPT_LONG_PROMISC "promisc" + +#define OPT_SHORT_RBUFS 'R' +#define OPT_LONG_RBUFS "rbufs" + +#define OPT_SHORT_SBUFS 'S' +#define OPT_LONG_SBUFS "sbufs" + +#define OPT_SHORT_BECFG 'b' +#define OPT_LONG_BECFG "becfg" + +#define OPT_SHORT_FECFG 'f' +#define OPT_LONG_FECFG "fecfg" + +#define OPT_SHORT_STREAMS 's' +#define OPT_LONG_STREAMS "streams" + +#define OPT_SHORT_UDP 'U' +#define OPT_LONG_UDP "udp" + +#define OPT_SHORT_TCP 'T' +#define OPT_LONG_TCP "tcp" + +#define OPT_SHORT_LISTEN 'L' +#define OPT_LONG_LISTEN "listen" + +#define OPT_SHORT_VERBOSE 'v' +#define OPT_LONG_VERBOSE "verbose" + +static const struct option long_opt[] = { + {OPT_LONG_ARP, 1, 0, OPT_SHORT_ARP}, + {OPT_LONG_SBULK, 1, 0, OPT_SHORT_SBULK}, + {OPT_LONG_PROMISC, 0, 0, OPT_SHORT_PROMISC}, + {OPT_LONG_RBUFS, 1, 0, OPT_SHORT_RBUFS}, + {OPT_LONG_SBUFS, 1, 0, OPT_SHORT_SBUFS}, + {OPT_LONG_BECFG, 1, 0, OPT_SHORT_BECFG}, + {OPT_LONG_FECFG, 1, 0, OPT_SHORT_FECFG}, + {OPT_LONG_STREAMS, 1, 0, OPT_SHORT_STREAMS}, + {OPT_LONG_UDP, 0, 0, OPT_SHORT_UDP}, + {OPT_LONG_TCP, 0, 0, OPT_SHORT_TCP}, + {OPT_LONG_LISTEN, 0, 0, OPT_SHORT_LISTEN}, + {OPT_LONG_VERBOSE, 1, 0, OPT_SHORT_VERBOSE}, + {NULL, 0, 0, 0} +}; + +static int +parse_uint_val(__rte_unused const char *key, const char *val, void *prm) +{ + union parse_val *rv; + unsigned long v; + char *end; + + rv = prm; + errno = 0; + v = strtoul(val, &end, 0); + if (errno != 0 || end[0] != 0 || v > UINT32_MAX) + return -EINVAL; + + rv->u64 = v; + return 0; +} + +static int +parse_ipv4_val(__rte_unused const char *key, const char *val, void *prm) +{ + union parse_val *rv; + + rv = prm; + if (inet_pton(AF_INET, val, &rv->in.addr4) != 1) + return -EINVAL; + rv->in.family = AF_INET; + return 0; +} + +static int +parse_ipv6_val(__rte_unused const char *key, const char *val, void *prm) +{ + union parse_val *rv; + + rv = prm; + if (inet_pton(AF_INET6, val, &rv->in.addr6) != 1) + return -EINVAL; + rv->in.family = AF_INET6; + return 0; +} + +static int +parse_ip_val(__rte_unused const char *key, const char *val, void *prm) +{ + if (parse_ipv6_val(key, val, prm) != 0 && + parse_ipv4_val(key, val, prm) != 0) + return -EINVAL; + return 0; +} + +#define PARSE_UINT8x16(s, v, l) \ +do { \ + char *end; \ + unsigned long t; \ + errno = 0; \ + t = strtoul((s), &end, 16); \ + if (errno != 0 || end[0] != (l) || t > UINT8_MAX) \ + return -EINVAL; \ + (s) = end + 1; \ + (v) = t; \ +} while (0) + +static int +parse_mac_val(__rte_unused const char *key, const char *val, void *prm) +{ + union parse_val *rv; + const char *s; + + rv = prm; + s = val; + + PARSE_UINT8x16(s, rv->mac.addr_bytes[0], ':'); + PARSE_UINT8x16(s, rv->mac.addr_bytes[1], ':'); + PARSE_UINT8x16(s, rv->mac.addr_bytes[2], ':'); + PARSE_UINT8x16(s, rv->mac.addr_bytes[3], ':'); + PARSE_UINT8x16(s, rv->mac.addr_bytes[4], ':'); + PARSE_UINT8x16(s, rv->mac.addr_bytes[5], 0); + return 0; +} + +static int +parse_feop_val(__rte_unused const char *key, const char *val, void *prm) +{ + uint32_t i; + union parse_val *rv; + + rv = prm; + for (i = 0; i != RTE_DIM(name2feop); i++) { + if (strcmp(val, name2feop[i].name) == 0) { + rv->u64 = name2feop[i].op; + return 0; + } + } + + return -EINVAL; +} + +static int +parse_lcore_list_val(__rte_unused const char *key, const char *val, void *prm) +{ + union parse_val *rv; + unsigned long a, b; + uint32_t i; + char *end; + + rv = prm; + + errno = 0; + a = strtoul(val, &end, 0); + if (errno != 0 || (end[0] != 0 && end[0] != '-') || a > UINT32_MAX) + return -EINVAL; + + if (end[0] == '-') { + val = end + 1; + errno = 0; + b = strtoul(val, &end, 0); + if (errno != 0 || end[0] != 0 || b > UINT32_MAX) + return -EINVAL; + } else + b = a; + + if (a <= b) { + for (i = a; i <= b; i++) + CPU_SET(i, &rv->cpuset); + } else { + RTE_LOG(ERR, USER1, + "%s: lcores not in ascending order\n", __func__); + return -EINVAL; + } + + return 0; +} + +static int +parse_kvargs(const char *arg, const char *keys_man[], uint32_t nb_man, + const char *keys_opt[], uint32_t nb_opt, + const arg_handler_t hndl[], union parse_val val[]) +{ + uint32_t j, k; + struct rte_kvargs *kvl; + + kvl = rte_kvargs_parse(arg, NULL); + if (kvl == NULL) { + RTE_LOG(ERR, USER1, + "%s: invalid parameter: %s\n", + __func__, arg); + return -EINVAL; + } + + for (j = 0; j != nb_man; j++) { + if (rte_kvargs_count(kvl, keys_man[j]) == 0) { + RTE_LOG(ERR, USER1, + "%s: %s missing mandatory key: %s\n", + __func__, arg, keys_man[j]); + rte_kvargs_free(kvl); + return -EINVAL; + } + } + + for (j = 0; j != nb_man; j++) { + if (rte_kvargs_process(kvl, keys_man[j], hndl[j], + val + j) != 0) { + RTE_LOG(ERR, USER1, + "%s: %s invalid value for man key: %s\n", + __func__, arg, keys_man[j]); + rte_kvargs_free(kvl); + return -EINVAL; + } + } + + for (j = 0; j != nb_opt; j++) { + k = j + nb_man; + if (rte_kvargs_process(kvl, keys_opt[j], hndl[k], + val + k) != 0) { + RTE_LOG(ERR, USER1, + "%s: %s invalid value for opt key: %s\n", + __func__, arg, keys_opt[j]); + rte_kvargs_free(kvl); + return -EINVAL; + } + } + + rte_kvargs_free(kvl); + return 0; +} + +int +parse_netbe_arg(struct netbe_port *prt, const char *arg, rte_cpuset_t *pcpu) +{ + int32_t rc; + uint32_t i, j, nc; + + static const char *keys_man[] = { + "port", + "lcore", + }; + + static const char *keys_opt[] = { + "mtu", + "rx_offload", + "tx_offload", + "ipv4", + "ipv6", + }; + + static const arg_handler_t hndl[] = { + parse_uint_val, + parse_lcore_list_val, + parse_uint_val, + parse_uint_val, + parse_uint_val, + parse_ipv4_val, + parse_ipv6_val, + }; + + union parse_val val[RTE_DIM(hndl)]; + + memset(val, 0, sizeof(val)); + val[2].u64 = ETHER_MAX_VLAN_FRAME_LEN - ETHER_CRC_LEN; + + rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man), + keys_opt, RTE_DIM(keys_opt), hndl, val); + if (rc != 0) + return rc; + + prt->id = val[0].u64; + + for (i = 0, nc = 0; i < RTE_MAX_LCORE; i++) + nc += CPU_ISSET(i, &val[1].cpuset); + prt->lcore_id = rte_zmalloc(NULL, nc * sizeof(prt->lcore_id[0]), + RTE_CACHE_LINE_SIZE); + prt->nb_lcore = nc; + + for (i = 0, j = 0; i < RTE_MAX_LCORE; i++) + if (CPU_ISSET(i, &val[1].cpuset)) + prt->lcore_id[j++] = i; + CPU_OR(pcpu, pcpu, &val[1].cpuset); + + prt->mtu = val[2].u64; + prt->rx_offload = val[3].u64; + prt->tx_offload = val[4].u64; + prt->ipv4 = val[5].in.addr4.s_addr; + prt->ipv6 = val[6].in.addr6; + + return 0; +} + +static int +check_netbe_dest(const struct netbe_dest *dst) +{ + if (dst->port >= RTE_MAX_ETHPORTS) { + RTE_LOG(ERR, USER1, "%s(line=%u) invalid port=%u", + __func__, dst->line, dst->port); + return -EINVAL; + } else if ((dst->family == AF_INET && + dst->prfx > sizeof(struct in_addr) * CHAR_BIT) || + (dst->family == AF_INET6 && + dst->prfx > sizeof(struct in6_addr) * CHAR_BIT)) { + RTE_LOG(ERR, USER1, "%s(line=%u) invalid masklen=%u", + __func__, dst->line, dst->prfx); + return -EINVAL; + } else if (dst->mtu > ETHER_MAX_JUMBO_FRAME_LEN - ETHER_CRC_LEN) { + RTE_LOG(ERR, USER1, "%s(line=%u) invalid mtu=%u", + __func__, dst->line, dst->mtu); + return -EINVAL; + } + return 0; +} + +static int +parse_netbe_dest(struct netbe_dest *dst, const char *arg) +{ + int32_t rc; + + static const char *keys_man[] = { + "port", + "addr", + "masklen", + "mac", + }; + + static const char *keys_opt[] = { + "mtu", + }; + + static const arg_handler_t hndl[] = { + parse_uint_val, + parse_ip_val, + parse_uint_val, + parse_mac_val, + parse_uint_val, + }; + + union parse_val val[RTE_DIM(hndl)]; + + /* set default values. */ + memset(val, 0, sizeof(val)); + val[4].u64 = ETHER_MAX_JUMBO_FRAME_LEN - ETHER_CRC_LEN; + + rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man), + keys_opt, RTE_DIM(keys_opt), hndl, val); + if (rc != 0) + return rc; + + dst->port = val[0].u64; + dst->family = val[1].in.family; + if (val[1].in.family == AF_INET) + dst->ipv4 = val[1].in.addr4; + else + dst->ipv6 = val[1].in.addr6; + dst->prfx = val[2].u64; + memcpy(&dst->mac, &val[3].mac, sizeof(dst->mac)); + dst->mtu = val[4].u64; + + return 0; +} + +int +netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm) +{ + uint32_t i, ln, n, num; + int32_t rc; + size_t sz; + char *s; + FILE *f; + struct netbe_dest *dp; + char line[LINE_MAX]; + + f = fopen(fname, "r"); + if (f == NULL) { + RTE_LOG(ERR, USER1, "%s failed to open file \"%s\"\n", + __func__, fname); + return -EINVAL; + } + + n = 0; + num = 0; + dp = NULL; + rc = 0; + for (ln = 0; fgets(line, sizeof(line), f) != NULL; ln++) { + + /* skip spaces at the start. */ + for (s = line; isspace(s[0]); s++) + ; + + /* skip comment line. */ + if (s[0] == '#' || s[0] == 0) + continue; + + /* skip spaces at the end. */ + for (i = strlen(s); i-- != 0 && isspace(s[i]); s[i] = 0) + ; + + if (n == num) { + num += DEF_LINE_NUM; + sz = sizeof(dp[0]) * num; + dp = realloc(dp, sizeof(dp[0]) * num); + if (dp == NULL) { + RTE_LOG(ERR, USER1, + "%s(%s) allocation of %zu bytes " + "failed\n", + __func__, fname, sz); + rc = -ENOMEM; + break; + } + memset(&dp[n], 0, sizeof(dp[0]) * (num - n)); + } + + dp[n].line = ln + 1; + rc = parse_netbe_dest(dp + n, s); + rc = (rc != 0) ? rc : check_netbe_dest(dp + n); + if (rc != 0) { + RTE_LOG(ERR, USER1, "%s(%s) failed to parse line %u\n", + __func__, fname, dp[n].line); + break; + } + n++; + } + + fclose(f); + + if (rc != 0) { + free(dp); + dp = NULL; + n = 0; + } + + prm->dest = dp; + prm->nb_dest = n; + return rc; +} + +static void +pv2saddr(struct sockaddr_storage *ss, const union parse_val *pva, + const union parse_val *pvp) +{ + ss->ss_family = pva->in.family; + if (pva->in.family == AF_INET) { + struct sockaddr_in *si = (struct sockaddr_in *)ss; + si->sin_addr = pva->in.addr4; + si->sin_port = rte_cpu_to_be_16((uint16_t)pvp->u64); + } else { + struct sockaddr_in6 *si = (struct sockaddr_in6 *)ss; + si->sin6_addr = pva->in.addr6; + si->sin6_port = rte_cpu_to_be_16((uint16_t)pvp->u64); + } +} + +static int +parse_netfe_arg(struct netfe_stream_prm *sp, const char *arg) +{ + int32_t rc; + + static const char *keys_man[] = { + "lcore", + "op", + "laddr", + "lport", + "raddr", + "rport", + }; + + static const char *keys_opt[] = { + "txlen", + "fwladdr", + "fwlport", + "fwraddr", + "fwrport", + "belcore", + }; + + static const arg_handler_t hndl[] = { + parse_uint_val, + parse_feop_val, + parse_ip_val, + parse_uint_val, + parse_ip_val, + parse_uint_val, + parse_uint_val, + parse_ip_val, + parse_uint_val, + parse_ip_val, + parse_uint_val, + parse_uint_val, + }; + + union parse_val val[RTE_DIM(hndl)]; + + memset(val, 0, sizeof(val)); + val[11].u64 = LCORE_ID_ANY; + rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man), + keys_opt, RTE_DIM(keys_opt), hndl, val); + if (rc != 0) + return rc; + sp->lcore = val[0].u64; + sp->op = val[1].u64; + pv2saddr(&sp->sprm.local_addr, val + 2, val + 3); + pv2saddr(&sp->sprm.remote_addr, val + 4, val + 5); + sp->txlen = val[6].u64; + pv2saddr(&sp->fprm.local_addr, val + 7, val + 8); + pv2saddr(&sp->fprm.remote_addr, val + 9, val + 10); + sp->belcore = val[11].u64; + + return 0; +} + +static const char * +format_feop(uint16_t op) +{ + uint32_t i; + + for (i = 0; i != RTE_DIM(name2feop); i++) { + if (name2feop[i].op == op) + return name2feop[i].name; + } + + return NULL; +} + +static int +is_addr_wc(const struct sockaddr_storage *sp) +{ + const struct sockaddr_in *i4; + const struct sockaddr_in6 *i6; + + if (sp->ss_family == AF_INET) { + i4 = (const struct sockaddr_in *)sp; + return (i4->sin_addr.s_addr == INADDR_ANY); + } else if (sp->ss_family == AF_INET6) { + i6 = (const struct sockaddr_in6 *)sp; + return (memcmp(&i6->sin6_addr, &in6addr_any, + sizeof(i6->sin6_addr)) == 0); + } + return 0; +} + +static int +check_netfe_arg(const struct netfe_stream_prm *sp) +{ + char buf[INET6_ADDRSTRLEN]; + + if (sp->sprm.local_addr.ss_family != + sp->sprm.remote_addr.ss_family) { + RTE_LOG(ERR, USER1, "invalid arg at line %u: " + "laddr and raddr for different protocols\n", + sp->line); + return -EINVAL; + } + + if (sp->op == TXONLY) { + if (sp->txlen > RTE_MBUF_DEFAULT_DATAROOM || sp->txlen == 0) { + RTE_LOG(ERR, USER1, "invalid arg at line %u: txlen=%u " + "exceeds allowed values: (0, %u]\n", + sp->line, sp->txlen, RTE_MBUF_DEFAULT_DATAROOM); + return -EINVAL; + } else if (is_addr_wc(&sp->sprm.remote_addr)) { + RTE_LOG(ERR, USER1, "invalid arg at line %u: " + "raddr=%s are not allowed for op=%s;\n", + sp->line, + format_addr(&sp->sprm.remote_addr, + buf, sizeof(buf)), + format_feop(sp->op)); + return -EINVAL; + } + } else if (sp->op == FWD) { + if (sp->fprm.local_addr.ss_family != + sp->fprm.remote_addr.ss_family) { + RTE_LOG(ERR, USER1, "invalid arg at line %u: " + "fwladdr and fwraddr for different protocols\n", + sp->line); + return -EINVAL; + } else if (is_addr_wc(&sp->fprm.remote_addr)) { + RTE_LOG(ERR, USER1, "invalid arg at line %u: " + "fwaddr=%s are not allowed for op=%s;\n", + sp->line, + format_addr(&sp->fprm.remote_addr, + buf, sizeof(buf)), + format_feop(sp->op)); + return -EINVAL; + } + } + + return 0; +} + +int +netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp) +{ + uint32_t i, ln, n, num; + int32_t rc; + size_t sz; + char *s; + FILE *f; + struct netfe_stream_prm *sp; + char line[LINE_MAX]; + + f = fopen(fname, "r"); + if (f == NULL) { + RTE_LOG(ERR, USER1, "%s failed to open file \"%s\"\n", + __func__, fname); + return -EINVAL; + } + + n = 0; + num = 0; + sp = NULL; + rc = 0; + for (ln = 0; fgets(line, sizeof(line), f) != NULL; ln++) { + + /* skip spaces at the start. */ + for (s = line; isspace(s[0]); s++) + ; + + /* skip comment line. */ + if (s[0] == '#' || s[0] == 0) + continue; + + /* skip spaces at the end. */ + for (i = strlen(s); i-- != 0 && isspace(s[i]); s[i] = 0) + ; + + if (n == lp->max_streams) { + RTE_LOG(ERR, USER1, + "%s(%s) number of entries exceed max streams " + "value: %u\n", + __func__, fname, n); + rc = -EINVAL; + break; + } + + if (n == num) { + num += DEF_LINE_NUM; + sz = sizeof(sp[0]) * num; + sp = realloc(sp, sizeof(sp[0]) * num); + if (sp == NULL) { + RTE_LOG(ERR, USER1, + "%s(%s) allocation of %zu bytes " + "failed\n", + __func__, fname, sz); + rc = -ENOMEM; + break; + } + memset(&sp[n], 0, sizeof(sp[0]) * (num - n)); + } + + sp[n].line = ln + 1; + rc = parse_netfe_arg(sp + n, s); + rc = (rc != 0) ? rc : check_netfe_arg(sp + n); + if (rc != 0) { + RTE_LOG(ERR, USER1, "%s(%s) failed to parse line %u\n", + __func__, fname, sp[n].line); + break; + } + n++; + } + + fclose(f); + + if (rc != 0) { + free(sp); + sp = NULL; + n = 0; + } + + lp->stream = sp; + lp->nb_streams = n; + return rc; +} + +int +parse_app_options(int argc, char **argv, struct netbe_cfg *cfg, + struct tle_ctx_param *ctx_prm, + char *fecfg_fname, char *becfg_fname) +{ + int32_t opt, opt_idx, rc; + uint64_t v; + uint32_t i, j, n, nc; + rte_cpuset_t cpuset; + uint32_t udp = 0, tcp = 0, listen = 0; + + optind = 0; + optarg = NULL; + while ((opt = getopt_long(argc, argv, "aB:LPR:S:TUb:f:s:v:", long_opt, + &opt_idx)) != EOF) { + if (opt == OPT_SHORT_ARP) { + cfg->arp = 1; + } else if (opt == OPT_SHORT_SBULK) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + ctx_prm->send_bulk_size = v; + } else if (opt == OPT_SHORT_PROMISC) { + cfg->promisc = 1; + } else if (opt == OPT_SHORT_RBUFS) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + ctx_prm->max_stream_rbufs = v; + } else if (opt == OPT_SHORT_SBUFS) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + ctx_prm->max_stream_sbufs = v; + } else if (opt == OPT_SHORT_STREAMS) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + ctx_prm->max_streams = v; + } else if (opt == OPT_SHORT_VERBOSE) { + rc = parse_uint_val(NULL, optarg, &v); + if (rc < 0) + rte_exit(EXIT_FAILURE, "%s: invalid value: %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + verbose = (v > VERBOSE_NUM) ? VERBOSE_NUM : v; + } else if (opt == OPT_SHORT_BECFG) { + snprintf(becfg_fname, PATH_MAX, "%s", + optarg); + } else if (opt == OPT_SHORT_FECFG) { + snprintf(fecfg_fname, PATH_MAX, "%s", + optarg); + } else if (opt == OPT_SHORT_UDP) { + udp = 1; + cfg->proto = TLE_PROTO_UDP; + } else if (opt == OPT_SHORT_TCP) { + tcp = 1; + cfg->proto = TLE_PROTO_TCP; + } else if (opt == OPT_SHORT_LISTEN) { + listen = 1; + cfg->server = 1; + } else { + rte_exit(EXIT_FAILURE, + "%s: unknown option: \'%c\'\n", + __func__, opt); + } + } + + if (!udp && !tcp) + rte_exit(EXIT_FAILURE, "%s: either UDP or TCP option has to be " + "provided\n", __func__); + + if (udp && tcp) + rte_exit(EXIT_FAILURE, "%s: both UDP and TCP options are not " + "allowed\n", __func__); + + if (udp && listen) + rte_exit(EXIT_FAILURE, + "%s: listen mode cannot be opened with UDP\n", + __func__); + + if (udp && cfg->arp) + rte_exit(EXIT_FAILURE, + "%s: arp cannot be enabled with UDP\n", + __func__); + + /* parse port params */ + argc -= optind; + argv += optind; + + /* allocate memory for number of ports defined */ + n = (uint32_t)argc; + cfg->prt = rte_zmalloc(NULL, sizeof(struct netbe_port) * n, + RTE_CACHE_LINE_SIZE); + cfg->prt_num = n; + + rc = 0; + for (i = 0; i != n; i++) { + rc = parse_netbe_arg(cfg->prt + i, argv[i], &cpuset); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: processing of \"%s\" failed with error " + "code: %d\n", __func__, argv[i], rc); + for (j = 0; j != i; j++) + rte_free(cfg->prt[j].lcore_id); + rte_free(cfg->prt); + return rc; + } + } + + /* count the number of CPU defined in ports */ + for (i = 0, nc = 0; i < RTE_MAX_LCORE; i++) + nc += CPU_ISSET(i, &cpuset); + + /* allocate memory for number of CPU defined */ + cfg->cpu = rte_zmalloc(NULL, sizeof(struct netbe_lcore) * nc, + RTE_CACHE_LINE_SIZE); + + return 0; +} diff --git a/examples/l4fwd/parse.h b/examples/l4fwd/parse.h new file mode 100644 index 0000000..4303623 --- /dev/null +++ b/examples/l4fwd/parse.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __PARSE_H__ +#define __PARSE_H__ + +#include + +#define PARSE_LIST_DELIM "-" + +union parse_val { + uint64_t u64; + struct { + uint16_t family; + union { + struct in_addr addr4; + struct in6_addr addr6; + }; + } in; + struct ether_addr mac; + rte_cpuset_t cpuset; +}; + +static const char * +format_addr(const struct sockaddr_storage *sp, char buf[], size_t len) +{ + const struct sockaddr_in *i4; + const struct sockaddr_in6 *i6; + const void *addr; + + if (sp->ss_family == AF_INET) { + i4 = (const struct sockaddr_in *)sp; + addr = &i4->sin_addr; + } else if (sp->ss_family == AF_INET6) { + i6 = (const struct sockaddr_in6 *)sp; + addr = &i6->sin6_addr; + } else + return NULL; + + + return inet_ntop(sp->ss_family, addr, buf, len); +} + +int parse_netbe_arg(struct netbe_port *prt, const char *arg, + rte_cpuset_t *pcpu); + +int netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm); + +int netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp); + +int +parse_app_options(int argc, char **argv, struct netbe_cfg *cfg, + struct tle_ctx_param *ctx_prm, + char *fecfg_fname, char *becfg_fname); + +#endif /* __PARSE_H__ */ + diff --git a/examples/l4fwd/pkt.c b/examples/l4fwd/pkt.c new file mode 100644 index 0000000..660e618 --- /dev/null +++ b/examples/l4fwd/pkt.c @@ -0,0 +1,872 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "netbe.h" + +static inline uint64_t +_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso, + uint64_t ol3, uint64_t ol2) +{ + return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49; +} + +static inline void +fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4) +{ + m->tx_offload = _mbuf_tx_offload(l2, l3, l4, 0, 0, 0); +} + +static inline int +is_ipv4_frag(const struct ipv4_hdr *iph) +{ + const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG); + + return ((mask & iph->fragment_offset) != 0); +} + +static inline uint32_t +get_tcp_header_size(struct rte_mbuf *m, uint32_t l2_len, uint32_t l3_len) +{ + const struct tcp_hdr *tcp; + + tcp = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len); + return (tcp->data_off >> 4) * 4; +} + +static inline void +adjust_ipv4_pktlen(struct rte_mbuf *m, uint32_t l2_len) +{ + uint32_t plen, trim; + const struct ipv4_hdr *iph; + + iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2_len); + plen = rte_be_to_cpu_16(iph->total_length) + l2_len; + if (plen < m->pkt_len) { + trim = m->pkt_len - plen; + rte_pktmbuf_trim(m, trim); + } +} + +static inline void +adjust_ipv6_pktlen(struct rte_mbuf *m, uint32_t l2_len) +{ + uint32_t plen, trim; + const struct ipv6_hdr *iph; + + iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, l2_len); + plen = rte_be_to_cpu_16(iph->payload_len) + sizeof(*iph) + l2_len; + if (plen < m->pkt_len) { + trim = m->pkt_len - plen; + rte_pktmbuf_trim(m, trim); + } +} + +static inline void +tcp_stat_update(struct netbe_lcore *lc, const struct rte_mbuf *m, + uint32_t l2_len, uint32_t l3_len) +{ + const struct tcp_hdr *th; + + th = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len); + lc->tcp_stat.flags[th->tcp_flags]++; +} + +static inline uint32_t +get_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, uint32_t frag) +{ + const struct ipv4_hdr *iph; + int32_t dlen, len; + + dlen = rte_pktmbuf_data_len(m); + dlen -= l2; + + iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2); + len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER; + + if (frag != 0 && is_ipv4_frag(iph)) { + m->packet_type &= ~RTE_PTYPE_L4_MASK; + m->packet_type |= RTE_PTYPE_L4_FRAG; + } + + if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto)) + m->packet_type = RTE_PTYPE_UNKNOWN; + + return len; +} + +static inline void +fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, + uint32_t frag, uint32_t l4_len) +{ + uint32_t len; + + len = get_ipv4_hdr_len(m, l2, proto, frag); + fill_pkt_hdr_len(m, l2, len, l4_len); + adjust_ipv4_pktlen(m, l2); +} + +static inline int +ipv6x_hdr(uint32_t proto) +{ + return (proto == IPPROTO_HOPOPTS || + proto == IPPROTO_ROUTING || + proto == IPPROTO_FRAGMENT || + proto == IPPROTO_AH || + proto == IPPROTO_NONE || + proto == IPPROTO_DSTOPTS); +} + +static inline uint32_t +get_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto, + uint32_t fproto) +{ + const struct ip6_ext *ipx; + int32_t dlen, len, ofs; + + len = sizeof(struct ipv6_hdr); + + dlen = rte_pktmbuf_data_len(m); + dlen -= l2; + + ofs = l2 + len; + ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs); + + while (ofs > 0 && len < dlen) { + + switch (nproto) { + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + ofs = (ipx->ip6e_len + 1) << 3; + break; + case IPPROTO_AH: + ofs = (ipx->ip6e_len + 2) << 2; + break; + case IPPROTO_FRAGMENT: + /* + * tso_segsz is not used by RX, so use it as temporary + * buffer to store the fragment offset. + */ + m->tso_segsz = ofs; + ofs = sizeof(struct ip6_frag); + m->packet_type &= ~RTE_PTYPE_L4_MASK; + m->packet_type |= RTE_PTYPE_L4_FRAG; + break; + default: + ofs = 0; + } + + if (ofs > 0) { + nproto = ipx->ip6e_nxt; + len += ofs; + ipx += ofs / sizeof(*ipx); + } + } + + /* unrecognized or invalid packet. */ + if ((ofs == 0 && nproto != fproto) || len > dlen) + m->packet_type = RTE_PTYPE_UNKNOWN; + + return len; +} + +static inline uint32_t +get_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto) +{ + const struct ipv6_hdr *iph; + + iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + if (iph->proto == fproto) + return sizeof(struct ipv6_hdr); + else if (ipv6x_hdr(iph->proto) != 0) + return get_ipv6x_hdr_len(m, l2, iph->proto, fproto); + + m->packet_type = RTE_PTYPE_UNKNOWN; + return 0; +} + +static inline void +fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto, + uint32_t l4_len) +{ + uint32_t len; + + len = get_ipv6_hdr_len(m, l2, fproto); + fill_pkt_hdr_len(m, l2, len, l4_len); + adjust_ipv6_pktlen(m, l2); +} + +static inline struct rte_mbuf * +handle_arp(struct rte_mbuf *m, struct netbe_lcore *lc, uint8_t port, + uint32_t l2len) +{ + const struct arp_hdr *ahdr; + struct pkt_buf *abuf; + + ahdr = rte_pktmbuf_mtod_offset(m, const struct arp_hdr *, l2len); + + if (ahdr->arp_hrd != rte_be_to_cpu_16(ARP_HRD_ETHER) || + ahdr->arp_pro != rte_be_to_cpu_16(ETHER_TYPE_IPv4) || + ahdr->arp_op != rte_be_to_cpu_16(ARP_OP_REQUEST)) { + + m->packet_type = RTE_PTYPE_UNKNOWN; + return m; + } + + m->l2_len = l2len; + abuf = &lc->prtq[port].arp_buf; + if (abuf->num >= RTE_DIM(abuf->pkt)) + return m; + + abuf->pkt[abuf->num++] = m; + + return NULL; +} + +static inline struct rte_mbuf * +fill_eth_tcp_arp_hdr_len(struct rte_mbuf *m, struct netbe_lcore *lc, + uint8_t port) +{ + uint32_t dlen, l2_len, l3_len, l4_len; + uint16_t etp; + const struct ether_hdr *eth; + + dlen = rte_pktmbuf_data_len(m); + + /* check that first segment is at least 54B long. */ + if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + + sizeof(struct tcp_hdr)) { + m->packet_type = RTE_PTYPE_UNKNOWN; + return m; + } + + l2_len = sizeof(*eth); + + eth = rte_pktmbuf_mtod(m, const struct ether_hdr *); + etp = eth->ether_type; + if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN)) + l2_len += sizeof(struct vlan_hdr); + + if (etp == rte_be_to_cpu_16(ETHER_TYPE_ARP)) + return handle_arp(m, lc, port, l2_len); + + if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) { + m->packet_type = RTE_PTYPE_L4_TCP | + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1); + l4_len = get_tcp_header_size(m, l2_len, l3_len); + fill_pkt_hdr_len(m, l2_len, l3_len, l4_len); + adjust_ipv4_pktlen(m, l2_len); + } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) && + dlen >= l2_len + sizeof(struct ipv6_hdr) + + sizeof(struct tcp_hdr)) { + m->packet_type = RTE_PTYPE_L4_TCP | + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP); + l4_len = get_tcp_header_size(m, l2_len, l3_len); + fill_pkt_hdr_len(m, l2_len, l3_len, l4_len); + adjust_ipv6_pktlen(m, l2_len); + } else + m->packet_type = RTE_PTYPE_UNKNOWN; + + return m; +} + +static inline void +fill_eth_tcp_hdr_len(struct rte_mbuf *m) +{ + uint32_t dlen, l2_len, l3_len, l4_len; + uint16_t etp; + const struct ether_hdr *eth; + + dlen = rte_pktmbuf_data_len(m); + + /* check that first segment is at least 54B long. */ + if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + + sizeof(struct tcp_hdr)) { + m->packet_type = RTE_PTYPE_UNKNOWN; + return; + } + + l2_len = sizeof(*eth); + + eth = rte_pktmbuf_mtod(m, const struct ether_hdr *); + etp = eth->ether_type; + if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN)) + l2_len += sizeof(struct vlan_hdr); + + if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) { + m->packet_type = RTE_PTYPE_L4_TCP | + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1); + l4_len = get_tcp_header_size(m, l2_len, l3_len); + fill_pkt_hdr_len(m, l2_len, l3_len, l4_len); + adjust_ipv4_pktlen(m, l2_len); + } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) && + dlen >= l2_len + sizeof(struct ipv6_hdr) + + sizeof(struct tcp_hdr)) { + m->packet_type = RTE_PTYPE_L4_TCP | + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP); + l4_len = get_tcp_header_size(m, l2_len, l3_len); + fill_pkt_hdr_len(m, l2_len, l3_len, l4_len); + adjust_ipv6_pktlen(m, l2_len); + } else + m->packet_type = RTE_PTYPE_UNKNOWN; +} + +static inline void +fill_eth_udp_hdr_len(struct rte_mbuf *m) +{ + uint32_t dlen, l2_len; + uint16_t etp; + const struct ether_hdr *eth; + + dlen = rte_pktmbuf_data_len(m); + + /* check that first segment is at least 42B long. */ + if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + + sizeof(struct udp_hdr)) { + m->packet_type = RTE_PTYPE_UNKNOWN; + return; + } + + l2_len = sizeof(*eth); + + eth = rte_pktmbuf_mtod(m, const struct ether_hdr *); + etp = eth->ether_type; + if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN)) + l2_len += sizeof(struct vlan_hdr); + + if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) { + m->packet_type = RTE_PTYPE_L4_UDP | + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + fill_ipv4_hdr_len(m, l2_len, IPPROTO_UDP, 1, + sizeof(struct udp_hdr)); + } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) && + dlen >= l2_len + sizeof(struct ipv6_hdr) + + sizeof(struct udp_hdr)) { + m->packet_type = RTE_PTYPE_L4_UDP | + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + fill_ipv6_hdr_len(m, l2_len, IPPROTO_UDP, + sizeof(struct udp_hdr)); + } else + m->packet_type = RTE_PTYPE_UNKNOWN; +} + +static inline uint16_t +ipv4x_cksum(const void *iph, size_t len) +{ + uint16_t cksum; + + cksum = rte_raw_cksum(iph, len); + return (cksum == 0xffff) ? cksum : ~cksum; +} + +static inline void +fix_reassembled(struct rte_mbuf *m, int32_t hwcsum, uint32_t proto) +{ + struct ipv4_hdr *iph; + + /* update packet type. */ + m->packet_type &= ~RTE_PTYPE_L4_MASK; + + if (proto == IPPROTO_TCP) + m->packet_type |= RTE_PTYPE_L4_TCP; + else + m->packet_type |= RTE_PTYPE_L4_UDP; + + /* fix reassemble setting TX flags. */ + m->ol_flags &= ~PKT_TX_IP_CKSUM; + + /* fix l3_len after reassemble. */ + if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) + m->l3_len = m->l3_len - sizeof(struct ipv6_extension_fragment); + + /* recalculate ipv4 cksum after reassemble. */ + else if (hwcsum == 0 && RTE_ETH_IS_IPV4_HDR(m->packet_type)) { + iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); + iph->hdr_checksum = ipv4x_cksum(iph, m->l3_len); + } +} + +static struct rte_mbuf * +reassemble(struct rte_mbuf *m, struct netbe_lcore *lc, uint64_t tms, + uint8_t port, uint32_t proto) +{ + uint32_t l3cs; + struct rte_ip_frag_tbl *tbl; + struct rte_ip_frag_death_row *dr; + + tbl = lc->ftbl; + dr = &lc->death_row; + l3cs = lc->prtq[port].port.rx_offload & DEV_RX_OFFLOAD_IPV4_CKSUM; + + if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { + + struct ipv4_hdr *iph; + + iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); + + /* process this fragment. */ + m = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, iph); + + } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { + + struct ipv6_hdr *iph; + struct ipv6_extension_fragment *fhdr; + + iph = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len); + + /* + * we store fragment header offset in tso_segsz before + * temporary, just to avoid another scan of ipv6 header. + */ + fhdr = rte_pktmbuf_mtod_offset(m, + struct ipv6_extension_fragment *, m->tso_segsz); + m->tso_segsz = 0; + + /* process this fragment. */ + m = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, iph, fhdr); + + } else { + rte_pktmbuf_free(m); + m = NULL; + } + + /* got reassembled packet. */ + if (m != NULL) + fix_reassembled(m, l3cs, proto); + + return m; +} + +/* exclude NULLs from the final list of packets. */ +static inline uint32_t +compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero) +{ + uint32_t i, j, k, l; + + for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) { + + /* found a hole. */ + if (pkt[j] == NULL) { + + /* find how big is it. */ + for (i = j; i-- != 0 && pkt[i] == NULL; ) + ; + /* fill the hole. */ + for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++) + pkt[l] = pkt[k]; + + nb_pkt -= j - i; + nb_zero -= j - i; + j = i + 1; + } + } + + return nb_pkt; +} + +/* + * if it is a fragment, try to reassemble it, + * if by some reason it can't be done, then + * set pkt[] entry to NULL. + */ +#define DO_REASSEMBLE(proto) \ +do { \ + if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == \ + RTE_PTYPE_L4_FRAG) { \ + cts = (cts == 0) ? rte_rdtsc() : cts; \ + pkt[j] = reassemble(pkt[j], lc, cts, port, (proto)); \ + x += (pkt[j] == NULL); \ + } \ +} while (0) + +/* + * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k) + */ +static uint16_t +type0_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, tp; + struct netbe_lcore *lc; + uint32_t l4_len, l3_len, l2_len; + const struct ether_hdr *eth; + + lc = user_param; + l2_len = sizeof(*eth); + + RTE_SET_USED(lc); + + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + + tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); + + switch (tp) { + /* non fragmented tcp packets. */ + case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4 | + RTE_PTYPE_L2_ETHER): + l4_len = get_tcp_header_size(pkt[j], l2_len, + sizeof(struct ipv4_hdr)); + fill_pkt_hdr_len(pkt[j], l2_len, + sizeof(struct ipv4_hdr), l4_len); + adjust_ipv4_pktlen(pkt[j], l2_len); + break; + case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6 | + RTE_PTYPE_L2_ETHER): + l4_len = get_tcp_header_size(pkt[j], l2_len, + sizeof(struct ipv6_hdr)); + fill_pkt_hdr_len(pkt[j], l2_len, + sizeof(struct ipv6_hdr), l4_len); + adjust_ipv6_pktlen(pkt[j], l2_len); + break; + case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT | + RTE_PTYPE_L2_ETHER): + l3_len = get_ipv4_hdr_len(pkt[j], l2_len, + IPPROTO_TCP, 0); + l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len); + fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len); + adjust_ipv4_pktlen(pkt[j], l2_len); + break; + case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT | + RTE_PTYPE_L2_ETHER): + l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP); + l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len); + fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len); + adjust_ipv6_pktlen(pkt[j], l2_len); + break; + default: + /* treat packet types as invalid. */ + pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; + break; + } + } + + return nb_pkts; +} + +/* + * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k) + */ +static uint16_t +type0_udp_rx_callback(uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, tp, x; + uint64_t cts; + struct netbe_lcore *lc; + uint32_t l2_len; + const struct ether_hdr *eth; + + lc = user_param; + cts = 0; + l2_len = sizeof(*eth); + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + + tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); + + switch (tp) { + /* non fragmented udp packets. */ + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4 | + RTE_PTYPE_L2_ETHER): + fill_pkt_hdr_len(pkt[j], l2_len, + sizeof(struct ipv4_hdr), + sizeof(struct udp_hdr)); + adjust_ipv4_pktlen(pkt[j], l2_len); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6 | + RTE_PTYPE_L2_ETHER): + fill_pkt_hdr_len(pkt[j], l2_len, + sizeof(struct ipv6_hdr), + sizeof(struct udp_hdr)); + adjust_ipv6_pktlen(pkt[j], l2_len); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT | + RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], l2_len, + UINT32_MAX, 0, sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT | + RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], l2_len, + IPPROTO_UDP, sizeof(struct udp_hdr)); + break; + /* possibly fragmented udp packets. */ + case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER): + case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], l2_len, + IPPROTO_UDP, 1, sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER): + case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], l2_len, + IPPROTO_UDP, sizeof(struct udp_hdr)); + break; + default: + /* treat packet types as invalid. */ + pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; + break; + } + + DO_REASSEMBLE(IPPROTO_UDP); + } + + /* reassemble was invoked, cleanup its death-row. */ + if (cts != 0) + rte_ip_frag_free_death_row(&lc->death_row, 0); + + if (x == 0) + return nb_pkts; + + NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " + "%u non-reassembled fragments;\n", + __func__, port, queue, nb_pkts, x); + + return compress_pkt_list(pkt, nb_pkts, x); +} + +/* + * HW can recognize L2/L3/L4 and fragments (i40e). + */ +static uint16_t +type1_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, tp; + struct netbe_lcore *lc; + uint32_t l4_len, l3_len, l2_len; + const struct ether_hdr *eth; + + lc = user_param; + l2_len = sizeof(*eth); + + RTE_SET_USED(lc); + + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + + tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); + + switch (tp) { + case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + l3_len = get_ipv4_hdr_len(pkt[j], l2_len, + IPPROTO_TCP, 0); + l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len); + fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len); + adjust_ipv4_pktlen(pkt[j], l2_len); + tcp_stat_update(lc, pkt[j], l2_len, l3_len); + break; + case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP); + l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len); + fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len); + adjust_ipv6_pktlen(pkt[j], l2_len); + tcp_stat_update(lc, pkt[j], l2_len, l3_len); + break; + default: + /* treat packet types as invalid. */ + pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; + break; + } + + } + + return nb_pkts; +} + +/* + * HW can recognize L2/L3/L4 and fragments (i40e). + */ +static uint16_t +type1_udp_rx_callback(uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, tp, x; + uint64_t cts; + struct netbe_lcore *lc; + uint32_t l2_len; + const struct ether_hdr *eth; + + lc = user_param; + cts = 0; + l2_len = sizeof(*eth); + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + + tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); + + switch (tp) { + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], l2_len, + UINT32_MAX, 0, sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], l2_len, + IPPROTO_UDP, sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], l2_len, + IPPROTO_UDP, 0, sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], l2_len, + IPPROTO_UDP, sizeof(struct udp_hdr)); + break; + default: + /* treat packet types as invalid. */ + pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; + break; + } + + DO_REASSEMBLE(IPPROTO_UDP); + } + + /* reassemble was invoked, cleanup its death-row. */ + if (cts != 0) + rte_ip_frag_free_death_row(&lc->death_row, 0); + + if (x == 0) + return nb_pkts; + + NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " + "%u non-reassembled fragments;\n", + __func__, port, queue, nb_pkts, x); + + return compress_pkt_list(pkt, nb_pkts, x); +} + +/* + * generic, assumes HW doesn't recognize any packet type. + */ +static uint16_t +typen_tcp_arp_rx_callback(uint8_t port, uint16_t queue, struct rte_mbuf *pkt[], + uint16_t nb_pkts, uint16_t max_pkts, void *user_param) +{ + uint32_t j, x; + struct netbe_lcore *lc; + + lc = user_param; + + RTE_SET_USED(queue); + RTE_SET_USED(max_pkts); + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + pkt[j] = fill_eth_tcp_arp_hdr_len(pkt[j], lc, port); + x += (pkt[j] == NULL); + } + + if (x == 0) + return nb_pkts; + + return compress_pkt_list(pkt, nb_pkts, x); +} + +static uint16_t +typen_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j; + struct netbe_lcore *lc; + + lc = user_param; + + RTE_SET_USED(lc); + + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + fill_eth_tcp_hdr_len(pkt[j]); + } + + return nb_pkts; +} + +static uint16_t +typen_udp_rx_callback(uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, x; + uint64_t cts; + struct netbe_lcore *lc; + + lc = user_param; + cts = 0; + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + fill_eth_udp_hdr_len(pkt[j]); + + DO_REASSEMBLE(IPPROTO_UDP); + } + + /* reassemble was invoked, cleanup its death-row. */ + if (cts != 0) + rte_ip_frag_free_death_row(&lc->death_row, 0); + + if (x == 0) + return nb_pkts; + + NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " + "%u non-reassembled fragments;\n", + __func__, port, queue, nb_pkts, x); + + return compress_pkt_list(pkt, nb_pkts, x); +} + +#include "pkt_dpdk_legacy.h" diff --git a/examples/l4fwd/pkt_dpdk_legacy.h b/examples/l4fwd/pkt_dpdk_legacy.h new file mode 100644 index 0000000..d840978 --- /dev/null +++ b/examples/l4fwd/pkt_dpdk_legacy.h @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PKT_DPDK_LEGACY_H_ +#define PKT_DPDK_LEGACY_H_ + +#include "dpdk_version.h" + +struct ptype2cb { + uint32_t mask; + const char *name; + rte_rx_callback_fn fn; +}; + +enum { + ETHER_PTYPE = 0x1, + IPV4_PTYPE = 0x2, + IPV4_EXT_PTYPE = 0x4, + IPV6_PTYPE = 0x8, + IPV6_EXT_PTYPE = 0x10, + TCP_PTYPE = 0x20, + UDP_PTYPE = 0x40, +}; + +#ifdef DPDK_VERSION_GE_1604 + +static uint32_t +get_ptypes(const struct netbe_port *uprt) +{ + uint32_t smask; + int32_t i, rc; + const uint32_t pmask = RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK | + RTE_PTYPE_L4_MASK; + + smask = 0; + rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, NULL, 0); + if (rc < 0) { + RTE_LOG(ERR, USER1, + "%s(port=%u) failed to get supported ptypes;\n", + __func__, uprt->id); + return smask; + } + + uint32_t ptype[rc]; + rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, ptype, rc); + + for (i = 0; i != rc; i++) { + switch (ptype[i]) { + case RTE_PTYPE_L2_ETHER: + smask |= ETHER_PTYPE; + break; + case RTE_PTYPE_L3_IPV4: + case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN: + smask |= IPV4_PTYPE; + break; + case RTE_PTYPE_L3_IPV4_EXT: + smask |= IPV4_EXT_PTYPE; + break; + case RTE_PTYPE_L3_IPV6: + case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN: + smask |= IPV6_PTYPE; + break; + case RTE_PTYPE_L3_IPV6_EXT: + smask |= IPV6_EXT_PTYPE; + break; + case RTE_PTYPE_L4_TCP: + smask |= TCP_PTYPE; + break; + case RTE_PTYPE_L4_UDP: + smask |= UDP_PTYPE; + break; + } + } + + return smask; +} + +#else + +static uint32_t +get_ptypes(__rte_unused const struct netbe_port *uprt) +{ + return 0; +} + +#endif /* DPDK_VERSION_GE_1604 */ + +int +setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc, + uint16_t qid, uint32_t arp) +{ + int32_t rc; + uint32_t i, n, smask; + void *cb; + const struct ptype2cb *ptype2cb; + + static const struct ptype2cb tcp_ptype2cb[] = { + { + .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE | + IPV6_PTYPE | IPV6_EXT_PTYPE | TCP_PTYPE, + .name = "HW l2/l3x/l4-tcp ptype", + .fn = type0_tcp_rx_callback, + }, + { + .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE | + TCP_PTYPE, + .name = "HW l2/l3/l4-tcp ptype", + .fn = type1_tcp_rx_callback, + }, + { + .mask = 0, + .name = "tcp no HW ptype", + .fn = typen_tcp_rx_callback, + }, + }; + + static const struct ptype2cb tcp_arp_ptype2cb[] = { + { + .mask = 0, + .name = "tcp with arp no HW ptype", + .fn = typen_tcp_arp_rx_callback, + }, + }; + + static const struct ptype2cb udp_ptype2cb[] = { + { + .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE | + IPV6_PTYPE | IPV6_EXT_PTYPE | UDP_PTYPE, + .name = "HW l2/l3x/l4-udp ptype", + .fn = type0_udp_rx_callback, + }, + { + .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE | + UDP_PTYPE, + .name = "HW l2/l3/l4-udp ptype", + .fn = type1_udp_rx_callback, + }, + { + .mask = 0, + .name = "udp no HW ptype", + .fn = typen_udp_rx_callback, + }, + }; + + smask = get_ptypes(uprt); + + if (lc->proto == TLE_PROTO_TCP) { + if (arp != 0) { + ptype2cb = tcp_arp_ptype2cb; + n = RTE_DIM(tcp_arp_ptype2cb); + } else { + ptype2cb = tcp_ptype2cb; + n = RTE_DIM(tcp_ptype2cb); + } + } else if (lc->proto == TLE_PROTO_UDP) { + ptype2cb = udp_ptype2cb; + n = RTE_DIM(udp_ptype2cb); + } else { + RTE_LOG(ERR, USER1, + "%s(lc=%u) unsupported proto: %u\n", + __func__, lc->id, lc->proto); + return -EINVAL; + } + + for (i = 0; i != n; i++) { + if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) { + cb = rte_eth_add_rx_callback(uprt->id, qid, + ptype2cb[i].fn, lc); + rc = -rte_errno; + RTE_LOG(ERR, USER1, + "%s(port=%u), setup RX callback \"%s\" " + "returns %p;\n", + __func__, uprt->id, ptype2cb[i].name, cb); + return ((cb == NULL) ? rc : 0); + } + } + + /* no proper callback found. */ + RTE_LOG(ERR, USER1, + "%s(port=%u) failed to find an appropriate callback;\n", + __func__, uprt->id); + return -ENOENT; +} + +#endif /* PKT_DPDK_LEGACY_H_ */ diff --git a/examples/l4fwd/port.h b/examples/l4fwd/port.h new file mode 100644 index 0000000..bc13dca --- /dev/null +++ b/examples/l4fwd/port.h @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef PORT_H_ +#define PORT_H_ + +static void +prepare_hash_key(struct netbe_port *uprt, uint8_t key_size, uint16_t family) +{ + uint32_t align_nb_q; + + align_nb_q = rte_align32pow2(uprt->nb_lcore); + memset(uprt->hash_key, 0, RSS_HASH_KEY_LENGTH); + uprt->hash_key_size = key_size; + if (family == AF_INET) + uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV4] = align_nb_q; + else + uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV6] = align_nb_q; +} + +static int +update_rss_conf(struct netbe_port *uprt, + const struct rte_eth_dev_info *dev_info, + struct rte_eth_conf *port_conf, uint32_t proto) +{ + uint8_t hash_key_size; + + if (uprt->nb_lcore > 1) { + if (dev_info->hash_key_size > 0) + hash_key_size = dev_info->hash_key_size; + else { + RTE_LOG(ERR, USER1, + "%s: dev_info did not provide a valid hash " + "key size\n", __func__); + return -EINVAL; + } + + if (uprt->ipv4 != INADDR_ANY && + memcmp(&uprt->ipv6, &in6addr_any, + sizeof(uprt->ipv6)) != 0) { + RTE_LOG(ERR, USER1, + "%s: RSS for both IPv4 and IPv6 not " + "supported!\n", __func__); + return -EINVAL; + } else if (uprt->ipv4 != INADDR_ANY) { + prepare_hash_key(uprt, hash_key_size, AF_INET); + } else if (memcmp(&uprt->ipv6, &in6addr_any, sizeof(uprt->ipv6)) + != 0) { + prepare_hash_key(uprt, hash_key_size, AF_INET6); + } else { + RTE_LOG(ERR, USER1, + "%s: No IPv4 or IPv6 address is found!\n", + __func__); + return -EINVAL; + } + port_conf->rxmode.mq_mode = ETH_MQ_RX_RSS; + if (proto == TLE_PROTO_TCP) + port_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_TCP; + else + port_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_UDP; + port_conf->rx_adv_conf.rss_conf.rss_key_len = hash_key_size; + port_conf->rx_adv_conf.rss_conf.rss_key = uprt->hash_key; + } + + return 0; +} + +static uint32_t +qidx_from_hash_index(uint32_t hash, uint32_t align_nb_q) +{ + uint32_t i, nb_bit, q; + + nb_bit = (sizeof(uint32_t) * CHAR_BIT) - __builtin_clz(align_nb_q - 1); + q = (hash & 1); + for (i = 1; i < nb_bit; i++) { + hash >>= 1; + q <<= 1; + q |= (hash & 1); + } + + return q; +} + +static int +update_rss_reta(struct netbe_port *uprt, + const struct rte_eth_dev_info *dev_info) +{ + struct rte_eth_rss_reta_entry64 reta_conf[RSS_RETA_CONF_ARRAY_SIZE]; + int32_t i, rc, align_nb_q; + int32_t q_index, idx, shift; + + if (uprt->nb_lcore > 1) { + if (dev_info->reta_size == 0) { + RTE_LOG(ERR, USER1, + "%s: Redirection table size 0 is invalid for " + "RSS\n", __func__); + return -EINVAL; + } + RTE_LOG(NOTICE, USER1, + "%s: The reta size of port %d is %u\n", + __func__, uprt->id, dev_info->reta_size); + + if (dev_info->reta_size > ETH_RSS_RETA_SIZE_512) { + RTE_LOG(ERR, USER1, + "%s: More than %u entries of Reta not supported\n", + __func__, ETH_RSS_RETA_SIZE_512); + return -EINVAL; + } + + memset(reta_conf, 0, sizeof(reta_conf)); + align_nb_q = rte_align32pow2(uprt->nb_lcore); + for (i = 0; i < align_nb_q; i++) { + q_index = qidx_from_hash_index(i, align_nb_q) % + uprt->nb_lcore; + + idx = i / RTE_RETA_GROUP_SIZE; + shift = i % RTE_RETA_GROUP_SIZE; + reta_conf[idx].mask |= (1ULL << shift); + reta_conf[idx].reta[shift] = q_index; + RTE_LOG(NOTICE, USER1, + "%s: port=%u RSS reta conf: hash=%u, q=%u\n", + __func__, uprt->id, i, q_index); + } + + rc = rte_eth_dev_rss_reta_update(uprt->id, + reta_conf, dev_info->reta_size); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: Bad redirection table parameter, " + "rc = %d\n", __func__, rc); + return rc; + } + } + + return 0; +} + +/* + * Initilise DPDK port. + * In current version, multi-queue per port is used. + */ +static int +port_init(struct netbe_port *uprt, uint32_t proto) +{ + int32_t rc; + struct rte_eth_conf port_conf; + struct rte_eth_dev_info dev_info; + + rte_eth_dev_info_get(uprt->id, &dev_info); + if ((dev_info.rx_offload_capa & uprt->rx_offload) != uprt->rx_offload) { + RTE_LOG(ERR, USER1, + "port#%u supported/requested RX offloads don't match, " + "supported: %#x, requested: %#x;\n", + uprt->id, dev_info.rx_offload_capa, uprt->rx_offload); + return -EINVAL; + } + if ((dev_info.tx_offload_capa & uprt->tx_offload) != uprt->tx_offload) { + RTE_LOG(ERR, USER1, + "port#%u supported/requested TX offloads don't match, " + "supported: %#x, requested: %#x;\n", + uprt->id, dev_info.tx_offload_capa, uprt->tx_offload); + return -EINVAL; + } + + port_conf = port_conf_default; + if ((uprt->rx_offload & RX_CSUM_OFFLOAD) != 0) { + RTE_LOG(ERR, USER1, "%s(%u): enabling RX csum offload;\n", + __func__, uprt->id); + port_conf.rxmode.hw_ip_checksum = 1; + } + port_conf.rxmode.max_rx_pkt_len = uprt->mtu + ETHER_CRC_LEN; + + rc = update_rss_conf(uprt, &dev_info, &port_conf, proto); + if (rc != 0) + return rc; + + rc = rte_eth_dev_configure(uprt->id, uprt->nb_lcore, uprt->nb_lcore, + &port_conf); + RTE_LOG(NOTICE, USER1, + "%s: rte_eth_dev_configure(prt_id=%u, nb_rxq=%u, nb_txq=%u) " + "returns %d;\n", __func__, uprt->id, uprt->nb_lcore, + uprt->nb_lcore, rc); + if (rc != 0) + return rc; + + return 0; +} + +static int +queue_init(struct netbe_port *uprt, struct rte_mempool *mp) +{ + int32_t socket, rc; + uint16_t q; + struct rte_eth_dev_info dev_info; + + rte_eth_dev_info_get(uprt->id, &dev_info); + + socket = rte_eth_dev_socket_id(uprt->id); + + dev_info.default_rxconf.rx_drop_en = 1; + + dev_info.default_txconf.tx_free_thresh = TX_RING_SIZE / 2; + if (uprt->tx_offload != 0) { + RTE_LOG(ERR, USER1, "%s(%u): enabling full featured TX;\n", + __func__, uprt->id); + dev_info.default_txconf.txq_flags = 0; + } + + for (q = 0; q < uprt->nb_lcore; q++) { + rc = rte_eth_rx_queue_setup(uprt->id, q, RX_RING_SIZE, + socket, &dev_info.default_rxconf, mp); + if (rc < 0) { + RTE_LOG(ERR, USER1, + "%s: rx queue=%u setup failed with error " + "code: %d\n", __func__, q, rc); + return rc; + } + } + + for (q = 0; q < uprt->nb_lcore; q++) { + rc = rte_eth_tx_queue_setup(uprt->id, q, TX_RING_SIZE, + socket, &dev_info.default_txconf); + if (rc < 0) { + RTE_LOG(ERR, USER1, + "%s: tx queue=%u setup failed with error " + "code: %d\n", __func__, q, rc); + return rc; + } + } + return 0; +} + +/* + * Check that lcore is enabled, not master, and not in use already. + */ +static int +check_lcore(uint32_t lc) +{ + if (rte_lcore_is_enabled(lc) == 0) { + RTE_LOG(ERR, USER1, "lcore %u is not enabled\n", lc); + return -EINVAL; + } + if (rte_eal_get_lcore_state(lc) == RUNNING) { + RTE_LOG(ERR, USER1, "lcore %u already running %p\n", + lc, lcore_config[lc].f); + return -EINVAL; + } + return 0; +} + +static void +log_netbe_prt(const struct netbe_port *uprt) +{ + uint32_t i; + char corelist[2 * RTE_MAX_LCORE + 1]; + char hashkey[2 * RSS_HASH_KEY_LENGTH]; + + memset(corelist, 0, sizeof(corelist)); + memset(hashkey, 0, sizeof(hashkey)); + for (i = 0; i < uprt->nb_lcore; i++) + if (i < uprt->nb_lcore - 1) + sprintf(corelist + (2 * i), "%u,", uprt->lcore_id[i]); + else + sprintf(corelist + (2 * i), "%u", uprt->lcore_id[i]); + + for (i = 0; i < uprt->hash_key_size; i++) + sprintf(hashkey + (2 * i), "%02x", uprt->hash_key[i]); + + RTE_LOG(NOTICE, USER1, + "uprt %p = , mtu = %u, " + "rx_offload = %u, tx_offload = %u,\n" + "ipv4 = %#x, " + "ipv6 = %04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx, " + "mac = %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx>;\n" + "hashkey = %s;\n", + uprt, uprt->id, corelist, + uprt->mtu, uprt->rx_offload, uprt->tx_offload, + uprt->ipv4, + uprt->ipv6.s6_addr16[0], uprt->ipv6.s6_addr16[1], + uprt->ipv6.s6_addr16[2], uprt->ipv6.s6_addr16[3], + uprt->ipv6.s6_addr16[4], uprt->ipv6.s6_addr16[5], + uprt->ipv6.s6_addr16[6], uprt->ipv6.s6_addr16[7], + uprt->mac.addr_bytes[0], uprt->mac.addr_bytes[1], + uprt->mac.addr_bytes[2], uprt->mac.addr_bytes[3], + uprt->mac.addr_bytes[4], uprt->mac.addr_bytes[5], + hashkey); +} + +static void +log_netbe_cfg(const struct netbe_cfg *ucfg) +{ + uint32_t i; + + RTE_LOG(NOTICE, USER1, + "ucfg @ %p, prt_num = %u\n", ucfg, ucfg->prt_num); + + for (i = 0; i != ucfg->prt_num; i++) + log_netbe_prt(ucfg->prt + i); +} + +static int +pool_init(uint32_t sid) +{ + int32_t rc; + struct rte_mempool *mp; + char name[RTE_MEMPOOL_NAMESIZE]; + + snprintf(name, sizeof(name), "MP%u", sid); + mp = rte_pktmbuf_pool_create(name, MPOOL_NB_BUF, MPOOL_CACHE_SIZE, 0, + RTE_MBUF_DEFAULT_BUF_SIZE, sid - 1); + if (mp == NULL) { + rc = -rte_errno; + RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n", + __func__, sid - 1, rc); + return rc; + } + + mpool[sid] = mp; + return 0; +} + +static int +frag_pool_init(uint32_t sid) +{ + int32_t rc; + struct rte_mempool *frag_mp; + char frag_name[RTE_MEMPOOL_NAMESIZE]; + + snprintf(frag_name, sizeof(frag_name), "frag_MP%u", sid); + frag_mp = rte_pktmbuf_pool_create(frag_name, MPOOL_NB_BUF, + MPOOL_CACHE_SIZE, 0, FRAG_MBUF_BUF_SIZE, sid - 1); + if (frag_mp == NULL) { + rc = -rte_errno; + RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n", + __func__, sid - 1, rc); + return rc; + } + + frag_mpool[sid] = frag_mp; + return 0; +} + +static struct netbe_lcore * +find_initilized_lcore(struct netbe_cfg *cfg, uint32_t lc_num) +{ + uint32_t i; + + for (i = 0; i < cfg->cpu_num; i++) + if (cfg->cpu[i].id == lc_num) + return &cfg->cpu[i]; + + return NULL; +} + +/* + * Setup all enabled ports. + */ +static int +netbe_port_init(struct netbe_cfg *cfg) +{ + int32_t rc; + uint32_t i, sid, j; + struct netbe_port *prt; + struct netbe_lcore *lc; + + for (i = 0; i != cfg->prt_num; i++) { + prt = cfg->prt + i; + rc = port_init(prt, cfg->proto); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: port=%u init failed with error code: %d\n", + __func__, prt->id, rc); + return rc; + } + rte_eth_macaddr_get(prt->id, &prt->mac); + if (cfg->promisc) + rte_eth_promiscuous_enable(prt->id); + + for (j = 0; j < prt->nb_lcore; j++) { + rc = check_lcore(prt->lcore_id[j]); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: processing failed with err: %d\n", + __func__, rc); + return rc; + } + + sid = rte_lcore_to_socket_id(prt->lcore_id[j]) + 1; + assert(sid < RTE_DIM(mpool)); + + if (mpool[sid] == NULL) { + rc = pool_init(sid); + if (rc != 0) + return rc; + } + + if (frag_mpool[sid] == NULL) { + rc = frag_pool_init(sid); + if (rc != 0) + return rc; + } + + rc = queue_init(prt, mpool[sid]); + if (rc != 0) { + RTE_LOG(ERR, USER1, + "%s: lcore=%u queue init failed with " + "err: %d\n", + __func__, prt->lcore_id[j], rc); + return rc; + } + + /* calculate number of queues and assign queue id + * per lcore. */ + lc = find_initilized_lcore(cfg, prt->lcore_id[j]); + if (lc == NULL) { + lc = &cfg->cpu[cfg->cpu_num]; + lc->id = prt->lcore_id[j]; + lc->proto = becfg.proto; + cfg->cpu_num++; + } + + lc->prtq = rte_realloc(lc->prtq, sizeof(*(lc->prtq)) * + (lc->prtq_num + 1), RTE_CACHE_LINE_SIZE); + if (lc->prtq == NULL) { + RTE_LOG(ERR, USER1, + "%s: failed to reallocate memory\n", + __func__); + return -ENOMEM; + } + lc->prtq[lc->prtq_num].rxqid = j; + lc->prtq[lc->prtq_num].txqid = j; + lc->prtq[lc->prtq_num].port = *prt; + lc->prtq_num++; + } + } + log_netbe_cfg(cfg); + + return 0; +} + +#endif /* PORT_H_ */ diff --git a/examples/l4fwd/tcp.h b/examples/l4fwd/tcp.h new file mode 100644 index 0000000..031ad8d --- /dev/null +++ b/examples/l4fwd/tcp.h @@ -0,0 +1,701 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TCP_H_ +#define TCP_H_ + +#define TCP_MAX_PROCESS 0x20 + +static inline void +netfe_stream_term_tcp(struct netfe_lcore *fe, struct netfe_stream *fes) +{ + fes->s = NULL; + fes->fwds = NULL; + memset(&fes->stat, 0, sizeof(fes->stat)); + netfe_put_stream(fe, &fe->free, fes); +} + +static inline void +netfe_stream_close_tcp(struct netfe_lcore *fe, struct netfe_stream *fes) +{ + tle_tcp_stream_close(fes->s); + netfe_stream_term_tcp(fe, fes); +} + +/* + * helper function: opens IPv4 and IPv6 streams for selected port. + */ +static struct netfe_stream * +netfe_stream_open_tcp(struct netfe_lcore *fe, struct netfe_sprm *sprm, + uint32_t lcore, uint16_t op, uint32_t bidx, uint8_t server_mode) +{ + int32_t rc; + struct netfe_stream *fes; + struct sockaddr_in *l4; + struct sockaddr_in6 *l6; + uint16_t errport; + struct tle_tcp_stream_param tprm; + + fes = netfe_get_stream(&fe->free); + if (fes == NULL) { + rte_errno = ENOBUFS; + return NULL; + } + + if (server_mode != 0) { + tle_event_free(fes->rxev); + fes->rxev = tle_event_alloc(fe->syneq, fes); + } + + if (fes->rxev == NULL) { + netfe_stream_close_tcp(fe, fes); + rte_errno = ENOMEM; + return NULL; + } + + /* activate rx, tx and err events for the stream */ + if (op == TXONLY || op == FWD) { + tle_event_active(fes->txev, TLE_SEV_DOWN); + fes->stat.txev[TLE_SEV_DOWN]++; + } + + if (op != TXONLY || server_mode != 0) { + tle_event_active(fes->rxev, TLE_SEV_DOWN); + fes->stat.rxev[TLE_SEV_DOWN]++; + } + tle_event_active(fes->erev, TLE_SEV_DOWN); + fes->stat.erev[TLE_SEV_DOWN]++; + + memset(&tprm, 0, sizeof(tprm)); + tprm.addr.local = sprm->local_addr; + tprm.addr.remote = sprm->remote_addr; + tprm.cfg.err_ev = fes->erev; + tprm.cfg.recv_ev = fes->rxev; + if (op != FWD) + tprm.cfg.send_ev = fes->txev; + + fes->s = tle_tcp_stream_open(becfg.cpu[bidx].ctx, &tprm); + + if (fes->s == NULL) { + rc = rte_errno; + netfe_stream_close_tcp(fe, fes); + rte_errno = rc; + + if (sprm->local_addr.ss_family == AF_INET) { + l4 = (struct sockaddr_in *) &sprm->local_addr; + errport = ntohs(l4->sin_port); + } else { + l6 = (struct sockaddr_in6 *) &sprm->local_addr; + errport = ntohs(l6->sin6_port); + } + + RTE_LOG(ERR, USER1, "stream open failed for port %u with error " + "code=%u, bidx=%u, lc=%u\n", + errport, rc, bidx, becfg.cpu[bidx].id); + return NULL; + } + + RTE_LOG(NOTICE, USER1, + "%s(%u)={s=%p, op=%hu, proto=%s, rxev=%p, txev=%p}, belc=%u\n", + __func__, lcore, fes->s, op, proto_name[becfg.proto], + fes->rxev, fes->txev, becfg.cpu[bidx].id); + + fes->op = op; + fes->proto = becfg.proto; + fes->family = sprm->local_addr.ss_family; + fes->laddr = sprm->local_addr; + netfe_put_stream(fe, &fe->use, fes); + + return fes; +} + +static int +netfe_lcore_init_tcp(const struct netfe_lcore_prm *prm) +{ + size_t sz; + int32_t rc; + uint32_t i, lcore, snum; + struct netfe_lcore *fe; + struct tle_evq_param eprm; + struct netfe_stream *fes; + struct netfe_sprm *sprm; + + lcore = rte_lcore_id(); + + snum = prm->max_streams; + RTE_LOG(NOTICE, USER1, "%s(lcore=%u, nb_streams=%u, max_streams=%u)\n", + __func__, lcore, prm->nb_streams, snum); + + memset(&eprm, 0, sizeof(eprm)); + eprm.socket_id = rte_lcore_to_socket_id(lcore); + eprm.max_events = snum; + + sz = sizeof(*fe) + snum * sizeof(struct netfe_stream); + fe = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, + rte_lcore_to_socket_id(lcore)); + + if (fe == NULL) { + RTE_LOG(ERR, USER1, "%s:%d failed to allocate %zu bytes\n", + __func__, __LINE__, sz); + return -ENOMEM; + } + + RTE_PER_LCORE(_fe) = fe; + + fe->snum = snum; + /* initialize the stream pool */ + LIST_INIT(&fe->free.head); + LIST_INIT(&fe->use.head); + + /* allocate the event queues */ + fe->syneq = tle_evq_create(&eprm); + fe->ereq = tle_evq_create(&eprm); + fe->rxeq = tle_evq_create(&eprm); + fe->txeq = tle_evq_create(&eprm); + + RTE_LOG(INFO, USER1, "%s(%u) synevq=%p, erevq=%p, rxevq=%p, txevq=%p\n", + __func__, lcore, fe->syneq, fe->ereq, fe->rxeq, fe->txeq); + if (fe->syneq == NULL || fe->ereq == NULL || fe->rxeq == NULL || + fe->txeq == NULL) + return -ENOMEM; + + fes = (struct netfe_stream *)(fe + 1); + for (i = 0; i != snum; i++) { + fes[i].rxev = tle_event_alloc(fe->rxeq, fes + i); + fes[i].txev = tle_event_alloc(fe->txeq, fes + i); + fes[i].erev = tle_event_alloc(fe->ereq, fes + i); + netfe_put_stream(fe, &fe->free, fes + i); + } + + + /* open all requested streams. */ + for (i = 0; i != prm->nb_streams; i++) { + sprm = &prm->stream[i].sprm; + fes = netfe_stream_open_tcp(fe, sprm, lcore, prm->stream[i].op, + sprm->bidx, becfg.server); + if (fes == NULL) { + rc = -rte_errno; + break; + } + + netfe_stream_dump(fes, &sprm->local_addr, &sprm->remote_addr); + + if (prm->stream[i].op == FWD) { + fes->fwdprm = prm->stream[i].fprm; + } else if (prm->stream[i].op == TXONLY) { + fes->txlen = prm->stream[i].txlen; + fes->raddr = prm->stream[i].sprm.remote_addr; + } + + if (becfg.server == 1) { + rc = tle_tcp_stream_listen(fes->s); + RTE_LOG(INFO, USER1, + "%s(%u) tle_tcp_stream_listen(stream=%p) " + "returns %d\n", + __func__, lcore, fes->s, rc); + if (rc != 0) + break; + } else { + rc = tle_tcp_stream_connect(fes->s, + (const struct sockaddr *)&sprm->remote_addr); + RTE_LOG(INFO, USER1, + "%s(%u) tle_tcp_stream_connect(stream=%p) " + "returns %d\n", + __func__, lcore, fes->s, rc); + if (rc != 0) + break; + } + } + + return rc; +} + +static inline struct netfe_stream * +netfe_create_fwd_stream(struct netfe_lcore *fe, struct netfe_stream *fes, + uint32_t lcore, uint32_t bidx) +{ + uint32_t rc; + struct netfe_stream *fws; + + fws = netfe_stream_open_tcp(fe, &fes->fwdprm, lcore, FWD, bidx, 0); + if (fws != NULL) { + rc = tle_tcp_stream_connect(fws->s, + (const struct sockaddr *)&fes->fwdprm.remote_addr); + NETFE_TRACE("%s(lc=%u, fes=%p): tle_tcp_stream_connect() " + "returns %d;\n", + __func__, rte_lcore_id(), fes, rc); + + if (rc != 0) { + netfe_stream_term_tcp(fe, fws); + fws = NULL; + } + } + + if (fws == NULL) + RTE_LOG(ERR, USER1, "%s(lc=%u fes=%p) failed to open " + "forwarding stream;\n", + __func__, rte_lcore_id(), fes); + + return fws; +} + +static inline void +netfe_fwd_tcp(uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, k, n; + struct rte_mbuf **pkt; + struct netfe_stream *fed; + + RTE_SET_USED(lcore); + + n = fes->pbuf.num; + pkt = fes->pbuf.pkt; + + if (n == 0) + return; + + fed = fes->fwds; + + if (fed != NULL) { + + k = tle_tcp_stream_send(fed->s, pkt, n); + + NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) " + "returns %u\n", + __func__, lcore, proto_name[fes->proto], + fed->s, n, k); + + fed->stat.txp += k; + fed->stat.drops += n - k; + fes->stat.fwp += k; + + } else { + NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n", + __func__, lcore, fes->s, n); + for (k = 0; k != n; k++) { + NETFE_TRACE("%s(%u, %p): free(%p);\n", + __func__, lcore, fes->s, pkt[k]); + rte_pktmbuf_free(pkt[k]); + } + fes->stat.drops += n; + } + + /* copy unforwarded mbufs. */ + for (i = 0; i != n - k; i++) + pkt[i] = pkt[i + k]; + + fes->pbuf.num = i; + + if (i != 0) { + tle_event_raise(fes->txev); + fes->stat.txev[TLE_SEV_UP]++; + } + + if (n == RTE_DIM(fes->pbuf.pkt)) { + tle_event_active(fes->rxev, TLE_SEV_UP); + fes->stat.rxev[TLE_SEV_UP]++; + } +} + +static inline void +netfe_new_conn_tcp(struct netfe_lcore *fe, __rte_unused uint32_t lcore, + struct netfe_stream *fes) +{ + uint32_t i, k, n, rc; + struct tle_tcp_stream_cfg *prm; + struct tle_tcp_accept_param acpt_prm[MAX_PKT_BURST]; + struct tle_stream *rs[MAX_PKT_BURST]; + struct tle_syn_req syn_reqs[MAX_PKT_BURST]; + struct netfe_stream *ts; + struct netfe_stream *fs[MAX_PKT_BURST]; + + static const struct tle_stream_cb zcb = {.func = NULL, .data = NULL}; + + /* check if any syn requests are waiting */ + n = tle_tcp_stream_synreqs(fes->s, syn_reqs, RTE_DIM(syn_reqs)); + if (n == 0) + return; + + NETFE_TRACE("%s(%u): tle_tcp_stream_synreqs(%p, %u) returns %u\n", + __func__, lcore, fes->s, MAX_PKT_BURST, n); + + /* get n free streams */ + k = netfe_get_streams(&fe->free, fs, n); + + /* fill accept params to accept k connection requests*/ + for (i = 0; i != k; i++) { + acpt_prm[i].syn = syn_reqs[i]; + prm = &acpt_prm[i].cfg; + prm->nb_retries = 0; + prm->recv_ev = fs[i]->rxev; + prm->send_ev = fs[i]->txev; + prm->err_ev = fs[i]->erev; + tle_event_active(fs[i]->erev, TLE_SEV_DOWN); + prm->err_cb = zcb; + prm->recv_cb = zcb; + prm->send_cb = zcb; + } + + /* accept k new connections */ + rc = tle_tcp_stream_accept(fes->s, acpt_prm, rs, k); + + NETFE_TRACE("%s(%u): tle_tcp_stream_accept(%p, %u) returns %u\n", + __func__, lcore, fes->s, k, rc); + + if (rc != n) { + /* n - rc connections could not be accepted */ + tle_tcp_reject(fes->s, syn_reqs + rc, n - rc); + + /* put back k - rc streams free list */ + netfe_put_streams(fe, &fe->free, fs + rc, k - rc); + } + + /* update the params for accepted streams */ + for (i = 0; i != rc; i++) { + + ts = fs[i]; + + ts->s = rs[i]; + ts->op = fes->op; + ts->proto = fes->proto; + ts->family = fes->family; + ts->txlen = fes->txlen; + + if (fes->op == TXONLY) { + tle_event_active(ts->txev, TLE_SEV_UP); + ts->stat.txev[TLE_SEV_UP]++; + } else { + tle_event_active(ts->rxev, TLE_SEV_DOWN); + ts->stat.rxev[TLE_SEV_DOWN]++; + } + + netfe_put_stream(fe, &fe->use, ts); + NETFE_TRACE("%s(%u) accept (stream=%p, s=%p)\n", + __func__, lcore, ts, rs[i]); + + /* create a new fwd stream if needed */ + if (fes->op == FWD) { + tle_event_active(ts->txev, TLE_SEV_DOWN); + ts->stat.txev[TLE_SEV_DOWN]++; + + ts->fwds = netfe_create_fwd_stream(fe, fes, lcore, + fes->fwdprm.bidx); + if (ts->fwds != NULL) + ts->fwds->fwds = ts; + } + } + fe->tcp_stat.acc += rc; + fe->tcp_stat.rej += n - rc; +} + +static inline void +netfe_lcore_tcp_req(void) +{ + struct netfe_lcore *fe; + uint32_t j, n, lcore; + struct netfe_stream *fs[MAX_PKT_BURST]; + + fe = RTE_PER_LCORE(_fe); + if (fe == NULL) + return; + + /* look for syn events */ + n = tle_evq_get(fe->syneq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); + if (n == 0) + return; + + lcore = rte_lcore_id(); + + NETFE_TRACE("%s(%u): tle_evq_get(synevq=%p) returns %u\n", + __func__, lcore, fe->syneq, n); + + for (j = 0; j != n; j++) + netfe_new_conn_tcp(fe, lcore, fs[j]); +} + +static inline void +netfe_lcore_tcp_rst(void) +{ + struct netfe_lcore *fe; + struct netfe_stream *fwds; + uint32_t j, n; + struct tle_stream *s[MAX_PKT_BURST]; + struct netfe_stream *fs[MAX_PKT_BURST]; + struct tle_event *rv[MAX_PKT_BURST]; + struct tle_event *tv[MAX_PKT_BURST]; + struct tle_event *ev[MAX_PKT_BURST]; + + fe = RTE_PER_LCORE(_fe); + if (fe == NULL) + return; + + /* look for err events */ + n = tle_evq_get(fe->ereq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); + if (n == 0) + return; + + NETFE_TRACE("%s(%u): tle_evq_get(errevq=%p) returns %u\n", + __func__, rte_lcore_id(), fe->ereq, n); + + for (j = 0; j != n; j++) { + if (verbose > VERBOSE_NONE) { + struct tle_tcp_stream_addr addr; + tle_tcp_stream_get_addr(fs[j]->s, &addr); + netfe_stream_dump(fs[j], &addr.local, &addr.remote); + } + s[j] = fs[j]->s; + rv[j] = fs[j]->rxev; + tv[j] = fs[j]->txev; + ev[j] = fs[j]->erev; + } + + tle_evq_idle(fe->rxeq, rv, n); + tle_evq_idle(fe->txeq, tv, n); + tle_evq_idle(fe->ereq, ev, n); + + tle_tcp_stream_close_bulk(s, n); + + for (j = 0; j != n; j++) { + + /* + * if forwarding mode, send unsent packets and + * signal peer stream to terminate too. + */ + fwds = fs[j]->fwds; + if (fwds != NULL && fwds->s != NULL) { + + /* forward all unsent packets */ + netfe_fwd_tcp(rte_lcore_id(), fs[j]); + + fwds->fwds = NULL; + tle_event_raise(fwds->erev); + fs[j]->fwds = NULL; + } + + /* now terminate the stream receiving rst event*/ + netfe_rem_stream(&fe->use, fs[j]); + netfe_stream_term_tcp(fe, fs[j]); + fe->tcp_stat.ter++; + } +} + +static inline void +netfe_rxtx_process_tcp(__rte_unused uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, k, n; + struct rte_mbuf **pkt; + + n = fes->pbuf.num; + pkt = fes->pbuf.pkt; + + /* there is nothing to send. */ + if (n == 0) { + tle_event_idle(fes->txev); + fes->stat.txev[TLE_SEV_IDLE]++; + return; + } + + + k = tle_tcp_stream_send(fes->s, pkt, n); + + NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n", + __func__, lcore, proto_name[fes->proto], + fes->s, n, k); + fes->stat.txp += k; + fes->stat.drops += n - k; + + /* not able to send anything. */ + if (k == 0) + return; + + if (n == RTE_DIM(fes->pbuf.pkt)) { + /* mark stream as readable */ + tle_event_active(fes->rxev, TLE_SEV_UP); + fes->stat.rxev[TLE_SEV_UP]++; + } + + /* adjust pbuf array. */ + fes->pbuf.num = n - k; + for (i = 0; i != n - k; i++) + pkt[i] = pkt[i + k]; +} + +static inline void +netfe_tx_process_tcp(uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, k, n; + + /* refill with new mbufs. */ + pkt_buf_fill(lcore, &fes->pbuf, fes->txlen); + + n = fes->pbuf.num; + if (n == 0) + return; + + /** + * TODO: cannot use function pointers for unequal param num. + */ + k = tle_tcp_stream_send(fes->s, fes->pbuf.pkt, n); + + NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n", + __func__, lcore, proto_name[fes->proto], fes->s, n, k); + fes->stat.txp += k; + fes->stat.drops += n - k; + + if (k == 0) + return; + + /* adjust pbuf array. */ + fes->pbuf.num = n - k; + for (i = k; i != n; i++) + fes->pbuf.pkt[i - k] = fes->pbuf.pkt[i]; +} + +static inline void +netfe_lcore_tcp(void) +{ + struct netfe_lcore *fe; + uint32_t j, n, lcore; + struct netfe_stream *fs[MAX_PKT_BURST]; + + fe = RTE_PER_LCORE(_fe); + if (fe == NULL) + return; + + lcore = rte_lcore_id(); + + /* look for rx events */ + n = tle_evq_get(fe->rxeq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); + + if (n != 0) { + NETFE_TRACE("%s(%u): tle_evq_get(rxevq=%p) returns %u\n", + __func__, lcore, fe->rxeq, n); + for (j = 0; j != n; j++) + netfe_rx_process(lcore, fs[j]); + } + + /* look for tx events */ + n = tle_evq_get(fe->txeq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); + + if (n != 0) { + NETFE_TRACE("%s(%u): tle_evq_get(txevq=%p) returns %u\n", + __func__, lcore, fe->txeq, n); + for (j = 0; j != n; j++) { + if (fs[j]->op == RXTX) + netfe_rxtx_process_tcp(lcore, fs[j]); + else if (fs[j]->op == FWD) + netfe_fwd_tcp(lcore, fs[j]); + else if (fs[j]->op == TXONLY) + netfe_tx_process_tcp(lcore, fs[j]); + } + } +} + +static void +netfe_lcore_fini_tcp(void) +{ + struct netfe_lcore *fe; + uint32_t i, snum; + struct tle_tcp_stream_addr addr; + struct netfe_stream *fes; + uint32_t acc, rej, ter; + + fe = RTE_PER_LCORE(_fe); + if (fe == NULL) + return; + + snum = fe->use.num; + for (i = 0; i != snum; i++) { + fes = netfe_get_stream(&fe->use); + tle_tcp_stream_get_addr(fes->s, &addr); + netfe_stream_dump(fes, &addr.local, &addr.remote); + netfe_stream_close(fe, fes); + } + + acc = fe->tcp_stat.acc; + rej = fe->tcp_stat.rej; + ter = fe->tcp_stat.ter; + RTE_LOG(NOTICE, USER1, + "tcp_stats={con_acc=%u,con_rej=%u,con_ter=%u};\n", + acc, rej, ter); + + tle_evq_destroy(fe->txeq); + tle_evq_destroy(fe->rxeq); + tle_evq_destroy(fe->ereq); + tle_evq_destroy(fe->syneq); + RTE_PER_LCORE(_fe) = NULL; + rte_free(fe); +} + +static inline void +netbe_lcore_tcp(void) +{ + uint32_t i; + struct netbe_lcore *lc; + + lc = RTE_PER_LCORE(_be); + if (lc == NULL) + return; + + for (i = 0; i != lc->prtq_num; i++) { + netbe_rx(lc, i); + tle_tcp_process(lc->ctx, TCP_MAX_PROCESS); + netbe_tx(lc, i); + } +} + +static int +lcore_main_tcp(void *arg) +{ + int32_t rc; + uint32_t lcore; + struct lcore_prm *prm; + + prm = arg; + lcore = rte_lcore_id(); + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n", + __func__, lcore); + + rc = 0; + + /* lcore FE init. */ + if (prm->fe.max_streams != 0) + rc = netfe_lcore_init_tcp(&prm->fe); + + /* lcore FE init. */ + if (rc == 0 && prm->be.lc != NULL) + rc = netbe_lcore_setup(prm->be.lc); + + if (rc != 0) + sig_handle(SIGQUIT); + + while (force_quit == 0) { + netfe_lcore_tcp_req(); + netfe_lcore_tcp_rst(); + netfe_lcore_tcp(); + netbe_lcore_tcp(); + } + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n", + __func__, lcore); + + netfe_lcore_fini_tcp(); + netbe_lcore_clear(); + + return rc; +} + +#endif /* TCP_H_ */ diff --git a/examples/l4fwd/udp.h b/examples/l4fwd/udp.h new file mode 100644 index 0000000..cdec6a5 --- /dev/null +++ b/examples/l4fwd/udp.h @@ -0,0 +1,588 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef UDP_H_ +#define UDP_H_ + +/* + * helper function: opens IPv4 and IPv6 streams for selected port. + */ +static struct netfe_stream * +netfe_stream_open_udp(struct netfe_lcore *fe, struct netfe_sprm *sprm, + uint32_t lcore, uint16_t op, uint32_t bidx) +{ + int32_t rc; + struct netfe_stream *fes; + struct sockaddr_in *l4; + struct sockaddr_in6 *l6; + uint16_t errport; + struct tle_udp_stream_param uprm; + + fes = netfe_get_stream(&fe->free); + if (fes == NULL) { + rte_errno = ENOBUFS; + return NULL; + } + + fes->rxev = tle_event_alloc(fe->rxeq, fes); + fes->txev = tle_event_alloc(fe->txeq, fes); + + if (fes->rxev == NULL || fes->txev == NULL) { + netfe_stream_close(fe, fes); + rte_errno = ENOMEM; + return NULL; + } + + if (op == TXONLY || op == FWD) { + tle_event_active(fes->txev, TLE_SEV_DOWN); + fes->stat.txev[TLE_SEV_DOWN]++; + } + + if (op != TXONLY) { + tle_event_active(fes->rxev, TLE_SEV_DOWN); + fes->stat.rxev[TLE_SEV_DOWN]++; + } + + memset(&uprm, 0, sizeof(uprm)); + uprm.local_addr = sprm->local_addr; + uprm.remote_addr = sprm->remote_addr; + uprm.recv_ev = fes->rxev; + if (op != FWD) + uprm.send_ev = fes->txev; + fes->s = tle_udp_stream_open(becfg.cpu[bidx].ctx, &uprm); + + if (fes->s == NULL) { + rc = rte_errno; + netfe_stream_close(fe, fes); + rte_errno = rc; + + if (sprm->local_addr.ss_family == AF_INET) { + l4 = (struct sockaddr_in *) &sprm->local_addr; + errport = ntohs(l4->sin_port); + } else { + l6 = (struct sockaddr_in6 *) &sprm->local_addr; + errport = ntohs(l6->sin6_port); + } + + RTE_LOG(ERR, USER1, "stream open failed for port %u with error " + "code=%u, bidx=%u, lc=%u\n", + errport, rc, bidx, becfg.cpu[bidx].id); + return NULL; + } + + RTE_LOG(NOTICE, USER1, + "%s(%u)={s=%p, op=%hu, proto=%s, rxev=%p, txev=%p}, belc=%u\n", + __func__, lcore, fes->s, op, proto_name[becfg.proto], + fes->rxev, fes->txev, becfg.cpu[bidx].id); + + fes->op = op; + fes->proto = becfg.proto; + fes->family = sprm->local_addr.ss_family; + + return fes; +} + +static int +netfe_lcore_init_udp(const struct netfe_lcore_prm *prm) +{ + size_t sz; + int32_t rc; + uint32_t i, lcore, snum; + struct netfe_lcore *fe; + struct tle_evq_param eprm; + struct netfe_stream *fes; + struct netfe_sprm *sprm; + + lcore = rte_lcore_id(); + + snum = prm->max_streams; + RTE_LOG(NOTICE, USER1, "%s(lcore=%u, nb_streams=%u, max_streams=%u)\n", + __func__, lcore, prm->nb_streams, snum); + + memset(&eprm, 0, sizeof(eprm)); + eprm.socket_id = rte_lcore_to_socket_id(lcore); + eprm.max_events = snum; + + sz = sizeof(*fe) + snum * sizeof(struct netfe_stream); + fe = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, + rte_lcore_to_socket_id(lcore)); + + if (fe == NULL) { + RTE_LOG(ERR, USER1, "%s:%d failed to allocate %zu bytes\n", + __func__, __LINE__, sz); + return -ENOMEM; + } + + RTE_PER_LCORE(_fe) = fe; + + fe->snum = snum; + /* initialize the stream pool */ + LIST_INIT(&fe->free.head); + LIST_INIT(&fe->use.head); + fes = (struct netfe_stream *)(fe + 1); + for (i = 0; i != snum; i++, fes++) + netfe_put_stream(fe, &fe->free, fes); + + /* allocate the event queues */ + fe->rxeq = tle_evq_create(&eprm); + fe->txeq = tle_evq_create(&eprm); + + RTE_LOG(INFO, USER1, "%s(%u) rx evq=%p, tx evq=%p\n", + __func__, lcore, fe->rxeq, fe->txeq); + if (fe->rxeq == NULL || fe->txeq == NULL) + return -ENOMEM; + + rc = fwd_tbl_init(fe, AF_INET, lcore); + RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n", + __func__, lcore, AF_INET, rc); + if (rc != 0) + return rc; + + rc = fwd_tbl_init(fe, AF_INET6, lcore); + RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n", + __func__, lcore, AF_INET6, rc); + if (rc != 0) + return rc; + + /* open all requested streams. */ + for (i = 0; i != prm->nb_streams; i++) { + sprm = &prm->stream[i].sprm; + fes = netfe_stream_open_udp(fe, sprm, lcore, prm->stream[i].op, + sprm->bidx); + if (fes == NULL) { + rc = -rte_errno; + break; + } + + netfe_stream_dump(fes, &sprm->local_addr, &sprm->remote_addr); + + if (prm->stream[i].op == FWD) { + fes->fwdprm = prm->stream[i].fprm; + rc = fwd_tbl_add(fe, + prm->stream[i].fprm.remote_addr.ss_family, + (const struct sockaddr *) + &prm->stream[i].fprm.remote_addr, + fes); + if (rc != 0) { + netfe_stream_close(fe, fes); + break; + } + } else if (prm->stream[i].op == TXONLY) { + fes->txlen = prm->stream[i].txlen; + fes->raddr = prm->stream[i].sprm.remote_addr; + } + } + + return rc; +} + +static struct netfe_stream * +find_fwd_dst_udp(uint32_t lcore, struct netfe_stream *fes, + const struct sockaddr *sa) +{ + uint32_t rc; + struct netfe_stream *fed; + struct netfe_lcore *fe; + struct tle_udp_stream_param uprm; + + fe = RTE_PER_LCORE(_fe); + + fed = fwd_tbl_lkp(fe, fes->family, sa); + if (fed != NULL) + return fed; + + /* create a new stream and put it into the fwd table. */ + memset(&uprm, 0, sizeof(uprm)); + uprm.local_addr = fes->fwdprm.local_addr; + uprm.remote_addr = fes->fwdprm.remote_addr; + + /* open forward stream with wildcard remote addr. */ + memset(&uprm.remote_addr.ss_family + 1, 0, + sizeof(uprm.remote_addr) - sizeof(uprm.remote_addr.ss_family)); + + fed = netfe_stream_open_udp(fe, &fes->fwdprm, lcore, FWD, + fes->fwdprm.bidx); + if (fed == NULL) + return NULL; + + rc = fwd_tbl_add(fe, fes->family, sa, fed); + if (rc != 0) { + netfe_stream_close(fe, fed); + fed = NULL; + } + + fed->fwdprm.remote_addr = *(const struct sockaddr_storage *)sa; + return fed; +} + +static inline int +netfe_addr_eq(struct sockaddr_storage *l, struct sockaddr_storage *r, + uint16_t family) +{ + struct sockaddr_in *l4, *r4; + struct sockaddr_in6 *l6, *r6; + + if (family == AF_INET) { + l4 = (struct sockaddr_in *)l; + r4 = (struct sockaddr_in *)r; + return (l4->sin_port == r4->sin_port && + l4->sin_addr.s_addr == r4->sin_addr.s_addr); + } else { + l6 = (struct sockaddr_in6 *)l; + r6 = (struct sockaddr_in6 *)r; + return (l6->sin6_port == r6->sin6_port && + memcmp(&l6->sin6_addr, &r6->sin6_addr, + sizeof(l6->sin6_addr))); + } +} + +static inline void +netfe_pkt_addr(const struct rte_mbuf *m, struct sockaddr_storage *ps, + uint16_t family) +{ + const struct ipv4_hdr *ip4h; + const struct ipv6_hdr *ip6h; + const struct udp_hdr *udph; + struct sockaddr_in *in4; + struct sockaddr_in6 *in6; + + NETFE_PKT_DUMP(m); + + udph = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, -m->l4_len); + + if (family == AF_INET) { + in4 = (struct sockaddr_in *)ps; + ip4h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, + -(m->l4_len + m->l3_len)); + in4->sin_port = udph->src_port; + in4->sin_addr.s_addr = ip4h->src_addr; + } else { + in6 = (struct sockaddr_in6 *)ps; + ip6h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, + -(m->l4_len + m->l3_len)); + in6->sin6_port = udph->src_port; + rte_memcpy(&in6->sin6_addr, ip6h->src_addr, + sizeof(in6->sin6_addr)); + } +} + +static inline uint32_t +pkt_eq_addr(struct rte_mbuf *pkt[], uint32_t num, uint16_t family, + struct sockaddr_storage *cur, struct sockaddr_storage *nxt) +{ + uint32_t i; + + for (i = 0; i != num; i++) { + netfe_pkt_addr(pkt[i], nxt, family); + if (netfe_addr_eq(cur, nxt, family) == 0) + break; + } + + return i; +} + +static inline void +netfe_fwd_udp(uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, j, k, n, x; + uint16_t family; + void *pi0, *pi1, *pt; + struct rte_mbuf **pkt; + struct netfe_stream *fed; + struct sockaddr_storage in[2]; + + family = fes->family; + n = fes->pbuf.num; + pkt = fes->pbuf.pkt; + + if (n == 0) + return; + + in[0].ss_family = family; + in[1].ss_family = family; + pi0 = &in[0]; + pi1 = &in[1]; + + netfe_pkt_addr(pkt[0], pi0, family); + + x = 0; + for (i = 0; i != n; i = j) { + + j = i + pkt_eq_addr(&pkt[i + 1], + n - i - 1, family, pi0, pi1) + 1; + + fed = find_fwd_dst_udp(lcore, fes, + (const struct sockaddr *)pi0); + if (fed != NULL) { + + /** + * TODO: cannot use function pointers for unequal + * number of params. + */ + k = tle_udp_stream_send(fed->s, pkt + i, j - i, + (const struct sockaddr *) + &fes->fwdprm.remote_addr); + + NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) " + "returns %u\n", + __func__, lcore, proto_name[fes->proto], + fed->s, j - i, k); + + fed->stat.txp += k; + fed->stat.drops += j - i - k; + fes->stat.fwp += k; + + } else { + NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n", + __func__, lcore, fes->s, j - i); + for (k = i; k != j; k++) { + NETFE_TRACE("%s(%u, %p): free(%p);\n", + __func__, lcore, fes->s, pkt[k]); + rte_pktmbuf_free(pkt[j]); + } + fes->stat.drops += j - i; + } + + /* copy unforwarded mbufs. */ + for (i += k; i != j; i++, x++) + pkt[x] = pkt[i]; + + /* swap the pointers */ + pt = pi0; + pi0 = pi1; + pi1 = pt; + } + + fes->pbuf.num = x; + + if (x != 0) { + tle_event_raise(fes->txev); + fes->stat.txev[TLE_SEV_UP]++; + } + + if (n == RTE_DIM(fes->pbuf.pkt)) { + tle_event_active(fes->rxev, TLE_SEV_UP); + fes->stat.rxev[TLE_SEV_UP]++; + } +} + +static inline void +netfe_rxtx_process_udp(__rte_unused uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, j, k, n; + uint16_t family; + void *pi0, *pi1, *pt; + struct rte_mbuf **pkt; + struct sockaddr_storage in[2]; + + family = fes->family; + n = fes->pbuf.num; + pkt = fes->pbuf.pkt; + + /* there is nothing to send. */ + if (n == 0) { + tle_event_idle(fes->txev); + fes->stat.txev[TLE_SEV_IDLE]++; + return; + } + + in[0].ss_family = family; + in[1].ss_family = family; + pi0 = &in[0]; + pi1 = &in[1]; + + netfe_pkt_addr(pkt[0], pi0, family); + + for (i = 0; i != n; i = j) { + + j = i + pkt_eq_addr(&pkt[i + 1], + n - i - 1, family, pi0, pi1) + 1; + + /** + * TODO: cannot use function pointers for unequal param num. + */ + k = tle_udp_stream_send(fes->s, pkt + i, j - i, + (const struct sockaddr *)pi0); + + NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n", + __func__, lcore, proto_name[fes->proto], + fes->s, j - i, k); + fes->stat.txp += k; + fes->stat.drops += j - i - k; + + i += k; + + /* stream send buffer is full */ + if (i != j) + break; + + /* swap the pointers */ + pt = pi0; + pi0 = pi1; + pi1 = pt; + } + + /* not able to send anything. */ + if (i == 0) + return; + + if (n == RTE_DIM(fes->pbuf.pkt)) { + /* mark stream as readable */ + tle_event_active(fes->rxev, TLE_SEV_UP); + fes->stat.rxev[TLE_SEV_UP]++; + } + + /* adjust pbuf array. */ + fes->pbuf.num = n - i; + for (j = i; j != n; j++) + pkt[j - i] = pkt[j]; +} + +static inline void +netfe_tx_process_udp(uint32_t lcore, struct netfe_stream *fes) +{ + uint32_t i, k, n; + + /* refill with new mbufs. */ + pkt_buf_fill(lcore, &fes->pbuf, fes->txlen); + + n = fes->pbuf.num; + if (n == 0) + return; + + /** + * TODO: cannot use function pointers for unequal param num. + */ + k = tle_udp_stream_send(fes->s, fes->pbuf.pkt, n, NULL); + NETFE_TRACE("%s(%u): tle_%s_stream_send(%p, %u) returns %u\n", + __func__, lcore, proto_name[fes->proto], fes->s, n, k); + fes->stat.txp += k; + fes->stat.drops += n - k; + + if (k == 0) + return; + + /* adjust pbuf array. */ + fes->pbuf.num = n - k; + for (i = k; i != n; i++) + fes->pbuf.pkt[i - k] = fes->pbuf.pkt[i]; +} + +static inline void +netfe_lcore_udp(void) +{ + struct netfe_lcore *fe; + uint32_t j, n, lcore; + struct netfe_stream *fs[MAX_PKT_BURST]; + + fe = RTE_PER_LCORE(_fe); + if (fe == NULL) + return; + + lcore = rte_lcore_id(); + + /* look for rx events */ + n = tle_evq_get(fe->rxeq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); + + if (n != 0) { + NETFE_TRACE("%s(%u): tle_evq_get(rxevq=%p) returns %u\n", + __func__, lcore, fe->rxeq, n); + for (j = 0; j != n; j++) + netfe_rx_process(lcore, fs[j]); + } + + /* look for tx events */ + n = tle_evq_get(fe->txeq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); + + if (n != 0) { + NETFE_TRACE("%s(%u): tle_evq_get(txevq=%p) returns %u\n", + __func__, lcore, fe->txeq, n); + for (j = 0; j != n; j++) { + if (fs[j]->op == RXTX) + netfe_rxtx_process_udp(lcore, fs[j]); + else if (fs[j]->op == FWD) + netfe_fwd_udp(lcore, fs[j]); + else if (fs[j]->op == TXONLY) + netfe_tx_process_udp(lcore, fs[j]); + } + } +} + +static void +netfe_lcore_fini_udp(void) +{ + struct netfe_lcore *fe; + uint32_t i; + struct tle_udp_stream_param uprm; + struct netfe_stream *fes; + + fe = RTE_PER_LCORE(_fe); + if (fe == NULL) + return; + + for (i = 0; i != fe->use.num; i++) { + fes = netfe_get_stream(&fe->use); + tle_udp_stream_get_param(fes->s, &uprm); + netfe_stream_dump(fes, &uprm.local_addr, &uprm.remote_addr); + netfe_stream_close(fe, fes); + } + + tle_evq_destroy(fe->txeq); + tle_evq_destroy(fe->rxeq); + RTE_PER_LCORE(_fe) = NULL; + rte_free(fe); +} + +static int +lcore_main_udp(void *arg) +{ + int32_t rc; + uint32_t lcore; + struct lcore_prm *prm; + + prm = arg; + lcore = rte_lcore_id(); + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n", + __func__, lcore); + + rc = 0; + + /* lcore FE init. */ + if (prm->fe.max_streams != 0) + rc = netfe_lcore_init_udp(&prm->fe); + + /* lcore FE init. */ + if (rc == 0 && prm->be.lc != NULL) + rc = netbe_lcore_setup(prm->be.lc); + + if (rc != 0) + sig_handle(SIGQUIT); + + while (force_quit == 0) { + netfe_lcore_udp(); + netbe_lcore(); + } + + RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n", + __func__, lcore); + + netfe_lcore_fini_udp(); + netbe_lcore_clear(); + + return rc; +} + +#endif /* UDP_H_ */ diff --git a/examples/udpfwd/Makefile b/examples/udpfwd/Makefile deleted file mode 100644 index fae1c34..0000000 --- a/examples/udpfwd/Makefile +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2016 Intel Corporation. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -ifeq ($(RTE_SDK),) -$(error "Please define RTE_SDK environment variable") -endif - -ifeq ($(RTE_TARGET),) -$(error "Please define RTE_TARGET environment variable") -endif - -ifeq ($(TLDK_ROOT),) -$(error "Please define TLDK_ROOT environment variable") -endif - -include $(RTE_SDK)/mk/rte.vars.mk - -# binary name -APP = udpfwd - -# all source are stored in SRCS-y -SRCS-y += parse.c -SRCS-y += pkt.c -SRCS-y += main.c - -CFLAGS += $(WERROR_FLAGS) -CFLAGS += -I$(RTE_OUTPUT)/include - -LDLIBS += -L$(RTE_OUTPUT)/lib -LDLIBS += -ltle_udp - -EXTRA_CFLAGS += -O3 -CFLAGS_parse.o += -D_GNU_SOURCE -CFLAGS_main.o += -D_GNU_SOURCE - -include $(TLDK_ROOT)/mk/tle.app.mk diff --git a/examples/udpfwd/README b/examples/udpfwd/README deleted file mode 100644 index 91b6e76..0000000 --- a/examples/udpfwd/README +++ /dev/null @@ -1,141 +0,0 @@ -Introduction -============ - -udpfwd is a sample application to demonstrate and test libtle_udp. -Depending on configuration it can do simple send/recv or both over -opened udp streams. Also it implements ability to do UDP datagram -forwarding between different streams, so it is possible to use that -application as some sort of 'UDP proxy'. -The application can reassemble input fragmented IP packets, -and fragment outgoing IP packets (if destination MTU is less then packet size). -To build and run the application DPDK and TLDK libraries are required. - -Logically the application is divided into two parts: - -- Back End (BE) -BE is responsible for: - - RX over DPDK ports and feed them into UDP TLDK context(s) - (via tle_udp_rx_bulk). - - retrieve packets ready to be send out from UDP TLDK context(s) - and TX them over destined DPDK port. -Multiple RX/TX queues per port are supported by RSS. Right now the number of -TX is same as the number of RX queue. -Each BE lcore can serve multiple DPDK ports, TLDK UDP contexts. - -- Front End (FE) -FE responsibility is to open configured UDP streams and perform -send/recv over them. These streams can belong to different UDP contexts. - -Right now each lcore can act as BE and/or FE. - -Usage -===== - -udpfwd -- \ - -P | --promisc /* promiscuous mode enabled. */ \ - -R | --rbufs /* max recv buffers per stream. */ \ - -S | --sbufs /* max send buffers per stream. */ \ - -s | --streams /* streams to open per context. */ \ - -b | --becfg /* backend configuration file. */ \ - -f | --fecfg /* frontend configuration file. */ \ - ... - -port_params: port=,lcore=[-],\ -[rx_offload=,tx_offload=,mtu=,ipv4=,ipv6=] - -port_params are used to configure the particular DPDK device (rte_ethdev port), -and specify BE lcore that will do RX/TX from/to the device and manage -BE part of corresponding UDP context. Multiple BE lcore can be specified. - -port - DPDK port id (multiple queues are supported when multiple lcore - is specified for a port). -lcore - EAL lcore id to do IO over that port (rx_burst/tx_burst). - several ports can be managed by the same lcore, and same port can - belong to more than one lcore. -rx_offload - RX HW offload capabilities to enable/use on this port. - (bitmask of DEV_RX_OFFLOAD_* values). -tx_offload - TX HW offload capabilities to enable/use on this port. - (bitmask of DEV_TX_OFFLOAD_* values). -mtu - MTU to be used on that port - ( = UDP data size + L2/L3/L4 headers sizes, default=1514). -ipv4 - ipv4 address to assign to that port. -ipv6 - ipv6 address to assign to that port. - -At least one of ipv4/ipv6 values have to be specified for each port. - -As an example: -udpfwd --lcores='3,6,8' -w 01:00.0 -- \ ---promisc --rbufs 0x1000 --sbufs 0x1000 --streams 0x100 \ ---fecfg ./fe.cfg --becfg ./be.cfg \ -port=0,lcore=6,lcore=8,rx_offload=0xf,tx_offload=0,\ -ipv4=192.168.1.233,ipv6=2001:4860:b002::28 - -Will create TLDK UDP context on lcore=6 and lcore=8 (BE lcore) to manage -DPDK port 0. Will assign IPv4 address 192.168.1.233 and IPv6 address -2001:4860:b002::28 to that port. -The following supported by DPDK RX HW offloads: - DEV_RX_OFFLOAD_VLAN_STRIP, - DEV_RX_OFFLOAD_IPV4_CKSUM, - DEV_RX_OFFLOAD_UDP_CKSUM, - DEV_RX_OFFLOAD_TCP_CKSUM -will be enabled on that port. -No HW TX offloads will be enabled. - -If multiple lcore is specified per DPDK port, the following RSS hash will -be enabled on that port: - ETH_RSS_UDP - - -Fornt-End (FE) and Back-End (BE) configuration files format: ------------------------------------------------------------- - - each record on a separate line. - - lines started with '#' are treated as comments. - - empty lines (containing whitespace chars only) are ignored. - - kvargs style format for each record. - - each FE record correspond to at least one stream to be opened - (could be multiple streams in case of op="fwd"). - - each BE record define a ipv4/ipv6 destination. - -FE config record format: ------------------------- - -lcore=,op=<"rx|tx|echo|fwd">,\ -laddr=,lport=,raddr=,rport=,\ -[txlen=,fwladdr=,fwlport=,fwraddr=,fwrport=,\ -belcore=] - -lcore - EAL lcore to manage that stream(s) in the FE. -op - operation to perform on that stream: - "rx" - do receive only on that stream. - "tx" - do send only on that stream. - "echo" - mimic recvfrom(..., &addr);sendto(..., &addr); - on that stream. - "fwd" - forward packets between streams. -laddr - local address for the stream to open. -lport - local port for the stream to open. -raddr - remote address for the stream to open. -rport - remote port for the stream to open. -txlen - data length to send with each packet ("tx" mode only). -fwladdr - local address for the forwarding stream(s) to open - ("fwd mode only). -fwlport - local port for the forwarding stream(s) to open - ("fwd mode only). -fwraddr - remote address for the forwarding stream(s) to open - ("fwd mode only). -fwrport - remote port for the forwarding stream(s) to open - ("fwd mode only). -belcore - EAL lcore to manage that stream(s) in the BE. - -Refer to fe.cfg for an example. - -BE config record format: ------------------------- - -port=,addr=,masklen=,mac= - -port - port number to be used to send packets to the destination. -addr - destionation network address. -masklen - desitantion network prefix length. -mac - destination ethernet address. - -Refer to fe.cfg for an example. diff --git a/examples/udpfwd/be.cfg b/examples/udpfwd/be.cfg deleted file mode 100644 index 5c1d173..0000000 --- a/examples/udpfwd/be.cfg +++ /dev/null @@ -1,5 +0,0 @@ -# -# udpfwd BE cconfig file exaple -# -port=0,masklen=16,addr=192.168.0.0,mac=01:de:ad:be:ef:01 -port=0,addr=2001:4860:b002::,masklen=64,mac=01:de:ad:be:ef:01 diff --git a/examples/udpfwd/dpdk_version.h b/examples/udpfwd/dpdk_version.h deleted file mode 100644 index 4f6bdfb..0000000 --- a/examples/udpfwd/dpdk_version.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef DPDK_VERSION_H_ -#define DPDK_VERSION_H_ - -#include - -#ifdef RTE_VER_MAJOR -#if RTE_VER_MAJOR >= 16 && RTE_VER_MINOR >= 4 -#define DPDK_VERSION_GE_1604 -#endif -#elif defined(RTE_VER_YEAR) -#if RTE_VERSION >= RTE_VERSION_NUM(16,4,0,0) -#define DPDK_VERSION_GE_1604 -#endif -#else -#error "RTE_VER_MAJOR and RTE_VER_YEAR are undefined!" -#endif - -#endif /* DPDK_VERSION_H_ */ diff --git a/examples/udpfwd/fe.cfg b/examples/udpfwd/fe.cfg deleted file mode 100644 index 2706323..0000000 --- a/examples/udpfwd/fe.cfg +++ /dev/null @@ -1,24 +0,0 @@ -# -# udpfwd FE config file example -# - -# open IPv4 stream with local_addr=192.168.1.233:32768, -# and remote_addr as wildcard (any remote addressi/port allowed). -# use it echo mode - for any received packet - send it back to the source -lcore=3,op=echo,laddr=192.168.1.233,lport=0x8000,raddr=0.0.0.0,rport=0 - -# open IPv4 stream with specified local/remote address/port and -# do send only over that stream. -lcore=3,op=tx,laddr=192.168.1.233,lport=0x8001,raddr=192.168.1.56,rport=0x200,txlen=72 - -# open IPv6 stream with specified local port (512) probably over multiple -# eth ports, and do recv only over that stream. -lcore=3,op=rx,laddr=::,lport=0x200,raddr=::,rport=0,txlen=72 - -# fwd mode example. -# open IPv4 stream on local port 11211 (memcached) over all possible ports. -# for each new flow, sort of tunnel will be created, i.e: -# new stream will be opend to communcate with forwarding remote address, -# so all packets with will be forwarded to -# and visa-versa. -lcore=3,op=fwd,laddr=0.0.0.0,lport=11211,raddr=0.0.0.0,rport=0,fwladdr=::,fwlport=0,fwraddr=2001:4860:b002::56,fwrport=11211 diff --git a/examples/udpfwd/fwdtbl.h b/examples/udpfwd/fwdtbl.h deleted file mode 100644 index 1c4265e..0000000 --- a/examples/udpfwd/fwdtbl.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __FWDTBL_H__ -#define __FWDTBL_H__ - -struct fwd4_key { - uint32_t port; - struct in_addr addr; -} __attribute__((__packed__)); - -struct fwd6_key { - uint32_t port; - struct in6_addr addr; -} __attribute__((__packed__)); - -union fwd_key { - struct fwd4_key k4; - struct fwd6_key k6; -}; - -static struct rte_hash * -fwd_tbl_key_prep(const struct netfe_lcore *fe, uint16_t family, - const struct sockaddr *sa, union fwd_key *key) -{ - struct rte_hash *h; - const struct sockaddr_in *sin4; - const struct sockaddr_in6 *sin6; - - if (family == AF_INET) { - h = fe->fw4h; - sin4 = (const struct sockaddr_in *)sa; - key->k4.port = sin4->sin_port; - key->k4.addr = sin4->sin_addr; - } else { - h = fe->fw6h; - sin6 = (const struct sockaddr_in6 *)sa; - key->k6.port = sin6->sin6_port; - key->k6.addr = sin6->sin6_addr; - } - - return h; -} - -static int -fwd_tbl_add(struct netfe_lcore *fe, uint16_t family, const struct sockaddr *sa, - struct netfe_stream *data) -{ - int32_t rc; - struct rte_hash *h; - union fwd_key key; - - h = fwd_tbl_key_prep(fe, family, sa, &key); - rc = rte_hash_add_key_data(h, &key, data); - return rc; -} - -static struct netfe_stream * -fwd_tbl_lkp(struct netfe_lcore *fe, uint16_t family, const struct sockaddr *sa) -{ - int rc; - void *d; - struct rte_hash *h; - union fwd_key key; - - h = fwd_tbl_key_prep(fe, family, sa, &key); - rc = rte_hash_lookup_data(h, &key, &d); - if (rc < 0) - d = NULL; - return d; -} - -static int -fwd_tbl_init(struct netfe_lcore *fe, uint16_t family, uint32_t lcore) -{ - int32_t rc; - struct rte_hash **h; - struct rte_hash_parameters hprm; - char buf[RTE_HASH_NAMESIZE]; - - if (family == AF_INET) { - snprintf(buf, sizeof(buf), "fwd4tbl@%u", lcore); - h = &fe->fw4h; - hprm.key_len = sizeof(struct fwd4_key); - } else { - snprintf(buf, sizeof(buf), "fwd6tbl@%u", lcore); - h = &fe->fw6h; - hprm.key_len = sizeof(struct fwd6_key); - } - - hprm.name = buf; - hprm.entries = RTE_MAX(2 * fe->snum, 0x10U); - hprm.socket_id = rte_lcore_to_socket_id(lcore); - hprm.hash_func = NULL; - hprm.hash_func_init_val = 0; - - *h = rte_hash_create(&hprm); - if (*h == NULL) - rc = (rte_errno != 0) ? -rte_errno : -ENOMEM; - else - rc = 0; - return rc; -} - -#endif /* __FWDTBL_H__ */ diff --git a/examples/udpfwd/gen_fe_cfg.py b/examples/udpfwd/gen_fe_cfg.py deleted file mode 100755 index dbb500b..0000000 --- a/examples/udpfwd/gen_fe_cfg.py +++ /dev/null @@ -1,199 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (c) 2016 Intel Corporation. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# -# generate FE config script -# - -import sys, optparse, math - -def print_usage (): - print "Usage: " + sys.argv[0] + " [-f fe_lcore_list] [-b be_lcore_list]" - print " [-p start_port] [-n number_of_streams]" - print " [-m mode] [-q local_address] [-r remote_address]" - print " [-s fwd_local_address] [-t fwd_remote_address]" - print " [-l txlen] [-4/-6] [-H]" - print - print "Options" - print " -f, --fe_lcore_list: list of lcores used for FE. Multiple " \ - "lcores are comma-separated, within double quote" - print " -b, --be_lcore_list: list of lcores used for BE. Multiple " \ - "lcores are comma-separated, within double quote" - print " -p, --start_port: starting UDP port number" - print " -n, --number_of_streams: number of streams to be generated" - print " -m, --mode: mode of the application. [echo, rx, tx, fwd]" - print " -q, --local_address: local address of the stream" - print " -r, --remote_address: remote address of the stream" - print " -s, --fwd_local_address: forwarding local address of the stream" - print " -t, --fwd_remote_address: forwarding remote address of the " \ - "stream" - print " -l, --txlen: transmission length for rx/tx mode" - print " -H: if port number to printed in hex format" - print " -4/-6: if default ip addresses to be printed in ipv4/ipv6 format" - -def align_power_of_2(x): - return 2**(x-1).bit_length() - -def print_stream(mode, la, ra, fwd_la, fwd_ra, lcore, belcore, lport, - fwrport, txlen): - if support_hex != 0: - lport_str = str(format(lport, '#x')) - fwrport_str = str(format(fwrport, '#x')) - else: - lport_str = str(lport) - fwrport_str = str(fwrport) - - stream = "lcore=" + str(lcore) + ",belcore=" + str(belcore) + ",op=" + mode - stream += ",laddr=" + la + ",lport=" + lport_str - stream += ",raddr=" + ra + ",rport=0" - - if mode == 'fwd': - stream += ",fwladdr=" + fwd_la + ",fwlport=0,fwraddr=" + fwd_ra - stream += ",fwrport=" + fwrport_str - - if mode == 'rx' or mode == 'tx': - stream += ",txlen=" + str(txlen) - - print stream - -parser = optparse.OptionParser() -parser.add_option("-b", "--be_lcore_list", dest = "be_lcore_list", - help = "BE lcore lists.") -parser.add_option("-f", "--fe_lcore_list", dest = "fe_lcore_list", - help = "FE lcore lists.") -parser.add_option("-p", "--start_port", dest = "start_port", - help = "start port.") -parser.add_option("-n", "--number_of_streams", dest = "number_of_streams", - help = "number of streams.") -parser.add_option("-m", "--mode", dest = "mode", - help = "mode (op, rx, tx, fwd).") -parser.add_option("-q", "--local_address", dest = "local_address", - help = "local_address.") -parser.add_option("-r", "--remote_address", dest = "remote_address", - help = "remote_address.") -parser.add_option("-s", "--fwd_local_address", dest = "fwd_local_address", - help = "fwd_local_address.") -parser.add_option("-t", "--fwd_remote_address", dest = "fwd_remote_address", - help = "fwd_remote_address.") -parser.add_option("-l", "--txlen", dest = "txlen", help = "txlen.") -parser.add_option("-4", action = "store_false", dest = "ipv6", - help = "IPv4/IPv6") -parser.add_option("-6", action = "store_true", dest = "ipv6", - help = "IPv4/IPv6") -parser.add_option("-H", action = "store_true", dest = "support_hex", - help = "print ports in hexa format") - -(options, args) = parser.parse_args() - -if len(sys.argv) == 1: - print - print_usage() - print - sys.exit() - -supported_modes = ['echo', 'rx', 'tx', 'fwd'] -support_hex = 0 -txlen = 72 - -if options.ipv6 == True: - la = '::' - ra = '::' - fwd_la = '::' - fwd_ra = '::' -else: - la = '0.0.0.0' - ra = '0.0.0.0' - fwd_la = '0.0.0.0' - fwd_ra = '0.0.0.0' - -if options.support_hex == True: - support_hex = 1 - -if options.txlen != None: - txlen = options.txlen - -if options.fe_lcore_list != None: - felcore_list = list(map(int, options.fe_lcore_list.split(","))) - felcore_list.sort() -else: - felcore_list = [] - -if options.be_lcore_list != None: - belcore_list = list(map(int, options.be_lcore_list.split(","))) - belcore_list.sort() -else: - print "default BE lcore list = [ 1 ]" - belcore_list = [1] - -if options.mode != None: - mode = options.mode - if mode not in supported_modes: - print "Supported modes: " + str(supported_modes) - print "Provided mode \"" + mode + "\" is not supported. Terminating..." - sys.exit() -else: - print "default mode = echo" - mode = "echo" - -if options.start_port != None: - start_port = int(options.start_port) -else: - print "default start_port = 32768" - start_port = 32768 - -if options.number_of_streams != None: - number_of_streams = int(options.number_of_streams) -else: - print "default number_of_streams = 1" - number_of_streams = 1 - -fwd_start_port = 53248 - -if options.local_address != None: - la = options.local_address - -if options.remote_address != None: - ra = options.remote_address - -if options.fwd_local_address != None: - fwd_la = options.fwd_local_address - -if options.fwd_remote_address != None: - fwd_ra = options.fwd_remote_address - -belcore_count = len(belcore_list) -align_belcore_count = align_power_of_2(belcore_count) -nb_bits = int(math.log(align_belcore_count, 2)) - -felcore_count = len(felcore_list) -if felcore_count != 0: - if number_of_streams % felcore_count == 0: - nb_stream_per_flc = number_of_streams / felcore_count - else: - nb_stream_per_flc = (number_of_streams / felcore_count) + 1 - -for i in range(start_port, start_port + number_of_streams): - k = i - start_port - align_belcore_count = align_power_of_2(belcore_count) - blc_indx = (i % align_belcore_count) % belcore_count - belcore = belcore_list[blc_indx] - if felcore_count != 0: - flc_indx = k / nb_stream_per_flc - felcore = felcore_list[flc_indx] - else: - felcore = belcore - - print_stream(mode, la, ra, fwd_la, fwd_ra, felcore, belcore, i, - fwd_start_port + k, txlen) diff --git a/examples/udpfwd/main.c b/examples/udpfwd/main.c deleted file mode 100644 index 0463588..0000000 --- a/examples/udpfwd/main.c +++ /dev/null @@ -1,2134 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "netbe.h" -#include "parse.h" - -#define MAX_RULES 0x100 -#define MAX_TBL8 0x800 - -#define RX_RING_SIZE 0x400 -#define TX_RING_SIZE 0x800 - -#define MPOOL_CACHE_SIZE 0x100 -#define MPOOL_NB_BUF 0x20000 - -#define FRAG_MBUF_BUF_SIZE (RTE_PKTMBUF_HEADROOM + TLE_UDP_MAX_HDR) -#define FRAG_TTL MS_PER_S -#define FRAG_TBL_BUCKET_ENTRIES 16 - -#define FIRST_PORT 0x8000 - -#define RX_CSUM_OFFLOAD (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM) -#define TX_CSUM_OFFLOAD (DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM) - -#define OPT_SHORT_SBULK 'B' -#define OPT_LONG_SBULK "sburst" - -#define OPT_SHORT_PROMISC 'P' -#define OPT_LONG_PROMISC "promisc" - -#define OPT_SHORT_RBUFS 'R' -#define OPT_LONG_RBUFS "rbufs" - -#define OPT_SHORT_SBUFS 'S' -#define OPT_LONG_SBUFS "sbufs" - -#define OPT_SHORT_STREAMS 's' -#define OPT_LONG_STREAMS "streams" - -#define OPT_SHORT_FECFG 'f' -#define OPT_LONG_FECFG "fecfg" - -#define OPT_SHORT_BECFG 'b' -#define OPT_LONG_BECFG "becfg" - -RTE_DEFINE_PER_LCORE(struct netbe_lcore *, _be); -RTE_DEFINE_PER_LCORE(struct netfe_lcore *, _fe); - -#include "fwdtbl.h" - -static const struct option long_opt[] = { - {OPT_LONG_BECFG, 1, 0, OPT_SHORT_BECFG}, - {OPT_LONG_FECFG, 1, 0, OPT_SHORT_FECFG}, - {OPT_LONG_PROMISC, 0, 0, OPT_SHORT_PROMISC}, - {OPT_LONG_RBUFS, 1, 0, OPT_SHORT_RBUFS}, - {OPT_LONG_SBUFS, 1, 0, OPT_SHORT_SBUFS}, - {OPT_LONG_SBULK, 1, 0, OPT_SHORT_SBULK}, - {OPT_LONG_STREAMS, 1, 0, OPT_SHORT_STREAMS}, - {NULL, 0, 0, 0} -}; - -/** - * IPv4 Input size in bytes for RSS hash key calculation. - * source address, destination address, source port, and destination port. - */ -#define IPV4_TUPLE_SIZE 12 - -/** - * IPv6 Input size in bytes for RSS hash key calculation. - * source address, destination address, source port, and destination port. - */ -#define IPV6_TUPLE_SIZE 36 - -/** - * Location to be modified to create the IPv4 hash key which helps - * to distribute packets based on the destination UDP port. - */ -#define RSS_HASH_KEY_DEST_PORT_LOC_IPV4 15 - -/* - * Location to be modified to create the IPv6 hash key which helps - * to distribute packets based on the destination UDP port. - */ -#define RSS_HASH_KEY_DEST_PORT_LOC_IPV6 39 - -/** - * Size of the rte_eth_rss_reta_entry64 array to update through - * rte_eth_dev_rss_reta_update. - */ -#define RSS_RETA_CONF_ARRAY_SIZE (ETH_RSS_RETA_SIZE_512/RTE_RETA_GROUP_SIZE) - -#define NETBE_REALLOC(loc, n) do { \ - (loc) = rte_realloc((loc), sizeof(*(loc)) * (n), RTE_CACHE_LINE_SIZE); \ - if ((loc) == NULL) { \ - RTE_LOG(ERR, USER1, \ - "%s: failed to reallocate memory\n", \ - __func__); \ - return -ENOMEM; \ - } \ -} while (0) - -static volatile int force_quit; - -static struct netbe_cfg becfg; -static struct rte_mempool *mpool[RTE_MAX_NUMA_NODES + 1]; -static struct rte_mempool *frag_mpool[RTE_MAX_NUMA_NODES + 1]; - -static const struct rte_eth_conf port_conf_default = { - .rxmode = { - .max_rx_pkt_len = ETHER_MAX_VLAN_FRAME_LEN, - .hw_vlan_strip = 1, - .jumbo_frame = 1, - }, -}; - -#include "parse.h" -#include "main_dpdk_legacy.h" - -static void -sig_handle(int signum) -{ - RTE_LOG(ERR, USER1, "%s(%d)\n", __func__, signum); - force_quit = 1; -} - -static void -prepare_hash_key(struct netbe_port *uprt, uint8_t key_size, uint16_t family) -{ - uint32_t align_nb_q; - - align_nb_q = rte_align32pow2(uprt->nb_lcore); - memset(uprt->hash_key, 0, RSS_HASH_KEY_LENGTH); - uprt->hash_key_size = key_size; - if (family == AF_INET) - uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV4] = align_nb_q; - else - uprt->hash_key[RSS_HASH_KEY_DEST_PORT_LOC_IPV6] = align_nb_q; -} - -static uint32_t -qidx_from_hash_index(uint32_t hash, uint32_t align_nb_q) -{ - uint32_t i, nb_bit, q; - - nb_bit = (sizeof(uint32_t) * CHAR_BIT) - __builtin_clz(align_nb_q - 1); - q = (hash & 1); - for (i = 1; i < nb_bit; i++) { - hash >>= 1; - q <<= 1; - q |= (hash & 1); - } - - return q; -} - -static int -update_rss_conf(struct netbe_port *uprt, - const struct rte_eth_dev_info *dev_info, - struct rte_eth_conf *port_conf) -{ - uint8_t hash_key_size; - - if (uprt->nb_lcore > 1) { - if (dev_info->hash_key_size > 0) - hash_key_size = dev_info->hash_key_size; - else { - RTE_LOG(ERR, USER1, - "%s: dev_info did not provide a valid hash key size\n", - __func__); - return -EINVAL; - } - - if (uprt->ipv4 != INADDR_ANY && - memcmp(&uprt->ipv6, &in6addr_any, - sizeof(uprt->ipv6)) != 0) { - RTE_LOG(ERR, USER1, - "%s: RSS for both IPv4 and IPv6 not supported!\n", - __func__); - return -EINVAL; - } else if (uprt->ipv4 != INADDR_ANY) { - prepare_hash_key(uprt, hash_key_size, AF_INET); - } else if (memcmp(&uprt->ipv6, &in6addr_any, sizeof(uprt->ipv6)) - != 0) { - prepare_hash_key(uprt, hash_key_size, AF_INET6); - } else { - RTE_LOG(ERR, USER1, - "%s: No IPv4 or IPv6 address is found!\n", - __func__); - return -EINVAL; - } - port_conf->rxmode.mq_mode = ETH_MQ_RX_RSS; - port_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_UDP; - port_conf->rx_adv_conf.rss_conf.rss_key_len = hash_key_size; - port_conf->rx_adv_conf.rss_conf.rss_key = uprt->hash_key; - } - - return 0; -} - -static int -update_rss_reta(struct netbe_port *uprt, - const struct rte_eth_dev_info *dev_info) -{ - struct rte_eth_rss_reta_entry64 reta_conf[RSS_RETA_CONF_ARRAY_SIZE]; - int32_t i, rc, align_nb_q; - int32_t q_index, idx, shift; - - if (uprt->nb_lcore > 1) { - if (dev_info->reta_size == 0) { - RTE_LOG(ERR, USER1, - "%s: Redirection table size 0 is invalid for RSS\n", - __func__); - return -EINVAL; - } - RTE_LOG(NOTICE, USER1, - "%s: The reta size of port %d is %u\n", - __func__, uprt->id, dev_info->reta_size); - - if (dev_info->reta_size > ETH_RSS_RETA_SIZE_512) { - RTE_LOG(ERR, USER1, - "%s: More than %u entries of Reta not supported\n", - __func__, ETH_RSS_RETA_SIZE_512); - return -EINVAL; - } - - memset(reta_conf, 0, sizeof(reta_conf)); - align_nb_q = rte_align32pow2(uprt->nb_lcore); - for (i = 0; i < align_nb_q; i++) { - q_index = qidx_from_hash_index(i, align_nb_q) % - uprt->nb_lcore; - - idx = i / RTE_RETA_GROUP_SIZE; - shift = i % RTE_RETA_GROUP_SIZE; - reta_conf[idx].mask |= (1ULL << shift); - reta_conf[idx].reta[shift] = q_index; - RTE_LOG(NOTICE, USER1, - "%s: port=%u RSS reta conf: hash=%u, q=%u\n", - __func__, uprt->id, i, q_index); - } - - rc = rte_eth_dev_rss_reta_update(uprt->id, - reta_conf, dev_info->reta_size); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s: Bad redirection table parameter, rc = %d\n", - __func__, rc); - return rc; - } - } - - return 0; -} - -/* - * Initilise DPDK port. - * In current version, multi-queue per port is used. - */ -static int -port_init(struct netbe_port *uprt) -{ - int32_t rc; - struct rte_eth_conf port_conf; - struct rte_eth_dev_info dev_info; - - rte_eth_dev_info_get(uprt->id, &dev_info); - if ((dev_info.rx_offload_capa & uprt->rx_offload) != uprt->rx_offload) { - RTE_LOG(ERR, USER1, - "port#%u supported/requested RX offloads don't match, " - "supported: %#x, requested: %#x;\n", - uprt->id, dev_info.rx_offload_capa, uprt->rx_offload); - return -EINVAL; - } - if ((dev_info.tx_offload_capa & uprt->tx_offload) != uprt->tx_offload) { - RTE_LOG(ERR, USER1, - "port#%u supported/requested TX offloads don't match, " - "supported: %#x, requested: %#x;\n", - uprt->id, dev_info.tx_offload_capa, uprt->tx_offload); - return -EINVAL; - } - - port_conf = port_conf_default; - if ((uprt->rx_offload & RX_CSUM_OFFLOAD) != 0) { - RTE_LOG(ERR, USER1, "%s(%u): enabling RX csum offload;\n", - __func__, uprt->id); - port_conf.rxmode.hw_ip_checksum = 1; - } - port_conf.rxmode.max_rx_pkt_len = uprt->mtu + ETHER_CRC_LEN; - - rc = update_rss_conf(uprt, &dev_info, &port_conf); - if (rc != 0) - return rc; - - rc = rte_eth_dev_configure(uprt->id, uprt->nb_lcore, uprt->nb_lcore, - &port_conf); - RTE_LOG(NOTICE, USER1, - "%s: rte_eth_dev_configure(prt_id=%u, nb_rxq=%u, nb_txq=%u) " - "returns %d;\n", __func__, uprt->id, uprt->nb_lcore, - uprt->nb_lcore, rc); - if (rc != 0) - return rc; - - return 0; -} - -static int -queue_init(struct netbe_port *uprt, struct rte_mempool *mp) -{ - int32_t socket, rc; - uint16_t q; - struct rte_eth_dev_info dev_info; - - rte_eth_dev_info_get(uprt->id, &dev_info); - - socket = rte_eth_dev_socket_id(uprt->id); - - dev_info.default_rxconf.rx_drop_en = 1; - - dev_info.default_txconf.tx_free_thresh = TX_RING_SIZE / 2; - if (uprt->tx_offload != 0) { - RTE_LOG(ERR, USER1, "%s(%u): enabling full featured TX;\n", - __func__, uprt->id); - dev_info.default_txconf.txq_flags = 0; - } - - for (q = 0; q < uprt->nb_lcore; q++) { - rc = rte_eth_rx_queue_setup(uprt->id, q, RX_RING_SIZE, - socket, &dev_info.default_rxconf, mp); - if (rc < 0) { - RTE_LOG(ERR, USER1, - "%s: rx queue=%u setup failed with error code: %d\n", - __func__, q, rc); - return rc; - } - } - - for (q = 0; q < uprt->nb_lcore; q++) { - rc = rte_eth_tx_queue_setup(uprt->id, q, TX_RING_SIZE, - socket, &dev_info.default_txconf); - if (rc < 0) { - RTE_LOG(ERR, USER1, - "%s: tx queue=%u setup failed with error code: %d\n", - __func__, q, rc); - return rc; - } - } - return 0; -} - -/* - * Check that lcore is enabled, not master, and not in use already. - */ -static int -check_lcore(uint32_t lc) -{ - if (rte_lcore_is_enabled(lc) == 0) { - RTE_LOG(ERR, USER1, "lcore %u is not enabled\n", lc); - return -EINVAL; - } - if (rte_eal_get_lcore_state(lc) == RUNNING) { - RTE_LOG(ERR, USER1, "lcore %u already running %p\n", - lc, lcore_config[lc].f); - return -EINVAL; - } - return 0; -} - -static void -log_netbe_prt(const struct netbe_port *uprt) -{ - uint32_t i; - char corelist[2 * RTE_MAX_LCORE + 1]; - char hashkey[2 * RSS_HASH_KEY_LENGTH]; - - memset(corelist, 0, sizeof(corelist)); - memset(hashkey, 0, sizeof(hashkey)); - for (i = 0; i < uprt->nb_lcore; i++) - if (i < uprt->nb_lcore - 1) - sprintf(corelist + (2 * i), "%u,", uprt->lcore[i]); - else - sprintf(corelist + (2 * i), "%u", uprt->lcore[i]); - - for (i = 0; i < uprt->hash_key_size; i++) - sprintf(hashkey + (2 * i), "%02x", uprt->hash_key[i]); - - RTE_LOG(NOTICE, USER1, - "uprt %p = , mtu = %u, " - "rx_offload = %u, tx_offload = %u,\n" - "ipv4 = %#x, " - "ipv6 = %04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx:%04hx, " - "mac = %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx>;\n" - "hashkey = %s;\n", - uprt, uprt->id, corelist, - uprt->mtu, uprt->rx_offload, uprt->tx_offload, - uprt->ipv4, - uprt->ipv6.s6_addr16[0], uprt->ipv6.s6_addr16[1], - uprt->ipv6.s6_addr16[2], uprt->ipv6.s6_addr16[3], - uprt->ipv6.s6_addr16[4], uprt->ipv6.s6_addr16[5], - uprt->ipv6.s6_addr16[6], uprt->ipv6.s6_addr16[7], - uprt->mac.addr_bytes[0], uprt->mac.addr_bytes[1], - uprt->mac.addr_bytes[2], uprt->mac.addr_bytes[3], - uprt->mac.addr_bytes[4], uprt->mac.addr_bytes[5], - hashkey); -} - -static void -log_netbe_cfg(const struct netbe_cfg *ucfg) -{ - uint32_t i; - - RTE_LOG(NOTICE, USER1, - "ucfg @ %p, prt_num = %u\n", ucfg, ucfg->prt_num); - - for (i = 0; i != ucfg->prt_num; i++) - log_netbe_prt(ucfg->prt + i); -} - -static int -pool_init(uint32_t sid) -{ - int32_t rc; - struct rte_mempool *mp; - char name[RTE_MEMPOOL_NAMESIZE]; - - snprintf(name, sizeof(name), "MP%u", sid); - mp = rte_pktmbuf_pool_create(name, MPOOL_NB_BUF, MPOOL_CACHE_SIZE, 0, - RTE_MBUF_DEFAULT_BUF_SIZE, sid - 1); - if (mp == NULL) { - rc = -rte_errno; - RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n", - __func__, sid - 1, rc); - return rc; - } - - mpool[sid] = mp; - return 0; -} - -static int -frag_pool_init(uint32_t sid) -{ - int32_t rc; - struct rte_mempool *frag_mp; - char frag_name[RTE_MEMPOOL_NAMESIZE]; - - snprintf(frag_name, sizeof(frag_name), "frag_MP%u", sid); - frag_mp = rte_pktmbuf_pool_create(frag_name, MPOOL_NB_BUF, - MPOOL_CACHE_SIZE, 0, FRAG_MBUF_BUF_SIZE, sid - 1); - if (frag_mp == NULL) { - rc = -rte_errno; - RTE_LOG(ERR, USER1, "%s(%d) failed with error code: %d\n", - __func__, sid - 1, rc); - return rc; - } - - frag_mpool[sid] = frag_mp; - return 0; -} - -static struct netbe_lcore * -find_initilized_lcore(struct netbe_cfg *cfg, uint32_t lc_num) -{ - uint32_t i; - - for (i = 0; i < cfg->cpu_num; i++) - if (cfg->cpu[i].id == lc_num) - return &cfg->cpu[i]; - - return NULL; -} - -/* - * Setup all enabled ports. - */ -static int -netbe_port_init(struct netbe_cfg *cfg, int argc, char *argv[]) -{ - int32_t rc; - uint32_t i, n, sid, j; - struct netbe_port *prt; - rte_cpuset_t cpuset; - uint32_t nc; - struct netbe_lcore *lc; - - n = (uint32_t)argc; - cfg->prt = rte_zmalloc(NULL, sizeof(struct netbe_port) * n, - RTE_CACHE_LINE_SIZE); - cfg->prt_num = n; - - rc = 0; - for (i = 0; i != n; i++) { - rc = parse_netbe_arg(cfg->prt + i, argv[i], &cpuset); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s: processing of \"%s\" failed with error code: %d\n", - __func__, argv[i], rc); - return rc; - } - } - - for (i = 0, nc = 0; i < RTE_MAX_LCORE; i++) - nc += CPU_ISSET(i, &cpuset); - cfg->cpu = rte_zmalloc(NULL, sizeof(struct netbe_lcore) * nc, - RTE_CACHE_LINE_SIZE); - - for (i = 0; i != cfg->prt_num; i++) { - prt = cfg->prt + i; - rc = port_init(prt); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s: port=%u init failed with error code: %d\n", - __func__, prt->id, rc); - return rc; - } - rte_eth_macaddr_get(prt->id, &prt->mac); - if (cfg->promisc) - rte_eth_promiscuous_enable(prt->id); - - for (j = 0; j < prt->nb_lcore; j++) { - rc = check_lcore(prt->lcore[j]); - if (rc != 0) - return rc; - - sid = rte_lcore_to_socket_id(prt->lcore[j]) + 1; - assert(sid < RTE_DIM(mpool)); - - if (mpool[sid] == NULL) { - rc = pool_init(sid); - if (rc != 0) - return rc; - } - - if (frag_mpool[sid] == NULL) { - rc = frag_pool_init(sid); - if (rc != 0) - return rc; - } - - rc = queue_init(prt, mpool[sid]); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s: lcore=%u queue init failed with err: %d\n", - __func__, prt->lcore[j], rc); - return rc; - } - - /* calculate number of queues and assign queue id per lcore. */ - lc = find_initilized_lcore(cfg, prt->lcore[j]); - if (lc == NULL) { - lc = &cfg->cpu[cfg->cpu_num]; - lc->id = prt->lcore[j]; - cfg->cpu_num++; - } - - NETBE_REALLOC(lc->prtq, lc->prtq_num + 1); - lc->prtq[lc->prtq_num].rxqid = j; - lc->prtq[lc->prtq_num].txqid = j; - lc->prtq[lc->prtq_num].port = *prt; - lc->prtq_num++; - } - } - log_netbe_cfg(cfg); - - return 0; -} - -/* - * UDP IPv6 destination lookup callback. - */ -static int -lpm6_dst_lookup(void *data, const struct in6_addr *addr, - struct tle_udp_dest *res) -{ - int32_t rc; - uint8_t idx; - struct netbe_lcore *lc; - struct tle_udp_dest *dst; - uintptr_t p; - - lc = data; - p = (uintptr_t)addr->s6_addr; - - rc = rte_lpm6_lookup(lc->lpm6, (uint8_t *)p, &idx); - if (rc == 0) { - dst = &lc->dst6[idx]; - rte_memcpy(res, dst, dst->l2_len + dst->l3_len + - offsetof(struct tle_udp_dest, hdr)); - } - return rc; -} - -static int -netbe_add_ipv4_route(struct netbe_lcore *lc, const struct netbe_dest *dst, - uint8_t idx) -{ - int32_t rc; - uint32_t addr, depth; - char str[INET_ADDRSTRLEN]; - - depth = dst->prfx; - addr = rte_be_to_cpu_32(dst->ipv4.s_addr); - - inet_ntop(AF_INET, &dst->ipv4, str, sizeof(str)); - rc = rte_lpm_add(lc->lpm4, addr, depth, idx); - RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p," - "ipv4=%s/%u,mtu=%u," - "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) " - "returns %d;\n", - __func__, lc->id, dst->port, lc->dst4[idx].dev, - str, depth, lc->dst4[idx].mtu, - dst->mac.addr_bytes[0], dst->mac.addr_bytes[1], - dst->mac.addr_bytes[2], dst->mac.addr_bytes[3], - dst->mac.addr_bytes[4], dst->mac.addr_bytes[5], - rc); - return rc; -} - -static int -netbe_add_ipv6_route(struct netbe_lcore *lc, const struct netbe_dest *dst, - uint8_t idx) -{ - int32_t rc; - uint32_t depth; - char str[INET6_ADDRSTRLEN]; - - depth = dst->prfx; - - rc = rte_lpm6_add(lc->lpm6, (uint8_t *)(uintptr_t)dst->ipv6.s6_addr, - depth, idx); - - inet_ntop(AF_INET6, &dst->ipv6, str, sizeof(str)); - RTE_LOG(NOTICE, USER1, "%s(lcore=%u,port=%u,dev=%p," - "ipv6=%s/%u,mtu=%u," - "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) " - "returns %d;\n", - __func__, lc->id, dst->port, lc->dst6[idx].dev, - str, depth, lc->dst4[idx].mtu, - dst->mac.addr_bytes[0], dst->mac.addr_bytes[1], - dst->mac.addr_bytes[2], dst->mac.addr_bytes[3], - dst->mac.addr_bytes[4], dst->mac.addr_bytes[5], - rc); - return rc; -} - -static void -fill_dst(struct tle_udp_dest *dst, struct netbe_dev *bed, - const struct netbe_dest *bdp, uint16_t l3_type, int32_t sid) -{ - struct ether_hdr *eth; - struct ipv4_hdr *ip4h; - struct ipv6_hdr *ip6h; - - static const struct ipv4_hdr ipv4_tmpl = { - .version_ihl = 4 << 4 | sizeof(*ip4h) / IPV4_IHL_MULTIPLIER, - .time_to_live = 64, - .next_proto_id = IPPROTO_UDP, - }; - - static const struct ipv6_hdr ipv6_tmpl = { - .vtc_flow = 6 << 4, - .proto = IPPROTO_UDP, - .hop_limits = 64, - }; - - dst->dev = bed->dev; - dst->head_mp = frag_mpool[sid + 1]; - dst->mtu = RTE_MIN(bdp->mtu, bed->port.mtu); - dst->l2_len = sizeof(*eth); - - eth = (struct ether_hdr *)dst->hdr; - - ether_addr_copy(&bed->port.mac, ð->s_addr); - ether_addr_copy(&bdp->mac, ð->d_addr); - eth->ether_type = rte_cpu_to_be_16(l3_type); - - if (l3_type == ETHER_TYPE_IPv4) { - dst->l3_len = sizeof(*ip4h); - ip4h = (struct ipv4_hdr *)(eth + 1); - ip4h[0] = ipv4_tmpl; - } else if (l3_type == ETHER_TYPE_IPv6) { - dst->l3_len = sizeof(*ip6h); - ip6h = (struct ipv6_hdr *)(eth + 1); - ip6h[0] = ipv6_tmpl; - } -} - -static int -create_context(struct netbe_lcore *lc, const struct tle_udp_ctx_param *ctx_prm) -{ - uint32_t rc = 0, sid; - uint64_t frag_cycles; - struct tle_udp_ctx_param cprm; - - if (lc->ctx == NULL) { - sid = rte_lcore_to_socket_id(lc->id); - - rc = lcore_lpm_init(lc); - if (rc != 0) - return rc; - - cprm = *ctx_prm; - cprm.socket_id = sid; - cprm.lookup4 = lpm4_dst_lookup; - cprm.lookup4_data = lc; - cprm.lookup6 = lpm6_dst_lookup; - cprm.lookup6_data = lc; - - /* to facilitate both IPv4 and IPv6. */ - cprm.max_streams *= 2; - - frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / - MS_PER_S * FRAG_TTL; - - lc->ftbl = rte_ip_frag_table_create(cprm.max_streams, - FRAG_TBL_BUCKET_ENTRIES, cprm.max_streams, - frag_cycles, sid); - - RTE_LOG(NOTICE, USER1, "%s(lcore=%u): frag_tbl=%p;\n", - __func__, lc->id, lc->ftbl); - - lc->ctx = tle_udp_create(&cprm); - - RTE_LOG(NOTICE, USER1, "%s(lcore=%u): udp_ctx=%p;\n", - __func__, lc->id, lc->ctx); - - if (lc->ctx == NULL || lc->ftbl == NULL) - rc = ENOMEM; - } - - return rc; -} - -/* - * BE lcore setup routine. - */ -static int -lcore_init(struct netbe_lcore *lc, const struct tle_udp_ctx_param *ctx_prm, - const uint32_t prtqid, const uint16_t *bl_ports, uint32_t nb_bl_ports) -{ - int32_t rc = 0; - struct tle_udp_dev_param dprm; - - rc = create_context(lc, ctx_prm); - - if (lc->ctx != NULL) { - memset(&dprm, 0, sizeof(dprm)); - dprm.rx_offload = lc->prtq[prtqid].port.rx_offload; - dprm.tx_offload = lc->prtq[prtqid].port.tx_offload; - dprm.local_addr4.s_addr = lc->prtq[prtqid].port.ipv4; - memcpy(&dprm.local_addr6, &lc->prtq[prtqid].port.ipv6, - sizeof(lc->prtq[prtqid].port.ipv6)); - dprm.bl4.nb_port = nb_bl_ports; - dprm.bl4.port = bl_ports; - dprm.bl6.nb_port = nb_bl_ports; - dprm.bl6.port = bl_ports; - - lc->prtq[prtqid].dev = tle_udp_add_dev(lc->ctx, &dprm); - - RTE_LOG(NOTICE, USER1, - "%s(lcore=%u, port=%u, qid=%u), udp_dev: %p\n", - __func__, lc->id, lc->prtq[prtqid].port.id, - lc->prtq[prtqid].rxqid, lc->prtq[prtqid].dev); - - if (lc->prtq[prtqid].dev == NULL) - rc = -rte_errno; - - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s(lcore=%u) failed with error code: %d\n", - __func__, lc->id, rc); - tle_udp_destroy(lc->ctx); - rte_ip_frag_table_destroy(lc->ftbl); - rte_lpm_free(lc->lpm4); - rte_lpm6_free(lc->lpm6); - rte_free(lc->prtq[prtqid].port.lcore); - lc->prtq[prtqid].port.nb_lcore = 0; - rte_free(lc->prtq); - lc->prtq_num = 0; - return rc; - } - } - - return rc; -} - -static uint16_t -create_blocklist(const struct netbe_port *beprt, uint16_t *bl_ports, - uint32_t q) -{ - uint32_t i, j, qid, align_nb_q; - - align_nb_q = rte_align32pow2(beprt->nb_lcore); - for (i = 0, j = 0; i < (UINT16_MAX + 1); i++) { - qid = (i % align_nb_q) % beprt->nb_lcore; - if (qid != q) - bl_ports[j++] = i; - } - - return j; -} - -static int -netbe_lcore_init(struct netbe_cfg *cfg, - const struct tle_udp_ctx_param *ctx_prm) -{ - int32_t rc; - uint32_t i, j, nb_bl_ports = 0, sz; - struct netbe_lcore *lc; - static uint16_t *bl_ports; - - /* Create the udp context and attached queue for each lcore. */ - rc = 0; - sz = sizeof(uint16_t) * UINT16_MAX; - bl_ports = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE); - for (i = 0; i < cfg->cpu_num; i++) { - lc = &cfg->cpu[i]; - for (j = 0; j < lc->prtq_num; j++) { - memset((uint8_t *)bl_ports, 0, sz); - /* create list of blocked ports based on q */ - nb_bl_ports = create_blocklist(&lc->prtq[j].port, - bl_ports, lc->prtq[j].rxqid); - RTE_LOG(NOTICE, USER1, - "lc=%u, q=%u, nb_bl_ports=%u\n", - lc->id, lc->prtq[j].rxqid, nb_bl_ports); - - rc = lcore_init(lc, ctx_prm, j, bl_ports, nb_bl_ports); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s: failed with error code: %d\n", - __func__, rc); - rte_free(bl_ports); - return rc; - } - } - } - rte_free(bl_ports); - - return 0; -} - -static void -netbe_lcore_fini(struct netbe_cfg *cfg) -{ - uint32_t i; - - for (i = 0; i != cfg->cpu_num; i++) { - tle_udp_destroy(cfg->cpu[i].ctx); - rte_ip_frag_table_destroy(cfg->cpu[i].ftbl); - rte_lpm_free(cfg->cpu[i].lpm4); - rte_lpm6_free(cfg->cpu[i].lpm6); - - rte_free(cfg->cpu[i].prtq); - cfg->cpu[i].prtq_num = 0; - } - - rte_free(cfg->cpu); - cfg->cpu_num = 0; - for (i = 0; i != cfg->prt_num; i++) { - rte_free(cfg->prt[i].lcore); - cfg->prt[i].nb_lcore = 0; - } - rte_free(cfg->prt); - cfg->prt_num = 0; -} - -static int -netbe_add_dest(struct netbe_lcore *lc, uint32_t dev_idx, uint16_t family, - const struct netbe_dest *dst, uint32_t dnum) -{ - int32_t rc, sid; - uint16_t l3_type; - uint32_t i, n, m; - struct tle_udp_dest *dp; - - if (family == AF_INET) { - n = lc->dst4_num; - dp = lc->dst4 + n; - m = RTE_DIM(lc->dst4); - l3_type = ETHER_TYPE_IPv4; - } else { - n = lc->dst6_num; - dp = lc->dst6 + n; - m = RTE_DIM(lc->dst6); - l3_type = ETHER_TYPE_IPv6; - } - - if (n + dnum >= m) { - RTE_LOG(ERR, USER1, "%s(lcore=%u, family=%hu, dnum=%u) exceeds " - "maximum allowed number of destinations(%u);\n", - __func__, lc->id, family, dnum, m); - return -ENOSPC; - } - - sid = rte_lcore_to_socket_id(lc->id); - rc = 0; - - for (i = 0; i != dnum && rc == 0; i++) { - fill_dst(dp + i, lc->prtq + dev_idx, dst + i, l3_type, sid); - if (family == AF_INET) - rc = netbe_add_ipv4_route(lc, dst + i, n + i); - else - rc = netbe_add_ipv6_route(lc, dst + i, n + i); - } - - if (family == AF_INET) - lc->dst4_num = n + i; - else - lc->dst6_num = n + i; - - return rc; -} - -static int -netbe_dest_init(const char *fname, struct netbe_cfg *cfg) -{ - int32_t rc; - uint32_t f, i, p; - uint32_t k, l, cnt; - struct netbe_lcore *lc; - struct netbe_dest_prm prm; - - rc = netbe_parse_dest(fname, &prm); - if (rc != 0) - return rc; - - rc = 0; - for (i = 0; i != prm.nb_dest; i++) { - - p = prm.dest[i].port; - f = prm.dest[i].family; - - cnt = 0; - for (k = 0; k != cfg->cpu_num; k++) { - lc = cfg->cpu + k; - for (l = 0; l != lc->prtq_num; l++) - if (lc->prtq[l].port.id == p) { - rc = netbe_add_dest(lc, l, f, - prm.dest + i, 1); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s(lcore=%u, family=%u) could not " - "add destinations(%u);\n", - __func__, lc->id, f, i); - return -ENOSPC; - } - cnt++; - } - } - - if (cnt == 0) { - RTE_LOG(ERR, USER1, "%s(%s) error at line %u: " - "port %u not managed by any lcore;\n", - __func__, fname, prm.dest[i].line, p); - break; - } - } - - free(prm.dest); - return rc; -} - -static void -netfe_stream_close(struct netfe_lcore *fe, uint32_t dec) -{ - uint32_t sidx; - - fe->sidx -= dec; - sidx = fe->sidx; - tle_event_free(fe->fs[sidx].txev); - tle_event_free(fe->fs[sidx].rxev); - tle_udp_stream_close(fe->fs[sidx].s); - memset(&fe->fs[sidx], 0, sizeof(fe->fs[sidx])); -} - -static void -netfe_stream_dump(const struct netfe_stream *fes) -{ - struct sockaddr_in *l4, *r4; - struct sockaddr_in6 *l6, *r6; - uint16_t lport, rport; - struct tle_udp_stream_param sprm; - char laddr[INET6_ADDRSTRLEN]; - char raddr[INET6_ADDRSTRLEN]; - - tle_udp_stream_get_param(fes->s, &sprm); - - if (sprm.local_addr.ss_family == AF_INET) { - - l4 = (struct sockaddr_in *)&sprm.local_addr; - r4 = (struct sockaddr_in *)&sprm.remote_addr; - - lport = l4->sin_port; - rport = r4->sin_port; - - } else if (sprm.local_addr.ss_family == AF_INET6) { - - l6 = (struct sockaddr_in6 *)&sprm.local_addr; - r6 = (struct sockaddr_in6 *)&sprm.remote_addr; - - lport = l6->sin6_port; - rport = r6->sin6_port; - - } else { - RTE_LOG(ERR, USER1, "stream@%p - unknown family=%hu\n", - fes->s, sprm.local_addr.ss_family); - return; - } - - format_addr(&sprm.local_addr, laddr, sizeof(laddr)); - format_addr(&sprm.remote_addr, raddr, sizeof(raddr)); - - RTE_LOG(INFO, USER1, - "stream@%p={" - "family=%hu,laddr=%s,lport=%hu,raddr=%s,rport=%hu," - "stats={" - "rxp=%" PRIu64 ",txp=%" PRIu64 ",drops=%" PRIu64 "," - "rxev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "]," - "txev[IDLE, DOWN, UP]=[%" PRIu64 ", %" PRIu64 ", %" PRIu64 "]," - "}};\n", - fes->s, - sprm.local_addr.ss_family, - laddr, ntohs(lport), raddr, ntohs(rport), - fes->stat.rxp, fes->stat.txp, fes->stat.drops, - fes->stat.rxev[TLE_SEV_IDLE], - fes->stat.rxev[TLE_SEV_DOWN], - fes->stat.rxev[TLE_SEV_UP], - fes->stat.txev[TLE_SEV_IDLE], - fes->stat.txev[TLE_SEV_DOWN], - fes->stat.txev[TLE_SEV_UP]); -} - -/* - * helper function: opens IPv4 and IPv6 streams for selected port. - */ -static struct netfe_stream * -netfe_stream_open(struct netfe_lcore *fe, struct tle_udp_stream_param *sprm, - uint32_t lcore, uint16_t op, uint32_t bidx) -{ - int32_t rc; - uint32_t sidx; - struct netfe_stream *fes; - struct sockaddr_in *l4; - struct sockaddr_in6 *l6; - uint16_t errport; - - sidx = fe->sidx; - fes = fe->fs + sidx; - if (sidx >= fe->snum) { - rte_errno = ENOBUFS; - return NULL; - } - - fes->rxev = tle_event_alloc(fe->rxeq, &fe->fs[sidx]); - fes->txev = tle_event_alloc(fe->txeq, &fe->fs[sidx]); - sprm->recv_ev = fes->rxev; - if (op != FWD) - sprm->send_ev = fes->txev; - - RTE_LOG(ERR, USER1, - "%s(%u) [%u]={op=%hu, rxev=%p, txev=%p}, be_lc=%u\n", - __func__, lcore, sidx, op, fes->rxev, fes->txev, - becfg.cpu[bidx].id); - if (fes->rxev == NULL || fes->txev == NULL) { - netfe_stream_close(fe, 0); - rte_errno = ENOMEM; - return NULL; - } - - if (op == TXONLY || op == FWD) { - tle_event_active(fes->txev, TLE_SEV_DOWN); - fes->stat.txev[TLE_SEV_DOWN]++; - } - - if (op != TXONLY) { - tle_event_active(fes->rxev, TLE_SEV_DOWN); - fes->stat.rxev[TLE_SEV_DOWN]++; - } - - fes->s = tle_udp_stream_open(becfg.cpu[bidx].ctx, sprm); - if (fes->s == NULL) { - rc = rte_errno; - netfe_stream_close(fe, 0); - rte_errno = rc; - - if (sprm->local_addr.ss_family == AF_INET) { - l4 = (struct sockaddr_in *) &sprm->local_addr; - errport = ntohs(l4->sin_port); - } else { - l6 = (struct sockaddr_in6 *) &sprm->local_addr; - errport = ntohs(l6->sin6_port); - } - RTE_LOG(ERR, USER1, "stream open failed for port %u with error " - "code=%u, bidx=%u, lc=%u\n", - errport, rc, bidx, becfg.cpu[bidx].id); - return NULL; - } - - fes->op = op; - fes->family = sprm->local_addr.ss_family; - - fe->sidx = sidx + 1; - return fes; -} - -static inline int -netfe_addr_eq(struct sockaddr_storage *l, struct sockaddr_storage *r, - uint16_t family) -{ - struct sockaddr_in *l4, *r4; - struct sockaddr_in6 *l6, *r6; - - if (family == AF_INET) { - l4 = (struct sockaddr_in *)l; - r4 = (struct sockaddr_in *)r; - return (l4->sin_port == r4->sin_port && - l4->sin_addr.s_addr == r4->sin_addr.s_addr); - } else { - l6 = (struct sockaddr_in6 *)l; - r6 = (struct sockaddr_in6 *)r; - return (l6->sin6_port == r6->sin6_port && - memcmp(&l6->sin6_addr, &r6->sin6_addr, - sizeof(l6->sin6_addr))); - } -} - -static inline void -netfe_pkt_addr(const struct rte_mbuf *m, struct sockaddr_storage *ps, - uint16_t family) -{ - const struct ipv4_hdr *ip4h; - const struct ipv6_hdr *ip6h; - const struct udp_hdr *udph; - struct sockaddr_in *in4; - struct sockaddr_in6 *in6; - - NETFE_PKT_DUMP(m); - - udph = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, -m->l4_len); - - if (family == AF_INET) { - in4 = (struct sockaddr_in *)ps; - ip4h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, - -(m->l4_len + m->l3_len)); - in4->sin_port = udph->src_port; - in4->sin_addr.s_addr = ip4h->src_addr; - } else { - in6 = (struct sockaddr_in6 *)ps; - ip6h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, - -(m->l4_len + m->l3_len)); - in6->sin6_port = udph->src_port; - rte_memcpy(&in6->sin6_addr, ip6h->src_addr, - sizeof(in6->sin6_addr)); - } -} - -static inline uint32_t -pkt_eq_addr(struct rte_mbuf *pkt[], uint32_t num, uint16_t family, - struct sockaddr_storage *cur, struct sockaddr_storage *nxt) -{ - uint32_t i; - - for (i = 0; i != num; i++) { - netfe_pkt_addr(pkt[i], nxt, family); - if (netfe_addr_eq(cur, nxt, family) == 0) - break; - } - - return i; -} - -static inline void -pkt_buf_empty(struct pkt_buf *pb) -{ - uint32_t i; - - for (i = 0; i != pb->num; i++) - rte_pktmbuf_free(pb->pkt[i]); - - pb->num = 0; -} - -static inline void -pkt_buf_fill(uint32_t lcore, struct pkt_buf *pb, uint32_t dlen) -{ - uint32_t i; - int32_t sid; - - sid = rte_lcore_to_socket_id(lcore) + 1; - - for (i = pb->num; i != RTE_DIM(pb->pkt); i++) { - pb->pkt[i] = rte_pktmbuf_alloc(mpool[sid]); - if (pb->pkt[i] == NULL) - break; - rte_pktmbuf_append(pb->pkt[i], dlen); - } - - pb->num = i; -} - -static struct netfe_stream * -find_fwd_dst(uint32_t lcore, struct netfe_stream *fes, - const struct sockaddr *sa) -{ - uint32_t rc; - struct netfe_stream *fed; - struct netfe_lcore *fe; - struct tle_udp_stream_param sprm; - - fe = RTE_PER_LCORE(_fe); - - fed = fwd_tbl_lkp(fe, fes->family, sa); - if (fed != NULL) - return fed; - - /* create a new stream and put it into the fwd table. */ - - sprm = fes->fwdprm.prm; - - /* open forward stream with wildcard remote addr. */ - memset(&sprm.remote_addr.ss_family + 1, 0, - sizeof(sprm.remote_addr) - sizeof(sprm.remote_addr.ss_family)); - fed = netfe_stream_open(fe, &sprm, lcore, FWD, fes->fwdprm.bidx); - if (fed == NULL) - return NULL; - - rc = fwd_tbl_add(fe, fes->family, sa, fed); - if (rc != 0) { - netfe_stream_close(fe, 1); - fed = NULL; - } - - fed->fwdprm.prm.remote_addr = *(const struct sockaddr_storage *)sa; - return fed; -} - -static inline void -netfe_tx_process(uint32_t lcore, struct netfe_stream *fes) -{ - uint32_t i, k, n; - - /* refill with new mbufs. */ - pkt_buf_fill(lcore, &fes->pbuf, fes->txlen); - - n = fes->pbuf.num; - if (n == 0) - return; - - k = tle_udp_stream_send(fes->s, fes->pbuf.pkt, n, NULL); - NETFE_TRACE("%s(%u): tle_udp_stream_send(%p, %u) returns %u\n", - __func__, lcore, fes->s, n, k); - fes->stat.txp += k; - fes->stat.drops += n - k; - - if (k == 0) - return; - - /* adjust pbuf array. */ - fes->pbuf.num = n - k; - for (i = k; i != n; i++) - fes->pbuf.pkt[i - k] = fes->pbuf.pkt[i]; -} - -static inline void -netfe_fwd(uint32_t lcore, struct netfe_stream *fes) -{ - uint32_t i, j, k, n, x; - uint16_t family; - void *pi0, *pi1, *pt; - struct rte_mbuf **pkt; - struct netfe_stream *fed; - struct sockaddr_storage in[2]; - - family = fes->family; - n = fes->pbuf.num; - pkt = fes->pbuf.pkt; - - if (n == 0) - return; - - in[0].ss_family = family; - in[1].ss_family = family; - pi0 = &in[0]; - pi1 = &in[1]; - - netfe_pkt_addr(pkt[0], pi0, family); - - x = 0; - for (i = 0; i != n; i = j) { - - j = i + pkt_eq_addr(&pkt[i + 1], - n - i - 1, family, pi0, pi1) + 1; - - fed = find_fwd_dst(lcore, fes, (const struct sockaddr *)pi0); - if (fed != NULL) { - - k = tle_udp_stream_send(fed->s, pkt + i, j - i, - (const struct sockaddr *) - &fes->fwdprm.prm.remote_addr); - - NETFE_TRACE("%s(%u): tle_udp_stream_send(%p, %u) " - "returns %u\n", - __func__, lcore, fed->s, j - i, k); - fed->stat.txp += k; - fed->stat.drops += j - i - k; - fes->stat.fwp += k; - - } else { - NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n", - __func__, lcore, fes->s, j - i); - for (k = i; k != j; k++) { - NETFE_TRACE("%s(%u, %p): free(%p);\n", - __func__, lcore, fes->s, pkt[k]); - rte_pktmbuf_free(pkt[j]); - } - fes->stat.drops += j - i; - } - - /* copy unforwarded mbufs. */ - for (i += k; i != j; i++, x++) - pkt[x] = pkt[i]; - - /* swap the pointers */ - pt = pi0; - pi0 = pi1; - pi1 = pt; - } - - fes->pbuf.num = x; - - if (x != 0) { - tle_event_raise(fes->txev); - fes->stat.txev[TLE_SEV_UP]++; - } - - if (n == RTE_DIM(fes->pbuf.pkt)) { - tle_event_active(fes->rxev, TLE_SEV_UP); - fes->stat.rxev[TLE_SEV_UP]++; - } -} - -static inline void -netfe_rx_process(__rte_unused uint32_t lcore, struct netfe_stream *fes) -{ - uint32_t k, n; - - n = fes->pbuf.num; - k = RTE_DIM(fes->pbuf.pkt) - n; - - /* packet buffer is full, can't receive any new packets. */ - if (k == 0) { - tle_event_idle(fes->rxev); - fes->stat.rxev[TLE_SEV_IDLE]++; - return; - } - - n = tle_udp_stream_recv(fes->s, fes->pbuf.pkt + n, k); - if (n == 0) - return; - - NETFE_TRACE("%s(%u): tle_udp_stream_recv(%p, %u) returns %u\n", - __func__, lcore, fes->s, k, n); - - fes->pbuf.num += n; - fes->stat.rxp += n; - - /* free all received mbufs. */ - if (fes->op == RXONLY) - pkt_buf_empty(&fes->pbuf); - /* mark stream as writable */ - else if (k == RTE_DIM(fes->pbuf.pkt)) { - if (fes->op == RXTX) { - tle_event_active(fes->txev, TLE_SEV_UP); - fes->stat.txev[TLE_SEV_UP]++; - } else if (fes->op == FWD) { - tle_event_raise(fes->txev); - fes->stat.txev[TLE_SEV_UP]++; - } - } -} - -static inline void -netfe_rxtx_process(__rte_unused uint32_t lcore, struct netfe_stream *fes) -{ - uint32_t i, j, k, n; - uint16_t family; - void *pi0, *pi1, *pt; - struct rte_mbuf **pkt; - struct sockaddr_storage in[2]; - - family = fes->family; - n = fes->pbuf.num; - pkt = fes->pbuf.pkt; - - /* there is nothing to send. */ - if (n == 0) { - tle_event_idle(fes->txev); - fes->stat.txev[TLE_SEV_IDLE]++; - return; - } - - in[0].ss_family = family; - in[1].ss_family = family; - pi0 = &in[0]; - pi1 = &in[1]; - - netfe_pkt_addr(pkt[0], pi0, family); - - for (i = 0; i != n; i = j) { - - j = i + pkt_eq_addr(&pkt[i + 1], - n - i - 1, family, pi0, pi1) + 1; - - k = tle_udp_stream_send(fes->s, pkt + i, j - i, - (const struct sockaddr *)pi0); - - NETFE_TRACE("%s(%u): tle_udp_stream_send(%p, %u) returns %u\n", - __func__, lcore, fes->s, j - i, k); - fes->stat.txp += k; - fes->stat.drops += j - i - k; - - i += k; - - /* stream send buffer is full */ - if (i != j) - break; - - /* swap the pointers */ - pt = pi0; - pi0 = pi1; - pi1 = pt; - } - - /* not able to send anything. */ - if (i == 0) - return; - - if (n == RTE_DIM(fes->pbuf.pkt)) { - /* mark stream as readable */ - tle_event_active(fes->rxev, TLE_SEV_UP); - fes->stat.rxev[TLE_SEV_UP]++; - } - - /* adjust pbuf array. */ - fes->pbuf.num = n - i; - for (j = i; j != n; j++) - pkt[j - i] = pkt[j]; -} - -static int -netfe_lcore_init(const struct netfe_lcore_prm *prm) -{ - size_t sz; - int32_t rc; - uint32_t i, lcore, snum; - struct netfe_lcore *fe; - struct tle_evq_param eprm; - struct tle_udp_stream_param sprm; - struct netfe_stream *fes; - - lcore = rte_lcore_id(); - - snum = prm->max_streams; - RTE_LOG(NOTICE, USER1, "%s(lcore=%u, nb_streams=%u, max_streams=%u)\n", - __func__, lcore, prm->nb_streams, snum); - - memset(&eprm, 0, sizeof(eprm)); - eprm.socket_id = rte_lcore_to_socket_id(lcore); - eprm.max_events = snum; - - sz = sizeof(*fe) + snum * sizeof(fe->fs[0]); - fe = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, - rte_lcore_to_socket_id(lcore)); - - if (fe == NULL) { - RTE_LOG(ERR, USER1, "%s:%d failed to allocate %zu bytes\n", - __func__, __LINE__, sz); - return -ENOMEM; - } - - RTE_PER_LCORE(_fe) = fe; - - fe->snum = snum; - fe->fs = (struct netfe_stream *)(fe + 1); - - fe->rxeq = tle_evq_create(&eprm); - fe->txeq = tle_evq_create(&eprm); - - RTE_LOG(INFO, USER1, "%s(%u) rx evq=%p, tx evq=%p\n", - __func__, lcore, fe->rxeq, fe->txeq); - if (fe->rxeq == NULL || fe->txeq == NULL) - return -ENOMEM; - - rc = fwd_tbl_init(fe, AF_INET, lcore); - RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n", - __func__, lcore, AF_INET, rc); - if (rc != 0) - return rc; - - rc = fwd_tbl_init(fe, AF_INET6, lcore); - RTE_LOG(ERR, USER1, "%s(%u) fwd_tbl_init(%u) returns %d\n", - __func__, lcore, AF_INET6, rc); - if (rc != 0) - return rc; - - /* open all requested streams. */ - for (i = 0; i != prm->nb_streams; i++) { - sprm = prm->stream[i].sprm.prm; - fes = netfe_stream_open(fe, &sprm, lcore, prm->stream[i].op, - prm->stream[i].sprm.bidx); - if (fes == NULL) { - rc = -rte_errno; - break; - } - - netfe_stream_dump(fes); - - if (prm->stream[i].op == FWD) { - fes->fwdprm = prm->stream[i].fprm; - rc = fwd_tbl_add(fe, - prm->stream[i].fprm.prm.remote_addr.ss_family, - (const struct sockaddr *) - &prm->stream[i].fprm.prm.remote_addr, - fes); - if (rc != 0) { - netfe_stream_close(fe, 1); - break; - } - } else if (prm->stream[i].op == TXONLY) { - fes->txlen = prm->stream[i].txlen; - fes->raddr = sprm.remote_addr; - } - } - - return rc; -} - -static inline void -netfe_lcore(void) -{ - struct netfe_lcore *fe; - uint32_t j, n, lcore; - struct netfe_stream *fs[MAX_PKT_BURST]; - - fe = RTE_PER_LCORE(_fe); - if (fe == NULL) - return; - - lcore = rte_lcore_id(); - - n = tle_evq_get(fe->rxeq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); - - if (n != 0) { - NETFE_TRACE("%s(%u): tle_evq_get(rxevq=%p) returns %u\n", - __func__, lcore, fe->rxeq, n); - for (j = 0; j != n; j++) - netfe_rx_process(lcore, fs[j]); - } - - n = tle_evq_get(fe->txeq, (const void **)(uintptr_t)fs, RTE_DIM(fs)); - - if (n != 0) { - NETFE_TRACE("%s(%u): tle_evq_get(txevq=%p) returns %u\n", - __func__, lcore, fe->txeq, n); - for (j = 0; j != n; j++) { - if (fs[j]->op == RXTX) - netfe_rxtx_process(lcore, fs[j]); - else if (fs[j]->op == FWD) - netfe_fwd(lcore, fs[j]); - else if (fs[j]->op == TXONLY) - netfe_tx_process(lcore, fs[j]); - } - } -} - -static void -netfe_lcore_fini(void) -{ - struct netfe_lcore *fe; - uint32_t i; - - fe = RTE_PER_LCORE(_fe); - if (fe == NULL) - return; - - while (fe->sidx != 0) { - i = fe->sidx - 1; - netfe_stream_dump(fe->fs + i); - netfe_stream_close(fe, 1); - } - - tle_evq_destroy(fe->txeq); - tle_evq_destroy(fe->rxeq); - RTE_PER_LCORE(_fe) = NULL; - rte_free(fe); -} - -static inline void -netbe_rx(struct netbe_lcore *lc, uint32_t pidx) -{ - uint32_t j, k, n; - struct rte_mbuf *pkt[MAX_PKT_BURST]; - struct rte_mbuf *rp[MAX_PKT_BURST]; - int32_t rc[MAX_PKT_BURST]; - - n = rte_eth_rx_burst(lc->prtq[pidx].port.id, - lc->prtq[pidx].rxqid, pkt, RTE_DIM(pkt)); - if (n == 0) - return; - - lc->prtq[pidx].rx_stat.in += n; - NETBE_TRACE("%s(%u): rte_eth_rx_burst(%u, %u) returns %u\n", - __func__, lc->id, lc->prtq[pidx].port.id, lc->prtq[pidx].rxqid, - n); - - k = tle_udp_rx_bulk(lc->prtq[pidx].dev, pkt, rp, rc, n); - - lc->prtq[pidx].rx_stat.up += k; - lc->prtq[pidx].rx_stat.drop += n - k; - NETBE_TRACE("%s(%u): tle_udp_rx_bulk(%p, %u) returns %u\n", - __func__, lc->id, lc->prtq[pidx].dev, n, k); - - for (j = 0; j != n - k; j++) { - NETBE_TRACE("%s:%d(port=%u) rp[%u]={%p, %d};\n", - __func__, __LINE__, lc->prtq[pidx].port.id, - j, rp[j], rc[j]); - rte_pktmbuf_free(rp[j]); - } -} - -static inline void -netbe_tx(struct netbe_lcore *lc, uint32_t pidx) -{ - uint32_t j, k, n; - struct rte_mbuf **mb; - - n = lc->prtq[pidx].tx_buf.num; - k = RTE_DIM(lc->prtq[pidx].tx_buf.pkt) - n; - mb = lc->prtq[pidx].tx_buf.pkt; - - if (k >= RTE_DIM(lc->prtq[pidx].tx_buf.pkt) / 2) { - j = tle_udp_tx_bulk(lc->prtq[pidx].dev, mb + n, k); - n += j; - lc->prtq[pidx].tx_stat.down += j; - } - - if (n == 0) - return; - - NETBE_TRACE("%s(%u): tle_udp_tx_bulk(%p) returns %u,\n" - "total pkts to send: %u\n", - __func__, lc->id, lc->prtq[pidx].dev, j, n); - - for (j = 0; j != n; j++) - NETBE_PKT_DUMP(mb[j]); - - k = rte_eth_tx_burst(lc->prtq[pidx].port.id, - lc->prtq[pidx].txqid, mb, n); - - lc->prtq[pidx].tx_stat.out += k; - lc->prtq[pidx].tx_stat.drop += n - k; - NETBE_TRACE("%s(%u): rte_eth_tx_burst(%u, %u, %u) returns %u\n", - __func__, lc->id, lc->prtq[pidx].port.id, lc->prtq[pidx].txqid, - n, k); - - lc->prtq[pidx].tx_buf.num = n - k; - if (k != 0) - for (j = k; j != n; j++) - mb[j - k] = mb[j]; -} - -static int -netbe_lcore_setup(struct netbe_lcore *lc) -{ - uint32_t i; - int32_t rc; - - RTE_LOG(NOTICE, USER1, "%s(lcore=%u, udp_ctx: %p) start\n", - __func__, lc->id, lc->ctx); - - /* - * ??????? - * wait for FE lcores to start, so BE dont' drop any packets - * because corresponding streams not opened yet by FE. - * useful when used with pcap PMDS. - * think better way, or should this timeout be a cmdlien parameter. - * ??????? - */ - rte_delay_ms(10); - - rc = 0; - for (i = 0; i != lc->prtq_num && rc == 0; i++) { - RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, udp_dev: %p)\n", - __func__, i, lc->prtq[i].port.id, lc->prtq[i].dev); - rc = setup_rx_cb(&lc->prtq[i].port, lc, lc->prtq[i].rxqid); - if (rc < 0) - return rc; - } - - if (rc == 0) - RTE_PER_LCORE(_be) = lc; - return rc; -} - -static inline void -netbe_lcore(void) -{ - uint32_t i; - struct netbe_lcore *lc; - - lc = RTE_PER_LCORE(_be); - if (lc == NULL) - return; - - for (i = 0; i != lc->prtq_num; i++) { - netbe_rx(lc, i); - netbe_tx(lc, i); - } -} - -static void -netbe_lcore_clear(void) -{ - uint32_t i, j; - struct netbe_lcore *lc; - - lc = RTE_PER_LCORE(_be); - if (lc == NULL) - return; - - RTE_LOG(NOTICE, USER1, "%s(lcore=%u, udp_ctx: %p) finish\n", - __func__, lc->id, lc->ctx); - for (i = 0; i != lc->prtq_num; i++) { - RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, lcore=%u, q=%u, dev=%p) " - "rx_stats={" - "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "}, " - "tx_stats={" - "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "};\n", - __func__, i, lc->prtq[i].port.id, lc->id, - lc->prtq[i].rxqid, - lc->prtq[i].dev, - lc->prtq[i].rx_stat.in, - lc->prtq[i].rx_stat.up, - lc->prtq[i].rx_stat.drop, - lc->prtq[i].tx_stat.down, - lc->prtq[i].tx_stat.out, - lc->prtq[i].tx_stat.drop); - } - - for (i = 0; i != lc->prtq_num; i++) - for (j = 0; j != lc->prtq[i].tx_buf.num; j++) - rte_pktmbuf_free(lc->prtq[i].tx_buf.pkt[j]); - - RTE_PER_LCORE(_be) = NULL; -} - -static int -lcore_main(void *arg) -{ - int32_t rc; - uint32_t lcore; - struct lcore_prm *prm; - - prm = arg; - lcore = rte_lcore_id(); - - RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n", - __func__, lcore); - - rc = 0; - - /* lcore FE init. */ - if (prm->fe.max_streams != 0) - rc = netfe_lcore_init(&prm->fe); - - /* lcore FE init. */ - if (rc == 0 && prm->be.lc != NULL) - rc = netbe_lcore_setup(prm->be.lc); - - if (rc != 0) - sig_handle(SIGQUIT); - - while (force_quit == 0) { - netfe_lcore(); - netbe_lcore(); - } - - RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n", - __func__, lcore); - - netfe_lcore_fini(); - netbe_lcore_clear(); - - return rc; -} - -static int -netfe_lcore_cmp(const void *s1, const void *s2) -{ - const struct netfe_stream_prm *p1, *p2; - - p1 = s1; - p2 = s2; - return p1->lcore - p2->lcore; -} - -static int -netbe_find6(const struct in6_addr *laddr, uint16_t lport, - const struct in6_addr *raddr, uint32_t be_lc) -{ - uint32_t i, j; - uint8_t idx; - struct netbe_lcore *bc; - - /* we have exactly one BE, use it for all traffic */ - if (becfg.cpu_num == 1) - return 0; - - /* search by provided be_lcore */ - if (be_lc != LCORE_ID_ANY) { - for (i = 0; i != becfg.cpu_num; i++) { - bc = becfg.cpu + i; - if (be_lc == bc->id) - return i; - } - RTE_LOG(NOTICE, USER1, "%s: no stream with be_lcore=%u\n", - __func__, be_lc); - return -ENOENT; - } - - /* search by local address */ - if (memcmp(laddr, &in6addr_any, sizeof(*laddr)) != 0) { - for (i = 0; i != becfg.cpu_num; i++) { - bc = becfg.cpu + i; - /* search by queue for the local port */ - for (j = 0; j != bc->prtq_num; j++) { - if (memcmp(laddr, &bc->prtq[j].port.ipv6, - sizeof(*laddr)) == 0) { - - if (lport == 0) - return i; - - if (verify_queue_for_port(bc->prtq + j, - lport) != 0) - return i; - } - } - } - } - - /* search by remote address */ - if (memcmp(raddr, &in6addr_any, sizeof(*raddr)) == 0) { - for (i = 0; i != becfg.cpu_num; i++) { - bc = becfg.cpu + i; - if (rte_lpm6_lookup(bc->lpm6, - (uint8_t *)(uintptr_t)raddr->s6_addr, - &idx) == 0) { - - if (lport == 0) - return i; - - /* search by queue for the local port */ - for (j = 0; j != bc->prtq_num; j++) - if (verify_queue_for_port(bc->prtq + j, - lport) != 0) - return i; - } - } - } - - return -ENOENT; -} - -static int -netbe_find(const struct tle_udp_stream_param *p, uint32_t be_lc) -{ - const struct sockaddr_in *l4, *r4; - const struct sockaddr_in6 *l6, *r6; - - if (p->local_addr.ss_family == AF_INET) { - l4 = (const struct sockaddr_in *)&p->local_addr; - r4 = (const struct sockaddr_in *)&p->remote_addr; - return netbe_find4(&l4->sin_addr, ntohs(l4->sin_port), - &r4->sin_addr, be_lc); - } else if (p->local_addr.ss_family == AF_INET6) { - l6 = (const struct sockaddr_in6 *)&p->local_addr; - r6 = (const struct sockaddr_in6 *)&p->remote_addr; - return netbe_find6(&l6->sin6_addr, ntohs(l6->sin6_port), - &r6->sin6_addr, be_lc); - } - return -EINVAL; -} - -static int -netfe_sprm_flll_be(struct netfe_sprm *sp, uint32_t line, uint32_t be_lc) -{ - int32_t bidx; - - bidx = netbe_find(&sp->prm, be_lc); - if (bidx < 0) { - RTE_LOG(ERR, USER1, "%s(line=%u): no BE for that stream\n", - __func__, line); - return -EINVAL; - } - sp->bidx = bidx; - return 0; -} - -/* start front-end processing. */ -static int -netfe_lcore_fill(struct lcore_prm prm[RTE_MAX_LCORE], - struct netfe_lcore_prm *lprm) -{ - uint32_t be_lc; - uint32_t i, j, lc, ln; - - /* determine on what BE each stream should be open. */ - for (i = 0; i != lprm->nb_streams; i++) { - lc = lprm->stream[i].lcore; - ln = lprm->stream[i].line; - be_lc = lprm->stream[i].be_lcore; - if (netfe_sprm_flll_be(&lprm->stream[i].sprm, ln, - be_lc) != 0 || - (lprm->stream[i].op == FWD && - netfe_sprm_flll_be(&lprm->stream[i].fprm, ln, - be_lc) != 0)) - return -EINVAL; - } - - /* group all fe parameters by lcore. */ - - qsort(lprm->stream, lprm->nb_streams, sizeof(lprm->stream[0]), - netfe_lcore_cmp); - - for (i = 0; i != lprm->nb_streams; i = j) { - - lc = lprm->stream[i].lcore; - ln = lprm->stream[i].line; - - if (rte_lcore_is_enabled(lc) == 0) { - RTE_LOG(ERR, USER1, - "%s(line=%u): lcore %u is not enabled\n", - __func__, ln, lc); - return -EINVAL; - } - - if (rte_get_master_lcore() != lc && - rte_eal_get_lcore_state(lc) == RUNNING) { - RTE_LOG(ERR, USER1, - "%s(line=%u): lcore %u already in use\n", - __func__, ln, lc); - return -EINVAL; - } - - for (j = i + 1; j != lprm->nb_streams && - lc == lprm->stream[j].lcore; - j++) - ; - - prm[lc].fe.max_streams = lprm->max_streams; - prm[lc].fe.nb_streams = j - i; - prm[lc].fe.stream = lprm->stream + i; - } - - return 0; -} - -int -main(int argc, char *argv[]) -{ - int32_t opt, opt_idx, rc; - uint32_t i; - uint64_t v; - struct tle_udp_ctx_param ctx_prm; - struct netfe_lcore_prm feprm; - struct rte_eth_stats stats; - char fecfg_fname[PATH_MAX + 1]; - char becfg_fname[PATH_MAX + 1]; - struct lcore_prm prm[RTE_MAX_LCORE]; - struct rte_eth_dev_info dev_info; - - fecfg_fname[0] = 0; - becfg_fname[0] = 0; - memset(prm, 0, sizeof(prm)); - - rc = rte_eal_init(argc, argv); - if (rc < 0) - rte_exit(EXIT_FAILURE, - "%s: rte_eal_init failed with error code: %d\n", - __func__, rc); - - memset(&ctx_prm, 0, sizeof(ctx_prm)); - - argc -= rc; - argv += rc; - - optind = 0; - optarg = NULL; - while ((opt = getopt_long(argc, argv, "B:PR:S:b:f:s:", long_opt, - &opt_idx)) != EOF) { - if (opt == OPT_SHORT_SBULK) { - rc = parse_uint_val(NULL, optarg, &v); - if (rc < 0) - rte_exit(EXIT_FAILURE, "%s: invalid value: %s " - "for option: \'%c\'\n", - __func__, optarg, opt); - ctx_prm.send_bulk_size = v; - } else if (opt == OPT_SHORT_PROMISC) { - becfg.promisc = 1; - } else if (opt == OPT_SHORT_RBUFS) { - rc = parse_uint_val(NULL, optarg, &v); - if (rc < 0) - rte_exit(EXIT_FAILURE, "%s: invalid value: %s " - "for option: \'%c\'\n", - __func__, optarg, opt); - ctx_prm.max_stream_rbufs = v; - } else if (opt == OPT_SHORT_SBUFS) { - rc = parse_uint_val(NULL, optarg, &v); - if (rc < 0) - rte_exit(EXIT_FAILURE, "%s: invalid value: %s " - "for option: \'%c\'\n", - __func__, optarg, opt); - ctx_prm.max_stream_sbufs = v; - } else if (opt == OPT_SHORT_STREAMS) { - rc = parse_uint_val(NULL, optarg, &v); - if (rc < 0) - rte_exit(EXIT_FAILURE, "%s: invalid value: %s " - "for option: \'%c\'\n", - __func__, optarg, opt); - ctx_prm.max_streams = v; - } else if (opt == OPT_SHORT_BECFG) { - snprintf(becfg_fname, sizeof(becfg_fname), "%s", - optarg); - } else if (opt == OPT_SHORT_FECFG) { - snprintf(fecfg_fname, sizeof(fecfg_fname), "%s", - optarg); - } else { - rte_exit(EXIT_FAILURE, - "%s: unknown option: \'%c\'\n", - __func__, opt); - } - } - - signal(SIGINT, sig_handle); - - rc = netbe_port_init(&becfg, argc - optind, argv + optind); - if (rc != 0) - rte_exit(EXIT_FAILURE, - "%s: netbe_port_init failed with error code: %d\n", - __func__, rc); - - rc = netbe_lcore_init(&becfg, &ctx_prm); - if (rc != 0) - sig_handle(SIGQUIT); - - if ((rc = netbe_dest_init(becfg_fname, &becfg)) != 0) - sig_handle(SIGQUIT); - - for (i = 0; i != becfg.prt_num && rc == 0; i++) { - RTE_LOG(NOTICE, USER1, "%s: starting port %u\n", - __func__, becfg.prt[i].id); - rc = rte_eth_dev_start(becfg.prt[i].id); - if (rc != 0) { - RTE_LOG(ERR, USER1, - "%s: rte_eth_dev_start(%u) returned " - "error code: %d\n", - __func__, becfg.prt[i].id, rc); - sig_handle(SIGQUIT); - } - rte_eth_dev_info_get(becfg.prt[i].id, &dev_info); - rc = update_rss_reta(&becfg.prt[i], &dev_info); - if (rc != 0) - sig_handle(SIGQUIT); - } - - feprm.max_streams = ctx_prm.max_streams * becfg.cpu_num; - if (rc == 0 && (rc = netfe_parse_cfg(fecfg_fname, &feprm)) != 0) - sig_handle(SIGQUIT); - - for (i = 0; rc == 0 && i != becfg.cpu_num; i++) - prm[becfg.cpu[i].id].be.lc = becfg.cpu + i; - - if (rc == 0 && (rc = netfe_lcore_fill(prm, &feprm)) != 0) - sig_handle(SIGQUIT); - - /* launch all slave lcores. */ - RTE_LCORE_FOREACH_SLAVE(i) { - if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0) - rte_eal_remote_launch(lcore_main, prm + i, i); - } - - /* launch master lcore. */ - i = rte_get_master_lcore(); - if (prm[i].be.lc != NULL || prm[i].fe.max_streams != 0) - lcore_main(prm + i); - - rte_eal_mp_wait_lcore(); - - for (i = 0; i != becfg.prt_num; i++) { - RTE_LOG(NOTICE, USER1, "%s: stoping port %u\n", - __func__, becfg.prt[i].id); - rte_eth_stats_get(becfg.prt[i].id, &stats); - RTE_LOG(NOTICE, USER1, "port %u stats={\n" - "ipackets=%" PRIu64 ";" - "ibytes=%" PRIu64 ";" - "ierrors=%" PRIu64 ";\n" - "opackets=%" PRIu64 ";" - "obytes=%" PRIu64 ";" - "oerrors=%" PRIu64 ";\n" - "}\n", - becfg.prt[i].id, - stats.ipackets, - stats.ibytes, - stats.ierrors, - stats.opackets, - stats.obytes, - stats.oerrors); - rte_eth_dev_stop(becfg.prt[i].id); - } - - netbe_lcore_fini(&becfg); - - return 0; -} diff --git a/examples/udpfwd/main_dpdk_legacy.h b/examples/udpfwd/main_dpdk_legacy.h deleted file mode 100644 index e4bff24..0000000 --- a/examples/udpfwd/main_dpdk_legacy.h +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MAIN_DPDK_LEGACY_H_ -#define MAIN_DPDK_LEGACY_H_ - -#include "dpdk_version.h" - -/* - * Helper functions, verify the queue for corresponding UDP port. - */ -static uint8_t -verify_queue_for_port(const struct netbe_dev *prtq, const uint16_t lport) -{ - uint32_t align_nb_q, qid; - - align_nb_q = rte_align32pow2(prtq->port.nb_lcore); - qid = (lport % align_nb_q) % prtq->port.nb_lcore; - if (prtq->rxqid == qid) - return 1; - - return 0; -} - -/* - * UDP IPv4 destination lookup callback. - */ -static int -lpm4_dst_lookup(void *data, const struct in_addr *addr, - struct tle_udp_dest *res) -{ - int32_t rc; -#ifdef DPDK_VERSION_GE_1604 - uint32_t idx; -#else - uint8_t idx; -#endif - struct netbe_lcore *lc; - struct tle_udp_dest *dst; - - lc = data; - - rc = rte_lpm_lookup(lc->lpm4, rte_be_to_cpu_32(addr->s_addr), &idx); - if (rc == 0) { - dst = &lc->dst4[idx]; - rte_memcpy(res, dst, dst->l2_len + dst->l3_len + - offsetof(struct tle_udp_dest, hdr)); - } - return rc; -} - -static int -lcore_lpm_init(struct netbe_lcore *lc) -{ - int32_t sid; - char str[RTE_LPM_NAMESIZE]; -#ifdef DPDK_VERSION_GE_1604 - const struct rte_lpm_config lpm4_cfg = { - .max_rules = MAX_RULES, - .number_tbl8s = MAX_TBL8, - }; -#endif - const struct rte_lpm6_config lpm6_cfg = { - .max_rules = MAX_RULES, - .number_tbl8s = MAX_TBL8, - }; - - sid = rte_lcore_to_socket_id(lc->id); - - snprintf(str, sizeof(str), "LPM4%u\n", lc->id); -#ifdef DPDK_VERSION_GE_1604 - lc->lpm4 = rte_lpm_create(str, sid, &lpm4_cfg); -#else - lc->lpm4 = rte_lpm_create(str, sid, MAX_RULES, 0); -#endif - RTE_LOG(NOTICE, USER1, "%s(lcore=%u): lpm4=%p;\n", - __func__, lc->id, lc->lpm4); - if (lc->lpm4 == NULL) - return -ENOMEM; - - snprintf(str, sizeof(str), "LPM6%u\n", lc->id); - lc->lpm6 = rte_lpm6_create(str, sid, &lpm6_cfg); - RTE_LOG(NOTICE, USER1, "%s(lcore=%u): lpm6=%p;\n", - __func__, lc->id, lc->lpm6); - if (lc->lpm6 == NULL) - return -ENOMEM; - - return 0; -} - -/* - * Helper functions, finds BE by given local and remote addresses. - */ -static int -netbe_find4(const struct in_addr *laddr, const uint16_t lport, - const struct in_addr *raddr, const uint32_t be_lc) -{ - uint32_t i, j; -#ifdef DPDK_VERSION_GE_1604 - uint32_t idx; -#else - uint8_t idx; -#endif - struct netbe_lcore *bc; - - /* we have exactly one BE, use it for all traffic */ - if (becfg.cpu_num == 1) - return 0; - - /* search by provided be_lcore */ - if (be_lc != LCORE_ID_ANY) { - for (i = 0; i != becfg.cpu_num; i++) { - bc = becfg.cpu + i; - if (be_lc == bc->id) - return i; - } - RTE_LOG(NOTICE, USER1, "%s: no stream with be_lcore=%u\n", - __func__, be_lc); - return -ENOENT; - } - - /* search by local address */ - if (laddr->s_addr != INADDR_ANY) { - for (i = 0; i != becfg.cpu_num; i++) { - bc = becfg.cpu + i; - /* search by queue for the local port */ - for (j = 0; j != bc->prtq_num; j++) { - if (laddr->s_addr == bc->prtq[j].port.ipv4) { - - if (lport == 0) - return i; - - if (verify_queue_for_port(bc->prtq + j, - lport) != 0) - return i; - } - } - } - } - - /* search by remote address */ - if (raddr->s_addr != INADDR_ANY) { - for (i = 0; i != becfg.cpu_num; i++) { - bc = becfg.cpu + i; - if (rte_lpm_lookup(bc->lpm4, - rte_be_to_cpu_32(raddr->s_addr), - &idx) == 0) { - - if (lport == 0) - return i; - - /* search by queue for the local port */ - for (j = 0; j != bc->prtq_num; j++) - if (verify_queue_for_port(bc->prtq + j, - lport) != 0) - return i; - } - } - } - - return -ENOENT; -} - -#endif /* MAIN_DPDK_LEGACY_H_ */ diff --git a/examples/udpfwd/netbe.h b/examples/udpfwd/netbe.h deleted file mode 100644 index 1e5d9a7..0000000 --- a/examples/udpfwd/netbe.h +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __NETBE_H__ -#define __NETBE_H__ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define MAX_PKT_BURST 0x20 - -/* Used to allocate the memory for hash key. */ -#define RSS_HASH_KEY_LENGTH 64 - -/* - * BE related structures. - */ - -struct netbe_port { - uint32_t id; - uint32_t nb_lcore; - uint32_t *lcore; - uint32_t mtu; - uint32_t rx_offload; - uint32_t tx_offload; - uint32_t ipv4; - struct in6_addr ipv6; - struct ether_addr mac; - uint32_t hash_key_size; - uint8_t hash_key[RSS_HASH_KEY_LENGTH]; -}; - -struct netbe_dest { - uint32_t line; - uint32_t port; - uint32_t mtu; - uint32_t prfx; - uint16_t family; - union { - struct in_addr ipv4; - struct in6_addr ipv6; - }; - struct ether_addr mac; -}; - -struct netbe_dest_prm { - uint32_t nb_dest; - struct netbe_dest *dest; -}; - -struct pkt_buf { - uint32_t num; - struct rte_mbuf *pkt[2 * MAX_PKT_BURST]; -}; - -struct netbe_dev { - uint16_t rxqid; - uint16_t txqid; - struct netbe_port port; - struct tle_udp_dev *dev; - struct { - uint64_t in; - uint64_t up; - uint64_t drop; - } rx_stat; - struct { - uint64_t down; - uint64_t out; - uint64_t drop; - } tx_stat; - struct pkt_buf tx_buf; -}; - -/* 8 bit LPM user data. */ -#define LCORE_MAX_DST (UINT8_MAX + 1) - -struct netbe_lcore { - uint32_t id; - struct rte_lpm *lpm4; - struct rte_lpm6 *lpm6; - struct rte_ip_frag_tbl *ftbl; - struct tle_udp_ctx *ctx; - uint32_t prtq_num; - uint32_t dst4_num; - uint32_t dst6_num; - struct netbe_dev *prtq; - struct tle_udp_dest dst4[LCORE_MAX_DST]; - struct tle_udp_dest dst6[LCORE_MAX_DST]; - struct rte_ip_frag_death_row death_row; -}; - -struct netbe_cfg { - uint32_t promisc; - uint32_t prt_num; - uint32_t cpu_num; - struct netbe_port *prt; - struct netbe_lcore *cpu; -}; - -/* - * FE related structures. - */ - -enum { - RXONLY, - TXONLY, - RXTX, - FWD, -}; - -struct netfe_sprm { - uint32_t bidx; /* BE index to use. */ - struct tle_udp_stream_param prm; -}; - -struct netfe_stream_prm { - uint32_t lcore; - uint32_t be_lcore; - uint32_t line; - uint16_t op; - uint16_t txlen; /* valid/used only for TXONLY op. */ - struct netfe_sprm sprm; - struct netfe_sprm fprm; /* valid/used only for FWD op. */ -}; - -struct netfe_lcore_prm { - uint32_t max_streams; - uint32_t nb_streams; - struct netfe_stream_prm *stream; -}; - -struct netfe_stream { - struct tle_udp_stream *s; - struct tle_event *rxev; - struct tle_event *txev; - uint16_t op; - uint16_t family; - uint16_t txlen; - struct { - uint64_t rxp; - uint64_t txp; - uint64_t fwp; - uint64_t drops; - uint64_t rxev[TLE_SEV_NUM]; - uint64_t txev[TLE_SEV_NUM]; - } stat; - struct pkt_buf pbuf; - struct sockaddr_storage raddr; - struct netfe_sprm fwdprm; -}; - -struct netfe_lcore { - uint32_t snum; /* max number of streams */ - uint32_t sidx; /* last open stream index */ - struct tle_evq *rxeq; - struct tle_evq *txeq; - struct rte_hash *fw4h; - struct rte_hash *fw6h; - struct netfe_stream *fs; -}; - -struct lcore_prm { - struct { - struct netbe_lcore *lc; - } be; - struct netfe_lcore_prm fe; -}; - -/* - * debug/trace macros. - */ - -#define DUMMY_MACRO do {} while (0) - -#ifdef NETFE_DEBUG -#define NETFE_TRACE(fmt, arg...) printf(fmt, ##arg) -#define NETFE_PKT_DUMP(p) rte_pktmbuf_dump(stdout, (p), 64) -#else -#define NETFE_TRACE(fmt, arg...) DUMMY_MACRO -#define NETFE_PKT_DUMP(p) DUMMY_MACRO -#endif - -#ifdef NETBE_DEBUG -#define NETBE_TRACE(fmt, arg...) printf(fmt, ##arg) -#define NETBE_PKT_DUMP(p) rte_pktmbuf_dump(stdout, (p), 64) -#else -#define NETBE_TRACE(fmt, arg...) DUMMY_MACRO -#define NETBE_PKT_DUMP(p) DUMMY_MACRO -#endif - -#define FUNC_STAT(v, c) do { \ - static uint64_t nb_call, nb_data; \ - nb_call++; \ - nb_data += (v); \ - if ((nb_call & ((c) - 1)) == 0) { \ - printf("%s#%d@%u: nb_call=%lu, avg(" #v ")=%#Lf\n", \ - __func__, __LINE__, rte_lcore_id(), nb_call, \ - (long double)nb_data / nb_call); \ - nb_call = 0; \ - nb_data = 0; \ - } \ -} while (0) - -#define FUNC_TM_STAT(v, c) do { \ - static uint64_t nb_call, nb_data; \ - static uint64_t cts, pts, sts; \ - cts = rte_rdtsc(); \ - if (pts != 0) \ - sts += cts - pts; \ - pts = cts; \ - nb_call++; \ - nb_data += (v); \ - if ((nb_call & ((c) - 1)) == 0) { \ - printf("%s#%d@%u: nb_call=%lu, " \ - "avg(" #v ")=%#Lf, " \ - "avg(cycles)=%#Lf, " \ - "avg(cycles/" #v ")=%#Lf\n", \ - __func__, __LINE__, rte_lcore_id(), nb_call, \ - (long double)nb_data / nb_call, \ - (long double)sts / nb_call, \ - (long double)sts / nb_data); \ - nb_call = 0; \ - nb_data = 0; \ - sts = 0; \ - } \ -} while (0) - -int setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc, - uint16_t qid); - -#endif /* __NETBE_H__ */ diff --git a/examples/udpfwd/parse.c b/examples/udpfwd/parse.c deleted file mode 100644 index f46c7df..0000000 --- a/examples/udpfwd/parse.c +++ /dev/null @@ -1,638 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "netbe.h" -#include "parse.h" - -#define DEF_LINE_NUM 0x400 - -static const struct { - const char *name; - uint16_t op; -} name2feop[] = { - { .name = "rx", .op = RXONLY,}, - { .name = "tx", .op = TXONLY,}, - { .name = "echo", .op = RXTX,}, - { .name = "fwd", .op = FWD,}, -}; - -static int -parse_ipv4_val(__rte_unused const char *key, const char *val, void *prm) -{ - union parse_val *rv; - - rv = prm; - if (inet_pton(AF_INET, val, &rv->in.addr4) != 1) - return -EINVAL; - rv->in.family = AF_INET; - return 0; -} - -static int -parse_ipv6_val(__rte_unused const char *key, const char *val, void *prm) -{ - union parse_val *rv; - - rv = prm; - if (inet_pton(AF_INET6, val, &rv->in.addr6) != 1) - return -EINVAL; - rv->in.family = AF_INET6; - return 0; -} - -static int -parse_ip_val(__rte_unused const char *key, const char *val, void *prm) -{ - if (parse_ipv6_val(key, val, prm) != 0 && - parse_ipv4_val(key, val, prm) != 0) - return -EINVAL; - return 0; -} - -#define PARSE_UINT8x16(s, v, l) \ -do { \ - char *end; \ - unsigned long t; \ - errno = 0; \ - t = strtoul((s), &end, 16); \ - if (errno != 0 || end[0] != (l) || t > UINT8_MAX) \ - return -EINVAL; \ - (s) = end + 1; \ - (v) = t; \ -} while (0) - -static int -parse_mac_val(__rte_unused const char *key, const char *val, void *prm) -{ - union parse_val *rv; - const char *s; - - rv = prm; - s = val; - - PARSE_UINT8x16(s, rv->mac.addr_bytes[0], ':'); - PARSE_UINT8x16(s, rv->mac.addr_bytes[1], ':'); - PARSE_UINT8x16(s, rv->mac.addr_bytes[2], ':'); - PARSE_UINT8x16(s, rv->mac.addr_bytes[3], ':'); - PARSE_UINT8x16(s, rv->mac.addr_bytes[4], ':'); - PARSE_UINT8x16(s, rv->mac.addr_bytes[5], 0); - return 0; -} - -static int -parse_feop_val(__rte_unused const char *key, const char *val, void *prm) -{ - uint32_t i; - union parse_val *rv; - - rv = prm; - for (i = 0; i != RTE_DIM(name2feop); i++) { - if (strcmp(val, name2feop[i].name) == 0) { - rv->u64 = name2feop[i].op; - return 0; - } - } - - return -EINVAL; -} - -static int -parse_lcore_list_val(__rte_unused const char *key, const char *val, void *prm) -{ - union parse_val *rv; - unsigned long a, b; - uint32_t i; - char *end; - - rv = prm; - errno = 0; - a = strtoul(val, &end, 0); - if (errno != 0 || (end[0] != 0 && end[0] != '-') || a > UINT32_MAX) - return -EINVAL; - - if (end[0] == '-') { - val = end + 1; - errno = 0; - b = strtoul(val, &end, 0); - if (errno != 0 || end[0] != 0 || b > UINT32_MAX) - return -EINVAL; - } else - b = a; - - if (a <= b) { - for (i = a; i <= b; i++) - CPU_SET(i, &rv->cpuset); - } else { - RTE_LOG(ERR, USER1, - "%s: lcores not in ascending order\n", __func__); - return -EINVAL; - } - - return 0; -} - -static int -parse_kvargs(const char *arg, const char *keys_man[], uint32_t nb_man, - const char *keys_opt[], uint32_t nb_opt, - const arg_handler_t hndl[], union parse_val val[]) -{ - uint32_t j, k; - struct rte_kvargs *kvl; - - kvl = rte_kvargs_parse(arg, NULL); - if (kvl == NULL) { - RTE_LOG(ERR, USER1, - "%s: invalid parameter: %s\n", - __func__, arg); - return -EINVAL; - } - - for (j = 0; j != nb_man; j++) { - if (rte_kvargs_count(kvl, keys_man[j]) == 0) { - RTE_LOG(ERR, USER1, - "%s: %s missing mandatory key: %s\n", - __func__, arg, keys_man[j]); - rte_kvargs_free(kvl); - return -EINVAL; - } - } - - for (j = 0; j != nb_man; j++) { - if (rte_kvargs_process(kvl, keys_man[j], hndl[j], - val + j) != 0) { - RTE_LOG(ERR, USER1, - "%s: %s invalid value for man key: %s\n", - __func__, arg, keys_man[j]); - rte_kvargs_free(kvl); - return -EINVAL; - } - } - - for (j = 0; j != nb_opt; j++) { - k = j + nb_man; - if (rte_kvargs_process(kvl, keys_opt[j], hndl[k], - val + k) != 0) { - RTE_LOG(ERR, USER1, - "%s: %s invalid value for opt key: %s\n", - __func__, arg, keys_opt[j]); - rte_kvargs_free(kvl); - return -EINVAL; - } - } - - rte_kvargs_free(kvl); - return 0; -} - -int -parse_netbe_arg(struct netbe_port *prt, const char *arg, rte_cpuset_t *cpuset) -{ - int32_t rc; - uint32_t i, j, nc; - - static const char *keys_man[] = { - "port", - "lcore", - }; - - static const char *keys_opt[] = { - "mtu", - "rx_offload", - "tx_offload", - "ipv4", - "ipv6", - }; - - static const arg_handler_t hndl[] = { - parse_uint_val, - parse_lcore_list_val, - parse_uint_val, - parse_uint_val, - parse_uint_val, - parse_ipv4_val, - parse_ipv6_val, - }; - - union parse_val val[RTE_DIM(hndl)]; - - memset(val, 0, sizeof(val)); - val[2].u64 = ETHER_MAX_VLAN_FRAME_LEN - ETHER_CRC_LEN; - - rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man), - keys_opt, RTE_DIM(keys_opt), hndl, val); - if (rc != 0) - return rc; - - prt->id = val[0].u64; - - for (i = 0, nc = 0; i < RTE_MAX_LCORE; i++) - nc += CPU_ISSET(i, &val[1].cpuset); - prt->lcore = rte_zmalloc(NULL, nc * sizeof(prt->lcore[0]), - RTE_CACHE_LINE_SIZE); - prt->nb_lcore = nc; - - for (i = 0, j = 0; i < RTE_MAX_LCORE; i++) - if (CPU_ISSET(i, &val[1].cpuset)) - prt->lcore[j++] = i; - CPU_OR(cpuset, cpuset, &val[1].cpuset); - - prt->mtu = val[2].u64; - prt->rx_offload = val[3].u64; - prt->tx_offload = val[4].u64; - prt->ipv4 = val[5].in.addr4.s_addr; - prt->ipv6 = val[6].in.addr6; - - return 0; -} - -static int -check_netbe_dest(const struct netbe_dest *dst) -{ - if (dst->port >= RTE_MAX_ETHPORTS) { - RTE_LOG(ERR, USER1, "%s(line=%u) invalid port=%u", - __func__, dst->line, dst->port); - return -EINVAL; - } else if ((dst->family == AF_INET && - dst->prfx > sizeof(struct in_addr) * CHAR_BIT) || - (dst->family == AF_INET6 && - dst->prfx > sizeof(struct in6_addr) * CHAR_BIT)) { - RTE_LOG(ERR, USER1, "%s(line=%u) invalid masklen=%u", - __func__, dst->line, dst->prfx); - return -EINVAL; - } else if (dst->mtu > ETHER_MAX_JUMBO_FRAME_LEN - ETHER_CRC_LEN) { - RTE_LOG(ERR, USER1, "%s(line=%u) invalid mtu=%u", - __func__, dst->line, dst->mtu); - return -EINVAL; - } - return 0; -} - -static int -parse_netbe_dest(struct netbe_dest *dst, const char *arg) -{ - int32_t rc; - - static const char *keys_man[] = { - "port", - "addr", - "masklen", - "mac", - }; - - static const char *keys_opt[] = { - "mtu", - }; - - static const arg_handler_t hndl[] = { - parse_uint_val, - parse_ip_val, - parse_uint_val, - parse_mac_val, - parse_uint_val, - }; - - union parse_val val[RTE_DIM(hndl)]; - - /* set default values. */ - memset(val, 0, sizeof(val)); - val[4].u64 = ETHER_MAX_JUMBO_FRAME_LEN - ETHER_CRC_LEN; - - rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man), - keys_opt, RTE_DIM(keys_opt), hndl, val); - if (rc != 0) - return rc; - - dst->port = val[0].u64; - dst->family = val[1].in.family; - if (val[1].in.family == AF_INET) - dst->ipv4 = val[1].in.addr4; - else - dst->ipv6 = val[1].in.addr6; - dst->prfx = val[2].u64; - memcpy(&dst->mac, &val[3].mac, sizeof(dst->mac)); - dst->mtu = val[4].u64; - - return 0; -} - -int -netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm) -{ - uint32_t i, ln, n, num; - int32_t rc; - size_t sz; - char *s; - FILE *f; - struct netbe_dest *dp; - char line[LINE_MAX]; - - f = fopen(fname, "r"); - if (f == NULL) { - RTE_LOG(ERR, USER1, "%s failed to open file \"%s\"\n", - __func__, fname); - return -EINVAL; - } - - n = 0; - num = 0; - dp = NULL; - - for (ln = 0; fgets(line, sizeof(line), f) != NULL; ln++) { - - /* skip spaces at the start. */ - for (s = line; isspace(s[0]); s++) - ; - - /* skip comment line. */ - if (s[0] == '#' || s[0] == 0) - continue; - - /* skip spaces at the end. */ - for (i = strlen(s); i-- != 0 && isspace(s[i]); s[i] = 0) - ; - - if (n == num) { - num += DEF_LINE_NUM; - sz = sizeof(dp[0]) * num; - dp = realloc(dp, sizeof(dp[0]) * num); - if (dp == NULL) { - RTE_LOG(ERR, USER1, - "%s(%s) allocation of %zu bytes " - "failed\n", - __func__, fname, sz); - rc = -ENOMEM; - break; - } - memset(&dp[n], 0, sizeof(dp[0]) * (num - n)); - } - - dp[n].line = ln + 1; - if ((rc = parse_netbe_dest(dp + n, s)) != 0 || - (rc = check_netbe_dest(dp + n)) != 0) { - RTE_LOG(ERR, USER1, "%s(%s) failed to parse line %u\n", - __func__, fname, dp[n].line); - break; - } - n++; - } - - fclose(f); - - if (rc != 0) { - free(dp); - dp = NULL; - n = 0; - } - - prm->dest = dp; - prm->nb_dest = n; - return rc; -} - -static void -pv2saddr(struct sockaddr_storage *ss, const union parse_val *pva, - const union parse_val *pvp) -{ - ss->ss_family = pva->in.family; - if (pva->in.family == AF_INET) { - struct sockaddr_in *si = (struct sockaddr_in *)ss; - si->sin_addr = pva->in.addr4; - si->sin_port = rte_cpu_to_be_16((uint16_t)pvp->u64); - } else { - struct sockaddr_in6 *si = (struct sockaddr_in6 *)ss; - si->sin6_addr = pva->in.addr6; - si->sin6_port = rte_cpu_to_be_16((uint16_t)pvp->u64); - } -} - -static int -parse_netfe_arg(struct netfe_stream_prm *sp, const char *arg) -{ - int32_t rc; - - static const char *keys_man[] = { - "lcore", - "op", - "laddr", - "lport", - "raddr", - "rport", - }; - - static const char *keys_opt[] = { - "txlen", - "fwladdr", - "fwlport", - "fwraddr", - "fwrport", - "belcore", - }; - - static const arg_handler_t hndl[] = { - parse_uint_val, - parse_feop_val, - parse_ip_val, - parse_uint_val, - parse_ip_val, - parse_uint_val, - parse_uint_val, - parse_ip_val, - parse_uint_val, - parse_ip_val, - parse_uint_val, - parse_uint_val, - }; - - union parse_val val[RTE_DIM(hndl)]; - - memset(val, 0, sizeof(val)); - val[11].u64 = LCORE_ID_ANY; - rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man), - keys_opt, RTE_DIM(keys_opt), hndl, val); - if (rc != 0) - return rc; - sp->lcore = val[0].u64; - sp->op = val[1].u64; - pv2saddr(&sp->sprm.prm.local_addr, val + 2, val + 3); - pv2saddr(&sp->sprm.prm.remote_addr, val + 4, val + 5); - sp->txlen = val[6].u64; - pv2saddr(&sp->fprm.prm.local_addr, val + 7, val + 8); - pv2saddr(&sp->fprm.prm.remote_addr, val + 9, val + 10); - sp->be_lcore = val[11].u64; - - return 0; -} - -static const char * -format_feop(uint16_t op) -{ - uint32_t i; - - for (i = 0; i != RTE_DIM(name2feop); i++) { - if (name2feop[i].op == op) - return name2feop[i].name; - } - - return NULL; -} - -static int -is_addr_wc(const struct sockaddr_storage *sp) -{ - const struct sockaddr_in *i4; - const struct sockaddr_in6 *i6; - - if (sp->ss_family == AF_INET) { - i4 = (const struct sockaddr_in *)sp; - return (i4->sin_addr.s_addr == INADDR_ANY); - } else if (sp->ss_family == AF_INET6) { - i6 = (const struct sockaddr_in6 *)sp; - return (memcmp(&i6->sin6_addr, &in6addr_any, - sizeof(i6->sin6_addr)) == 0); - } - return 0; -} - -static int -check_netfe_arg(const struct netfe_stream_prm *sp) -{ - char buf[INET6_ADDRSTRLEN]; - - if (sp->sprm.prm.local_addr.ss_family != - sp->sprm.prm.remote_addr.ss_family) { - RTE_LOG(ERR, USER1, "invalid arg at line %u: " - "laddr and raddr for different protocols\n", - sp->line); - return -EINVAL; - } - - if (sp->op == TXONLY) { - if (sp->txlen > RTE_MBUF_DEFAULT_DATAROOM || sp->txlen == 0) { - RTE_LOG(ERR, USER1, "invalid arg at line %u: txlen=%u " - "exceeds allowed values: (0, %u]\n", - sp->line, sp->txlen, RTE_MBUF_DEFAULT_DATAROOM); - return -EINVAL; - } else if (is_addr_wc(&sp->sprm.prm.remote_addr)) { - RTE_LOG(ERR, USER1, "invalid arg at line %u: " - "raddr=%s are not allowed for op=%s;\n", - sp->line, - format_addr(&sp->sprm.prm.remote_addr, - buf, sizeof(buf)), - format_feop(sp->op)); - return -EINVAL; - } - } else if (sp->op == FWD) { - if (sp->fprm.prm.local_addr.ss_family != - sp->fprm.prm.remote_addr.ss_family) { - RTE_LOG(ERR, USER1, "invalid arg at line %u: " - "fwladdr and fwraddr for different protocols\n", - sp->line); - return -EINVAL; - } else if (is_addr_wc(&sp->fprm.prm.remote_addr)) { - RTE_LOG(ERR, USER1, "invalid arg at line %u: " - "fwaddr=%s are not allowed for op=%s;\n", - sp->line, - format_addr(&sp->fprm.prm.remote_addr, - buf, sizeof(buf)), - format_feop(sp->op)); - return -EINVAL; - } - } - - return 0; -} - -int -netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp) -{ - uint32_t i, ln, n, num; - int32_t rc; - size_t sz; - char *s; - FILE *f; - struct netfe_stream_prm *sp; - char line[LINE_MAX]; - - f = fopen(fname, "r"); - if (f == NULL) { - RTE_LOG(ERR, USER1, "%s failed to open file \"%s\"\n", - __func__, fname); - return -EINVAL; - } - - n = 0; - num = 0; - sp = NULL; - - for (ln = 0; fgets(line, sizeof(line), f) != NULL; ln++) { - - /* skip spaces at the start. */ - for (s = line; isspace(s[0]); s++) - ; - - /* skip comment line. */ - if (s[0] == '#' || s[0] == 0) - continue; - - /* skip spaces at the end. */ - for (i = strlen(s); i-- != 0 && isspace(s[i]); s[i] = 0) - ; - - if (n == lp->max_streams) { - RTE_LOG(ERR, USER1, - "%s(%s) number of entries exceed max streams " - "value: %u\n", - __func__, fname, n); - rc = -EINVAL; - break; - } - - if (n == num) { - num += DEF_LINE_NUM; - sz = sizeof(sp[0]) * num; - sp = realloc(sp, sizeof(sp[0]) * num); - if (sp == NULL) { - RTE_LOG(ERR, USER1, - "%s(%s) allocation of %zu bytes " - "failed\n", - __func__, fname, sz); - rc = -ENOMEM; - break; - } - memset(&sp[n], 0, sizeof(sp[0]) * (num - n)); - } - - sp[n].line = ln + 1; - if ((rc = parse_netfe_arg(sp + n, s)) != 0 || - (rc = check_netfe_arg(sp + n)) != 0) { - RTE_LOG(ERR, USER1, "%s(%s) failed to parse line %u\n", - __func__, fname, sp[n].line); - break; - } - n++; - } - - fclose(f); - - if (rc != 0) { - free(sp); - sp = NULL; - n = 0; - } - - lp->stream = sp; - lp->nb_streams = n; - return rc; -} diff --git a/examples/udpfwd/parse.h b/examples/udpfwd/parse.h deleted file mode 100644 index 7df7671..0000000 --- a/examples/udpfwd/parse.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __PARSE_H__ -#define __PARSE_H__ - -#include - -#define PARSE_LIST_DELIM "-" - -union parse_val { - uint64_t u64; - struct { - uint16_t family; - union { - struct in_addr addr4; - struct in6_addr addr6; - }; - } in; - struct ether_addr mac; - rte_cpuset_t cpuset; -}; - -static int -parse_uint_val(__rte_unused const char *key, const char *val, void *prm) -{ - union parse_val *rv; - unsigned long v; - char *end; - - rv = prm; - errno = 0; - v = strtoul(val, &end, 0); - if (errno != 0 || end[0] != 0 || v > UINT32_MAX) - return -EINVAL; - - rv->u64 = v; - return 0; -} - -static const char * -format_addr(const struct sockaddr_storage *sp, char buf[], size_t len) -{ - const struct sockaddr_in *i4; - const struct sockaddr_in6 *i6; - const void *addr; - - if (sp->ss_family == AF_INET) { - i4 = (const struct sockaddr_in *)sp; - addr = &i4->sin_addr; - } else if (sp->ss_family == AF_INET6) { - i6 = (const struct sockaddr_in6 *)sp; - addr = &i6->sin6_addr; - } else - return NULL; - - - return inet_ntop(sp->ss_family, addr, buf, len); -} - -int parse_netbe_arg(struct netbe_port *prt, const char *arg, - rte_cpuset_t *cpuset); - -int netbe_parse_dest(const char *fname, struct netbe_dest_prm *prm); - -int netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp); - -#endif /* __PARSE_H__ */ - diff --git a/examples/udpfwd/pkt.c b/examples/udpfwd/pkt.c deleted file mode 100644 index a68e428..0000000 --- a/examples/udpfwd/pkt.c +++ /dev/null @@ -1,509 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "netbe.h" -#include - -static inline uint64_t -_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso, - uint64_t ol3, uint64_t ol2) -{ - return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49; -} - -static inline void -fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4) -{ - m->tx_offload = _mbuf_tx_offload(l2, l3, l4, 0, 0, 0); -} - -static inline int -is_ipv4_frag(const struct ipv4_hdr *iph) -{ - const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG); - - return ((mask & iph->fragment_offset) != 0); -} - -static inline void -fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, - uint32_t frag) -{ - const struct ipv4_hdr *iph; - int32_t dlen, len; - - dlen = rte_pktmbuf_data_len(m); - dlen -= l2 + sizeof(struct udp_hdr); - - iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2); - len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER; - - if (frag != 0 && is_ipv4_frag(iph)) { - m->packet_type &= ~RTE_PTYPE_L4_MASK; - m->packet_type |= RTE_PTYPE_L4_FRAG; - } - - if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto)) - m->packet_type = RTE_PTYPE_UNKNOWN; - else - fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr)); -} - -static inline int -ipv6x_hdr(uint32_t proto) -{ - return (proto == IPPROTO_HOPOPTS || - proto == IPPROTO_ROUTING || - proto == IPPROTO_FRAGMENT || - proto == IPPROTO_AH || - proto == IPPROTO_NONE || - proto == IPPROTO_DSTOPTS); -} - -static inline void -fill_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto, - uint32_t fproto) -{ - const struct ip6_ext *ipx; - int32_t dlen, len, ofs; - - len = sizeof(struct ipv6_hdr); - - dlen = rte_pktmbuf_data_len(m); - dlen -= l2 + sizeof(struct udp_hdr); - - ofs = l2 + len; - ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs); - - while (ofs > 0 && len < dlen) { - - switch (nproto) { - case IPPROTO_HOPOPTS: - case IPPROTO_ROUTING: - case IPPROTO_DSTOPTS: - ofs = (ipx->ip6e_len + 1) << 3; - break; - case IPPROTO_AH: - ofs = (ipx->ip6e_len + 2) << 2; - break; - case IPPROTO_FRAGMENT: - /* - * tso_segsz is not used by RX, so suse it as temporary - * buffer to store the fragment offset. - */ - m->tso_segsz = ofs; - ofs = sizeof(struct ip6_frag); - m->packet_type &= ~RTE_PTYPE_L4_MASK; - m->packet_type |= RTE_PTYPE_L4_FRAG; - break; - default: - ofs = 0; - } - - if (ofs > 0) { - nproto = ipx->ip6e_nxt; - len += ofs; - ipx += ofs / sizeof(*ipx); - } - } - - /* undercognised or invalid packet. */ - if ((ofs == 0 && nproto != fproto) || len > dlen) - m->packet_type = RTE_PTYPE_UNKNOWN; - else - fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr)); -} - -static inline void -fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto) -{ - const struct ipv6_hdr *iph; - - iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, - sizeof(struct ether_hdr)); - - if (iph->proto == fproto) - fill_pkt_hdr_len(m, l2, sizeof(struct ipv6_hdr), - sizeof(struct udp_hdr)); - else if (ipv6x_hdr(iph->proto) != 0) - fill_ipv6x_hdr_len(m, l2, iph->proto, fproto); -} - -static inline void -fill_eth_hdr_len(struct rte_mbuf *m) -{ - uint32_t dlen, l2; - uint16_t etp; - const struct ether_hdr *eth; - - dlen = rte_pktmbuf_data_len(m); - - /* check that first segment is at least 42B long. */ - if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + - sizeof(struct udp_hdr)) { - m->packet_type = RTE_PTYPE_UNKNOWN; - return; - } - - l2 = sizeof(*eth); - - eth = rte_pktmbuf_mtod(m, const struct ether_hdr *); - etp = eth->ether_type; - if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN)) - l2 += sizeof(struct vlan_hdr); - - if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) { - m->packet_type = RTE_PTYPE_L4_UDP | - RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L2_ETHER; - fill_ipv4_hdr_len(m, l2, IPPROTO_UDP, 1); - } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) && - dlen >= l2 + sizeof(struct ipv6_hdr) + - sizeof(struct udp_hdr)) { - m->packet_type = RTE_PTYPE_L4_UDP | - RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L2_ETHER; - fill_ipv6_hdr_len(m, l2, IPPROTO_UDP); - } else - m->packet_type = RTE_PTYPE_UNKNOWN; -} - -static inline uint16_t -ipv4x_cksum(const void *iph, size_t len) -{ - uint16_t cksum; - - cksum = rte_raw_cksum(iph, len); - return (cksum == 0xffff) ? cksum : ~cksum; -} - -static inline void -fix_reassembled(struct rte_mbuf *m, int32_t hwcsum) -{ - struct ipv4_hdr *iph; - - /* update packet type. */ - m->packet_type &= ~RTE_PTYPE_L4_MASK; - m->packet_type |= RTE_PTYPE_L4_UDP; - - /* fix reassemble setting TX flags. */ - m->ol_flags &= ~PKT_TX_IP_CKSUM; - - /* fix l3_len after reassemble. */ - if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) - m->l3_len = m->l3_len - sizeof(struct ipv6_extension_fragment); - - /* recalculate ipv4 cksum after reassemble. */ - else if (hwcsum == 0 && RTE_ETH_IS_IPV4_HDR(m->packet_type)) { - iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); - iph->hdr_checksum = ipv4x_cksum(iph, m->l3_len); - } -} - -static struct rte_mbuf * -reassemble(struct rte_mbuf *m, struct netbe_lcore *lc, uint64_t tms, - uint8_t port) -{ - uint32_t l3cs; - struct rte_ip_frag_tbl *tbl; - struct rte_ip_frag_death_row *dr; - - tbl = lc->ftbl; - dr = &lc->death_row; - l3cs = lc->prtq[port].port.rx_offload & DEV_RX_OFFLOAD_IPV4_CKSUM; - - if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { - - struct ipv4_hdr *iph; - - iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); - - /* process this fragment. */ - m = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, iph); - - } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { - - struct ipv6_hdr *iph; - struct ipv6_extension_fragment *fhdr; - - iph = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len); - - /* - * we store fragment header offset in tso_segsz before - * temporary, just to avoid another scan of ipv6 header. - */ - fhdr = rte_pktmbuf_mtod_offset(m, - struct ipv6_extension_fragment *, m->tso_segsz); - m->tso_segsz = 0; - - /* process this fragment. */ - m = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, iph, fhdr); - - } else { - rte_pktmbuf_free(m); - m = NULL; - } - - /* got reassembled packet. */ - if (m != NULL) - fix_reassembled(m, l3cs); - - return m; -} - -/* exclude NULLs from the final list of packets. */ -static inline uint32_t -compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero) -{ - uint32_t i, j, k, l; - - for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) { - - /* found a hole. */ - if (pkt[j] == NULL) { - - /* find how big is it. */ - for (i = j; i-- != 0 && pkt[i] == NULL; ) - ; - /* fill the hole. */ - for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++) - pkt[l] = pkt[k]; - - nb_pkt -= j - i; - nb_zero -= j - i; - j = i + 1; - } - } - - return nb_pkt; -} - -/* - * HW can recognise L2/L3 with/without extentions/L4 (ixgbe/igb/fm10k) - */ -static uint16_t __rte_unused -type0_rx_callback(uint8_t port, __rte_unused uint16_t queue, - struct rte_mbuf *pkt[], uint16_t nb_pkts, - __rte_unused uint16_t max_pkts, void *user_param) -{ - uint32_t j, tp, x; - uint64_t cts; - struct netbe_lcore *lc; - - lc = user_param; - cts = 0; - - x = 0; - for (j = 0; j != nb_pkts; j++) { - - NETBE_PKT_DUMP(pkt[j]); - - tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | - RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); - - switch (tp) { - /* non fragmented udp packets. */ - case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4 | - RTE_PTYPE_L2_ETHER): - fill_pkt_hdr_len(pkt[j], sizeof(struct ether_hdr), - sizeof(struct ipv4_hdr), - sizeof(struct udp_hdr)); - break; - case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6 | - RTE_PTYPE_L2_ETHER): - fill_pkt_hdr_len(pkt[j], sizeof(struct ether_hdr), - sizeof(struct ipv6_hdr), - sizeof(struct udp_hdr)); - break; - case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT | - RTE_PTYPE_L2_ETHER): - fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), - UINT32_MAX, 0); - break; - case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT | - RTE_PTYPE_L2_ETHER): - fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), - IPPROTO_UDP); - break; - /* possibly fragmented udp packets. */ - case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER): - case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER): - fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), - IPPROTO_UDP, 1); - break; - case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER): - case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER): - fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), - IPPROTO_UDP); - break; - default: - /* treat packet types as invalid. */ - pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; - break; - } - - /* - * if it is a fragment, try to reassemble it, - * if by some reason it can't be done, then - * set pkt[] entry to NULL. - */ - if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == - RTE_PTYPE_L4_FRAG) { - cts = (cts == 0) ? rte_rdtsc() : cts; - pkt[j] = reassemble(pkt[j], lc, cts, port); - x += (pkt[j] == NULL); - } - } - - /* reassemble was invoked, cleanup its death-row. */ - if (cts != 0) - rte_ip_frag_free_death_row(&lc->death_row, 0); - - if (x == 0) - return nb_pkts; - - NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " - "%u non-reassembled fragments;\n", - __func__, port, queue, nb_pkts, x); - - return compress_pkt_list(pkt, nb_pkts, x); -} - -/* - * HW can recognise L2/L3/L4 and fragments (i40e). - */ -static uint16_t __rte_unused -type1_rx_callback(uint8_t port, __rte_unused uint16_t queue, - struct rte_mbuf *pkt[], uint16_t nb_pkts, - __rte_unused uint16_t max_pkts, void *user_param) -{ - uint32_t j, tp, x; - uint64_t cts; - struct netbe_lcore *lc; - - lc = user_param; - cts = 0; - - x = 0; - for (j = 0; j != nb_pkts; j++) { - - NETBE_PKT_DUMP(pkt[j]); - - tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | - RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); - - switch (tp) { - case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L2_ETHER): - fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), - UINT32_MAX, 0); - break; - case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L2_ETHER): - fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), - IPPROTO_UDP); - break; - case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L2_ETHER): - fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), - IPPROTO_UDP, 0); - break; - case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L2_ETHER): - fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), - IPPROTO_UDP); - break; - default: - /* treat packet types as invalid. */ - pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; - break; - } - - /* - * if it is a fragment, try to reassemble it, - * if by some reason it can't be done, then - * set pkt[] entry to NULL. - */ - if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == - RTE_PTYPE_L4_FRAG) { - cts = (cts == 0) ? rte_rdtsc() : cts; - pkt[j] = reassemble(pkt[j], lc, cts, port); - x += (pkt[j] == NULL); - } - } - - /* reassemble was invoked, cleanup its death-row. */ - if (cts != 0) - rte_ip_frag_free_death_row(&lc->death_row, 0); - - if (x == 0) - return nb_pkts; - - NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " - "%u non-reassembled fragments;\n", - __func__, port, queue, nb_pkts, x); - - return compress_pkt_list(pkt, nb_pkts, x); -} - -/* - * generic, assumes HW doesn't recognise any packet type. - */ -static uint16_t -typen_rx_callback(uint8_t port, __rte_unused uint16_t queue, - struct rte_mbuf *pkt[], uint16_t nb_pkts, - __rte_unused uint16_t max_pkts, void *user_param) -{ - uint32_t j, x; - uint64_t cts; - struct netbe_lcore *lc; - - lc = user_param; - cts = 0; - - x = 0; - for (j = 0; j != nb_pkts; j++) { - - NETBE_PKT_DUMP(pkt[j]); - fill_eth_hdr_len(pkt[j]); - - /* - * if it is a fragment, try to reassemble it, - * if by some reason it can't be done, then - * set pkt[] entry to NULL. - */ - if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == - RTE_PTYPE_L4_FRAG) { - cts = (cts == 0) ? rte_rdtsc() : cts; - pkt[j] = reassemble(pkt[j], lc, cts, port); - x += (pkt[j] == NULL); - } - } - - /* reassemble was invoked, cleanup its death-row. */ - if (cts != 0) - rte_ip_frag_free_death_row(&lc->death_row, 0); - - if (x == 0) - return nb_pkts; - - NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " - "%u non-reassembled fragments;\n", - __func__, port, queue, nb_pkts, x); - - return compress_pkt_list(pkt, nb_pkts, x); -} - -#include "pkt_dpdk_legacy.h" diff --git a/examples/udpfwd/pkt_dpdk_legacy.h b/examples/udpfwd/pkt_dpdk_legacy.h deleted file mode 100644 index c32f044..0000000 --- a/examples/udpfwd/pkt_dpdk_legacy.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2016 Intel Corporation. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef PKT_DPDK_LEGACY_H_ -#define PKT_DPDK_LEGACY_H_ - -#include "dpdk_version.h" - -#ifdef DPDK_VERSION_GE_1604 - -int -setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc, - uint16_t qid) -{ - int32_t i, rc; - uint32_t smask; - void *cb; - - const uint32_t pmask = RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK | - RTE_PTYPE_L4_MASK; - - enum { - ETHER_PTYPE = 0x1, - IPV4_PTYPE = 0x2, - IPV4_EXT_PTYPE = 0x4, - IPV6_PTYPE = 0x8, - IPV6_EXT_PTYPE = 0x10, - UDP_PTYPE = 0x20, - }; - - static const struct { - uint32_t mask; - const char *name; - rte_rx_callback_fn fn; - } ptype2cb[] = { - { - .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE | - IPV6_PTYPE | IPV6_EXT_PTYPE | UDP_PTYPE, - .name = "HW l2/l3x/l4 ptype", - .fn = type0_rx_callback, - }, - { - .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE | - UDP_PTYPE, - .name = "HW l2/l3/l4 ptype", - .fn = type1_rx_callback, - }, - { - .mask = 0, - .name = "no HW ptype", - .fn = typen_rx_callback, - }, - }; - - smask = 0; - rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, NULL, 0); - if (rc < 0) { - RTE_LOG(ERR, USER1, - "%s(port=%u) failed to get supported ptypes;\n", - __func__, uprt->id); - return rc; - } - - uint32_t ptype[rc]; - rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, ptype, rc); - - for (i = 0; i != rc; i++) { - switch (ptype[i]) { - case RTE_PTYPE_L2_ETHER: - smask |= ETHER_PTYPE; - break; - case RTE_PTYPE_L3_IPV4: - case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN: - smask |= IPV4_PTYPE; - break; - case RTE_PTYPE_L3_IPV4_EXT: - smask |= IPV4_EXT_PTYPE; - break; - case RTE_PTYPE_L3_IPV6: - case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN: - smask |= IPV6_PTYPE; - break; - case RTE_PTYPE_L3_IPV6_EXT: - smask |= IPV6_EXT_PTYPE; - break; - case RTE_PTYPE_L4_UDP: - smask |= UDP_PTYPE; - break; - } - } - - for (i = 0; i != RTE_DIM(ptype2cb); i++) { - if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) { - cb = rte_eth_add_rx_callback(uprt->id, qid, - ptype2cb[i].fn, lc); - rc = -rte_errno; - RTE_LOG(ERR, USER1, - "%s(port=%u), setup RX callback \"%s\" " - "returns %p;\n", - __func__, uprt->id, ptype2cb[i].name, cb); - return ((cb == NULL) ? rc : 0); - } - } - - /* no proper callback found. */ - RTE_LOG(ERR, USER1, - "%s(port=%u) failed to find an appropriate callback;\n", - __func__, uprt->id); - return -ENOENT; -} - -#else - -int -setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc, - uint16_t qid) -{ - void *cb; - int32_t rc; - - cb = rte_eth_add_rx_callback(uprt->id, qid, typen_rx_callback, lc); - rc = -rte_errno; - RTE_LOG(ERR, USER1, - "%s(port=%u), setup RX callback \"%s\" " - "returns %p;\n", - __func__, uprt->id, "no HW ptype", cb); - - return ((cb == NULL) ? rc : 0); -} - -#endif /* DPDK_VERSION_GE_1604 */ - -#endif /* PKT_DPDK_LEGACY_H_ */ -- cgit 1.2.3-korg