aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorKonstantin Ananyev <konstantin.ananyev@intel.com>2017-02-21 18:12:20 +0000
committerKonstantin Ananyev <konstantin.ananyev@intel.com>2017-02-24 16:37:08 +0000
commitaa97dd1ce910b839fed46ad55d1e70e403f5a930 (patch)
treef6f0fd494eaf499859bff9f20f5ddfac9ab99233 /lib
parentf5f10013ffef8e4ac1071087b8492fe6380d98fe (diff)
Introduce first version of TCP code.
Supported functionality: - open/close - listen/accept/connect - send/recv In order to achieve that libtle_udp library was reworked into libtle_l4p library that supports both TCP and UDP protocols. New libtle_timer library was introduced (thanks to Cisco guys and Dave Barach <dbarach@cisco.com> for sharing their timer code with us). Sample application was also reworked significantly to support both TCP and UDP traffic handling. New UT were introduced. Change-Id: I806b05011f521e89b58db403cfdd484a37beb775 Signed-off-by: Mohammad Abdul Awal <mohammad.abdul.awal@intel.com> Signed-off-by: Karol Latecki <karolx.latecki@intel.com> Signed-off-by: Daniel Mrzyglod <danielx.t.mrzyglod@intel.com> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Diffstat (limited to 'lib')
-rw-r--r--lib/Makefile3
-rw-r--r--lib/libtle_dring/tle_dring.h6
-rw-r--r--lib/libtle_l4p/Makefile (renamed from lib/libtle_udp/Makefile)16
-rw-r--r--lib/libtle_l4p/ctx.c527
-rw-r--r--lib/libtle_l4p/ctx.h86
-rw-r--r--lib/libtle_l4p/debug.h81
-rw-r--r--lib/libtle_l4p/event.c (renamed from lib/libtle_udp/event.c)0
-rw-r--r--lib/libtle_l4p/misc.h (renamed from lib/libtle_udp/misc.h)116
-rw-r--r--lib/libtle_l4p/net_misc.h78
-rw-r--r--lib/libtle_l4p/osdep.h (renamed from lib/libtle_udp/osdep.h)6
-rw-r--r--lib/libtle_l4p/port_bitmap.h (renamed from lib/libtle_udp/port_bitmap.h)14
-rw-r--r--lib/libtle_l4p/stream.h170
-rw-r--r--lib/libtle_l4p/stream_table.c80
-rw-r--r--lib/libtle_l4p/stream_table.h260
-rw-r--r--lib/libtle_l4p/syncookie.h194
-rw-r--r--lib/libtle_l4p/tcp_ctl.h120
-rw-r--r--lib/libtle_l4p/tcp_misc.h462
-rw-r--r--lib/libtle_l4p/tcp_ofo.c85
-rw-r--r--lib/libtle_l4p/tcp_ofo.h249
-rw-r--r--lib/libtle_l4p/tcp_rxq.h149
-rw-r--r--lib/libtle_l4p/tcp_rxtx.c2431
-rw-r--r--lib/libtle_l4p/tcp_stream.c522
-rw-r--r--lib/libtle_l4p/tcp_stream.h170
-rw-r--r--lib/libtle_l4p/tcp_timer.h126
-rw-r--r--lib/libtle_l4p/tcp_txq.h122
-rw-r--r--lib/libtle_l4p/tle_ctx.h233
-rw-r--r--lib/libtle_l4p/tle_event.h (renamed from lib/libtle_udp/tle_event.h)21
-rw-r--r--lib/libtle_l4p/tle_tcp.h395
-rw-r--r--lib/libtle_l4p/tle_udp.h187
-rw-r--r--lib/libtle_l4p/udp_rxtx.c (renamed from lib/libtle_udp/udp_rxtx.c)219
-rw-r--r--lib/libtle_l4p/udp_stream.c346
-rw-r--r--lib/libtle_l4p/udp_stream.h79
-rw-r--r--lib/libtle_timer/Makefile38
-rw-r--r--lib/libtle_timer/timer.c364
-rw-r--r--lib/libtle_timer/tle_timer.h94
-rw-r--r--lib/libtle_udp/tle_udp_impl.h384
-rw-r--r--lib/libtle_udp/udp_ctl.c794
-rw-r--r--lib/libtle_udp/udp_impl.h166
38 files changed, 7868 insertions, 1525 deletions
diff --git a/lib/Makefile b/lib/Makefile
index 8ce9bac..201f078 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,6 +22,7 @@ endif
include $(RTE_SDK)/mk/rte.vars.mk
DIRS-y += libtle_dring
-DIRS-y += libtle_udp
+DIRS-y += libtle_timer
+DIRS-y += libtle_l4p
include $(TLDK_ROOT)/mk/tle.subdir.mk
diff --git a/lib/libtle_dring/tle_dring.h b/lib/libtle_dring/tle_dring.h
index e89679d..f589ece 100644
--- a/lib/libtle_dring/tle_dring.h
+++ b/lib/libtle_dring/tle_dring.h
@@ -81,6 +81,12 @@ struct tle_dring {
struct tle_drb dummy; /**< dummy block */
};
+static inline uint32_t
+tle_dring_count(const struct tle_dring *dr)
+{
+ return dr->prod.tail - dr->cons.tail;
+}
+
/*
* helper routine, to copy objects to/from the ring.
*/
diff --git a/lib/libtle_udp/Makefile b/lib/libtle_l4p/Makefile
index 44cb6aa..c0d3e80 100644
--- a/lib/libtle_udp/Makefile
+++ b/lib/libtle_l4p/Makefile
@@ -21,25 +21,33 @@ RTE_TARGET ?= x86_64-native-linuxapp-gcc
include $(RTE_SDK)/mk/rte.vars.mk
# library name
-LIB = libtle_udp.a
+LIB = libtle_l4p.a
CFLAGS += -O3
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
-EXPORT_MAP := tle_udp_version.map
+EXPORT_MAP := tle_l4p_version.map
LIBABIVER := 1
#source files
+SRCS-y += ctx.c
SRCS-y += event.c
-SRCS-y += udp_ctl.c
+SRCS-y += stream_table.c
+SRCS-y += tcp_ofo.c
+SRCS-y += tcp_stream.c
+SRCS-y += tcp_rxtx.c
+SRCS-y += udp_stream.c
SRCS-y += udp_rxtx.c
# install this header file
-SYMLINK-y-include += tle_udp_impl.h
+SYMLINK-y-include += tle_ctx.h
SYMLINK-y-include += tle_event.h
+SYMLINK-y-include += tle_tcp.h
+SYMLINK-y-include += tle_udp.h
# this lib dependencies
DEPDIRS-y += lib/libtle_dring
+DEPDIRS-y += lib/libtle_timer
include $(TLDK_ROOT)/mk/tle.lib.mk
diff --git a/lib/libtle_l4p/ctx.c b/lib/libtle_l4p/ctx.c
new file mode 100644
index 0000000..7ebef9d
--- /dev/null
+++ b/lib/libtle_l4p/ctx.c
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+
+#include "stream.h"
+#include "misc.h"
+
+#define LPORT_START 0x8000
+#define LPORT_END MAX_PORT_NUM
+
+#define LPORT_START_BLK PORT_BLK(LPORT_START)
+#define LPORT_END_BLK PORT_BLK(LPORT_END)
+
+const struct in6_addr tle_ipv6_any = IN6ADDR_ANY_INIT;
+const struct in6_addr tle_ipv6_none = {
+ {
+ .__u6_addr32 = {
+ UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX
+ },
+ },
+};
+
+struct stream_ops tle_stream_ops[TLE_PROTO_NUM] = {};
+
+static int
+check_dev_prm(const struct tle_dev_param *dev_prm)
+{
+ /* no valid IPv4/IPv6 addresses provided. */
+ if (dev_prm->local_addr4.s_addr == INADDR_ANY &&
+ memcmp(&dev_prm->local_addr6, &tle_ipv6_any,
+ sizeof(tle_ipv6_any)) == 0)
+ return -EINVAL;
+
+ if (dev_prm->bl4.nb_port > UINT16_MAX ||
+ (dev_prm->bl4.nb_port != 0 &&
+ dev_prm->bl4.port == NULL))
+ return -EINVAL;
+
+ if (dev_prm->bl6.nb_port > UINT16_MAX ||
+ (dev_prm->bl6.nb_port != 0 &&
+ dev_prm->bl6.port == NULL))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+check_ctx_prm(const struct tle_ctx_param *prm)
+{
+ if (prm->proto >= TLE_PROTO_NUM)
+ return -EINVAL;
+ return 0;
+}
+
+struct tle_ctx *
+tle_ctx_create(const struct tle_ctx_param *ctx_prm)
+{
+ struct tle_ctx *ctx;
+ size_t sz;
+ uint32_t i;
+ int32_t rc;
+
+ if (ctx_prm == NULL || check_ctx_prm(ctx_prm) != 0) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ sz = sizeof(*ctx);
+ ctx = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ ctx_prm->socket_id);
+ if (ctx == NULL) {
+ UDP_LOG(ERR, "allocation of %zu bytes for new ctx "
+ "on socket %d failed\n",
+ sz, ctx_prm->socket_id);
+ return NULL;
+ }
+
+ ctx->prm = *ctx_prm;
+
+ rc = tle_stream_ops[ctx_prm->proto].init_streams(ctx);
+ if (rc != 0) {
+ UDP_LOG(ERR, "init_streams(ctx=%p, proto=%u) failed "
+ "with error code: %d;\n",
+ ctx, ctx_prm->proto, rc);
+ tle_ctx_destroy(ctx);
+ rte_errno = -rc;
+ return NULL;
+ }
+
+ for (i = 0; i != RTE_DIM(ctx->use); i++)
+ tle_pbm_init(ctx->use + i, LPORT_START_BLK);
+
+ ctx->streams.nb_free = ctx->prm.max_streams;
+ return ctx;
+}
+
+void
+tle_ctx_destroy(struct tle_ctx *ctx)
+{
+ uint32_t i;
+
+ if (ctx == NULL) {
+ rte_errno = EINVAL;
+ return;
+ }
+
+ for (i = 0; i != RTE_DIM(ctx->dev); i++)
+ tle_del_dev(ctx->dev + i);
+
+ tle_stream_ops[ctx->prm.proto].fini_streams(ctx);
+ rte_free(ctx);
+}
+
+void
+tle_ctx_invalidate(struct tle_ctx *ctx)
+{
+ RTE_SET_USED(ctx);
+}
+
+static void
+fill_pbm(struct tle_pbm *pbm, const struct tle_bl_port *blp)
+{
+ uint32_t i;
+
+ for (i = 0; i != blp->nb_port; i++)
+ tle_pbm_set(pbm, blp->port[i]);
+}
+
+static int
+init_dev_proto(struct tle_dev *dev, uint32_t idx, int32_t socket_id,
+ const struct tle_bl_port *blp)
+{
+ size_t sz;
+
+ sz = sizeof(*dev->dp[idx]);
+ dev->dp[idx] = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ socket_id);
+
+ if (dev->dp[idx] == NULL) {
+ UDP_LOG(ERR, "allocation of %zu bytes on "
+ "socket %d for %u-th device failed\n",
+ sz, socket_id, idx);
+ return ENOMEM;
+ }
+
+ tle_pbm_init(&dev->dp[idx]->use, LPORT_START_BLK);
+ fill_pbm(&dev->dp[idx]->use, blp);
+ return 0;
+}
+
+static struct tle_dev *
+find_free_dev(struct tle_ctx *ctx)
+{
+ uint32_t i;
+
+ if (ctx->nb_dev < RTE_DIM(ctx->dev)) {
+ for (i = 0; i != RTE_DIM(ctx->dev); i++) {
+ if (ctx->dev[i].ctx != ctx)
+ return ctx->dev + i;
+ }
+ }
+
+ rte_errno = ENODEV;
+ return NULL;
+}
+
+struct tle_dev *
+tle_add_dev(struct tle_ctx *ctx, const struct tle_dev_param *dev_prm)
+{
+ int32_t rc;
+ struct tle_dev *dev;
+
+ if (ctx == NULL || dev_prm == NULL || check_dev_prm(dev_prm) != 0) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ dev = find_free_dev(ctx);
+ if (dev == NULL)
+ return NULL;
+ rc = 0;
+
+ /* device can handle IPv4 traffic */
+ if (dev_prm->local_addr4.s_addr != INADDR_ANY) {
+ rc = init_dev_proto(dev, TLE_V4, ctx->prm.socket_id,
+ &dev_prm->bl4);
+ if (rc == 0)
+ fill_pbm(&ctx->use[TLE_V4], &dev_prm->bl4);
+ }
+
+ /* device can handle IPv6 traffic */
+ if (rc == 0 && memcmp(&dev_prm->local_addr6, &tle_ipv6_any,
+ sizeof(tle_ipv6_any)) != 0) {
+ rc = init_dev_proto(dev, TLE_V6, ctx->prm.socket_id,
+ &dev_prm->bl6);
+ if (rc == 0)
+ fill_pbm(&ctx->use[TLE_V6], &dev_prm->bl6);
+ }
+
+ if (rc != 0) {
+ /* cleanup and return an error. */
+ rte_free(dev->dp[TLE_V4]);
+ rte_free(dev->dp[TLE_V6]);
+ rte_errno = rc;
+ return NULL;
+ }
+
+ /* setup RX data. */
+ if (dev_prm->local_addr4.s_addr != INADDR_ANY &&
+ (dev_prm->rx_offload & DEV_RX_OFFLOAD_IPV4_CKSUM) == 0)
+ dev->rx.ol_flags[TLE_V4] |= PKT_RX_IP_CKSUM_BAD;
+
+ if (((dev_prm->rx_offload & DEV_RX_OFFLOAD_UDP_CKSUM) == 0 &&
+ ctx->prm.proto == TLE_PROTO_UDP) ||
+ ((dev_prm->rx_offload &
+ DEV_RX_OFFLOAD_TCP_CKSUM) == 0 &&
+ ctx->prm.proto == TLE_PROTO_TCP)) {
+ dev->rx.ol_flags[TLE_V4] |= PKT_RX_L4_CKSUM_BAD;
+ dev->rx.ol_flags[TLE_V6] |= PKT_RX_L4_CKSUM_BAD;
+ }
+
+ /* setup TX data. */
+ tle_dring_reset(&dev->tx.dr);
+
+ if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_UDP_CKSUM) != 0 &&
+ ctx->prm.proto == TLE_PROTO_UDP) {
+ dev->tx.ol_flags[TLE_V4] |= PKT_TX_IPV4 | PKT_TX_UDP_CKSUM;
+ dev->tx.ol_flags[TLE_V6] |= PKT_TX_IPV6 | PKT_TX_UDP_CKSUM;
+ } else if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_TCP_CKSUM) != 0 &&
+ ctx->prm.proto == TLE_PROTO_TCP) {
+ dev->tx.ol_flags[TLE_V4] |= PKT_TX_IPV4 | PKT_TX_TCP_CKSUM;
+ dev->tx.ol_flags[TLE_V6] |= PKT_TX_IPV6 | PKT_TX_TCP_CKSUM;
+ }
+
+ if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_IPV4_CKSUM) != 0)
+ dev->tx.ol_flags[TLE_V4] |= PKT_TX_IPV4 | PKT_TX_IP_CKSUM;
+
+ dev->prm = *dev_prm;
+ dev->ctx = ctx;
+ ctx->nb_dev++;
+
+ return dev;
+}
+
+static void
+empty_dring(struct tle_dring *dr, uint32_t proto)
+{
+ uint32_t i, k, n;
+ struct tle_stream *s;
+ struct rte_mbuf *pkt[MAX_PKT_BURST];
+ struct tle_drb *drb[MAX_PKT_BURST];
+
+ do {
+ k = RTE_DIM(drb);
+ n = tle_dring_sc_dequeue(dr, (const void **)(uintptr_t)pkt,
+ RTE_DIM(pkt), drb, &k);
+
+ /* free mbufs */
+ for (i = 0; i != n; i++)
+ rte_pktmbuf_free(pkt[i]);
+ /* free drbs */
+ for (i = 0; i != k; i++) {
+ s = drb[i]->udata;
+ tle_stream_ops[proto].free_drbs(s, drb + i, 1);
+ }
+ } while (n != 0);
+}
+
+int
+tle_del_dev(struct tle_dev *dev)
+{
+ uint32_t p;
+ struct tle_ctx *ctx;
+
+ if (dev == NULL || dev->ctx == NULL)
+ return -EINVAL;
+
+ ctx = dev->ctx;
+ p = dev - ctx->dev;
+
+ if (p >= RTE_DIM(ctx->dev) ||
+ (dev->dp[TLE_V4] == NULL &&
+ dev->dp[TLE_V6] == NULL))
+ return -EINVAL;
+
+ /* emtpy TX queues. */
+ empty_dring(&dev->tx.dr, ctx->prm.proto);
+
+ rte_free(dev->dp[TLE_V4]);
+ rte_free(dev->dp[TLE_V6]);
+ memset(dev, 0, sizeof(*dev));
+ ctx->nb_dev--;
+ return 0;
+}
+
+static struct tle_dev *
+find_ipv4_dev(struct tle_ctx *ctx, const struct in_addr *addr)
+{
+ uint32_t i;
+
+ for (i = 0; i != RTE_DIM(ctx->dev); i++) {
+ if (ctx->dev[i].prm.local_addr4.s_addr == addr->s_addr &&
+ ctx->dev[i].dp[TLE_V4] != NULL)
+ return ctx->dev + i;
+ }
+
+ return NULL;
+}
+
+static struct tle_dev *
+find_ipv6_dev(struct tle_ctx *ctx, const struct in6_addr *addr)
+{
+ uint32_t i;
+
+ for (i = 0; i != RTE_DIM(ctx->dev); i++) {
+ if (memcmp(&ctx->dev[i].prm.local_addr6, addr,
+ sizeof(*addr)) == 0 &&
+ ctx->dev[i].dp[TLE_V6] != NULL)
+ return ctx->dev + i;
+ }
+
+ return NULL;
+}
+
+static int
+stream_fill_dev(struct tle_ctx *ctx, struct tle_stream *s,
+ const struct sockaddr *addr)
+{
+ struct tle_dev *dev;
+ struct tle_pbm *pbm;
+ const struct sockaddr_in *lin4;
+ const struct sockaddr_in6 *lin6;
+ uint32_t i, p, sp, t;
+
+ if (addr->sa_family == AF_INET) {
+ lin4 = (const struct sockaddr_in *)addr;
+ t = TLE_V4;
+ p = lin4->sin_port;
+ } else if (addr->sa_family == AF_INET6) {
+ lin6 = (const struct sockaddr_in6 *)addr;
+ t = TLE_V6;
+ p = lin6->sin6_port;
+ } else
+ return EINVAL;
+
+ p = ntohs(p);
+
+ /* if local address is not wildcard, find device it belongs to. */
+ if (t == TLE_V4 && lin4->sin_addr.s_addr != INADDR_ANY) {
+ dev = find_ipv4_dev(ctx, &lin4->sin_addr);
+ if (dev == NULL)
+ return ENODEV;
+ } else if (t == TLE_V6 && memcmp(&tle_ipv6_any, &lin6->sin6_addr,
+ sizeof(tle_ipv6_any)) != 0) {
+ dev = find_ipv6_dev(ctx, &lin6->sin6_addr);
+ if (dev == NULL)
+ return ENODEV;
+ } else
+ dev = NULL;
+
+ if (dev != NULL)
+ pbm = &dev->dp[t]->use;
+ else
+ pbm = &ctx->use[t];
+
+ /* try to acquire local port number. */
+ if (p == 0) {
+ p = tle_pbm_find_range(pbm, pbm->blk, LPORT_END_BLK);
+ if (p == 0 && pbm->blk > LPORT_START_BLK)
+ p = tle_pbm_find_range(pbm, LPORT_START_BLK, pbm->blk);
+ } else if (tle_pbm_check(pbm, p) != 0)
+ return EEXIST;
+
+ if (p == 0)
+ return ENFILE;
+
+ /* fill socket's dst port and type */
+
+ sp = htons(p);
+ s->type = t;
+ s->port.dst = sp;
+
+ /* mark port as in-use */
+
+ tle_pbm_set(&ctx->use[t], p);
+ if (dev != NULL) {
+ tle_pbm_set(pbm, p);
+ dev->dp[t]->streams[sp] = s;
+ } else {
+ for (i = 0; i != RTE_DIM(ctx->dev); i++) {
+ if (ctx->dev[i].dp[t] != NULL) {
+ tle_pbm_set(&ctx->dev[i].dp[t]->use, p);
+ ctx->dev[i].dp[t]->streams[sp] = s;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int
+stream_clear_dev(struct tle_ctx *ctx, const struct tle_stream *s)
+{
+ struct tle_dev *dev;
+ uint32_t i, p, sp, t;
+
+ t = s->type;
+ sp = s->port.dst;
+ p = ntohs(sp);
+
+ /* if local address is not wildcard, find device it belongs to. */
+ if (t == TLE_V4 && s->ipv4.addr.dst != INADDR_ANY) {
+ dev = find_ipv4_dev(ctx,
+ (const struct in_addr *)&s->ipv4.addr.dst);
+ if (dev == NULL)
+ return ENODEV;
+ } else if (t == TLE_V6 && memcmp(&tle_ipv6_any, &s->ipv6.addr.dst,
+ sizeof(tle_ipv6_any)) != 0) {
+ dev = find_ipv6_dev(ctx,
+ (const struct in6_addr *)&s->ipv6.addr.dst);
+ if (dev == NULL)
+ return ENODEV;
+ } else
+ dev = NULL;
+
+ tle_pbm_clear(&ctx->use[t], p);
+ if (dev != NULL) {
+ if (dev->dp[t]->streams[sp] == s) {
+ tle_pbm_clear(&dev->dp[t]->use, p);
+ dev->dp[t]->streams[sp] = NULL;
+ }
+ } else {
+ for (i = 0; i != RTE_DIM(ctx->dev); i++) {
+ if (ctx->dev[i].dp[t] != NULL &&
+ ctx->dev[i].dp[t]->streams[sp] == s) {
+ tle_pbm_clear(&ctx->dev[i].dp[t]->use, p);
+ ctx->dev[i].dp[t]->streams[sp] = NULL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void
+fill_ipv4_am(const struct sockaddr_in *in, uint32_t *addr, uint32_t *mask)
+{
+ *addr = in->sin_addr.s_addr;
+ *mask = (*addr == INADDR_ANY) ? INADDR_ANY : INADDR_NONE;
+}
+
+static void
+fill_ipv6_am(const struct sockaddr_in6 *in, rte_xmm_t *addr, rte_xmm_t *mask)
+{
+ const struct in6_addr *pm;
+
+ memcpy(addr, &in->sin6_addr, sizeof(*addr));
+ if (memcmp(&tle_ipv6_any, addr, sizeof(*addr)) == 0)
+ pm = &tle_ipv6_any;
+ else
+ pm = &tle_ipv6_none;
+
+ memcpy(mask, pm, sizeof(*mask));
+}
+
+int
+stream_fill_ctx(struct tle_ctx *ctx, struct tle_stream *s,
+ const struct sockaddr *laddr, const struct sockaddr *raddr)
+{
+ const struct sockaddr_in *rin;
+ int32_t rc;
+
+ /* setup ports and port mask fields (except dst port). */
+ rin = (const struct sockaddr_in *)raddr;
+ s->port.src = rin->sin_port;
+ s->pmsk.src = (s->port.src == 0) ? 0 : UINT16_MAX;
+ s->pmsk.dst = UINT16_MAX;
+
+ /* setup src and dst addresses. */
+ if (laddr->sa_family == AF_INET) {
+ fill_ipv4_am((const struct sockaddr_in *)laddr,
+ &s->ipv4.addr.dst, &s->ipv4.mask.dst);
+ fill_ipv4_am((const struct sockaddr_in *)raddr,
+ &s->ipv4.addr.src, &s->ipv4.mask.src);
+ } else if (laddr->sa_family == AF_INET6) {
+ fill_ipv6_am((const struct sockaddr_in6 *)laddr,
+ &s->ipv6.addr.dst, &s->ipv6.mask.dst);
+ fill_ipv6_am((const struct sockaddr_in6 *)raddr,
+ &s->ipv6.addr.src, &s->ipv6.mask.src);
+ }
+
+ rte_spinlock_lock(&ctx->dev_lock);
+ rc = stream_fill_dev(ctx, s, laddr);
+ rte_spinlock_unlock(&ctx->dev_lock);
+
+ return rc;
+}
+
+/* free stream's destination port */
+int
+stream_clear_ctx(struct tle_ctx *ctx, struct tle_stream *s)
+{
+ int32_t rc;
+
+ rte_spinlock_lock(&ctx->dev_lock);
+ rc = stream_clear_dev(ctx, s);
+ rte_spinlock_unlock(&ctx->dev_lock);
+
+ return rc;
+}
diff --git a/lib/libtle_l4p/ctx.h b/lib/libtle_l4p/ctx.h
new file mode 100644
index 0000000..cc32081
--- /dev/null
+++ b/lib/libtle_l4p/ctx.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _CTX_H_
+#define _CTX_H_
+
+#include <rte_spinlock.h>
+#include <rte_vect.h>
+#include <tle_dring.h>
+#include <tle_ctx.h>
+
+#include "port_bitmap.h"
+#include "osdep.h"
+#include "net_misc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct tle_dport {
+ struct tle_pbm use; /* ports in use. */
+ struct tle_stream *streams[MAX_PORT_NUM]; /* port to stream. */
+};
+
+struct tle_dev {
+ struct tle_ctx *ctx;
+ struct {
+ uint64_t ol_flags[TLE_VNUM];
+ } rx;
+ struct {
+ /* used by FE. */
+ uint64_t ol_flags[TLE_VNUM];
+ rte_atomic32_t packet_id[TLE_VNUM];
+
+ /* used by FE & BE. */
+ struct tle_dring dr;
+ } tx;
+ struct tle_dev_param prm; /* copy of device parameters. */
+ struct tle_dport *dp[TLE_VNUM]; /* device L4 ports */
+};
+
+struct tle_ctx {
+ struct tle_ctx_param prm;
+ struct {
+ rte_spinlock_t lock;
+ uint32_t nb_free; /* number of free streams. */
+ STAILQ_HEAD(, tle_stream) free;
+ void *buf; /* space allocated for streams */
+ } streams;
+
+ rte_spinlock_t dev_lock;
+ uint32_t nb_dev;
+ struct tle_pbm use[TLE_VNUM]; /* all ports in use. */
+ struct tle_dev dev[RTE_MAX_ETHPORTS];
+};
+
+struct stream_ops {
+ int (*init_streams)(struct tle_ctx *);
+ void (*fini_streams)(struct tle_ctx *);
+ void (*free_drbs)(struct tle_stream *, struct tle_drb *[], uint32_t);
+};
+
+extern struct stream_ops tle_stream_ops[TLE_PROTO_NUM];
+
+int stream_fill_ctx(struct tle_ctx *ctx, struct tle_stream *s,
+ const struct sockaddr *laddr, const struct sockaddr *raddr);
+
+int stream_clear_ctx(struct tle_ctx *ctx, struct tle_stream *s);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UDP_IMPL_H_ */
diff --git a/lib/libtle_l4p/debug.h b/lib/libtle_l4p/debug.h
new file mode 100644
index 0000000..b2a8b52
--- /dev/null
+++ b/lib/libtle_l4p/debug.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _DEBUG_H_
+#define _DEBUG_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FUNC_SEQ_VERIFY(v) do { \
+ static uint64_t nb_call; \
+ static typeof(v) x; \
+ if (nb_call++ != 0) \
+ RTE_VERIFY(tcp_seq_leq(x, v)); \
+ x = (v); \
+} while (0)
+
+#define FUNC_VERIFY(e, c) do { \
+ static uint64_t nb_call; \
+ if ((e) == 0) \
+ nb_call++; \
+ else \
+ nb_call = 0; \
+ RTE_VERIFY(nb_call != (c)); \
+} while (0)
+
+#define FUNC_STAT(v, c) do { \
+ static uint64_t nb_call, nb_data; \
+ nb_call++; \
+ nb_data += (v); \
+ if ((nb_call & ((c) - 1)) == 0) { \
+ printf("%s#%d@%u: nb_call=%lu, avg(" #v ")=%#Lf\n", \
+ __func__, __LINE__, rte_lcore_id(), nb_call, \
+ (long double)nb_data / nb_call); \
+ nb_call = 0; \
+ nb_data = 0; \
+ } \
+} while (0)
+
+#define FUNC_TM_STAT(v, c) do { \
+ static uint64_t nb_call, nb_data; \
+ static uint64_t cts, pts, sts; \
+ cts = rte_rdtsc(); \
+ if (pts != 0) \
+ sts += cts - pts; \
+ pts = cts; \
+ nb_call++; \
+ nb_data += (v); \
+ if ((nb_call & ((c) - 1)) == 0) { \
+ printf("%s#%d@%u: nb_call=%lu, " \
+ "avg(" #v ")=%#Lf, " \
+ "avg(cycles)=%#Lf, " \
+ "avg(cycles/" #v ")=%#Lf\n", \
+ __func__, __LINE__, rte_lcore_id(), nb_call, \
+ (long double)nb_data / nb_call, \
+ (long double)sts / nb_call, \
+ (long double)sts / nb_data); \
+ nb_call = 0; \
+ nb_data = 0; \
+ sts = 0; \
+ } \
+} while (0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DEBUG_H_ */
diff --git a/lib/libtle_udp/event.c b/lib/libtle_l4p/event.c
index 66c5a3b..66c5a3b 100644
--- a/lib/libtle_udp/event.c
+++ b/lib/libtle_l4p/event.c
diff --git a/lib/libtle_udp/misc.h b/lib/libtle_l4p/misc.h
index ffe665f..55dca10 100644
--- a/lib/libtle_udp/misc.h
+++ b/lib/libtle_l4p/misc.h
@@ -21,6 +21,30 @@ extern "C" {
#endif
static inline int
+xmm_cmp(const rte_xmm_t *da, const rte_xmm_t *sa)
+{
+ uint64_t ret;
+
+ ret = (sa->u64[0] ^ da->u64[0]) |
+ (sa->u64[1] ^ da->u64[1]);
+
+ return (ret != 0);
+}
+
+static inline int
+ymm_cmp(const _ymm_t *da, const _ymm_t *sa)
+{
+ uint64_t ret;
+
+ ret = (sa->u64[0] ^ da->u64[0]) |
+ (sa->u64[1] ^ da->u64[1]) |
+ (sa->u64[2] ^ da->u64[2]) |
+ (sa->u64[3] ^ da->u64[3]);
+
+ return (ret != 0);
+}
+
+static inline int
ymm_mask_cmp(const _ymm_t *da, const _ymm_t *sa, const _ymm_t *sm)
{
uint64_t ret;
@@ -75,11 +99,11 @@ _tx_offload_l4_offset(uint64_t ofl)
/**
* Process the non-complemented checksum of a buffer.
- * Similar to rte_raw_cksum(), but provide better perfomance
+ * Similar to rte_raw_cksum(), but provide better performance
* (at least on IA platforms).
* @param buf
* Pointer to the buffer.
- * @param len
+ * @param size
* Length of the buffer.
* @return
* The non-complemented checksum.
@@ -89,7 +113,7 @@ __raw_cksum(const uint8_t *buf, uint32_t size)
{
uint64_t s, sum;
uint32_t i, n;
- uint32_t dw1, dw2;
+ uint32_t dw1, dw2;
uint16_t w1, w2;
const uint64_t *b;
@@ -124,7 +148,6 @@ __raw_cksum(const uint8_t *buf, uint32_t size)
return w1;
}
-
/**
* Process UDP or TCP checksum over possibly multi-segmented packet.
* @param mb
@@ -223,7 +246,7 @@ _ipv4_udptcp_mbuf_cksum(const struct rte_mbuf *mb, uint16_t l4_ofs,
uint32_t cksum;
cksum = _ipv4x_phdr_cksum(ipv4_hdr, mb->l3_len, 0);
- cksum = __udptcp_mbuf_cksum(mb, l4_ofs, cksum);
+ cksum = __udptcp_mbuf_cksum(mb, l4_ofs, cksum);
return cksum;
}
@@ -247,7 +270,7 @@ _ipv6_udptcp_mbuf_cksum(const struct rte_mbuf *mb, uint16_t l4_ofs,
uint32_t cksum;
cksum = rte_ipv6_phdr_cksum(ipv6_hdr, 0);
- cksum = __udptcp_mbuf_cksum(mb, l4_ofs, cksum);
+ cksum = __udptcp_mbuf_cksum(mb, l4_ofs, cksum);
return cksum;
}
@@ -261,6 +284,46 @@ _ipv4x_cksum(const void *iph, size_t len)
return (cksum == 0xffff) ? cksum : ~cksum;
}
+static inline int
+check_pkt_csum(const struct rte_mbuf *m, uint64_t ol_flags, uint32_t type,
+ uint32_t proto)
+{
+ const struct ipv4_hdr *l3h4;
+ const struct ipv6_hdr *l3h6;
+ const struct udp_hdr *l4h;
+ int32_t ret;
+ uint16_t csum;
+
+ ret = 0;
+ l3h4 = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, m->l2_len);
+ l3h6 = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, m->l2_len);
+
+ if ((ol_flags & PKT_RX_IP_CKSUM_BAD) != 0) {
+ csum = _ipv4x_cksum(l3h4, m->l3_len);
+ ret = (csum != UINT16_MAX);
+ }
+
+ if (ret == 0 && (ol_flags & PKT_RX_L4_CKSUM_BAD) != 0) {
+
+ /*
+ * for IPv4 it is allowed to have zero UDP cksum,
+ * for IPv6 valid UDP cksum is mandatory.
+ */
+ if (type == TLE_V4) {
+ l4h = (const struct udp_hdr *)((uintptr_t)l3h4 +
+ m->l3_len);
+ csum = (proto == IPPROTO_UDP && l4h->dgram_cksum == 0) ?
+ UINT16_MAX : _ipv4_udptcp_mbuf_cksum(m,
+ m->l2_len + m->l3_len, l3h4);
+ } else
+ csum = _ipv6_udptcp_mbuf_cksum(m,
+ m->l2_len + m->l3_len, l3h6);
+
+ ret = (csum != UINT16_MAX);
+ }
+
+ return ret;
+}
/*
* Analog of read-write locks, very much in favour of read side.
@@ -304,6 +367,47 @@ rwl_up(rte_atomic32_t *p)
rte_atomic32_sub(p, INT32_MIN);
}
+/* exclude NULLs from the final list of packets. */
+static inline uint32_t
+compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero)
+{
+ uint32_t i, j, k, l;
+
+ for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) {
+
+ /* found a hole. */
+ if (pkt[j] == NULL) {
+
+ /* find how big is it. */
+ for (i = j; i-- != 0 && pkt[i] == NULL; )
+ ;
+ /* fill the hole. */
+ for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++)
+ pkt[l] = pkt[k];
+
+ nb_pkt -= j - i;
+ nb_zero -= j - i;
+ j = i + 1;
+ }
+ }
+
+ return nb_pkt;
+}
+
+/* empty ring and free queued mbufs */
+static inline void
+empty_mbuf_ring(struct rte_ring *r)
+{
+ uint32_t i, n;
+ struct rte_mbuf *mb[MAX_PKT_BURST];
+
+ do {
+ n = rte_ring_dequeue_burst(r, (void **)mb, RTE_DIM(mb));
+ for (i = 0; i != n; i++)
+ rte_pktmbuf_free(mb[i]);
+ } while (n != 0);
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/libtle_l4p/net_misc.h b/lib/libtle_l4p/net_misc.h
new file mode 100644
index 0000000..2d8dac2
--- /dev/null
+++ b/lib/libtle_l4p/net_misc.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _NET_MISC_H_
+#define _NET_MISC_H_
+
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include "osdep.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define PKT_L234_HLEN(m) (_tx_offload_l4_offset(m->tx_offload))
+#define PKT_L4_PLEN(m) ((m)->pkt_len - PKT_L234_HLEN(m))
+
+/*
+ * Some network protocols related structures definitions.
+ * Main purpose to simplify (and optimise) processing and representation
+ * of protocol related data.
+ */
+
+enum {
+ TLE_V4,
+ TLE_V6,
+ TLE_VNUM
+};
+
+extern const struct in6_addr tle_ipv6_any;
+extern const struct in6_addr tle_ipv6_none;
+
+union l4_ports {
+ uint32_t raw;
+ struct {
+ uint16_t src;
+ uint16_t dst;
+ };
+};
+
+union ipv4_addrs {
+ uint64_t raw;
+ struct {
+ uint32_t src;
+ uint32_t dst;
+ };
+};
+
+union ipv6_addrs {
+ _ymm_t raw;
+ struct {
+ rte_xmm_t src;
+ rte_xmm_t dst;
+ };
+};
+
+union ip_addrs {
+ union ipv4_addrs v4;
+ union ipv6_addrs v6;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _NET_MISC_H_ */
diff --git a/lib/libtle_udp/osdep.h b/lib/libtle_l4p/osdep.h
index 8e91964..ed7e883 100644
--- a/lib/libtle_udp/osdep.h
+++ b/lib/libtle_l4p/osdep.h
@@ -17,6 +17,8 @@
#define _OSDEP_H_
#include <rte_vect.h>
+#include <rte_memcpy.h>
+#include <rte_spinlock.h>
#include <rte_log.h>
#ifdef __cplusplus
@@ -36,6 +38,8 @@ extern "C" {
#define UDP_LOG(lvl, fmt, args...) RTE_LOG(lvl, USER1, fmt, ##args)
+#define TCP_LOG(lvl, fmt, args...) RTE_LOG(lvl, USER1, fmt, ##args)
+
/*
* if no AVX support, define _ymm_t here.
*/
@@ -60,6 +64,8 @@ typedef union _ymm {
#endif /* __AVX__ */
+#include "debug.h"
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/libtle_udp/port_bitmap.h b/lib/libtle_l4p/port_bitmap.h
index 6aff4e6..c0420d5 100644
--- a/lib/libtle_udp/port_bitmap.h
+++ b/lib/libtle_l4p/port_bitmap.h
@@ -21,7 +21,7 @@ extern "C" {
#endif
/*
- * Simple implementation of bitmap for all possible UDP ports [0-UINT16_MAX].
+ * Simple implementation of bitmap for all possible L4 ports [0-UINT16_MAX].
*/
#define MAX_PORT_NUM (UINT16_MAX + 1)
@@ -31,14 +31,14 @@ extern "C" {
#define MAX_PORT_BLK PORT_BLK(MAX_PORT_NUM)
-struct udp_pbm {
+struct tle_pbm {
uint32_t nb_set; /* number of bits set. */
uint32_t blk; /* last block with free entry. */
uint32_t bm[MAX_PORT_BLK];
};
static inline void
-udp_pbm_init(struct udp_pbm *pbm, uint32_t blk)
+tle_pbm_init(struct tle_pbm *pbm, uint32_t blk)
{
pbm->bm[0] = 1;
pbm->nb_set = 1;
@@ -46,7 +46,7 @@ udp_pbm_init(struct udp_pbm *pbm, uint32_t blk)
}
static inline void
-udp_pbm_set(struct udp_pbm *pbm, uint16_t port)
+tle_pbm_set(struct tle_pbm *pbm, uint16_t port)
{
uint32_t i, b, v;
@@ -58,7 +58,7 @@ udp_pbm_set(struct udp_pbm *pbm, uint16_t port)
}
static inline void
-udp_pbm_clear(struct udp_pbm *pbm, uint16_t port)
+tle_pbm_clear(struct tle_pbm *pbm, uint16_t port)
{
uint32_t i, b, v;
@@ -71,7 +71,7 @@ udp_pbm_clear(struct udp_pbm *pbm, uint16_t port)
static inline uint32_t
-udp_pbm_check(const struct udp_pbm *pbm, uint16_t port)
+tle_pbm_check(const struct tle_pbm *pbm, uint16_t port)
{
uint32_t i, v;
@@ -81,7 +81,7 @@ udp_pbm_check(const struct udp_pbm *pbm, uint16_t port)
}
static inline uint16_t
-udp_pbm_find_range(struct udp_pbm *pbm, uint32_t start_blk, uint32_t end_blk)
+tle_pbm_find_range(struct tle_pbm *pbm, uint32_t start_blk, uint32_t end_blk)
{
uint32_t i, v;
uint16_t p;
diff --git a/lib/libtle_l4p/stream.h b/lib/libtle_l4p/stream.h
new file mode 100644
index 0000000..f3b5828
--- /dev/null
+++ b/lib/libtle_l4p/stream.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _STREAM_H_
+#define _STREAM_H_
+
+#include "ctx.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Common structure that must be present as first field in all partcular
+ * L4 (UDP/TCP, etc.) stream implementations.
+ */
+struct tle_stream {
+
+ STAILQ_ENTRY(tle_stream) link;
+ struct tle_ctx *ctx;
+
+ uint8_t type; /* TLE_V4 or TLE_V6 */
+
+ /* Stream address information. */
+ union l4_ports port;
+ union l4_ports pmsk;
+
+ union {
+ struct {
+ union ipv4_addrs addr;
+ union ipv4_addrs mask;
+ } ipv4;
+ struct {
+ union ipv6_addrs addr;
+ union ipv6_addrs mask;
+ } ipv6;
+ };
+};
+
+static inline uint32_t
+get_streams(struct tle_ctx *ctx, struct tle_stream *s[], uint32_t num)
+{
+ struct tle_stream *p;
+ uint32_t i, n;
+
+ rte_spinlock_lock(&ctx->streams.lock);
+
+ n = RTE_MIN(ctx->streams.nb_free, num);
+ for (i = 0, p = STAILQ_FIRST(&ctx->streams.free);
+ i != n;
+ i++, p = STAILQ_NEXT(p, link))
+ s[i] = p;
+
+ if (p == NULL)
+ /* we retrieved all free entries */
+ STAILQ_INIT(&ctx->streams.free);
+ else
+ STAILQ_FIRST(&ctx->streams.free) = p;
+
+ ctx->streams.nb_free -= n;
+ rte_spinlock_unlock(&ctx->streams.lock);
+ return n;
+}
+
+static inline struct tle_stream *
+get_stream(struct tle_ctx *ctx)
+{
+ struct tle_stream *s;
+
+ s = NULL;
+ if (ctx->streams.nb_free == 0)
+ return s;
+
+ get_streams(ctx, &s, 1);
+ return s;
+}
+
+static inline void
+put_stream(struct tle_ctx *ctx, struct tle_stream *s, int32_t head)
+{
+ s->type = TLE_VNUM;
+ rte_spinlock_lock(&ctx->streams.lock);
+ if (head != 0)
+ STAILQ_INSERT_HEAD(&ctx->streams.free, s, link);
+ else
+ STAILQ_INSERT_TAIL(&ctx->streams.free, s, link);
+ ctx->streams.nb_free++;
+ rte_spinlock_unlock(&ctx->streams.lock);
+}
+
+/* calculate number of drbs per stream. */
+static inline uint32_t
+calc_stream_drb_num(const struct tle_ctx *ctx, uint32_t obj_num)
+{
+ uint32_t num;
+
+ num = (ctx->prm.max_stream_sbufs + obj_num - 1) / obj_num;
+ num = num + num / 2;
+ num = RTE_MAX(num, RTE_DIM(ctx->dev) + 1);
+ return num;
+}
+
+static inline uint32_t
+drb_nb_elem(const struct tle_ctx *ctx)
+{
+ return (ctx->prm.send_bulk_size != 0) ?
+ ctx->prm.send_bulk_size : MAX_PKT_BURST;
+}
+
+static inline int32_t
+stream_get_dest(struct tle_stream *s, const void *dst_addr,
+ struct tle_dest *dst)
+{
+ int32_t rc;
+ const struct in_addr *d4;
+ const struct in6_addr *d6;
+ struct tle_ctx *ctx;
+ struct tle_dev *dev;
+
+ ctx = s->ctx;
+
+ /* it is here just to keep gcc happy. */
+ d4 = NULL;
+
+ if (s->type == TLE_V4) {
+ d4 = dst_addr;
+ rc = ctx->prm.lookup4(ctx->prm.lookup4_data, d4, dst);
+ } else if (s->type == TLE_V6) {
+ d6 = dst_addr;
+ rc = ctx->prm.lookup6(ctx->prm.lookup6_data, d6, dst);
+ } else
+ rc = -ENOENT;
+
+ if (rc < 0 || dst->dev == NULL || dst->dev->ctx != ctx)
+ return -ENOENT;
+
+ dev = dst->dev;
+ if (s->type == TLE_V4) {
+ struct ipv4_hdr *l3h;
+ l3h = (struct ipv4_hdr *)(dst->hdr + dst->l2_len);
+ l3h->src_addr = dev->prm.local_addr4.s_addr;
+ l3h->dst_addr = d4->s_addr;
+ } else {
+ struct ipv6_hdr *l3h;
+ l3h = (struct ipv6_hdr *)(dst->hdr + dst->l2_len);
+ rte_memcpy(l3h->src_addr, &dev->prm.local_addr6,
+ sizeof(l3h->src_addr));
+ rte_memcpy(l3h->dst_addr, d6, sizeof(l3h->dst_addr));
+ }
+
+ return dev - ctx->dev;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _STREAM_H_ */
diff --git a/lib/libtle_l4p/stream_table.c b/lib/libtle_l4p/stream_table.c
new file mode 100644
index 0000000..5a89553
--- /dev/null
+++ b/lib/libtle_l4p/stream_table.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <string.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+
+#include "stream_table.h"
+
+void
+stbl_fini(struct stbl *st)
+{
+ uint32_t i;
+
+ for (i = 0; i != RTE_DIM(st->ht); i++) {
+ rte_hash_free(st->ht[i].t);
+ rte_free(st->ht[i].ent);
+ }
+
+ memset(st, 0, sizeof(*st));
+}
+
+int
+stbl_init(struct stbl *st, uint32_t num, int32_t socket)
+{
+ int32_t rc;
+ size_t i, sz;
+ struct rte_hash_parameters hprm;
+ char buf[RTE_HASH_NAMESIZE];
+
+ num = RTE_MAX(5 * num / 4, 0x10U);
+
+ memset(&hprm, 0, sizeof(hprm));
+ hprm.name = buf;
+ hprm.entries = num;
+ hprm.socket_id = socket;
+
+ rc = 0;
+
+ snprintf(buf, sizeof(buf), "stbl4@%p", st);
+ hprm.key_len = sizeof(struct stbl4_key);
+ st->ht[TLE_V4].t = rte_hash_create(&hprm);
+ if (st->ht[TLE_V4].t == NULL)
+ rc = (rte_errno != 0) ? -rte_errno : -ENOMEM;
+
+ if (rc == 0) {
+ snprintf(buf, sizeof(buf), "stbl6@%p", st);
+ hprm.key_len = sizeof(struct stbl6_key);
+ st->ht[TLE_V6].t = rte_hash_create(&hprm);
+ if (st->ht[TLE_V6].t == NULL)
+ rc = (rte_errno != 0) ? -rte_errno : -ENOMEM;
+ }
+
+ for (i = 0; i != RTE_DIM(st->ht) && rc == 0; i++) {
+
+ sz = sizeof(*st->ht[i].ent) * num;
+ st->ht[i].ent = rte_zmalloc_socket(NULL, sz,
+ RTE_CACHE_LINE_SIZE, socket);
+ if (st->ht[i].ent == NULL)
+ rc = -ENOMEM;
+ else
+ st->ht[i].nb_ent = num;
+ }
+
+ if (rc != 0)
+ stbl_fini(st);
+
+ return rc;
+}
diff --git a/lib/libtle_l4p/stream_table.h b/lib/libtle_l4p/stream_table.h
new file mode 100644
index 0000000..8ad1103
--- /dev/null
+++ b/lib/libtle_l4p/stream_table.h
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _STREAM_TABLE_H_
+#define _STREAM_TABLE_H_
+
+#include <rte_hash.h>
+#include "tcp_misc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* current stbl entry contains packet. */
+#define STE_PKT 1
+
+struct stbl_entry {
+ void *data;
+};
+
+struct shtbl {
+ uint32_t nb_ent; /* max number of entries in the table. */
+ rte_spinlock_t l; /* lock to protect the hash table */
+ struct rte_hash *t;
+ struct stbl_entry *ent;
+} __rte_cache_aligned;
+
+struct stbl {
+ struct shtbl ht[TLE_VNUM];
+};
+
+struct stbl4_key {
+ union l4_ports port;
+ union ipv4_addrs addr;
+} __attribute__((__packed__));
+
+struct stbl6_key {
+ union l4_ports port;
+ union ipv6_addrs addr;
+} __attribute__((__packed__));
+
+struct stbl_key {
+ union l4_ports port;
+ union {
+ union ipv4_addrs addr4;
+ union ipv6_addrs addr6;
+ };
+} __attribute__((__packed__));
+
+extern void stbl_fini(struct stbl *st);
+
+extern int stbl_init(struct stbl *st, uint32_t num, int32_t socket);
+
+static inline void
+stbl_pkt_fill_key(struct stbl_key *k, const union pkt_info *pi, uint32_t type)
+{
+ static const struct stbl_key zero = {
+ .port.raw = 0,
+ };
+
+ k->port = pi->port;
+ if (type == TLE_V4)
+ k->addr4 = pi->addr4;
+ else if (type == TLE_V6)
+ k->addr6 = *pi->addr6;
+ else
+ *k = zero;
+}
+
+static inline void
+stbl_lock(struct stbl *st, uint32_t type)
+{
+ rte_spinlock_lock(&st->ht[type].l);
+}
+
+static inline void
+stbl_unlock(struct stbl *st, uint32_t type)
+{
+ rte_spinlock_unlock(&st->ht[type].l);
+}
+
+static inline struct stbl_entry *
+stbl_add_entry(struct stbl *st, const union pkt_info *pi)
+{
+ int32_t rc;
+ uint32_t type;
+ struct shtbl *ht;
+ struct stbl_key k;
+
+ type = pi->tf.type;
+ stbl_pkt_fill_key(&k, pi, type);
+ ht = st->ht + type;
+
+ rc = rte_hash_add_key(ht->t, &k);
+ if ((uint32_t)rc >= ht->nb_ent)
+ return NULL;
+ return ht->ent + rc;
+}
+
+static inline struct stbl_entry *
+stbl_add_pkt(struct stbl *st, const union pkt_info *pi, const void *pkt)
+{
+ struct stbl_entry *se;
+
+ se = stbl_add_entry(st, pi);
+ if (se != NULL)
+ se->data = (void *)((uintptr_t)pkt | STE_PKT);
+ return se;
+}
+
+static inline struct stbl_entry *
+stbl_find_entry(struct stbl *st, const union pkt_info *pi)
+{
+ int32_t rc;
+ uint32_t type;
+ struct shtbl *ht;
+ struct stbl_key k;
+
+ type = pi->tf.type;
+ stbl_pkt_fill_key(&k, pi, type);
+ ht = st->ht + type;
+
+ rc = rte_hash_lookup(ht->t, &k);
+ if ((uint32_t)rc >= ht->nb_ent)
+ return NULL;
+ return ht->ent + rc;
+}
+
+static inline int
+stbl_data_pkt(const void *p)
+{
+ return ((uintptr_t)p & STE_PKT);
+}
+
+static inline void *
+stbl_get_pkt(const struct stbl_entry *se)
+{
+ return (void *)((uintptr_t)se->data ^ STE_PKT);
+}
+
+static inline void *
+stbl_find_data(struct stbl *st, const union pkt_info *pi)
+{
+ struct stbl_entry *ent;
+
+ ent = stbl_find_entry(st, pi);
+ return (ent == NULL) ? NULL : ent->data;
+}
+
+static inline void
+stbl_del_pkt(struct stbl *st, struct stbl_entry *se, const union pkt_info *pi)
+{
+ uint32_t type;
+ struct stbl_key k;
+
+ se->data = NULL;
+
+ type = pi->tf.type;
+ stbl_pkt_fill_key(&k, pi, type);
+ rte_hash_del_key(st->ht[type].t, &k);
+}
+
+static inline void
+stbl_del_pkt_lock(struct stbl *st, struct stbl_entry *se,
+ const union pkt_info *pi)
+{
+ uint32_t type;
+ struct stbl_key k;
+
+ se->data = NULL;
+
+ type = pi->tf.type;
+ stbl_pkt_fill_key(&k, pi, type);
+ stbl_lock(st, type);
+ rte_hash_del_key(st->ht[type].t, &k);
+ stbl_unlock(st, type);
+}
+
+#include "tcp_stream.h"
+
+static inline void
+stbl_stream_fill_key(struct stbl_key *k, const struct tle_stream *s,
+ uint32_t type)
+{
+ static const struct stbl_key zero = {
+ .port.raw = 0,
+ };
+
+ k->port = s->port;
+ if (type == TLE_V4)
+ k->addr4 = s->ipv4.addr;
+ else if (type == TLE_V6)
+ k->addr6 = s->ipv6.addr;
+ else
+ *k = zero;
+}
+
+static inline struct stbl_entry *
+stbl_add_stream_lock(struct stbl *st, const struct tle_tcp_stream *s)
+{
+ uint32_t type;
+ struct stbl_key k;
+ struct stbl_entry *se;
+ struct shtbl *ht;
+ int32_t rc;
+
+ type = s->s.type;
+ stbl_stream_fill_key(&k, &s->s, type);
+ ht = st->ht + type;
+
+ stbl_lock(st, type);
+ rc = rte_hash_add_key(ht->t, &k);
+ stbl_unlock(st, type);
+
+ if ((uint32_t)rc >= ht->nb_ent)
+ return NULL;
+
+ se = ht->ent + rc;
+ if (se != NULL)
+ se->data = (void *)(uintptr_t)s;
+
+ return se;
+}
+
+static inline void
+stbl_del_stream_lock(struct stbl *st, struct stbl_entry *se,
+ const struct tle_tcp_stream *s)
+{
+ uint32_t type;
+ struct stbl_key k;
+
+ if (se == NULL)
+ return;
+
+ se->data = NULL;
+
+ type = s->s.type;
+ stbl_stream_fill_key(&k, &s->s, type);
+ stbl_lock(st, type);
+ rte_hash_del_key(st->ht[type].t, &k);
+ stbl_unlock(st, type);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _STREAM_TABLE_H_ */
diff --git a/lib/libtle_l4p/syncookie.h b/lib/libtle_l4p/syncookie.h
new file mode 100644
index 0000000..276d45a
--- /dev/null
+++ b/lib/libtle_l4p/syncookie.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _SYNCOOKIE_H_
+#define _SYNCOOKIE_H_
+
+#include "tcp_misc.h"
+#include <rte_jhash.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SYNC_SEED0 0x736f6d65
+#define SYNC_SEED1 0x646f7261
+
+struct sync_in4 {
+ uint32_t seq;
+ union l4_ports port;
+ union ipv4_addrs addr;
+};
+
+static const rte_xmm_t mss4len = {
+ .u32 = {
+ TCP4_MIN_MSS, /* 536 */
+ 1300,
+ TCP4_OP_MSS, /* 1440 */
+ TCP4_NOP_MSS, /* 1460 */
+ },
+};
+
+static const rte_xmm_t mss6len = {
+ .u32 = {
+ TCP6_MIN_MSS, /* 1220 */
+ TCP6_OP_MSS, /* 1420 */
+ TCP6_NOP_MSS, /* 1440 */
+ 8940,
+ },
+};
+
+#define SYNC_MSS_BITS 2
+#define SYNC_MSS_MASK ((1 << SYNC_MSS_BITS) - 1)
+
+#define SYNC_TMS_WSCALE_BITS 4
+#define SYNC_TMS_WSCALE_MASK ((1 << SYNC_TMS_WSCALE_BITS) - 1)
+
+#define SYNC_TMS_RESERVE_BITS 2
+
+#define SYNC_TMS_OPT_BITS (SYNC_TMS_WSCALE_BITS + SYNC_TMS_RESERVE_BITS)
+#define SYNC_TMS_OPT_MASK ((1 << SYNC_TMS_OPT_BITS) - 1)
+
+/* allow around 2 minutes for 3-way handshake. */
+#define SYNC_MAX_TMO 0x20000
+
+
+/* ??? use SipHash as FreeBSD does. ??? */
+static inline uint32_t
+sync_hash4(const union pkt_info *pi, uint32_t seq)
+{
+ uint32_t v0, v1;
+ struct sync_in4 in4;
+
+ in4.seq = seq;
+ in4.port = pi->port;
+ in4.addr = pi->addr4;
+
+ v0 = SYNC_SEED0;
+ v1 = SYNC_SEED1;
+ rte_jhash_32b_2hashes(&in4.seq, sizeof(in4) / sizeof(uint32_t),
+ &v0, &v1);
+ return v0 + v1;
+}
+
+static inline uint32_t
+sync_hash6(const union pkt_info *pi, uint32_t seq)
+{
+ uint32_t v0, v1;
+
+ v0 = SYNC_SEED0;
+ v1 = SYNC_SEED1;
+ rte_jhash_32b_2hashes(pi->addr6->raw.u32,
+ sizeof(*pi->addr6) / sizeof(uint32_t), &v0, &v1);
+ return rte_jhash_3words(v0, seq, pi->port.raw, v1);
+}
+
+static inline uint32_t
+sync_mss2idx(uint16_t mss, const rte_xmm_t *msl)
+{
+ if (mss >= msl->u32[2])
+ return (mss >= msl->u32[3]) ? 3 : 2;
+ else
+ return (mss >= msl->u32[1]) ? 1 : 0;
+}
+
+static inline uint32_t
+sync_gen_seq(const union pkt_info *pi, uint32_t seq, uint32_t ts, uint16_t mss)
+{
+ uint32_t h, mi;
+
+ if (pi->tf.type == TLE_V4) {
+ h = sync_hash4(pi, seq);
+ mi = sync_mss2idx(mss, &mss4len);
+ } else {
+ h = sync_hash6(pi, seq);
+ mi = sync_mss2idx(mss, &mss6len);
+ }
+
+ h += (ts & ~SYNC_MSS_MASK) | mi;
+ return h;
+}
+
+static inline uint32_t
+sync_gen_ts(uint32_t ts, uint32_t wscale)
+{
+ ts = (ts - (SYNC_TMS_OPT_MASK + 1)) & ~SYNC_TMS_OPT_MASK;
+ ts |= wscale;
+ return ts;
+}
+
+static inline int
+sync_check_ack(const union pkt_info *pi, uint32_t seq, uint32_t ack,
+ uint32_t ts)
+{
+ uint32_t h, mi, pts;
+
+ h = (pi->tf.type == TLE_V4) ? sync_hash4(pi, seq) : sync_hash6(pi, seq);
+
+ h = ack - h;
+ pts = h & ~SYNC_MSS_MASK;
+ mi = h & SYNC_MSS_MASK;
+
+ if (ts - pts > SYNC_MAX_TMO)
+ return -ERANGE;
+
+ return (pi->tf.type == TLE_V4) ? mss4len.u32[mi] : mss6len.u32[mi];
+}
+
+static inline void
+sync_get_opts(struct syn_opts *so, uintptr_t p, uint32_t len)
+{
+ so->ts = get_tms_opts(p, len);
+ so->wscale = so->ts.ecr & SYNC_TMS_WSCALE_MASK;
+}
+
+static inline void
+sync_fill_tcb(struct tcb *tcb, const union seg_info *si,
+ const struct rte_mbuf *mb)
+{
+ const struct tcp_hdr *th;
+
+ th = rte_pktmbuf_mtod_offset(mb, const struct tcp_hdr *,
+ mb->l2_len + mb->l3_len);
+
+ tcb->rcv.nxt = si->seq;
+ tcb->rcv.irs = si->seq - 1;
+
+ tcb->snd.nxt = si->ack;
+ tcb->snd.una = si->ack;
+ tcb->snd.iss = si->ack - 1;
+ tcb->snd.rcvr = tcb->snd.iss;
+
+ tcb->snd.wu.wl1 = si->seq;
+ tcb->snd.wu.wl2 = si->ack;
+
+ get_syn_opts(&tcb->so, (uintptr_t)(th + 1), mb->l4_len - sizeof(*th));
+
+ tcb->snd.wscale = tcb->so.wscale;
+ tcb->snd.mss = tcb->so.mss;
+ tcb->snd.wnd = si->wnd << tcb->snd.wscale;
+
+ tcb->snd.ts = tcb->so.ts.ecr;
+ tcb->rcv.ts = tcb->so.ts.val;
+
+ tcb->rcv.wscale = (tcb->so.wscale == TCP_WSCALE_NONE) ?
+ TCP_WSCALE_NONE : TCP_WSCALE_DEFAULT;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _STREAM_TABLE_H_ */
diff --git a/lib/libtle_l4p/tcp_ctl.h b/lib/libtle_l4p/tcp_ctl.h
new file mode 100644
index 0000000..dcb9c3e
--- /dev/null
+++ b/lib/libtle_l4p/tcp_ctl.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Some helper stream control functions definitions.
+ */
+
+#ifndef _TCP_CTL_H_
+#define _TCP_CTL_H_
+
+#include "tcp_stream.h"
+#include "tcp_ofo.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+tcp_stream_down(struct tle_tcp_stream *s)
+{
+ rwl_down(&s->rx.use);
+ rwl_down(&s->tx.use);
+}
+
+static inline void
+tcp_stream_up(struct tle_tcp_stream *s)
+{
+ rwl_up(&s->rx.use);
+ rwl_up(&s->tx.use);
+}
+
+/* empty stream's receive queue */
+static void
+empty_rq(struct tle_tcp_stream *s)
+{
+ empty_mbuf_ring(s->rx.q);
+ tcp_ofo_reset(s->rx.ofo);
+}
+
+/* empty stream's listen queue */
+static void
+empty_lq(struct tle_tcp_stream *s, struct stbl *st)
+{
+ uint32_t i, n;
+ struct rte_mbuf *mb;
+ union pkt_info pi;
+ union seg_info si;
+ struct stbl_entry *se[MAX_PKT_BURST];
+
+ do {
+ n = rte_ring_dequeue_burst(s->rx.q, (void **)se, RTE_DIM(se));
+ for (i = 0; i != n; i++) {
+ mb = stbl_get_pkt(se[i]);
+ get_pkt_info(mb, &pi, &si);
+ stbl_del_pkt_lock(st, se[i], &pi);
+ rte_pktmbuf_free(mb);
+ }
+ } while (n != 0);
+}
+
+static inline void
+tcp_stream_reset(struct tle_ctx *ctx, struct tle_tcp_stream *s)
+{
+ struct stbl *st;
+ uint16_t uop;
+
+ st = CTX_TCP_STLB(ctx);
+
+ /* reset TX armed */
+ rte_atomic32_set(&s->tx.arm, 0);
+
+ /* reset TCB */
+ uop = s->tcb.uop & (TCP_OP_LISTEN | TCP_OP_CONNECT);
+ memset(&s->tcb, 0, sizeof(s->tcb));
+
+ /* reset cached destination */
+ memset(&s->tx.dst, 0, sizeof(s->tx.dst));
+
+ if (uop != 0) {
+ /* free stream's destination port */
+ stream_clear_ctx(ctx, &s->s);
+ if (uop == TCP_OP_LISTEN)
+ empty_lq(s, st);
+ }
+
+ if (s->ste != NULL) {
+ /* remove entry from RX streams table */
+ stbl_del_stream_lock(st, s->ste, s);
+ s->ste = NULL;
+ empty_rq(s);
+ }
+
+ /* empty TX queue */
+ empty_mbuf_ring(s->tx.q);
+
+ /*
+ * mark the stream as free again.
+ * if there still are pkts queued for TX,
+ * then put this stream to the tail of free list.
+ */
+ put_stream(ctx, &s->s, TCP_STREAM_TX_FINISHED(s));
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_CTL_H_ */
diff --git a/lib/libtle_l4p/tcp_misc.h b/lib/libtle_l4p/tcp_misc.h
new file mode 100644
index 0000000..beb6699
--- /dev/null
+++ b/lib/libtle_l4p/tcp_misc.h
@@ -0,0 +1,462 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_MISC_H_
+#define _TCP_MISC_H_
+
+#include "net_misc.h"
+#include <rte_tcp.h>
+#include <rte_cycles.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * TCP protocols related structures/functions definitions.
+ * Main purpose to simplify (and optimise) processing and representation
+ * of protocol related data.
+ */
+
+#define TCP_WSCALE_DEFAULT 7
+#define TCP_WSCALE_NONE 0
+
+#define TCP_TX_HDR_MAX (sizeof(struct tcp_hdr) + TCP_TX_OPT_LEN_MAX)
+
+/* max header size for normal data+ack packet */
+#define TCP_TX_HDR_DACK (sizeof(struct tcp_hdr) + TCP_TX_OPT_LEN_TMS)
+
+#define TCP4_MIN_MSS 536
+
+#define TCP6_MIN_MSS 1220
+
+/* default MTU, no TCP options. */
+#define TCP4_NOP_MSS \
+ (ETHER_MTU - sizeof(struct ipv4_hdr) - sizeof(struct tcp_hdr))
+
+#define TCP6_NOP_MSS \
+ (ETHER_MTU - sizeof(struct ipv6_hdr) - sizeof(struct tcp_hdr))
+
+/* default MTU, TCP options present */
+#define TCP4_OP_MSS (TCP4_NOP_MSS - TCP_TX_OPT_LEN_MAX)
+
+#define TCP6_OP_MSS (TCP6_NOP_MSS - TCP_TX_OPT_LEN_MAX)
+
+/*
+ * TCP flags
+ */
+#define TCP_FLAG_FIN 0x01
+#define TCP_FLAG_SYN 0x02
+#define TCP_FLAG_RST 0x04
+#define TCP_FLAG_PSH 0x08
+#define TCP_FLAG_ACK 0x10
+#define TCP_FLAG_URG 0x20
+
+/* TCP flags mask. */
+#define TCP_FLAG_MASK UINT8_MAX
+
+union typflg {
+ uint16_t raw;
+ struct {
+ uint8_t type; /* TLE_V4/TLE_V6 */
+ uint8_t flags; /* TCP header flags */
+ };
+};
+
+union pkt_info {
+ rte_xmm_t raw;
+ struct {
+ union typflg tf;
+ uint16_t csf; /* checksum flags */
+ union l4_ports port;
+ union {
+ union ipv4_addrs addr4;
+ const union ipv6_addrs *addr6;
+ };
+ };
+};
+
+union seg_info {
+ rte_xmm_t raw;
+ struct {
+ uint32_t seq;
+ uint32_t ack;
+ uint16_t hole1;
+ uint16_t wnd;
+ };
+};
+
+union seqlen {
+ uint64_t raw;
+ struct {
+ uint32_t seq;
+ uint32_t len;
+ };
+};
+
+#define TCP_DATA_ALIGN 4
+
+#define TCP_DATA_OFFSET 4
+
+/*
+ * recognizable options.
+ */
+#define TCP_OPT_KIND_EOL 0x00
+#define TCP_OPT_KIND_NOP 0x01
+#define TCP_OPT_KIND_MSS 0x02
+#define TCP_OPT_KIND_WSC 0x03
+#define TCP_OPT_KIND_TMS 0x08
+
+#define TCP_OPT_LEN_EOL 0x01
+#define TCP_OPT_LEN_NOP 0x01
+#define TCP_OPT_LEN_MSS 0x04
+#define TCP_OPT_LEN_WSC 0x03
+#define TCP_OPT_LEN_TMS 0x0a
+
+#define TCP_TX_OPT_LEN_MAX \
+ RTE_ALIGN_CEIL(TCP_OPT_LEN_MSS + TCP_OPT_LEN_WSC + TCP_OPT_LEN_TMS + \
+ TCP_OPT_LEN_EOL, TCP_DATA_ALIGN)
+
+/*
+ * recomended format for TSOPT from RFC 1323, appendix A:
+ * +--------+--------+--------+--------+
+ * | NOP | NOP | TSopt | 10 |
+ * +--------+--------+--------+--------+
+ * | TSval timestamp |
+ * +--------+--------+--------+--------+
+ * | TSecr timestamp |
+ * +--------+--------+--------+--------+
+ */
+#define TCP_TX_OPT_LEN_TMS (TCP_OPT_LEN_TMS + 2 * TCP_OPT_LEN_NOP)
+
+#define TCP_OPT_TMS_HDR (rte_be_to_cpu_32( \
+ TCP_OPT_KIND_NOP << 3 * CHAR_BIT | \
+ TCP_OPT_KIND_NOP << 2 * CHAR_BIT | \
+ TCP_OPT_KIND_TMS << CHAR_BIT | \
+ TCP_OPT_LEN_TMS))
+
+#define TCP_OPT_KL(k, l) (rte_be_to_cpu_16((k) << CHAR_BIT | (l)))
+
+#define TCP_OPT_KL_MSS TCP_OPT_KL(TCP_OPT_KIND_MSS, TCP_OPT_LEN_MSS)
+#define TCP_OPT_KL_WSC TCP_OPT_KL(TCP_OPT_KIND_WSC, TCP_OPT_LEN_WSC)
+#define TCP_OPT_KL_TMS TCP_OPT_KL(TCP_OPT_KIND_TMS, TCP_OPT_LEN_TMS)
+
+/*
+ * Timestamp option.
+ */
+union tsopt {
+ uint64_t raw;
+ struct {
+ uint32_t val;
+ uint32_t ecr;
+ };
+};
+
+struct tcpopt {
+ union {
+ uint16_t raw;
+ struct {
+ uint8_t kind;
+ uint8_t len;
+ };
+ } kl;
+ union {
+ uint16_t mss;
+ uint8_t wscale;
+ union tsopt ts;
+ };
+} __attribute__((__packed__));
+
+struct syn_opts {
+ uint16_t mss;
+ uint8_t wscale;
+ union tsopt ts;
+};
+
+struct resp_info {
+ uint32_t flags;
+};
+
+
+/* window update information (RFC 793 WL1, WL2) */
+union wui {
+ uint64_t raw;
+ union {
+ uint32_t wl1;
+ uint32_t wl2;
+ };
+};
+
+/*
+ * helper structure: holds aggregated information about group
+ * of processed data+ack packets.
+ */
+struct dack_info {
+ struct { /* # of received segments with: */
+ uint32_t data; /* incoming data */
+ uint32_t ack; /* newly acked data */
+ uint32_t dup; /* duplicate acks */
+ uint32_t badseq; /* bad seq/ack */
+ uint32_t ofo; /* OFO incoming data */
+ } segs;
+ uint32_t ack; /* highest received ACK */
+ union tsopt ts; /* TS of highest ACK */
+ union wui wu; /* window update information */
+ uint32_t wnd;
+ struct { /* 3 duplicate ACKs were observed after */
+ uint32_t seg; /* # of meaningful ACK segments */
+ uint32_t ack; /* ACK sequence */
+ } dup3;
+};
+
+/* get current timestamp in ms */
+static inline uint32_t
+tcp_get_tms(void)
+{
+ uint64_t ts, ms;
+ ms = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S;
+ ts = rte_get_tsc_cycles() / ms;
+ return ts;
+}
+
+static inline int
+tcp_seq_lt(uint32_t l, uint32_t r)
+{
+ return (int32_t)(l - r) < 0;
+}
+
+static inline int
+tcp_seq_leq(uint32_t l, uint32_t r)
+{
+ return (int32_t)(l - r) <= 0;
+}
+
+
+static inline void
+get_seg_info(const struct tcp_hdr *th, union seg_info *si)
+{
+ __m128i v;
+ const __m128i bswap_mask = _mm_set_epi8(15, 14, 13, 12, 10, 11, 9, 8,
+ 4, 5, 6, 7, 0, 1, 2, 3);
+
+ v = _mm_loadu_si128((const __m128i *)&th->sent_seq);
+ si->raw.x = _mm_shuffle_epi8(v, bswap_mask);
+}
+
+static inline void
+get_syn_opts(struct syn_opts *so, uintptr_t p, uint32_t len)
+{
+ uint32_t i, kind;
+ const struct tcpopt *opt;
+
+ memset(so, 0, sizeof(*so));
+
+ i = 0;
+ while (i < len) {
+ opt = (const struct tcpopt *)(p + i);
+ kind = opt->kl.kind;
+ if (kind == TCP_OPT_KIND_EOL)
+ return;
+ else if (kind == TCP_OPT_KIND_NOP)
+ i += sizeof(opt->kl.kind);
+ else {
+ i += opt->kl.len;
+ if (i <= len) {
+ if (opt->kl.raw == TCP_OPT_KL_MSS)
+ so->mss = rte_be_to_cpu_16(opt->mss);
+ else if (opt->kl.raw == TCP_OPT_KL_WSC)
+ so->wscale = opt->wscale;
+ else if (opt->kl.raw == TCP_OPT_KL_TMS) {
+ so->ts.val =
+ rte_be_to_cpu_32(opt->ts.val);
+ so->ts.ecr =
+ rte_be_to_cpu_32(opt->ts.ecr);
+ }
+ }
+ }
+ }
+}
+
+/*
+ * generates SYN options, assumes that there are
+ * at least TCP_TX_OPT_LEN_MAX bytes available.
+ */
+static inline void
+fill_syn_opts(void *p, const struct syn_opts *so)
+{
+ uint8_t *to;
+ struct tcpopt *opt;
+
+ to = (uint8_t *)p;
+
+ /* setup MSS*/
+ opt = (struct tcpopt *)to;
+ opt->kl.raw = TCP_OPT_KL_MSS;
+ opt->mss = rte_cpu_to_be_16(so->mss);
+
+ to += TCP_OPT_LEN_MSS;
+ opt = (struct tcpopt *)to;
+
+ /* setup TMS*/
+ if (so->ts.val != 0) {
+
+ opt->kl.raw = TCP_OPT_KL_TMS;
+ opt->ts.val = rte_cpu_to_be_32(so->ts.val);
+ opt->ts.ecr = rte_cpu_to_be_32(so->ts.ecr);
+
+ to += TCP_OPT_LEN_TMS;
+ opt = (struct tcpopt *)to;
+ }
+
+ /* setup TMS*/
+ if (so->wscale != 0) {
+
+ opt->kl.raw = TCP_OPT_KL_WSC;
+ opt->wscale = so->wscale;
+
+ to += TCP_OPT_LEN_WSC;
+ opt = (struct tcpopt *)to;
+ }
+
+ to[0] = TCP_OPT_KIND_EOL;
+}
+
+/*
+ * generate TMS option, for non SYN packet, make sure
+ * there at least TCP_TX_OPT_LEN_TMS available.
+ */
+static inline void
+fill_tms_opts(void *p, uint32_t val, uint32_t ecr)
+{
+ uint32_t *opt;
+
+ opt = (uint32_t *)p;
+ opt[0] = TCP_OPT_TMS_HDR;
+ opt[1] = rte_cpu_to_be_32(val);
+ opt[2] = rte_cpu_to_be_32(ecr);
+}
+
+static inline union tsopt
+get_tms_opts(uintptr_t p, uint32_t len)
+{
+ union tsopt ts;
+ uint32_t i, kind;
+ const uint32_t *opt;
+ const struct tcpopt *to;
+
+ opt = (const uint32_t *)p;
+
+ /* TS option is presented in recommended way */
+ if (len >= TCP_TX_OPT_LEN_TMS && opt[0] == TCP_OPT_TMS_HDR) {
+ ts.val = rte_be_to_cpu_32(opt[1]);
+ ts.ecr = rte_be_to_cpu_32(opt[2]);
+ return ts;
+ }
+
+ /* parse through whole list of options. */
+ ts.raw = 0;
+ i = 0;
+ while (i < len) {
+ to = (const struct tcpopt *)(p + i);
+ kind = to->kl.kind;
+ if (kind == TCP_OPT_KIND_EOL)
+ break;
+ else if (kind == TCP_OPT_KIND_NOP)
+ i += sizeof(to->kl.kind);
+ else {
+ i += to->kl.len;
+ if (i <= len && to->kl.raw == TCP_OPT_KL_TMS) {
+ ts.val = rte_be_to_cpu_32(to->ts.val);
+ ts.ecr = rte_be_to_cpu_32(to->ts.ecr);
+ break;
+ }
+ }
+ }
+
+ return ts;
+}
+
+static inline uint8_t
+get_pkt_type(const struct rte_mbuf *m)
+{
+ uint32_t v;
+
+ v = m->packet_type &
+ (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_MASK);
+ if (v == (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP))
+ return TLE_V4;
+ else if (v == (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP))
+ return TLE_V6;
+ else
+ return TLE_VNUM;
+}
+
+static inline void
+get_pkt_info(const struct rte_mbuf *m, union pkt_info *pi, union seg_info *si)
+{
+ uint32_t len, type;
+ const struct tcp_hdr *tcph;
+ const union l4_ports *prt;
+ const union ipv4_addrs *pa4;
+
+ type = get_pkt_type(m);
+ len = m->l2_len;
+
+ /*
+ * this line is here just to avoid gcc warning:
+ * error: .<U6098>.<U6000>.addr4.raw may be used uninitialized.
+ */
+ pi->addr4.raw = 0;
+
+ if (type == TLE_V4) {
+ pa4 = rte_pktmbuf_mtod_offset(m, const union ipv4_addrs *,
+ len + offsetof(struct ipv4_hdr, src_addr));
+ pi->addr4.raw = pa4->raw;
+ } else if (type == TLE_V6) {
+ pi->addr6 = rte_pktmbuf_mtod_offset(m, const union ipv6_addrs *,
+ len + offsetof(struct ipv6_hdr, src_addr));
+ }
+
+ len += m->l3_len;
+ tcph = rte_pktmbuf_mtod_offset(m, const struct tcp_hdr *, len);
+ prt = (const union l4_ports *)
+ ((uintptr_t)tcph + offsetof(struct tcp_hdr, src_port));
+ pi->tf.flags = tcph->tcp_flags;
+ pi->tf.type = type;
+ pi->csf = m->ol_flags & (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD);
+ pi->port.raw = prt->raw;
+
+ get_seg_info(tcph, si);
+}
+
+static inline uint32_t
+tcp_mbuf_seq_free(struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i, len;
+
+ len = 0;
+ for (i = 0; i != num; i++) {
+ len += mb[i]->pkt_len;
+ rte_pktmbuf_free(mb[i]);
+ }
+
+ return len;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_MISC_H_ */
diff --git a/lib/libtle_l4p/tcp_ofo.c b/lib/libtle_l4p/tcp_ofo.c
new file mode 100644
index 0000000..1565445
--- /dev/null
+++ b/lib/libtle_l4p/tcp_ofo.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <rte_malloc.h>
+#include <rte_errno.h>
+
+#include "tcp_stream.h"
+#include "tcp_rxq.h"
+
+#define OFO_FRACTION 4
+
+#define OFO_DB_MAX 0x20U
+
+#define OFODB_OBJ_MIN 8U
+#define OFODB_OBJ_MAX 0x20U
+
+#define OFO_OBJ_MAX (OFODB_OBJ_MAX * OFO_DB_MAX)
+
+void
+tcp_ofo_free(struct ofo *ofo)
+{
+ rte_free(ofo);
+}
+
+static void
+calc_ofo_elems(uint32_t nbufs, uint32_t *nobj, uint32_t *ndb)
+{
+ uint32_t n, nd, no;
+
+ n = nbufs / OFO_FRACTION;
+ n = RTE_MAX(n, OFODB_OBJ_MIN);
+ n = RTE_MIN(n, OFO_OBJ_MAX);
+
+ no = OFODB_OBJ_MIN / 2;
+ do {
+ no *= 2;
+ nd = n / no;
+ } while (nd > OFO_DB_MAX);
+
+ *nobj = no;
+ *ndb = nd;
+}
+
+struct ofo *
+tcp_ofo_alloc(uint32_t nbufs, int32_t socket)
+{
+ uint32_t i, ndb, nobj;
+ size_t dsz, osz, sz;
+ struct ofo *ofo;
+ struct rte_mbuf **obj;
+
+ calc_ofo_elems(nbufs, &nobj, &ndb);
+ osz = sizeof(*ofo) + sizeof(ofo->db[0]) * ndb;
+ dsz = sizeof(ofo->db[0].obj[0]) * nobj * ndb;
+ sz = osz + dsz;
+
+ ofo = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, socket);
+ if (ofo == NULL) {
+ TCP_LOG(ERR, "%s: allocation of %zu bytes on socket %d "
+ "failed with error code: %d\n",
+ __func__, sz, socket, rte_errno);
+ return NULL;
+ }
+
+ obj = (struct rte_mbuf **)&ofo->db[ndb];
+ for (i = 0; i != ndb; i++) {
+ ofo->db[i].nb_max = nobj;
+ ofo->db[i].obj = obj + i * nobj;
+ }
+
+ ofo->nb_max = ndb;
+ return ofo;
+}
+
diff --git a/lib/libtle_l4p/tcp_ofo.h b/lib/libtle_l4p/tcp_ofo.h
new file mode 100644
index 0000000..4f3bdab
--- /dev/null
+++ b/lib/libtle_l4p/tcp_ofo.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_OFO_H_
+#define _TCP_OFO_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ofodb {
+ uint32_t nb_elem;
+ uint32_t nb_max;
+ union seqlen sl;
+ struct rte_mbuf **obj;
+};
+
+struct ofo {
+ uint32_t nb_elem;
+ uint32_t nb_max;
+ struct ofodb db[];
+};
+
+static inline void
+_ofodb_free(struct ofodb *db)
+{
+ uint32_t i;
+
+ for (i = 0; i != db->nb_elem; i++)
+ rte_pktmbuf_free(db->obj[i]);
+}
+
+static inline void
+_ofo_remove(struct ofo *ofo, uint32_t pos, uint32_t num)
+{
+ uint32_t i, n;
+
+ n = ofo->nb_elem - num - pos;
+ for (i = 0; i != n; i++)
+ ofo->db[pos + i] = ofo->db[pos + num + i];
+ ofo->nb_elem -= num;
+}
+
+static inline void
+tcp_ofo_reset(struct ofo *ofo)
+{
+ uint32_t i;
+
+ for (i = 0; i != ofo->nb_elem; i++)
+ _ofodb_free(&ofo->db[i]);
+
+ _ofo_remove(ofo, 0, ofo->nb_elem);
+}
+
+static inline uint32_t
+_ofo_insert_new(struct ofo *ofo, uint32_t pos, union seqlen *sl,
+ struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i, n, plen;
+ struct ofodb *db;
+
+ n = ofo->nb_elem;
+
+ /* out of space */
+ if (n == ofo->nb_max)
+ return 0;
+
+ /* allocate new one */
+ db = ofo->db + n;
+ ofo->nb_elem = n + 1;
+
+ /* insert into a proper position. */
+ for (i = n; i != pos; i--)
+ ofo->db[i] = ofo->db[i - 1];
+
+ /* fill new block */
+ n = RTE_MIN(db->nb_max, num);
+ for (i = 0; i != n; i++)
+ db->obj[i] = mb[i];
+
+ /* can't queue some packets. */
+ plen = 0;
+ for (i = n; i != num; i++)
+ plen += mb[i]->pkt_len;
+
+ db->nb_elem = n;
+ db->sl.seq = sl->seq;
+ db->sl.len = sl->len - plen;
+
+ sl->seq += db->sl.len;
+ sl->len -= db->sl.len;
+ return n;
+}
+
+static inline uint32_t
+_ofo_insert_right(struct ofo *ofo, uint32_t pos, union seqlen *sl,
+ struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i, j, k, n;
+ uint32_t end, plen, skip;
+ struct ofodb *db;
+
+ db = ofo->db + pos;
+ end = db->sl.seq + db->sl.len;
+
+ skip = end - sl->seq;
+
+ /* skip overlapping packets */
+ for (i = 0, n = skip; i != num && n != 0; i++, n -= plen) {
+
+ plen = mb[i]->pkt_len;
+ if (n < plen) {
+ /* adjust partially overlapped packet. */
+ rte_pktmbuf_adj(mb[i], plen - n);
+ break;
+ }
+ }
+
+ /* free totally overlapped packets. */
+ for (j = 0; j != i; j++)
+ rte_pktmbuf_free(mb[j]);
+
+ /* copy non-overlapping mbufs */
+ k = db->nb_elem;
+ n = RTE_MIN(db->nb_max - k, num - i);
+
+ plen = 0;
+ for (j = 0; j != n; j++) {
+ db->obj[k + j] = mb[i + j];
+ plen += mb[i + j]->pkt_len;
+ }
+
+ db->nb_elem += n;
+ db->sl.len += plen;
+
+ plen += skip;
+ sl->len -= plen;
+ sl->seq += plen;
+ return n + i;
+}
+
+static inline uint32_t
+_ofo_step(struct ofo *ofo, union seqlen *sl, struct rte_mbuf *mb[],
+ uint32_t num)
+{
+ uint32_t i, n, end, lo, ro;
+ struct ofodb *db;
+
+ db = NULL;
+ end = sl->seq + sl->len;
+ n = ofo->nb_elem;
+
+ /*
+ * start from the right side, assume that after some gap,
+ * we keep receiving packets in order.
+ */
+ for (i = n; i-- != 0; ) {
+ db = ofo->db + i;
+ if (tcp_seq_leq(db->sl.seq, sl->seq))
+ break;
+ }
+
+ /* new db required */
+ if ((int32_t)i < 0 || tcp_seq_lt(db->sl.seq + db->sl.len, sl->seq))
+ return _ofo_insert_new(ofo, i + 1, sl, mb, num);
+
+ /* new one is right adjacent, or overlap */
+
+ ro = sl->seq - db->sl.seq;
+ lo = end - db->sl.seq;
+
+ /* new one is completely overlapped by old one */
+ if (lo <= db->sl.len)
+ return 0;
+
+ /* either overlap OR (adjacent AND some free space remains) */
+ if (ro < db->sl.len || db->nb_elem != db->nb_max)
+ return _ofo_insert_right(ofo, i, sl, mb, num);
+
+ /* adjacent, no free space in current block */
+ return _ofo_insert_new(ofo, i + 1, sl, mb, num);
+}
+
+static inline void
+_ofo_compact(struct ofo *ofo)
+{
+ uint32_t i, j, n, ro;
+ struct ofodb *db;
+
+ for (i = 0; i < ofo->nb_elem; i = j) {
+
+ for (j = i + 1; j != ofo->nb_elem; j++) {
+
+ /* no intersection */
+ ro = ofo->db[j].sl.seq - ofo->db[i].sl.seq;
+ if (ro > ofo->db[i].sl.len)
+ break;
+
+ db = ofo->db + j;
+ n = _ofo_insert_right(ofo, i, &db->sl, db->obj,
+ db->nb_elem);
+ if (n < db->nb_elem) {
+ db->nb_elem -= n;
+ break;
+ }
+ }
+
+ n = j - i - 1;
+ if (n != 0)
+ _ofo_remove(ofo, i + 1, n);
+ }
+}
+
+static inline uint32_t
+_ofodb_enqueue(struct rte_ring *r, const struct ofodb *db, union seqlen *sl)
+{
+ uint32_t n, num;
+
+ num = db->nb_elem;
+ sl->raw = db->sl.raw;
+ n = rte_ring_enqueue_burst(r, (void * const *)db->obj, num);
+
+ sl->len -= tcp_mbuf_seq_free(db->obj + n, num - n);
+ return num - n;
+}
+
+struct ofo *
+tcp_ofo_alloc(uint32_t nbufs, int32_t socket);
+
+void
+tcp_ofo_free(struct ofo *ofo);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_OFO_H_ */
diff --git a/lib/libtle_l4p/tcp_rxq.h b/lib/libtle_l4p/tcp_rxq.h
new file mode 100644
index 0000000..90e657f
--- /dev/null
+++ b/lib/libtle_l4p/tcp_rxq.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_RXQ_H_
+#define _TCP_RXQ_H_
+
+#include "tcp_ofo.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline uint32_t
+rx_ofo_enqueue(struct tle_tcp_stream *s, union seqlen *sl,
+ struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i, n;
+
+ n = 0;
+ do {
+ i = _ofo_step(s->rx.ofo, sl, mb + n, num - n);
+ n += i;
+ } while (i != 0 && n != num);
+
+ _ofo_compact(s->rx.ofo);
+ return n;
+}
+
+static inline uint32_t
+rx_ofo_reduce(struct tle_tcp_stream *s)
+{
+ uint32_t i, n, end, seq;
+ struct ofo *ofo;
+ struct ofodb *db;
+ union seqlen sl;
+
+ seq = s->tcb.rcv.nxt;
+ ofo = s->rx.ofo;
+
+ n = 0;
+ for (i = 0; i != ofo->nb_elem; i++) {
+
+ db = ofo->db + i;
+
+ /* gap still present */
+ if (tcp_seq_lt(seq, db->sl.seq))
+ break;
+
+ end = db->sl.seq + db->sl.len;
+
+ /* this db is fully overlapped */
+ if (tcp_seq_leq(end, seq))
+ _ofodb_free(db);
+ else
+ n += _ofodb_enqueue(s->rx.q, db, &sl);
+
+ seq = sl.seq + sl.len;
+ }
+
+ s->tcb.rcv.nxt = seq;
+ _ofo_remove(ofo, 0, i);
+ return n;
+}
+
+static inline uint32_t
+rx_ino_enqueue(struct tle_tcp_stream *s, union seqlen *sl,
+ struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i, n;
+
+ n = rte_ring_enqueue_burst(s->rx.q, (void * const *)mb, num);
+
+ /* error: can'queue some packets into receive buffer. */
+ for (i = n; i != num; i++)
+ sl->len -= mb[i]->pkt_len;
+
+ s->tcb.rcv.nxt = sl->seq + sl->len;
+ return n;
+}
+
+static inline uint32_t
+rx_data_enqueue(struct tle_tcp_stream *s, uint32_t seq, uint32_t len,
+ struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t n, r, t;
+ union seqlen sl;
+
+ sl.seq = seq;
+ sl.len = len;
+
+ r = rte_ring_count(s->rx.q);
+
+ /* in order packets, ready to be delivered */
+ if (seq == s->tcb.rcv.nxt) {
+
+ t = rx_ino_enqueue(s, &sl, mb, num);
+
+ /* failed to queue all input in-order packets */
+ if (t != num)
+ TCP_LOG(DEBUG,
+ "%s(s=%p, seq=%u, len=%u, num=%u) failed to queue "
+ "%u packets;\n",
+ __func__, s, seq, len, num, num - t);
+
+ /* try to consume some out-of-order packets*/
+ else {
+ n = rx_ofo_reduce(s);
+ if (n != 0)
+ TCP_LOG(DEBUG,
+ "%s(s=%p, rcv.nxt=%u) failed to queue %u "
+ "OFO packets;\n",
+ __func__, s, s->tcb.rcv.nxt, n);
+ }
+
+ /* queue out of order packets */
+ } else {
+ t = rx_ofo_enqueue(s, &sl, mb, num);
+ }
+
+ n = rte_ring_count(s->rx.q);
+ if (r != n) {
+ /* raise RX event */
+ if (s->rx.ev != NULL)
+ tle_event_raise(s->rx.ev);
+ /* if RX queue was empty invoke RX notification callback. */
+ else if (s->rx.cb.func != NULL && r == 0)
+ s->rx.cb.func(s->rx.cb.data, &s->s);
+ }
+
+ return t;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_RXQ_H_ */
diff --git a/lib/libtle_l4p/tcp_rxtx.c b/lib/libtle_l4p/tcp_rxtx.c
new file mode 100644
index 0000000..4e43730
--- /dev/null
+++ b/lib/libtle_l4p/tcp_rxtx.c
@@ -0,0 +1,2431 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+#include <rte_ip_frag.h>
+#include <rte_tcp.h>
+
+#include "tcp_stream.h"
+#include "tcp_timer.h"
+#include "stream_table.h"
+#include "syncookie.h"
+#include "misc.h"
+#include "tcp_ctl.h"
+#include "tcp_rxq.h"
+#include "tcp_txq.h"
+
+#define TCP_MAX_PKT_SEG 0x20
+
+/*
+ * checks if input TCP ports and IP addresses match given stream.
+ * returns zero on success.
+ */
+static inline int
+rx_check_stream(const struct tle_tcp_stream *s, const union pkt_info *pi)
+{
+ int32_t rc;
+
+ if (pi->tf.type == TLE_V4)
+ rc = (pi->port.raw & s->s.pmsk.raw) != s->s.port.raw ||
+ (pi->addr4.raw & s->s.ipv4.mask.raw) !=
+ s->s.ipv4.addr.raw;
+ else
+ rc = (pi->port.raw & s->s.pmsk.raw) != s->s.port.raw ||
+ ymm_mask_cmp(&pi->addr6->raw, &s->s.ipv6.addr.raw,
+ &s->s.ipv6.mask.raw) != 0;
+
+ return rc;
+}
+
+static inline struct tle_tcp_stream *
+rx_obtain_listen_stream(const struct tle_dev *dev, const union pkt_info *pi,
+ uint32_t type)
+{
+ struct tle_tcp_stream *s;
+
+ s = (struct tle_tcp_stream *)dev->dp[type]->streams[pi->port.dst];
+ if (s == NULL || rwl_acquire(&s->rx.use) < 0)
+ return NULL;
+
+ /* check that we have a proper stream. */
+ if (s->tcb.state != TCP_ST_LISTEN) {
+ rwl_release(&s->rx.use);
+ s = NULL;
+ }
+
+ return s;
+}
+
+static inline struct tle_tcp_stream *
+rx_obtain_stream(const struct tle_dev *dev, struct stbl *st,
+ const union pkt_info *pi, uint32_t type)
+{
+ struct tle_tcp_stream *s;
+
+ s = stbl_find_data(st, pi);
+ if (s == NULL) {
+ if (pi->tf.flags == TCP_FLAG_ACK)
+ return rx_obtain_listen_stream(dev, pi, type);
+ return NULL;
+ }
+
+ if (stbl_data_pkt(s) || rwl_acquire(&s->rx.use) < 0)
+ return NULL;
+ /* check that we have a proper stream. */
+ else if (s->tcb.state == TCP_ST_CLOSED) {
+ rwl_release(&s->rx.use);
+ s = NULL;
+ }
+
+ return s;
+}
+
+/*
+ * Consider 2 pkt_info *equal* if their:
+ * - types (IPv4/IPv6)
+ * - TCP flags
+ * - checksum flags
+ * - TCP src and dst ports
+ * - IP src and dst addresses
+ * are equal.
+ */
+static inline int
+pkt_info_bulk_eq(const union pkt_info pi[], uint32_t num)
+{
+ uint32_t i;
+
+ i = 1;
+
+ if (pi[0].tf.type == TLE_V4) {
+ while (i != num && xmm_cmp(&pi[0].raw, &pi[i].raw) == 0)
+ i++;
+
+ } else if (pi[0].tf.type == TLE_V6) {
+ while (i != num &&
+ pi[0].raw.u64[0] == pi[i].raw.u64[0] &&
+ ymm_cmp(&pi[0].addr6->raw,
+ &pi[i].addr6->raw) == 0)
+ i++;
+ }
+
+ return i;
+}
+
+static inline int
+pkt_info_bulk_syneq(const union pkt_info pi[], uint32_t num)
+{
+ uint32_t i;
+
+ i = 1;
+
+ if (pi[0].tf.type == TLE_V4) {
+ while (i != num && pi[0].tf.raw == pi[i].tf.raw &&
+ pi[0].port.dst == pi[i].port.dst &&
+ pi[0].addr4.dst == pi[i].addr4.dst)
+ i++;
+
+ } else if (pi[0].tf.type == TLE_V6) {
+ while (i != num && pi[0].tf.raw == pi[i].tf.raw &&
+ pi[0].port.dst == pi[i].port.dst &&
+ xmm_cmp(&pi[0].addr6->dst,
+ &pi[i].addr6->dst) == 0)
+ i++;
+ }
+
+ return i;
+}
+
+static inline void
+stream_drb_free(struct tle_tcp_stream *s, struct tle_drb *drbs[],
+ uint32_t nb_drb)
+{
+ rte_ring_enqueue_burst(s->tx.drb.r, (void **)drbs, nb_drb);
+}
+
+static inline uint32_t
+stream_drb_alloc(struct tle_tcp_stream *s, struct tle_drb *drbs[],
+ uint32_t nb_drb)
+{
+ return rte_ring_dequeue_burst(s->tx.drb.r, (void **)drbs, nb_drb);
+}
+
+static inline void
+fill_tcph(struct tcp_hdr *l4h, const struct tcb *tcb, union l4_ports port,
+ uint32_t seq, uint8_t hlen, uint8_t flags)
+{
+ uint16_t wnd;
+
+ l4h->src_port = port.dst;
+ l4h->dst_port = port.src;
+
+ wnd = (flags & TCP_FLAG_SYN) ?
+ RTE_MAX(TCP4_MIN_MSS, tcb->so.mss) :
+ tcb->rcv.wnd >> tcb->rcv.wscale;
+
+ /* ??? use sse shuffle to hton all remaining 16 bytes at once. ??? */
+ l4h->sent_seq = rte_cpu_to_be_32(seq);
+ l4h->recv_ack = rte_cpu_to_be_32(tcb->rcv.nxt);
+ l4h->data_off = hlen / TCP_DATA_ALIGN << TCP_DATA_OFFSET;
+ l4h->tcp_flags = flags;
+ l4h->rx_win = rte_cpu_to_be_16(wnd);
+ l4h->cksum = 0;
+ l4h->tcp_urp = 0;
+
+ if (flags & TCP_FLAG_SYN)
+ fill_syn_opts(l4h + 1, &tcb->so);
+ else if ((flags & TCP_FLAG_RST) == 0 && tcb->so.ts.raw != 0)
+ fill_tms_opts(l4h + 1, tcb->snd.ts, tcb->rcv.ts);
+}
+
+static inline int
+tcp_fill_mbuf(struct rte_mbuf *m, const struct tle_tcp_stream *s,
+ const struct tle_dest *dst, uint64_t ol_flags,
+ union l4_ports port, uint32_t seq, uint32_t flags,
+ uint32_t pid, uint32_t swcsm)
+{
+ uint32_t l4, len, plen;
+ struct tcp_hdr *l4h;
+ char *l2h;
+
+ len = dst->l2_len + dst->l3_len;
+ plen = m->pkt_len;
+
+ if (flags & TCP_FLAG_SYN)
+ l4 = sizeof(*l4h) + TCP_TX_OPT_LEN_MAX;
+ else if ((flags & TCP_FLAG_RST) == 0 && s->tcb.rcv.ts != 0)
+ l4 = sizeof(*l4h) + TCP_TX_OPT_LEN_TMS;
+ else
+ l4 = sizeof(*l4h);
+
+ /* adjust mbuf to put L2/L3/L4 headers into it. */
+ l2h = rte_pktmbuf_prepend(m, len + l4);
+ if (l2h == NULL)
+ return -EINVAL;
+
+ /* copy L2/L3 header */
+ rte_memcpy(l2h, dst->hdr, len);
+
+ /* setup TCP header & options */
+ l4h = (struct tcp_hdr *)(l2h + len);
+ fill_tcph(l4h, &s->tcb, port, seq, l4, flags);
+
+ /* setup mbuf TX offload related fields. */
+ m->tx_offload = _mbuf_tx_offload(dst->l2_len, dst->l3_len, l4, 0, 0, 0);
+ m->ol_flags |= ol_flags;
+
+ /* update proto specific fields. */
+
+ if (s->s.type == TLE_V4) {
+ struct ipv4_hdr *l3h;
+ l3h = (struct ipv4_hdr *)(l2h + dst->l2_len);
+ l3h->packet_id = rte_cpu_to_be_16(pid);
+ l3h->total_length = rte_cpu_to_be_16(plen + dst->l3_len + l4);
+
+ if ((ol_flags & PKT_TX_TCP_CKSUM) != 0)
+ l4h->cksum = _ipv4x_phdr_cksum(l3h, m->l3_len,
+ ol_flags);
+ else if (swcsm != 0)
+ l4h->cksum = _ipv4_udptcp_mbuf_cksum(m, len, l3h);
+
+ if ((ol_flags & PKT_TX_IP_CKSUM) == 0 && swcsm != 0)
+ l3h->hdr_checksum = _ipv4x_cksum(l3h, m->l3_len);
+ } else {
+ struct ipv6_hdr *l3h;
+ l3h = (struct ipv6_hdr *)(l2h + dst->l2_len);
+ l3h->payload_len = rte_cpu_to_be_16(plen + l4);
+ if ((ol_flags & PKT_TX_TCP_CKSUM) != 0)
+ l4h->cksum = rte_ipv6_phdr_cksum(l3h, ol_flags);
+ else if (swcsm != 0)
+ l4h->cksum = _ipv6_udptcp_mbuf_cksum(m, len, l3h);
+ }
+
+ return 0;
+}
+
+/*
+ * That function supposed to be used only for data packets.
+ * Assumes that L2/L3/L4 headers and mbuf fields already setup properly.
+ * - updates tcp SEG.SEQ, SEG.ACK, TS.VAL, TS.ECR.
+ * - if no HW cksum offloads are enabled, calculates TCP checksum.
+ */
+static inline void
+tcp_update_mbuf(struct rte_mbuf *m, uint32_t type, const struct tcb *tcb,
+ uint32_t seq, uint32_t pid)
+{
+ struct tcp_hdr *l4h;
+ uint32_t len;
+
+ len = m->l2_len + m->l3_len;
+ l4h = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, len);
+
+ l4h->sent_seq = rte_cpu_to_be_32(seq);
+ l4h->recv_ack = rte_cpu_to_be_32(tcb->rcv.nxt);
+
+ if (tcb->so.ts.raw != 0)
+ fill_tms_opts(l4h + 1, tcb->snd.ts, tcb->rcv.ts);
+
+ if (type == TLE_V4) {
+ struct ipv4_hdr *l3h;
+ l3h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
+ l3h->hdr_checksum = 0;
+ l3h->packet_id = rte_cpu_to_be_16(pid);
+ if ((m->ol_flags & PKT_TX_IP_CKSUM) == 0)
+ l3h->hdr_checksum = _ipv4x_cksum(l3h, m->l3_len);
+ }
+
+ /* have to calculate TCP checksum in SW */
+ if ((m->ol_flags & PKT_TX_TCP_CKSUM) == 0) {
+
+ l4h->cksum = 0;
+
+ if (type == TLE_V4) {
+ struct ipv4_hdr *l3h;
+ l3h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+ m->l2_len);
+ l4h->cksum = _ipv4_udptcp_mbuf_cksum(m, len, l3h);
+
+ } else {
+ struct ipv6_hdr *l3h;
+ l3h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+ m->l2_len);
+ l4h->cksum = _ipv6_udptcp_mbuf_cksum(m, len, l3h);
+ }
+ }
+}
+
+/* Send data packets that need to be ACK-ed by peer */
+static inline uint32_t
+tx_data_pkts(struct tle_tcp_stream *s, struct rte_mbuf *const m[], uint32_t num)
+{
+ uint32_t bsz, i, nb, nbm;
+ struct tle_dev *dev;
+ struct tle_drb *drb[num];
+
+ /* calculate how many drbs are needed.*/
+ bsz = s->tx.drb.nb_elem;
+ nbm = (num + bsz - 1) / bsz;
+
+ /* allocate drbs, adjust number of packets. */
+ nb = stream_drb_alloc(s, drb, nbm);
+
+ /* drb ring is empty. */
+ if (nb == 0)
+ return 0;
+
+ else if (nb != nbm)
+ num = nb * bsz;
+
+ dev = s->tx.dst.dev;
+
+ /* enqueue pkts for TX. */
+ nbm = nb;
+ i = tle_dring_mp_enqueue(&dev->tx.dr, (const void * const*)m,
+ num, drb, &nb);
+
+ /* free unused drbs. */
+ if (nb != 0)
+ stream_drb_free(s, drb + nbm - nb, nb);
+
+ return i;
+}
+
+static inline uint32_t
+tx_data_bulk(struct tle_tcp_stream *s, union seqlen *sl, struct rte_mbuf *mi[],
+ uint32_t num)
+{
+ uint32_t fail, i, k, n, mss, pid, plen, sz, tn, type;
+ struct tle_dev *dev;
+ struct rte_mbuf *mb;
+ struct rte_mbuf *mo[MAX_PKT_BURST + TCP_MAX_PKT_SEG];
+
+ mss = s->tcb.snd.mss;
+ type = s->s.type;
+
+ dev = s->tx.dst.dev;
+ pid = rte_atomic32_add_return(&dev->tx.packet_id[type], num) - num;
+
+ k = 0;
+ tn = 0;
+ fail = 0;
+ for (i = 0; i != num && sl->len != 0 && fail == 0; i++) {
+
+ mb = mi[i];
+ sz = RTE_MIN(sl->len, mss);
+ plen = PKT_L4_PLEN(mb);
+
+ /*fast path, no need to use indirect mbufs. */
+ if (plen <= sz) {
+
+ /* update pkt TCP header */
+ tcp_update_mbuf(mb, type, &s->tcb, sl->seq, pid + i);
+
+ /* keep mbuf till ACK is received. */
+ rte_pktmbuf_refcnt_update(mb, 1);
+ sl->len -= plen;
+ sl->seq += plen;
+ mo[k++] = mb;
+ /* remaining snd.wnd is less them MSS, send nothing */
+ } else if (sz < mss)
+ break;
+ /* packet indirection needed */
+ else
+ RTE_VERIFY(0);
+
+ if (k >= MAX_PKT_BURST) {
+ n = tx_data_pkts(s, mo, k);
+ fail = k - n;
+ tn += n;
+ k = 0;
+ }
+ }
+
+ if (k != 0) {
+ n = tx_data_pkts(s, mo, k);
+ fail = k - n;
+ tn += n;
+ }
+
+ if (fail != 0) {
+ sz = tcp_mbuf_seq_free(mo + n, fail);
+ sl->seq -= sz;
+ sl->len += sz;
+ }
+
+ return tn;
+}
+
+/*
+ * gets data from stream send buffer, updates it and
+ * queues it into TX device queue.
+ * Note that this function and is not MT safe.
+ */
+static inline uint32_t
+tx_nxt_data(struct tle_tcp_stream *s, uint32_t tms)
+{
+ uint32_t n, num, tn, wnd;
+ struct rte_mbuf **mi;
+ union seqlen sl;
+
+ tn = 0;
+ wnd = s->tcb.snd.wnd - (uint32_t)(s->tcb.snd.nxt - s->tcb.snd.una);
+ sl.seq = s->tcb.snd.nxt;
+ sl.len = RTE_MIN(wnd, s->tcb.snd.cwnd);
+
+ if (sl.len == 0)
+ return tn;
+
+ /* update send timestamp */
+ s->tcb.snd.ts = tms;
+
+ do {
+ /* get group of packets */
+ mi = tcp_txq_get_nxt_objs(s, &num);
+
+ /* stream send buffer is empty */
+ if (num == 0)
+ break;
+
+ /* queue data packets for TX */
+ n = tx_data_bulk(s, &sl, mi, num);
+ tn += n;
+
+ /* update consumer head */
+ tcp_txq_set_nxt_head(s, n);
+ } while (n == num);
+
+ s->tcb.snd.nxt += sl.seq - (uint32_t)s->tcb.snd.nxt;
+ return tn;
+}
+
+static inline void
+free_una_data(struct tle_tcp_stream *s, uint32_t len)
+{
+ uint32_t i, n, num, plen;
+ struct rte_mbuf **mi;
+
+ n = 0;
+ plen = 0;
+
+ do {
+ /* get group of packets */
+ mi = tcp_txq_get_una_objs(s, &num);
+
+ if (num == 0)
+ break;
+
+ /* free acked data */
+ for (i = 0; i != num && n != len; i++, n = plen) {
+ plen += PKT_L4_PLEN(mi[i]);
+ if (plen > len) {
+ /* keep SND.UNA at the start of the packet */
+ len -= RTE_MIN(len, plen - len);
+ break;
+ }
+ rte_pktmbuf_free(mi[i]);
+ }
+
+ /* update consumer tail */
+ tcp_txq_set_una_tail(s, i);
+ } while (plen < len);
+
+ s->tcb.snd.una += len;
+
+ /*
+ * that could happen in case of retransmit,
+ * adjust SND.NXT with SND.UNA.
+ */
+ if (s->tcb.snd.una > s->tcb.snd.nxt) {
+ tcp_txq_rst_nxt_head(s);
+ s->tcb.snd.nxt = s->tcb.snd.una;
+ }
+}
+
+static inline uint16_t
+calc_smss(uint16_t mss, const struct tle_dest *dst)
+{
+ uint16_t n;
+
+ n = dst->mtu - dst->l2_len - dst->l3_len - TCP_TX_HDR_DACK;
+ mss = RTE_MIN(n, mss);
+ return mss;
+}
+
+/*
+ * RFC 5681 3.1
+ * If SMSS > 2190 bytes:
+ * IW = 2 * SMSS bytes and MUST NOT be more than 2 segments
+ * If (SMSS > 1095 bytes) and (SMSS <= 2190 bytes):
+ * IW = 3 * SMSS bytes and MUST NOT be more than 3 segments
+ * if SMSS <= 1095 bytes:
+ * IW = 4 * SMSS bytes and MUST NOT be more than 4 segments
+ */
+static inline uint32_t
+initial_cwnd(uint16_t smss)
+{
+ if (smss > 2190)
+ return 2 * smss;
+ else if (smss > 1095)
+ return 3 * smss;
+ return 4 * smss;
+}
+
+/*
+ * queue standalone packet to he particular output device
+ * It assumes that:
+ * - L2/L3/L4 headers should be already set.
+ * - packet fits into one segment.
+ */
+static inline int
+send_pkt(struct tle_tcp_stream *s, struct tle_dev *dev, struct rte_mbuf *m)
+{
+ uint32_t n, nb;
+ struct tle_drb *drb;
+
+ if (stream_drb_alloc(s, &drb, 1) == 0)
+ return -ENOBUFS;
+
+ /* enqueue pkt for TX. */
+ nb = 1;
+ n = tle_dring_mp_enqueue(&dev->tx.dr, (const void * const*)&m, 1,
+ &drb, &nb);
+
+ /* free unused drbs. */
+ if (nb != 0)
+ stream_drb_free(s, &drb, 1);
+
+ return (n == 1) ? 0 : -ENOBUFS;
+}
+
+static inline int
+send_ctrl_pkt(struct tle_tcp_stream *s, struct rte_mbuf *m, uint32_t seq,
+ uint32_t flags)
+{
+ const struct tle_dest *dst;
+ uint32_t pid, type;
+ int32_t rc;
+
+ dst = &s->tx.dst;
+ type = s->s.type;
+ pid = rte_atomic32_add_return(&dst->dev->tx.packet_id[type], 1) - 1;
+
+ rc = tcp_fill_mbuf(m, s, dst, 0, s->s.port, seq, flags, pid, 1);
+ if (rc == 0)
+ rc = send_pkt(s, dst->dev, m);
+
+ return rc;
+}
+
+static inline int
+send_rst(struct tle_tcp_stream *s, uint32_t seq)
+{
+ struct rte_mbuf *m;
+ int32_t rc;
+
+ m = rte_pktmbuf_alloc(s->tx.dst.head_mp);
+ if (m == NULL)
+ return -ENOMEM;
+
+ rc = send_ctrl_pkt(s, m, seq, TCP_FLAG_RST);
+ if (rc != 0)
+ rte_pktmbuf_free(m);
+
+ return rc;
+}
+
+static inline int
+send_ack(struct tle_tcp_stream *s, uint32_t tms, uint32_t flags)
+{
+ struct rte_mbuf *m;
+ uint32_t seq;
+ int32_t rc;
+
+ m = rte_pktmbuf_alloc(s->tx.dst.head_mp);
+ if (m == NULL)
+ return -ENOMEM;
+
+ seq = s->tcb.snd.nxt - ((flags & (TCP_FLAG_FIN | TCP_FLAG_SYN)) != 0);
+ s->tcb.snd.ts = tms;
+
+ rc = send_ctrl_pkt(s, m, seq, flags);
+ if (rc != 0) {
+ rte_pktmbuf_free(m);
+ return rc;
+ }
+
+ s->tcb.snd.ack = s->tcb.rcv.nxt;
+ return 0;
+}
+
+
+static int
+sync_ack(struct tle_tcp_stream *s, const union pkt_info *pi,
+ const union seg_info *si, uint32_t ts, struct rte_mbuf *m)
+{
+ uint16_t len;
+ int32_t rc;
+ uint32_t pid, seq, type;
+ struct tle_dev *dev;
+ const void *da;
+ struct tle_dest dst;
+ const struct tcp_hdr *th;
+
+ type = s->s.type;
+
+ /* get destination information. */
+ if (type == TLE_V4)
+ da = &pi->addr4.src;
+ else
+ da = &pi->addr6->src;
+
+ rc = stream_get_dest(&s->s, da, &dst);
+ if (rc < 0)
+ return rc;
+
+ th = rte_pktmbuf_mtod_offset(m, const struct tcp_hdr *,
+ m->l2_len + m->l3_len);
+ get_syn_opts(&s->tcb.so, (uintptr_t)(th + 1), m->l4_len - sizeof(*th));
+
+ s->tcb.rcv.nxt = si->seq + 1;
+ seq = sync_gen_seq(pi, s->tcb.rcv.nxt, ts, s->tcb.so.mss);
+ s->tcb.so.ts.ecr = s->tcb.so.ts.val;
+ s->tcb.so.ts.val = sync_gen_ts(ts, s->tcb.so.wscale);
+ s->tcb.so.wscale = (s->tcb.so.wscale == TCP_WSCALE_NONE) ?
+ TCP_WSCALE_NONE : TCP_WSCALE_DEFAULT;
+ s->tcb.so.mss = calc_smss(dst.mtu, &dst);
+
+ /* reset mbuf's data contents. */
+ len = m->l2_len + m->l3_len + m->l4_len;
+ m->tx_offload = 0;
+ if (rte_pktmbuf_adj(m, len) == NULL)
+ return -EINVAL;
+
+ dev = dst.dev;
+ pid = rte_atomic32_add_return(&dev->tx.packet_id[type], 1) - 1;
+
+ rc = tcp_fill_mbuf(m, s, &dst, 0, pi->port, seq,
+ TCP_FLAG_SYN | TCP_FLAG_ACK, pid, 1);
+ if (rc == 0)
+ rc = send_pkt(s, dev, m);
+
+ return rc;
+}
+
+/*
+ * RFC 793:
+ * There are four cases for the acceptability test for an incoming segment:
+ * Segment Receive Test
+ * Length Window
+ * ------- ------- -------------------------------------------
+ * 0 0 SEG.SEQ = RCV.NXT
+ * 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+ * >0 0 not acceptable
+ * >0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND
+ * or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND
+ */
+static inline int
+check_seqn(const struct tcb *tcb, uint32_t seqn, uint32_t len)
+{
+ uint32_t n;
+
+ n = seqn + len;
+ if (seqn - tcb->rcv.nxt >= tcb->rcv.wnd &&
+ n - tcb->rcv.nxt > tcb->rcv.wnd)
+ return -ERANGE;
+
+ return 0;
+}
+
+static inline union tsopt
+rx_tms_opt(const struct tcb *tcb, const struct rte_mbuf *mb)
+{
+ union tsopt ts;
+ uintptr_t opt;
+ const struct tcp_hdr *th;
+
+ if (tcb->so.ts.val != 0) {
+ opt = rte_pktmbuf_mtod_offset(mb, uintptr_t,
+ mb->l2_len + mb->l3_len + sizeof(*th));
+ ts = get_tms_opts(opt, mb->l4_len - sizeof(*th));
+ } else
+ ts.raw = 0;
+
+ return ts;
+}
+
+/*
+ * PAWS and sequence check.
+ * RFC 1323 4.2.1
+ */
+static inline int
+rx_check_seq(struct tcb *tcb, uint32_t seq, uint32_t len, const union tsopt ts)
+{
+ int32_t rc;
+
+ /* RFC 1323 4.2.1 R2 */
+ rc = check_seqn(tcb, seq, len);
+ if (rc < 0)
+ return rc;
+
+ if (ts.raw != 0) {
+
+ /* RFC 1323 4.2.1 R1 */
+ if (tcp_seq_lt(ts.val, tcb->rcv.ts))
+ return -ERANGE;
+
+ /* RFC 1323 4.2.1 R3 */
+ if (tcp_seq_leq(seq, tcb->snd.ack) &&
+ tcp_seq_lt(tcb->snd.ack, seq + len))
+ tcb->rcv.ts = ts.val;
+ }
+
+ return rc;
+}
+
+static inline int
+rx_check_ack(const struct tcb *tcb, uint32_t ack)
+{
+ uint32_t max;
+
+ max = (uint32_t)RTE_MAX(tcb->snd.nxt, tcb->snd.rcvr);
+
+ if (tcp_seq_leq(tcb->snd.una, ack) && tcp_seq_leq(ack, max))
+ return 0;
+
+ return -ERANGE;
+}
+
+static inline int
+rx_check_seqack(struct tcb *tcb, uint32_t seq, uint32_t ack, uint32_t len,
+ const union tsopt ts)
+{
+ int32_t rc;
+
+ rc = rx_check_seq(tcb, seq, len, ts);
+ rc |= rx_check_ack(tcb, ack);
+ return rc;
+}
+
+static inline int
+restore_syn_pkt(const union pkt_info *pi, const union seg_info *si,
+ uint32_t ts, struct rte_mbuf *mb)
+{
+ int32_t rc;
+ uint32_t len;
+ struct tcp_hdr *th;
+ struct syn_opts so;
+
+ /* check that ACK, etc fields are what we expected. */
+ rc = sync_check_ack(pi, si->seq, si->ack - 1, ts);
+ if (rc < 0)
+ return rc;
+
+ so.mss = rc;
+
+ th = rte_pktmbuf_mtod_offset(mb, struct tcp_hdr *,
+ mb->l2_len + mb->l3_len);
+ len = mb->l4_len - sizeof(*th);
+ sync_get_opts(&so, (uintptr_t)(th + 1), len);
+
+ /* reconstruct SYN options, extend header size if necessary */
+ if (len < TCP_TX_OPT_LEN_MAX) {
+ len = TCP_TX_OPT_LEN_MAX - len;
+ th->data_off = TCP_TX_OPT_LEN_MAX / TCP_DATA_ALIGN <<
+ TCP_DATA_OFFSET;
+ mb->pkt_len += len;
+ mb->data_len += len;
+ mb->l4_len += len;
+ }
+
+ fill_syn_opts(th + 1, &so);
+ return 0;
+}
+
+static inline int
+rx_ack_listen(struct tle_tcp_stream *s, struct stbl *st,
+ const union pkt_info *pi, const union seg_info *si,
+ uint32_t ts, struct rte_mbuf *mb)
+{
+ int32_t rc;
+ struct stbl_entry *se;
+
+ if (pi->tf.flags != TCP_FLAG_ACK || rx_check_stream(s, pi) != 0)
+ return -EINVAL;
+
+ /* ACK for new connection request. */
+
+ rc = restore_syn_pkt(pi, si, ts, mb);
+ if (rc < 0)
+ return rc;
+
+ se = stbl_add_pkt(st, pi, mb);
+ if (se == NULL)
+ return -ENOBUFS;
+
+ /* put new connection requests into stream listen queue */
+ if (rte_ring_enqueue_burst(s->rx.q,
+ (void * const *)&se, 1) != 1) {
+ stbl_del_pkt(st, se, pi);
+ return -ENOBUFS;
+ }
+
+ return 0;
+}
+
+static inline void
+stream_term(struct tle_tcp_stream *s)
+{
+ struct sdr *dr;
+
+ s->tcb.state = TCP_ST_CLOSED;
+ rte_smp_wmb();
+
+ timer_stop(s);
+
+ /* close() was already invoked, schedule final cleanup */
+ if ((s->tcb.uop & TCP_OP_CLOSE) != 0) {
+
+ dr = CTX_TCP_SDR(s->s.ctx);
+ STAILQ_INSERT_TAIL(&dr->be, &s->s, link);
+
+ /* notify user that stream need to be closed */
+ } else if (s->err.ev != NULL)
+ tle_event_raise(s->err.ev);
+ else if (s->err.cb.func != NULL)
+ s->err.cb.func(s->err.cb.data, &s->s);
+}
+
+static inline int
+data_pkt_adjust(const struct tcb *tcb, struct rte_mbuf *mb, uint32_t hlen,
+ uint32_t *seqn, uint32_t *plen)
+{
+ uint32_t len, n, seq;
+
+ seq = *seqn;
+ len = *plen;
+
+ rte_pktmbuf_adj(mb, hlen);
+ if (len == 0)
+ return -ENODATA;
+ /* cut off the start of the packet */
+ else if (tcp_seq_lt(seq, tcb->rcv.nxt)) {
+ n = tcb->rcv.nxt - seq;
+ if (n >= len)
+ return -ENODATA;
+
+ rte_pktmbuf_adj(mb, n);
+ *seqn = seq + n;
+ *plen = len - n;
+ }
+
+ return 0;
+}
+
+static inline uint32_t
+rx_ackdata(struct tle_tcp_stream *s, uint32_t ack)
+{
+ uint32_t k, n;
+
+ n = ack - (uint32_t)s->tcb.snd.una;
+
+ /* some more data was acked. */
+ if (n != 0) {
+
+ /* advance SND.UNA and free related packets. */
+ k = rte_ring_free_count(s->tx.q);
+ free_una_data(s, n);
+
+ /* mark the stream as available for writing */
+ if (rte_ring_free_count(s->tx.q) != 0) {
+ if (s->tx.ev != NULL)
+ tle_event_raise(s->tx.ev);
+ else if (k == 0 && s->tx.cb.func != NULL)
+ s->tx.cb.func(s->tx.cb.data, &s->s);
+ }
+ }
+
+ return n;
+}
+
+static void
+rx_fin_state(struct tle_tcp_stream *s, struct resp_info *rsp)
+{
+ uint32_t state;
+ int32_t ackfin;
+
+ s->tcb.rcv.nxt += 1;
+
+ ackfin = (s->tcb.snd.una == s->tcb.snd.fss);
+ state = s->tcb.state;
+
+ if (state == TCP_ST_ESTABLISHED) {
+ s->tcb.state = TCP_ST_CLOSE_WAIT;
+ /* raise err.ev & err.cb */
+ if (s->err.ev != NULL)
+ tle_event_raise(s->err.ev);
+ else if (s->err.cb.func != NULL)
+ s->err.cb.func(s->err.cb.data, &s->s);
+ } else if (state == TCP_ST_FIN_WAIT_1 || state == TCP_ST_CLOSING) {
+ rsp->flags |= TCP_FLAG_ACK;
+ if (ackfin != 0) {
+ s->tcb.state = TCP_ST_TIME_WAIT;
+ s->tcb.snd.rto = TCP_RTO_2MSL;
+ timer_reset(s);
+ } else
+ s->tcb.state = TCP_ST_CLOSING;
+ } else if (state == TCP_ST_FIN_WAIT_2) {
+ rsp->flags |= TCP_FLAG_ACK;
+ s->tcb.state = TCP_ST_TIME_WAIT;
+ s->tcb.snd.rto = TCP_RTO_2MSL;
+ timer_reset(s);
+ } else if (state == TCP_ST_LAST_ACK && ackfin != 0) {
+ stream_term(s);
+ }
+}
+
+/*
+ * FIN process for ESTABLISHED state
+ * returns:
+ * 0 < - error occurred
+ * 0 - FIN was processed OK, and mbuf can be free/reused.
+ * 0 > - FIN was processed OK and mbuf can't be free/reused.
+ */
+static inline int
+rx_fin(struct tle_tcp_stream *s, uint32_t state,
+ const union seg_info *si, struct rte_mbuf *mb,
+ struct resp_info *rsp)
+{
+ uint32_t hlen, plen, seq;
+ int32_t ret;
+ union tsopt ts;
+
+ hlen = PKT_L234_HLEN(mb);
+ plen = mb->pkt_len - hlen;
+ seq = si->seq;
+
+ ts = rx_tms_opt(&s->tcb, mb);
+ ret = rx_check_seqack(&s->tcb, seq, si->ack, plen, ts);
+ if (ret != 0)
+ return ret;
+
+ if (state < TCP_ST_ESTABLISHED)
+ return -EINVAL;
+
+ if (plen != 0) {
+
+ ret = data_pkt_adjust(&s->tcb, mb, hlen, &seq, &plen);
+ if (ret != 0)
+ return ret;
+ if (rx_data_enqueue(s, seq, plen, &mb, 1) != 1)
+ return -ENOBUFS;
+ }
+
+ /* process ack here */
+ rx_ackdata(s, si->ack);
+
+ /* some fragments still missing */
+ if (seq + plen != s->tcb.rcv.nxt) {
+ s->tcb.rcv.frs.seq = seq + plen;
+ s->tcb.rcv.frs.on = 1;
+ } else
+ rx_fin_state(s, rsp);
+
+ return plen;
+}
+
+static inline int
+rx_rst(struct tle_tcp_stream *s, uint32_t state, uint32_t flags,
+ const union seg_info *si)
+{
+ int32_t rc;
+
+ /*
+ * RFC 793: In all states except SYN-SENT, all reset (RST) segments
+ * are validated by checking their SEQ-fields.
+ * A reset is valid if its sequence number is in the window.
+ * In the SYN-SENT state (a RST received in response to an initial SYN),
+ * the RST is acceptable if the ACK field acknowledges the SYN.
+ */
+ if (state == TCP_ST_SYN_SENT) {
+ rc = ((flags & TCP_FLAG_ACK) == 0 ||
+ si->ack != s->tcb.snd.nxt) ?
+ -ERANGE : 0;
+ }
+
+ else
+ rc = check_seqn(&s->tcb, si->seq, 0);
+
+ if (rc == 0)
+ stream_term(s);
+
+ return rc;
+}
+
+/*
+ * check do we have FIN that was received out-of-order.
+ * if yes, try to process it now.
+ */
+static inline void
+rx_ofo_fin(struct tle_tcp_stream *s, struct resp_info *rsp)
+{
+ if (s->tcb.rcv.frs.on != 0 && s->tcb.rcv.nxt == s->tcb.rcv.frs.seq)
+ rx_fin_state(s, rsp);
+}
+
+static inline void
+dack_info_init(struct dack_info *tack, const struct tcb *tcb)
+{
+ memset(tack, 0, sizeof(*tack));
+ tack->ack = tcb->snd.una;
+ tack->segs.dup = tcb->rcv.dupack;
+ tack->wu.raw = tcb->snd.wu.raw;
+ tack->wnd = tcb->snd.wnd >> tcb->snd.wscale;
+}
+
+static inline void
+ack_window_update(struct tcb *tcb, const struct dack_info *tack)
+{
+ tcb->snd.wu.raw = tack->wu.raw;
+ tcb->snd.wnd = tack->wnd << tcb->snd.wscale;
+}
+
+static inline void
+ack_cwnd_update(struct tcb *tcb, uint32_t acked, const struct dack_info *tack)
+{
+ uint32_t n;
+
+ n = tack->segs.ack * tcb->snd.mss;
+
+ /* slow start phase, RFC 5681 3.1 (2) */
+ if (tcb->snd.cwnd < tcb->snd.ssthresh)
+ tcb->snd.cwnd += RTE_MIN(acked, n);
+ /* congestion avoidance phase, RFC 5681 3.1 (3) */
+ else
+ tcb->snd.cwnd += RTE_MAX(1U, n * tcb->snd.mss / tcb->snd.cwnd);
+}
+
+static inline void
+rto_ssthresh_update(struct tcb *tcb)
+{
+ uint32_t k, n;
+
+ /* RFC 5681 3.1 (4) */
+ n = (tcb->snd.nxt - tcb->snd.una) / 2;
+ k = 2 * tcb->snd.mss;
+ tcb->snd.ssthresh = RTE_MAX(n, k);
+}
+
+static inline void
+rto_cwnd_update(struct tcb *tcb)
+{
+
+ if (tcb->snd.nb_retx == 0)
+ rto_ssthresh_update(tcb);
+
+ /*
+ * RFC 5681 3.1: upon a timeout cwnd MUST be set to
+ * no more than 1 full-sized segment.
+ */
+ tcb->snd.cwnd = tcb->snd.mss;
+}
+
+static inline void
+ack_info_update(struct dack_info *tack, const union seg_info *si,
+ int32_t badseq, uint32_t dlen, const union tsopt ts)
+{
+ if (badseq != 0) {
+ tack->segs.badseq++;
+ return;
+ }
+
+ /* segnt with incoming data */
+ tack->segs.data += (dlen != 0);
+
+ /* segment with newly acked data */
+ if (tcp_seq_lt(tack->ack, si->ack)) {
+ tack->segs.dup = 0;
+ tack->segs.ack++;
+ tack->ack = si->ack;
+ tack->ts = ts;
+
+ /*
+ * RFC 5681: An acknowledgment is considered a "duplicate" when:
+ * (a) the receiver of the ACK has outstanding data
+ * (b) the incoming acknowledgment carries no data
+ * (c) the SYN and FIN bits are both off
+ * (d) the acknowledgment number is equal to the TCP.UNA
+ * (e) the advertised window in the incoming acknowledgment equals the
+ * advertised window in the last incoming acknowledgment.
+ *
+ * Here will have only to check only for (b),(d),(e).
+ * (a) will be checked later for the whole bulk of packets,
+ * (c) should never happen here.
+ */
+ } else if (dlen == 0 && si->wnd == tack->wnd && ++tack->segs.dup == 3) {
+ tack->dup3.seg = tack->segs.ack + 1;
+ tack->dup3.ack = tack->ack;
+ }
+
+ /*
+ * RFC 793:
+ * If SND.UNA < SEG.ACK =< SND.NXT, the send window should be
+ * updated. If (SND.WL1 < SEG.SEQ or (SND.WL1 = SEG.SEQ and
+ * SND.WL2 =< SEG.ACK)), set SND.WND <- SEG.WND, set
+ * SND.WL1 <- SEG.SEQ, and set SND.WL2 <- SEG.ACK.
+ */
+ if (tcp_seq_lt(tack->wu.wl1, si->seq) ||
+ (si->seq == tack->wu.wl1 &&
+ tcp_seq_leq(tack->wu.wl2, si->ack))) {
+
+ tack->wu.wl1 = si->seq;
+ tack->wu.wl2 = si->ack;
+ tack->wnd = si->wnd;
+ }
+}
+
+static inline uint32_t
+rx_data_ack(struct tle_tcp_stream *s, struct dack_info *tack,
+ const union seg_info si[], struct rte_mbuf *mb[], struct rte_mbuf *rp[],
+ int32_t rc[], uint32_t num)
+{
+ uint32_t i, j, k, n, t;
+ uint32_t hlen, plen, seq, tlen;
+ int32_t ret;
+ union tsopt ts;
+
+ k = 0;
+ for (i = 0; i != num; i = j) {
+
+ hlen = PKT_L234_HLEN(mb[i]);
+ plen = mb[i]->pkt_len - hlen;
+ seq = si[i].seq;
+
+ ts = rx_tms_opt(&s->tcb, mb[i]);
+ ret = rx_check_seqack(&s->tcb, seq, si[i].ack, plen, ts);
+
+ /* account segment received */
+ ack_info_update(tack, &si[i], ret != 0, plen, ts);
+
+ if (ret == 0) {
+ /* skip duplicate data, if any */
+ ret = data_pkt_adjust(&s->tcb, mb[i], hlen,
+ &seq, &plen);
+ }
+
+ j = i + 1;
+ if (ret != 0) {
+ rp[k] = mb[i];
+ rc[k] = -ret;
+ k++;
+ continue;
+ }
+
+ /* group sequential packets together. */
+ for (tlen = plen; j != num; tlen += plen, j++) {
+
+ hlen = PKT_L234_HLEN(mb[j]);
+ plen = mb[j]->pkt_len - hlen;
+
+ /* not consecutive packet */
+ if (plen == 0 || seq + tlen != si[j].seq)
+ break;
+
+ /* check SEQ/ACK */
+ ts = rx_tms_opt(&s->tcb, mb[j]);
+ ret = rx_check_seqack(&s->tcb, si[j].seq, si[j].ack,
+ plen, ts);
+
+ /* account for segment received */
+ ack_info_update(tack, &si[j], ret != 0, plen, ts);
+
+ if (ret != 0) {
+ rp[k] = mb[j];
+ rc[k] = -ret;
+ k++;
+ break;
+ }
+ rte_pktmbuf_adj(mb[j], hlen);
+ }
+
+ n = j - i;
+ j += (ret != 0);
+
+ /* account for OFO data */
+ if (seq != s->tcb.rcv.nxt)
+ tack->segs.ofo += n;
+
+ /* enqueue packets */
+ t = rx_data_enqueue(s, seq, tlen, mb + i, n);
+
+ /* if we are out of space in stream recv buffer. */
+ for (; t != n; t++) {
+ rp[k] = mb[i + t];
+ rc[k] = -ENOBUFS;
+ k++;
+ }
+ }
+
+ return num - k;
+}
+
+static inline void
+start_fast_retransmit(struct tle_tcp_stream *s)
+{
+ struct tcb *tcb;
+
+ tcb = &s->tcb;
+
+ /* RFC 6582 3.2.2 */
+ tcb->snd.rcvr = tcb->snd.nxt;
+ tcb->snd.fastack = 1;
+
+ /* RFC 5681 3.2.2 */
+ rto_ssthresh_update(tcb);
+
+ /* RFC 5681 3.2.3 */
+ tcp_txq_rst_nxt_head(s);
+ tcb->snd.nxt = tcb->snd.una;
+ tcb->snd.cwnd = tcb->snd.ssthresh + 3 * tcb->snd.mss;
+}
+
+static inline void
+stop_fast_retransmit(struct tle_tcp_stream *s)
+{
+ struct tcb *tcb;
+ uint32_t n;
+
+ tcb = &s->tcb;
+ n = tcb->snd.nxt - tcb->snd.una;
+ tcb->snd.cwnd = RTE_MIN(tcb->snd.ssthresh,
+ RTE_MAX(n, tcb->snd.mss) + tcb->snd.mss);
+ tcb->snd.fastack = 0;
+}
+
+static inline int
+in_fast_retransmit(struct tle_tcp_stream *s, uint32_t ack_len, uint32_t ack_num,
+ uint32_t dup_num)
+{
+ uint32_t n;
+ struct tcb *tcb;
+
+ tcb = &s->tcb;
+
+ /* RFC 5682 3.2.3 partial ACK */
+ if (ack_len != 0) {
+
+ n = ack_num * tcb->snd.mss;
+ if (ack_len >= n)
+ tcb->snd.cwnd -= ack_len - n;
+ else
+ tcb->snd.cwnd -= ack_len % tcb->snd.mss;
+
+ /*
+ * For the first partial ACK that arrives
+ * during fast recovery, also reset the
+ * retransmit timer.
+ */
+ if (tcb->snd.fastack == 1)
+ timer_reset(s);
+
+ tcb->snd.fastack += ack_num;
+ return 1;
+
+ /* RFC 5681 3.2.4 */
+ } else if (dup_num > 3) {
+ s->tcb.snd.cwnd += (dup_num - 3) * tcb->snd.mss;
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline int
+process_ack(struct tle_tcp_stream *s, uint32_t acked,
+ const struct dack_info *tack)
+{
+ int32_t send;
+
+ send = 0;
+
+ /* normal mode */
+ if (s->tcb.snd.fastack == 0) {
+
+ send = 1;
+
+ /* RFC 6582 3.2.2 switch to fast retransmit mode */
+ if (tack->dup3.seg != 0 && s->tcb.snd.una != s->tcb.snd.nxt &&
+ s->tcb.snd.una >= s->tcb.snd.rcvr) {
+
+ start_fast_retransmit(s);
+ in_fast_retransmit(s,
+ tack->ack - tack->dup3.ack,
+ tack->segs.ack - tack->dup3.seg - 1,
+ tack->segs.dup);
+
+ /* remain in normal mode */
+ } else if (acked != 0) {
+ ack_cwnd_update(&s->tcb, acked, tack);
+ timer_stop(s);
+ }
+
+ /* fast retransmit mode */
+ } else {
+
+ /* remain in fast retransmit mode */
+ if (s->tcb.snd.una < s->tcb.snd.rcvr) {
+
+ send = in_fast_retransmit(s, acked, tack->segs.ack,
+ tack->segs.dup);
+ } else {
+ /* RFC 5682 3.2.3 full ACK */
+ stop_fast_retransmit(s);
+ timer_stop(s);
+
+ /* if we have another series of dup ACKs */
+ if (tack->dup3.seg != 0 &&
+ s->tcb.snd.una != s->tcb.snd.nxt &&
+ tcp_seq_leq((uint32_t)s->tcb.snd.rcvr,
+ tack->dup3.ack)) {
+
+ /* restart fast retransmit again. */
+ start_fast_retransmit(s);
+ send = in_fast_retransmit(s,
+ tack->ack - tack->dup3.ack,
+ tack->segs.ack - tack->dup3.seg - 1,
+ tack->segs.dup);
+ }
+ }
+ }
+
+ return send;
+}
+
+/*
+ * our FIN was acked, stop rto timer, change stream state,
+ * and possibly close the stream.
+ */
+static inline void
+rx_ackfin(struct tle_tcp_stream *s)
+{
+ uint32_t state;
+
+ s->tcb.snd.una = s->tcb.snd.fss;
+ empty_mbuf_ring(s->tx.q);
+
+ state = s->tcb.state;
+ if (state == TCP_ST_LAST_ACK)
+ stream_term(s);
+ else if (state == TCP_ST_FIN_WAIT_1) {
+ timer_stop(s);
+ s->tcb.state = TCP_ST_FIN_WAIT_2;
+ } else if (state == TCP_ST_CLOSING) {
+ s->tcb.state = TCP_ST_TIME_WAIT;
+ s->tcb.snd.rto = TCP_RTO_2MSL;
+ timer_reset(s);
+ }
+}
+
+static inline void
+rx_process_ack(struct tle_tcp_stream *s, uint32_t ts,
+ const struct dack_info *tack)
+{
+ int32_t send;
+ uint32_t n;
+
+ s->tcb.rcv.dupack = tack->segs.dup;
+
+ n = rx_ackdata(s, tack->ack);
+ send = process_ack(s, n, tack);
+
+ /* try to send more data. */
+ if ((n != 0 || send != 0) && tcp_txq_nxt_cnt(s) != 0)
+ txs_enqueue(s->s.ctx, s);
+
+ /* restart RTO timer. */
+ if (s->tcb.snd.nxt != s->tcb.snd.una)
+ timer_start(s);
+
+ /* update rto, if fresh packet is here then calculate rtt */
+ if (tack->ts.ecr != 0)
+ rto_estimate(&s->tcb, ts - tack->ts.ecr);
+}
+
+/*
+ * process <SYN,ACK>
+ * returns negative value on failure, or zero on success.
+ */
+static inline int
+rx_synack(struct tle_tcp_stream *s, uint32_t ts, uint32_t state,
+ const union seg_info *si, struct rte_mbuf *mb,
+ struct resp_info *rsp)
+{
+ struct syn_opts so;
+ struct tcp_hdr *th;
+
+ if (state != TCP_ST_SYN_SENT)
+ return -EINVAL;
+
+ /* invalid SEG.SEQ */
+ if (si->ack != (uint32_t)s->tcb.snd.nxt) {
+ rsp->flags = TCP_FLAG_RST;
+ return 0;
+ }
+
+ th = rte_pktmbuf_mtod_offset(mb, struct tcp_hdr *,
+ mb->l2_len + mb->l3_len);
+ get_syn_opts(&so, (uintptr_t)(th + 1), mb->l4_len - sizeof(*th));
+
+ s->tcb.so = so;
+
+ s->tcb.snd.una = s->tcb.snd.nxt;
+ s->tcb.snd.mss = so.mss;
+ s->tcb.snd.wnd = si->wnd << so.wscale;
+ s->tcb.snd.wu.wl1 = si->seq;
+ s->tcb.snd.wu.wl2 = si->ack;
+ s->tcb.snd.wscale = so.wscale;
+
+ /* setup congestion variables */
+ s->tcb.snd.cwnd = initial_cwnd(s->tcb.snd.mss);
+ s->tcb.snd.ssthresh = s->tcb.snd.wnd;
+
+ s->tcb.rcv.ts = so.ts.val;
+ s->tcb.rcv.irs = si->seq;
+ s->tcb.rcv.nxt = si->seq + 1;
+
+ /* calculate initial rto */
+ rto_estimate(&s->tcb, ts - s->tcb.snd.ts);
+
+ rsp->flags |= TCP_FLAG_ACK;
+
+ timer_stop(s);
+ s->tcb.state = TCP_ST_ESTABLISHED;
+ rte_smp_wmb();
+
+ if (s->tx.ev != NULL)
+ tle_event_raise(s->tx.ev);
+ else if (s->tx.cb.func != NULL)
+ s->tx.cb.func(s->tx.cb.data, &s->s);
+
+ return 0;
+}
+
+static inline uint32_t
+rx_stream(struct tle_tcp_stream *s, uint32_t ts,
+ const union pkt_info *pi, const union seg_info si[],
+ struct rte_mbuf *mb[], struct rte_mbuf *rp[], int32_t rc[],
+ uint32_t num)
+{
+ uint32_t i, k, n, state;
+ int32_t ret;
+ struct resp_info rsp;
+ struct dack_info tack;
+
+ k = 0;
+ rsp.flags = 0;
+
+ state = s->tcb.state;
+
+ /*
+ * first check for the states/flags where we don't
+ * expect groups of packets.
+ */
+
+ /* process RST */
+ if ((pi->tf.flags & TCP_FLAG_RST) != 0) {
+ for (i = 0;
+ i != num &&
+ rx_rst(s, state, pi->tf.flags, &si[i]);
+ i++)
+ ;
+ i = 0;
+
+ /* RFC 793: if the ACK bit is off drop the segment and return */
+ } else if ((pi->tf.flags & TCP_FLAG_ACK) == 0) {
+ i = 0;
+ /*
+ * first check for the states/flags where we don't
+ * expect groups of packets.
+ */
+
+ /* process <SYN,ACK> */
+ } else if ((pi->tf.flags & TCP_FLAG_SYN) != 0) {
+ ret = 0;
+ for (i = 0; i != num; i++) {
+ ret = rx_synack(s, ts, state, &si[i], mb[i], &rsp);
+ if (ret == 0)
+ break;
+
+ rc[k] = -ret;
+ rp[k] = mb[i];
+ k++;
+ }
+
+ /* process FIN */
+ } else if ((pi->tf.flags & TCP_FLAG_FIN) != 0) {
+ ret = 0;
+ for (i = 0; i != num; i++) {
+ ret = rx_fin(s, state, &si[i], mb[i], &rsp);
+ if (ret >= 0)
+ break;
+
+ rc[k] = -ret;
+ rp[k] = mb[i];
+ k++;
+ }
+ i += (ret > 0);
+
+ /* normal data/ack packets */
+ } else if (state >= TCP_ST_ESTABLISHED && state <= TCP_ST_LAST_ACK) {
+
+ /* process incoming data packets. */
+ dack_info_init(&tack, &s->tcb);
+ n = rx_data_ack(s, &tack, si, mb, rp, rc, num);
+
+ /* follow up actions based on aggregated information */
+
+ /* update SND.WND */
+ ack_window_update(&s->tcb, &tack);
+
+ /*
+ * fast-path: all data & FIN was already sent out
+ * and now is acknowledged.
+ */
+ if (s->tcb.snd.fss == s->tcb.snd.nxt &&
+ tack.ack == (uint32_t) s->tcb.snd.nxt)
+ rx_ackfin(s);
+ else
+ rx_process_ack(s, ts, &tack);
+
+ /*
+ * send an immediate ACK if either:
+ * - received segment with invalid seq/ack number
+ * - received segment with OFO data
+ * - received segment with INO data and no TX is scheduled
+ * for that stream.
+ */
+ if (tack.segs.badseq != 0 || tack.segs.ofo != 0 ||
+ (tack.segs.data != 0 &&
+ rte_atomic32_read(&s->tx.arm) == 0))
+ rsp.flags |= TCP_FLAG_ACK;
+
+ rx_ofo_fin(s, &rsp);
+
+ k += num - n;
+ i = num;
+
+ /* unhandled state, drop all packets. */
+ } else
+ i = 0;
+
+ /* we have a response packet to send. */
+ if (rsp.flags == TCP_FLAG_RST) {
+ send_rst(s, si[i].ack);
+ stream_term(s);
+ } else if (rsp.flags != 0) {
+ send_ack(s, ts, rsp.flags);
+
+ /* start the timer for FIN packet */
+ if ((rsp.flags & TCP_FLAG_FIN) != 0)
+ timer_reset(s);
+ }
+
+ /* unprocessed packets */
+ for (; i != num; i++, k++) {
+ rc[k] = EINVAL;
+ rp[k] = mb[i];
+ }
+
+ return num - k;
+}
+
+static inline uint32_t
+rx_postsyn(struct tle_dev *dev, struct stbl *st, uint32_t type, uint32_t ts,
+ const union pkt_info pi[], const union seg_info si[],
+ struct rte_mbuf *mb[], struct rte_mbuf *rp[], int32_t rc[],
+ uint32_t num)
+{
+ struct tle_tcp_stream *s;
+ uint32_t i, k, state;
+ int32_t ret;
+
+ s = rx_obtain_stream(dev, st, &pi[0], type);
+ if (s == NULL) {
+ for (i = 0; i != num; i++) {
+ rc[i] = ENOENT;
+ rp[i] = mb[i];
+ }
+ return 0;
+ }
+
+ k = 0;
+ state = s->tcb.state;
+
+ if (state == TCP_ST_LISTEN) {
+
+ /* one connection per flow */
+ ret = EINVAL;
+ for (i = 0; i != num && ret != 0; i++) {
+ ret = rx_ack_listen(s, st, pi, &si[i], ts, mb[i]);
+ if (ret != 0) {
+ rc[k] = -ret;
+ rp[k] = mb[i];
+ k++;
+ }
+ }
+ /* duplicate SYN requests */
+ for (; i != num; i++, k++) {
+ rc[k] = EINVAL;
+ rp[k] = mb[i];
+ }
+
+ if (k != num && s->rx.ev != NULL)
+ tle_event_raise(s->rx.ev);
+ else if (s->rx.cb.func != NULL && rte_ring_count(s->rx.q) == 1)
+ s->rx.cb.func(s->rx.cb.data, &s->s);
+
+ } else {
+ i = rx_stream(s, ts, pi, si, mb, rp, rc, num);
+ k = num - i;
+ }
+
+ rwl_release(&s->rx.use);
+ return num - k;
+}
+
+
+static inline uint32_t
+rx_syn(struct tle_dev *dev, uint32_t type, uint32_t ts,
+ const union pkt_info pi[], const union seg_info si[],
+ struct rte_mbuf *mb[], struct rte_mbuf *rp[], int32_t rc[],
+ uint32_t num)
+{
+ struct tle_tcp_stream *s;
+ uint32_t i, k;
+ int32_t ret;
+
+ s = rx_obtain_listen_stream(dev, &pi[0], type);
+ if (s == NULL) {
+ for (i = 0; i != num; i++) {
+ rc[i] = ENOENT;
+ rp[i] = mb[i];
+ }
+ return 0;
+ }
+
+ k = 0;
+ for (i = 0; i != num; i++) {
+
+ /* check that this remote is allowed to connect */
+ if (rx_check_stream(s, &pi[i]) != 0)
+ ret = -ENOENT;
+ else
+ /* syncokie: reply with <SYN,ACK> */
+ ret = sync_ack(s, &pi[i], &si[i], ts, mb[i]);
+
+ if (ret != 0) {
+ rc[k] = -ret;
+ rp[k] = mb[i];
+ k++;
+ }
+ }
+
+ rwl_release(&s->rx.use);
+ return num - k;
+}
+
+uint16_t
+tle_tcp_rx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
+ struct rte_mbuf *rp[], int32_t rc[], uint16_t num)
+{
+ struct stbl *st;
+ uint32_t i, j, k, n, t, ts;
+ uint64_t csf;
+ union pkt_info pi[num];
+ union seg_info si[num];
+ union {
+ uint8_t t[TLE_VNUM];
+ uint32_t raw;
+ } stu;
+
+ ts = tcp_get_tms();
+ st = CTX_TCP_STLB(dev->ctx);
+
+ stu.raw = 0;
+
+ /* extract packet info and check the L3/L4 csums */
+ for (i = 0; i != num; i++) {
+
+ get_pkt_info(pkt[i], &pi[i], &si[i]);
+
+ t = pi[i].tf.type;
+ csf = dev->rx.ol_flags[t] &
+ (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD);
+
+ /* check csums in SW */
+ if (pi[i].csf == 0 && csf != 0 && check_pkt_csum(pkt[i], csf,
+ pi[i].tf.type, IPPROTO_TCP) != 0)
+ pi[i].csf = csf;
+
+ stu.t[t] = 1;
+ }
+
+ if (stu.t[TLE_V4] != 0)
+ stbl_lock(st, TLE_V4);
+ if (stu.t[TLE_V6] != 0)
+ stbl_lock(st, TLE_V6);
+
+ k = 0;
+ for (i = 0; i != num; i += j) {
+
+ t = pi[i].tf.type;
+
+ /*basic checks for incoming packet */
+ if (t >= TLE_VNUM || pi[i].csf != 0 || dev->dp[t] == NULL) {
+ rc[k] = EINVAL;
+ rp[k] = pkt[i];
+ j = 1;
+ k++;
+ /* process input SYN packets */
+ } else if (pi[i].tf.flags == TCP_FLAG_SYN) {
+ j = pkt_info_bulk_syneq(pi + i, num - i);
+ n = rx_syn(dev, t, ts, pi + i, si + i, pkt + i,
+ rp + k, rc + k, j);
+ k += j - n;
+ } else {
+ j = pkt_info_bulk_eq(pi + i, num - i);
+ n = rx_postsyn(dev, st, t, ts, pi + i, si + i, pkt + i,
+ rp + k, rc + k, j);
+ k += j - n;
+ }
+ }
+
+ if (stu.t[TLE_V4] != 0)
+ stbl_unlock(st, TLE_V4);
+ if (stu.t[TLE_V6] != 0)
+ stbl_unlock(st, TLE_V6);
+
+ return num - k;
+}
+
+uint16_t
+tle_tcp_stream_synreqs(struct tle_stream *ts, struct tle_syn_req rq[],
+ uint32_t num)
+{
+ uint32_t i, n;
+ struct tle_tcp_stream *s;
+ struct stbl_entry *se[num];
+
+ s = TCP_STREAM(ts);
+ n = rte_ring_mc_dequeue_burst(s->rx.q, (void **)se, num);
+ if (n == 0)
+ return 0;
+
+ for (i = 0; i != n; i++) {
+ rq[i].pkt = stbl_get_pkt(se[i]);
+ rq[i].opaque = se[i];
+ }
+
+ /*
+ * if we still have packets to read,
+ * then rearm stream RX event.
+ */
+ if (n == num && rte_ring_count(s->rx.q) != 0) {
+ if (rwl_try_acquire(&s->rx.use) > 0 && s->rx.ev != NULL)
+ tle_event_raise(s->rx.ev);
+ rwl_release(&s->rx.use);
+ }
+
+ return n;
+}
+
+static inline int
+stream_fill_dest(struct tle_tcp_stream *s)
+{
+ int32_t rc;
+ const void *da;
+
+ if (s->s.type == TLE_V4)
+ da = &s->s.ipv4.addr.src;
+ else
+ da = &s->s.ipv6.addr.src;
+
+ rc = stream_get_dest(&s->s, da, &s->tx.dst);
+ return (rc < 0) ? rc : 0;
+}
+
+/*
+ * helper function, prepares an accepted stream.
+ */
+static int
+accept_fill_stream(struct tle_tcp_stream *ps, struct tle_tcp_stream *cs,
+ const struct tle_tcp_accept_param *prm, uint32_t tms,
+ const union pkt_info *pi, const union seg_info *si)
+{
+ int32_t rc;
+ uint32_t rtt;
+
+ /* some TX still pending for that stream. */
+ if (TCP_STREAM_TX_PENDING(cs))
+ return -EAGAIN;
+
+ /* setup L4 ports and L3 addresses fields. */
+ cs->s.port.raw = pi->port.raw;
+ cs->s.pmsk.raw = UINT32_MAX;
+
+ if (pi->tf.type == TLE_V4) {
+ cs->s.ipv4.addr = pi->addr4;
+ cs->s.ipv4.mask.src = INADDR_NONE;
+ cs->s.ipv4.mask.dst = INADDR_NONE;
+ } else if (pi->tf.type == TLE_V6) {
+ cs->s.ipv6.addr = *pi->addr6;
+ rte_memcpy(&cs->s.ipv6.mask.src, &tle_ipv6_none,
+ sizeof(cs->s.ipv6.mask.src));
+ rte_memcpy(&cs->s.ipv6.mask.dst, &tle_ipv6_none,
+ sizeof(cs->s.ipv6.mask.dst));
+ }
+
+ /* setup TCB */
+ sync_fill_tcb(&cs->tcb, si, prm->syn.pkt);
+ cs->tcb.rcv.wnd = cs->rx.q->prod.mask << cs->tcb.rcv.wscale;
+
+ /* setup stream notification menchanism */
+ cs->rx.ev = prm->cfg.recv_ev;
+ cs->rx.cb = prm->cfg.recv_cb;
+ cs->tx.ev = prm->cfg.send_ev;
+ cs->tx.cb = prm->cfg.send_cb;
+ cs->err.ev = prm->cfg.err_ev;
+ cs->err.cb = prm->cfg.err_cb;
+
+ /* store other params */
+ cs->tcb.snd.nb_retm = (prm->cfg.nb_retries != 0) ? prm->cfg.nb_retries :
+ TLE_TCP_DEFAULT_RETRIES;
+
+ /*
+ * estimate the rto
+ * for now rtt is calculated based on the tcp TMS option,
+ * later add real-time one
+ */
+ if (cs->tcb.so.ts.ecr) {
+ rtt = tms - cs->tcb.so.ts.ecr;
+ rto_estimate(&cs->tcb, rtt);
+ } else
+ cs->tcb.snd.rto = TCP_RTO_DEFAULT;
+
+ tcp_stream_up(cs);
+
+ /* copy streams type. */
+ cs->s.type = ps->s.type;
+
+ /* retrive and cache destination information. */
+ rc = stream_fill_dest(cs);
+ if (rc != 0)
+ return rc;
+
+ /* update snd.mss with SMSS value */
+ cs->tcb.snd.mss = calc_smss(cs->tcb.snd.mss, &cs->tx.dst);
+
+ /* setup congestion variables */
+ cs->tcb.snd.cwnd = initial_cwnd(cs->tcb.snd.mss);
+ cs->tcb.snd.ssthresh = cs->tcb.snd.wnd;
+
+ cs->tcb.state = TCP_ST_ESTABLISHED;
+ cs->tcb.uop |= TCP_OP_ACCEPT;
+
+ /* add stream to the table */
+ cs->ste = prm->syn.opaque;
+ rte_smp_wmb();
+ cs->ste->data = cs;
+ return 0;
+}
+
+/*
+ * !!!
+ * Right now new stream rcv.wnd is set to zero.
+ * That simplifies handling of new connection establishment
+ * (as no data segments could be received),
+ * but has to be addressed.
+ * possible ways:
+ * - send ack after accept creates new stream with new rcv.wnd value.
+ * the problem with that approach that single ack is not delivered
+ * reliably (could be lost), plus might slowdown connection establishment
+ * (extra packet per connection, that client has to wait for).
+ * - allocate new stream at ACK recieve stage.
+ * As a drawback - whole new stream allocation/connection establishment
+ * will be done in BE.
+ * !!!
+ */
+int
+tle_tcp_stream_accept(struct tle_stream *ts,
+ const struct tle_tcp_accept_param prm[], struct tle_stream *rs[],
+ uint32_t num)
+{
+ struct tle_tcp_stream *cs, *s;
+ struct tle_ctx *ctx;
+ uint32_t i, j, n, tms;
+ int32_t rc;
+ union pkt_info pi[num];
+ union seg_info si[num];
+
+ tms = tcp_get_tms();
+ s = TCP_STREAM(ts);
+
+ for (i = 0; i != num; i++)
+ get_pkt_info(prm[i].syn.pkt, &pi[i], &si[i]);
+
+ /* mark stream as not closable */
+ if (rwl_acquire(&s->rx.use) < 0)
+ return -EINVAL;
+
+ ctx = s->s.ctx;
+ n = get_streams(ctx, rs, num);
+
+ rc = 0;
+ for (i = 0; i != n; i++) {
+
+ /* prepare new stream */
+ cs = TCP_STREAM(rs[i]);
+ rc = accept_fill_stream(s, cs, prm + i, tms, pi + i, si + i);
+ if (rc != 0)
+ break;
+ }
+
+ rwl_release(&s->rx.use);
+
+ /* free 'SYN' mbufs. */
+ for (j = 0; j != i; j++)
+ rte_pktmbuf_free(prm[j].syn.pkt);
+
+ /* close failed stream, put unused streams back to the free list. */
+ if (rc != 0) {
+ tle_tcp_stream_close(rs[i]);
+ for (j = i + 1; j != n; j++) {
+ cs = TCP_STREAM(rs[j]);
+ put_stream(ctx, rs[j], TCP_STREAM_TX_PENDING(cs));
+ }
+ rte_errno = -rc;
+
+ /* not enough streams are available */
+ } else if (n != num)
+ rte_errno = ENFILE;
+
+ return i;
+}
+
+/*
+ * !!! implement a proper one, or delete !!!
+ * need to make sure no race conditions with add/lookup stream table.
+ */
+void
+tle_tcp_reject(struct tle_stream *s, const struct tle_syn_req rq[],
+ uint32_t num)
+{
+ uint32_t i;
+ struct rte_mbuf *mb;
+ struct stbl *st;
+ union pkt_info pi;
+ union seg_info si;
+
+ st = CTX_TCP_STLB(s->ctx);
+
+ for (i = 0; i != num; i++) {
+ mb = rq[i].pkt;
+ get_pkt_info(mb, &pi, &si);
+ if (pi.tf.type < TLE_VNUM)
+ stbl_del_pkt_lock(st, rq[i].opaque, &pi);
+
+ /* !!! send RST pkt to the peer !!! */
+ rte_pktmbuf_free(mb);
+ }
+}
+
+uint16_t
+tle_tcp_tx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
+{
+ uint32_t i, j, k, n;
+ struct tle_drb *drb[num];
+ struct tle_tcp_stream *s;
+
+ /* extract packets from device TX queue. */
+
+ k = num;
+ n = tle_dring_sc_dequeue(&dev->tx.dr, (const void **)(uintptr_t)pkt,
+ num, drb, &k);
+
+ if (n == 0)
+ return 0;
+
+ /* free empty drbs and notify related streams. */
+
+ for (i = 0; i != k; i = j) {
+ s = drb[i]->udata;
+ for (j = i + 1; j != k && s == drb[j]->udata; j++)
+ ;
+ stream_drb_free(s, drb + i, j - i);
+ }
+
+ return n;
+}
+
+static inline void
+stream_fill_pkt_info(const struct tle_tcp_stream *s, union pkt_info *pi)
+{
+ if (s->s.type == TLE_V4)
+ pi->addr4 = s->s.ipv4.addr;
+ else
+ pi->addr6 = &s->s.ipv6.addr;
+
+ pi->port = s->s.port;
+ pi->tf.type = s->s.type;
+}
+
+static int
+stream_fill_addr(struct tle_tcp_stream *s, const struct sockaddr *addr)
+{
+ const struct sockaddr_in *in4;
+ const struct sockaddr_in6 *in6;
+ const struct tle_dev_param *prm;
+ int32_t rc;
+
+ rc = 0;
+ s->s.pmsk.raw = UINT32_MAX;
+
+ /* setup L4 src ports and src address fields. */
+ if (s->s.type == TLE_V4) {
+ in4 = (const struct sockaddr_in *)addr;
+ if (in4->sin_addr.s_addr == INADDR_ANY || in4->sin_port == 0)
+ return -EINVAL;
+
+ s->s.port.src = in4->sin_port;
+ s->s.ipv4.addr.src = in4->sin_addr.s_addr;
+ s->s.ipv4.mask.src = INADDR_NONE;
+ s->s.ipv4.mask.dst = INADDR_NONE;
+
+ } else if (s->s.type == TLE_V6) {
+ in6 = (const struct sockaddr_in6 *)addr;
+ if (memcmp(&in6->sin6_addr, &tle_ipv6_any,
+ sizeof(tle_ipv6_any)) == 0 ||
+ in6->sin6_port == 0)
+ return -EINVAL;
+
+ s->s.port.src = in6->sin6_port;
+ rte_memcpy(&s->s.ipv6.addr.src, &in6->sin6_addr,
+ sizeof(s->s.ipv6.addr.src));
+ rte_memcpy(&s->s.ipv6.mask.src, &tle_ipv6_none,
+ sizeof(s->s.ipv6.mask.src));
+ rte_memcpy(&s->s.ipv6.mask.dst, &tle_ipv6_none,
+ sizeof(s->s.ipv6.mask.dst));
+ }
+
+ /* setup the destination device. */
+ rc = stream_fill_dest(s);
+ if (rc != 0)
+ return rc;
+
+ /* setup L4 dst address from device param */
+ prm = &s->tx.dst.dev->prm;
+ if (s->s.type == TLE_V4) {
+ if (s->s.ipv4.addr.dst == INADDR_ANY)
+ s->s.ipv4.addr.dst = prm->local_addr4.s_addr;
+ } else if (memcmp(&s->s.ipv6.addr.dst, &tle_ipv6_any,
+ sizeof(tle_ipv6_any)) == 0)
+ memcpy(&s->s.ipv6.addr.dst, &prm->local_addr6,
+ sizeof(s->s.ipv6.addr.dst));
+
+ return rc;
+}
+
+static inline int
+tx_syn(struct tle_tcp_stream *s, const struct sockaddr *addr)
+{
+ int32_t rc;
+ uint32_t tms, seq;
+ union pkt_info pi;
+ struct stbl *st;
+ struct stbl_entry *se;
+
+ /* fill stream address */
+ rc = stream_fill_addr(s, addr);
+ if (rc != 0)
+ return rc;
+
+ /* fill pkt info to generate seq.*/
+ stream_fill_pkt_info(s, &pi);
+
+ tms = tcp_get_tms();
+ s->tcb.so.ts.val = tms;
+ s->tcb.so.ts.ecr = 0;
+ s->tcb.so.wscale = TCP_WSCALE_DEFAULT;
+ s->tcb.so.mss = calc_smss(s->tx.dst.mtu, &s->tx.dst);
+
+ /* note that rcv.nxt is 0 here for sync_gen_seq.*/
+ seq = sync_gen_seq(&pi, s->tcb.rcv.nxt, tms, s->tcb.so.mss);
+ s->tcb.snd.iss = seq;
+ s->tcb.snd.rcvr = seq;
+ s->tcb.snd.una = seq;
+ s->tcb.snd.nxt = seq + 1;
+ s->tcb.snd.rto = TCP_RTO_DEFAULT;
+ s->tcb.snd.ts = tms;
+
+ s->tcb.rcv.mss = s->tcb.so.mss;
+ s->tcb.rcv.wscale = TCP_WSCALE_DEFAULT;
+ s->tcb.rcv.wnd = s->rx.q->prod.mask << s->tcb.rcv.wscale;
+ s->tcb.rcv.ts = 0;
+
+ /* add the stream in stream table */
+ st = CTX_TCP_STLB(s->s.ctx);
+ se = stbl_add_stream_lock(st, s);
+ if (se == NULL)
+ return -ENOBUFS;
+ s->ste = se;
+
+ /* put stream into the to-send queue */
+ txs_enqueue(s->s.ctx, s);
+
+ return 0;
+}
+
+int
+tle_tcp_stream_connect(struct tle_stream *ts, const struct sockaddr *addr)
+{
+ struct tle_tcp_stream *s;
+ uint32_t type;
+ int32_t rc;
+
+ if (ts == NULL || addr == NULL)
+ return -EINVAL;
+
+ s = TCP_STREAM(ts);
+ type = s->s.type;
+ if (type >= TLE_VNUM)
+ return -EINVAL;
+
+ if (rwl_try_acquire(&s->tx.use) > 0) {
+ rc = rte_atomic16_cmpset(&s->tcb.state, TCP_ST_CLOSED,
+ TCP_ST_SYN_SENT);
+ rc = (rc == 0) ? -EDEADLK : 0;
+ } else
+ rc = -EINVAL;
+
+ if (rc != 0) {
+ rwl_release(&s->tx.use);
+ return rc;
+ }
+
+ /* fill stream, prepare and transmit syn pkt */
+ s->tcb.uop |= TCP_OP_CONNECT;
+ rc = tx_syn(s, addr);
+ rwl_release(&s->tx.use);
+
+ /* error happened, do a cleanup */
+ if (rc != 0)
+ tle_tcp_stream_close(ts);
+
+ return rc;
+}
+
+uint16_t
+tle_tcp_stream_recv(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
+{
+ uint32_t n;
+ struct tle_tcp_stream *s;
+
+ s = TCP_STREAM(ts);
+ n = rte_ring_mc_dequeue_burst(s->rx.q, (void **)pkt, num);
+ if (n == 0)
+ return 0;
+
+ /*
+ * if we still have packets to read,
+ * then rearm stream RX event.
+ */
+ if (n == num && rte_ring_count(s->rx.q) != 0) {
+ if (rwl_try_acquire(&s->rx.use) > 0 && s->rx.ev != NULL)
+ tle_event_raise(s->rx.ev);
+ rwl_release(&s->rx.use);
+ }
+
+ return n;
+}
+
+uint16_t
+tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
+{
+ uint32_t i, j, mss, n, state, type;
+ uint64_t ol_flags;
+ struct tle_tcp_stream *s;
+ struct tle_dev *dev;
+
+ s = TCP_STREAM(ts);
+
+ /* mark stream as not closable. */
+ if (rwl_acquire(&s->tx.use) < 0) {
+ rte_errno = EAGAIN;
+ return 0;
+ }
+
+ state = s->tcb.state;
+ if (state != TCP_ST_ESTABLISHED && state != TCP_ST_CLOSE_WAIT) {
+ rte_errno = ENOTCONN;
+ n = 0;
+ } else {
+ mss = s->tcb.snd.mss;
+ dev = s->tx.dst.dev;
+ type = s->s.type;
+ ol_flags = dev->tx.ol_flags[type];
+
+ /* prepare and check for TX */
+ for (i = 0; i != num; i++) {
+
+ /* !!! need to be modified !!! */
+ if (pkt[i]->pkt_len > mss ||
+ pkt[i]->nb_segs > TCP_MAX_PKT_SEG) {
+ rte_errno = EBADMSG;
+ break;
+ } else if (tcp_fill_mbuf(pkt[i], s, &s->tx.dst,
+ ol_flags, s->s.port, 0, TCP_FLAG_ACK,
+ 0, 0) != 0)
+ break;
+ }
+
+ /* queue packets for further transmision. */
+ n = rte_ring_mp_enqueue_burst(s->tx.q, (void **)pkt, i);
+
+ /* notify BE about more data to send */
+ if (n != 0)
+ txs_enqueue(s->s.ctx, s);
+
+ /*
+ * for unsent, but already modified packets:
+ * remove pkt l2/l3 headers, restore ol_flags
+ */
+ if (n != i) {
+ ol_flags = ~dev->tx.ol_flags[type];
+ for (j = n; j != i; j++) {
+ rte_pktmbuf_adj(pkt[j], pkt[j]->l2_len +
+ pkt[j]->l3_len + pkt[j]->l4_len);
+ pkt[j]->ol_flags &= ol_flags;
+ }
+ /* if possible, rearm stream write event. */
+ } else if (rte_ring_free_count(s->tx.q) != 0 &&
+ s->tx.ev != NULL)
+ tle_event_raise(s->tx.ev);
+ }
+
+ rwl_release(&s->tx.use);
+ return n;
+}
+
+/* send data and FIN (if needed) */
+static inline void
+tx_data_fin(struct tle_tcp_stream *s, uint32_t tms, uint32_t state)
+{
+ /* try to send some data */
+ tx_nxt_data(s, tms);
+
+ /* we also have to send a FIN */
+ if (state != TCP_ST_ESTABLISHED &&
+ state != TCP_ST_CLOSE_WAIT &&
+ tcp_txq_nxt_cnt(s) == 0 &&
+ s->tcb.snd.fss != s->tcb.snd.nxt) {
+ s->tcb.snd.fss = ++s->tcb.snd.nxt;
+ send_ack(s, tms, TCP_FLAG_FIN | TCP_FLAG_ACK);
+ }
+}
+
+static inline void
+tx_stream(struct tle_tcp_stream *s, uint32_t tms)
+{
+ uint32_t state;
+
+ state = s->tcb.state;
+
+ if (state == TCP_ST_SYN_SENT) {
+ /* send the SYN, start the rto timer */
+ send_ack(s, tms, TCP_FLAG_SYN);
+ timer_start(s);
+
+ } else if (state >= TCP_ST_ESTABLISHED && state <= TCP_ST_LAST_ACK) {
+
+ tx_data_fin(s, tms, state);
+
+ /* start RTO timer. */
+ if (s->tcb.snd.nxt != s->tcb.snd.una)
+ timer_start(s);
+ }
+}
+
+static inline void
+rto_stream(struct tle_tcp_stream *s, uint32_t tms)
+{
+ uint32_t state;
+
+ state = s->tcb.state;
+
+ TCP_LOG(DEBUG, "%s(%p, tms=%u): state=%u, "
+ "retx=%u, retm=%u, "
+ "rto=%u, snd.ts=%u, tmo=%u, "
+ "snd.nxt=%lu, snd.una=%lu, flight_size=%lu, "
+ "snd.rcvr=%lu, snd.fastack=%u, "
+ "wnd=%u, cwnd=%u, ssthresh=%u, "
+ "bytes sent=%lu, pkt remain=%u;\n",
+ __func__, s, tms, s->tcb.state,
+ s->tcb.snd.nb_retx, s->tcb.snd.nb_retm,
+ s->tcb.snd.rto, s->tcb.snd.ts, tms - s->tcb.snd.ts,
+ s->tcb.snd.nxt, s->tcb.snd.una, s->tcb.snd.nxt - s->tcb.snd.una,
+ s->tcb.snd.rcvr, s->tcb.snd.fastack,
+ s->tcb.snd.wnd, s->tcb.snd.cwnd, s->tcb.snd.ssthresh,
+ s->tcb.snd.nxt - s->tcb.snd.iss, tcp_txq_nxt_cnt(s));
+
+ if (s->tcb.snd.nb_retx < s->tcb.snd.nb_retm) {
+
+ if (state >= TCP_ST_ESTABLISHED && state <= TCP_ST_LAST_ACK) {
+
+ /* update SND.CWD and SND.SSTHRESH */
+ rto_cwnd_update(&s->tcb);
+
+ /* RFC 6582 3.2.4 */
+ s->tcb.snd.rcvr = s->tcb.snd.nxt;
+ s->tcb.snd.fastack = 0;
+
+ /* restart from last acked data */
+ tcp_txq_rst_nxt_head(s);
+ s->tcb.snd.nxt = s->tcb.snd.una;
+
+ tx_data_fin(s, tms, state);
+
+ } else if (state == TCP_ST_SYN_SENT) {
+ /* resending SYN */
+ s->tcb.so.ts.val = tms;
+ send_ack(s, tms, TCP_FLAG_SYN);
+
+ } else if (state == TCP_ST_TIME_WAIT) {
+ stream_term(s);
+ }
+
+ /* RFC6298:5.5 back off the timer */
+ s->tcb.snd.rto = rto_roundup(2 * s->tcb.snd.rto);
+ s->tcb.snd.nb_retx++;
+ timer_restart(s);
+
+ } else {
+ send_rst(s, s->tcb.snd.una);
+ stream_term(s);
+ }
+}
+
+int
+tle_tcp_process(struct tle_ctx *ctx, uint32_t num)
+{
+ uint32_t i, k, tms;
+ struct sdr *dr;
+ struct tle_timer_wheel *tw;
+ struct tle_stream *p;
+ struct tle_tcp_stream *s, *rs[num];
+
+ /* process streams with RTO exipred */
+
+ tw = CTX_TCP_TMWHL(ctx);
+ tms = tcp_get_tms();
+ tle_timer_expire(tw, tms);
+
+ k = tle_timer_get_expired_bulk(tw, (void **)rs, RTE_DIM(rs));
+
+ for (i = 0; i != k; i++) {
+
+ s = rs[i];
+ s->timer.handle = NULL;
+ if (rwl_try_acquire(&s->tx.use) > 0)
+ rto_stream(s, tms);
+ rwl_release(&s->tx.use);
+ }
+
+ /* process streams from to-send queue */
+
+ k = txs_dequeue_bulk(ctx, rs, RTE_DIM(rs));
+
+ for (i = 0; i != k; i++) {
+
+ s = rs[i];
+ if (rwl_try_acquire(&s->tx.use) > 0 &&
+ rte_atomic32_read(&s->tx.arm) > 0) {
+ rte_atomic32_set(&s->tx.arm, 0);
+ tx_stream(s, tms);
+ }
+ rwl_release(&s->tx.use);
+ }
+
+ /* collect streams to close from the death row */
+
+ dr = CTX_TCP_SDR(ctx);
+ for (k = 0, p = STAILQ_FIRST(&dr->be);
+ k != num && p != NULL;
+ k++, p = STAILQ_NEXT(p, link))
+ rs[k] = TCP_STREAM(p);
+
+ if (p == NULL)
+ STAILQ_INIT(&dr->be);
+ else
+ STAILQ_FIRST(&dr->be) = p;
+
+ /* cleanup closed streams */
+ for (i = 0; i != k; i++) {
+ s = rs[i];
+ tcp_stream_down(s);
+ tcp_stream_reset(ctx, s);
+ }
+
+ return 0;
+}
diff --git a/lib/libtle_l4p/tcp_stream.c b/lib/libtle_l4p/tcp_stream.c
new file mode 100644
index 0000000..67ed66b
--- /dev/null
+++ b/lib/libtle_l4p/tcp_stream.c
@@ -0,0 +1,522 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
+#include "tcp_stream.h"
+#include "tcp_timer.h"
+#include "stream_table.h"
+#include "misc.h"
+#include "tcp_ctl.h"
+#include "tcp_ofo.h"
+#include "tcp_txq.h"
+
+
+static void
+unuse_stream(struct tle_tcp_stream *s)
+{
+ s->s.type = TLE_VNUM;
+ rte_atomic32_set(&s->rx.use, INT32_MIN);
+ rte_atomic32_set(&s->tx.use, INT32_MIN);
+}
+
+static void
+fini_stream(struct tle_tcp_stream *s)
+{
+ if (s != NULL) {
+ rte_free(s->rx.q);
+ tcp_ofo_free(s->rx.ofo);
+ rte_free(s->tx.q);
+ rte_free(s->tx.drb.r);
+ }
+}
+
+static void
+tcp_fini_streams(struct tle_ctx *ctx)
+{
+ uint32_t i;
+ struct tcp_streams *ts;
+
+ ts = CTX_TCP_STREAMS(ctx);
+ if (ts != NULL) {
+ stbl_fini(&ts->st);
+ for (i = 0; i != ctx->prm.max_streams; i++)
+ fini_stream(&ts->s[i]);
+
+ /* free the timer wheel */
+ tle_timer_free(ts->tmr);
+ rte_free(ts->tsq);
+
+ STAILQ_INIT(&ts->dr.fe);
+ STAILQ_INIT(&ts->dr.be);
+ }
+
+ rte_free(ts);
+ ctx->streams.buf = NULL;
+ STAILQ_INIT(&ctx->streams.free);
+}
+
+static struct rte_ring *
+alloc_ring(uint32_t n, uint32_t flags, int32_t socket)
+{
+ struct rte_ring *r;
+ size_t sz;
+ char name[RTE_RING_NAMESIZE];
+
+ n = rte_align32pow2(n);
+ sz = sizeof(*r) + n * sizeof(r->ring[0]);
+
+ r = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, socket);
+ if (r == NULL) {
+ TCP_LOG(ERR, "%s: allocation of %zu bytes on socket %d "
+ "failed with error code: %d\n",
+ __func__, sz, socket, rte_errno);
+ return NULL;
+ }
+
+ snprintf(name, sizeof(name), "%p@%zu", r, sz);
+ rte_ring_init(r, name, n, flags);
+ return r;
+}
+
+static int
+init_stream(struct tle_ctx *ctx, struct tle_tcp_stream *s)
+{
+ size_t bsz, rsz, sz;
+ uint32_t i, k, n, nb;
+ struct tle_drb *drb;
+ char name[RTE_RING_NAMESIZE];
+
+ /* init RX part. */
+
+ n = RTE_MAX(ctx->prm.max_stream_rbufs, 1U);
+ s->rx.q = alloc_ring(n, RING_F_SP_ENQ, ctx->prm.socket_id);
+ if (s->rx.q == NULL)
+ return -ENOMEM;
+
+ s->rx.ofo = tcp_ofo_alloc(n, ctx->prm.socket_id);
+ if (s->rx.ofo == NULL)
+ return -ENOMEM;
+
+ /* init TX part. */
+
+ n = RTE_MAX(ctx->prm.max_stream_sbufs, 1U);
+ s->tx.q = alloc_ring(n, RING_F_SC_DEQ, ctx->prm.socket_id);
+ if (s->tx.q == NULL)
+ return -ENOMEM;
+
+ nb = drb_nb_elem(ctx);
+ k = calc_stream_drb_num(ctx, nb);
+ n = rte_align32pow2(k);
+
+ /* size of the drbs ring */
+ rsz = sizeof(*s->tx.drb.r) + n * sizeof(s->tx.drb.r->ring[0]);
+ rsz = RTE_ALIGN_CEIL(rsz, RTE_CACHE_LINE_SIZE);
+
+ /* size of the drb. */
+ bsz = tle_drb_calc_size(nb);
+
+ /* total stream drbs size. */
+ sz = rsz + bsz * k;
+
+ s->tx.drb.r = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ ctx->prm.socket_id);
+ if (s->tx.drb.r == NULL) {
+ TCP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
+ "failed with error code: %d\n",
+ __func__, s, sz, ctx->prm.socket_id, rte_errno);
+ return -ENOMEM;
+ }
+
+ snprintf(name, sizeof(name), "%p@%zu", s, sz);
+ rte_ring_init(s->tx.drb.r, name, n, 0);
+
+ for (i = 0; i != k; i++) {
+ drb = (struct tle_drb *)((uintptr_t)s->tx.drb.r +
+ rsz + bsz * i);
+ drb->udata = s;
+ drb->size = nb;
+ rte_ring_enqueue(s->tx.drb.r, drb);
+ }
+
+ s->tx.drb.nb_elem = nb;
+ s->tx.drb.nb_max = k;
+
+ /* mark stream as avaialble to use. */
+
+ s->s.ctx = ctx;
+ unuse_stream(s);
+ STAILQ_INSERT_TAIL(&ctx->streams.free, &s->s, link);
+
+ return 0;
+}
+
+static void
+tcp_free_drbs(struct tle_stream *s, struct tle_drb *drb[], uint32_t nb_drb)
+{
+ struct tle_tcp_stream *us;
+
+ us = (struct tle_tcp_stream *)s;
+ rte_ring_enqueue_burst(us->tx.drb.r, (void **)drb, nb_drb);
+}
+
+static struct tle_timer_wheel *
+alloc_timers(uint32_t num, int32_t socket)
+{
+ struct tle_timer_wheel_args twprm;
+
+ twprm.tick_size = TCP_RTO_GRANULARITY;
+ twprm.max_timer = num;
+ twprm.socket_id = socket;
+ return tle_timer_create(&twprm, tcp_get_tms());
+}
+
+static int
+tcp_init_streams(struct tle_ctx *ctx)
+{
+ size_t sz;
+ uint32_t i;
+ int32_t rc;
+ struct tcp_streams *ts;
+
+ sz = sizeof(*ts) + sizeof(ts->s[0]) * ctx->prm.max_streams;
+ ts = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ ctx->prm.socket_id);
+ if (ts == NULL) {
+ TCP_LOG(ERR, "allocation of %zu bytes on socket %d "
+ "for %u tcp_streams failed\n",
+ sz, ctx->prm.socket_id, ctx->prm.max_streams);
+ return -ENOMEM;
+ }
+
+ STAILQ_INIT(&ts->dr.fe);
+ STAILQ_INIT(&ts->dr.be);
+
+ ctx->streams.buf = ts;
+ STAILQ_INIT(&ctx->streams.free);
+
+ ts->tmr = alloc_timers(ctx->prm.max_streams, ctx->prm.socket_id);
+ if (ts->tmr == NULL) {
+ TCP_LOG(ERR, "alloc_timers(ctx=%p) failed with error=%d\n",
+ ctx, rte_errno);
+ rc = -ENOMEM;
+ } else {
+ ts->tsq = alloc_ring(ctx->prm.max_streams,
+ RING_F_SC_DEQ, ctx->prm.socket_id);
+ if (ts->tsq == NULL)
+ rc = -ENOMEM;
+ else
+ rc = stbl_init(&ts->st, ctx->prm.max_streams,
+ ctx->prm.socket_id);
+ }
+
+ for (i = 0; rc == 0 && i != ctx->prm.max_streams; i++)
+ rc = init_stream(ctx, &ts->s[i]);
+
+ if (rc != 0) {
+ TCP_LOG(ERR, "initalisation of %u-th stream failed", i);
+ tcp_fini_streams(ctx);
+ }
+
+ return rc;
+}
+
+static void __attribute__((constructor))
+tcp_stream_setup(void)
+{
+ static const struct stream_ops tcp_ops = {
+ .init_streams = tcp_init_streams,
+ .fini_streams = tcp_fini_streams,
+ .free_drbs = tcp_free_drbs,
+ };
+
+ tle_stream_ops[TLE_PROTO_TCP] = tcp_ops;
+}
+
+/*
+ * Helper routine, check that input event and callback are mutually exclusive.
+ */
+static int
+check_cbev(const struct tle_event *ev, const struct tle_stream_cb *cb)
+{
+ if (ev != NULL && cb->func != NULL)
+ return -EINVAL;
+ return 0;
+}
+
+static int
+check_stream_prm(const struct tle_ctx *ctx,
+ const struct tle_tcp_stream_param *prm)
+{
+ if ((prm->addr.local.ss_family != AF_INET &&
+ prm->addr.local.ss_family != AF_INET6) ||
+ prm->addr.local.ss_family != prm->addr.remote.ss_family)
+ return -EINVAL;
+
+ /* callback and event notifications mechanisms are mutually exclusive */
+ if (check_cbev(prm->cfg.recv_ev, &prm->cfg.recv_cb) != 0 ||
+ check_cbev(prm->cfg.recv_ev, &prm->cfg.recv_cb) != 0 ||
+ check_cbev(prm->cfg.err_ev, &prm->cfg.err_cb) != 0)
+ return -EINVAL;
+
+ /* check does context support desired address family. */
+ if ((prm->addr.local.ss_family == AF_INET &&
+ ctx->prm.lookup4 == NULL) ||
+ (prm->addr.local.ss_family == AF_INET6 &&
+ ctx->prm.lookup6 == NULL))
+ return -EINVAL;
+
+ return 0;
+}
+
+struct tle_stream *
+tle_tcp_stream_open(struct tle_ctx *ctx,
+ const struct tle_tcp_stream_param *prm)
+{
+ struct tle_tcp_stream *s;
+ int32_t rc;
+
+ if (ctx == NULL || prm == NULL || check_stream_prm(ctx, prm) != 0) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ s = (struct tle_tcp_stream *)get_stream(ctx);
+ if (s == NULL) {
+ rte_errno = ENFILE;
+ return NULL;
+
+ /* some TX still pending for that stream. */
+ } else if (TCP_STREAM_TX_PENDING(s)) {
+ put_stream(ctx, &s->s, 0);
+ rte_errno = EAGAIN;
+ return NULL;
+ }
+
+ /* setup L4 ports and L3 addresses fields. */
+ rc = stream_fill_ctx(ctx, &s->s,
+ (const struct sockaddr *)&prm->addr.local,
+ (const struct sockaddr *)&prm->addr.remote);
+
+ if (rc != 0) {
+ put_stream(ctx, &s->s, 1);
+ rte_errno = rc;
+ return NULL;
+ }
+
+ /* setup stream notification menchanism */
+ s->rx.ev = prm->cfg.recv_ev;
+ s->rx.cb = prm->cfg.recv_cb;
+ s->tx.ev = prm->cfg.send_ev;
+ s->tx.cb = prm->cfg.send_cb;
+ s->err.ev = prm->cfg.err_ev;
+ s->err.cb = prm->cfg.err_cb;
+
+ /* store other params */
+ s->tcb.snd.nb_retm = (prm->cfg.nb_retries != 0) ? prm->cfg.nb_retries :
+ TLE_TCP_DEFAULT_RETRIES;
+
+ tcp_stream_up(s);
+ return &s->s;
+}
+
+/*
+ * Helper functions, used by close API.
+ */
+static inline int
+stream_close(struct tle_ctx *ctx, struct tle_tcp_stream *s)
+{
+ uint16_t uop;
+ uint32_t state;
+ static const struct tle_stream_cb zcb;
+
+ /* check was close() already invoked */
+ uop = s->tcb.uop;
+ if ((uop & TCP_OP_CLOSE) != 0)
+ return -EDEADLK;
+
+ /* record that close() was already invoked */
+ if (rte_atomic16_cmpset(&s->tcb.uop, uop, uop | TCP_OP_CLOSE) == 0)
+ return -EDEADLK;
+
+ /* mark stream as unavaialbe for RX/TX. */
+ tcp_stream_down(s);
+
+ /* reset events/callbacks */
+ s->rx.ev = NULL;
+ s->tx.ev = NULL;
+ s->err.ev = NULL;
+
+ s->rx.cb = zcb;
+ s->tx.cb = zcb;
+ s->err.cb = zcb;
+
+ state = s->tcb.state;
+
+ /* CLOSED, LISTEN, SYN_SENT - we can close the stream straighway */
+ if (state <= TCP_ST_SYN_SENT) {
+ tcp_stream_reset(ctx, s);
+ return 0;
+ }
+
+ /* generate FIN and proceed with normal connection termination */
+ if (state == TCP_ST_ESTABLISHED || state == TCP_ST_CLOSE_WAIT) {
+
+ /* change state */
+ s->tcb.state = (state == TCP_ST_ESTABLISHED) ?
+ TCP_ST_FIN_WAIT_1 : TCP_ST_LAST_ACK;
+
+ /* mark stream as writable/readable again */
+ tcp_stream_up(s);
+
+ /* queue stream into to-send queue */
+ txs_enqueue(ctx, s);
+ return 0;
+ }
+
+ /*
+ * accroding to the state, close() was already invoked,
+ * should never that point.
+ */
+ RTE_ASSERT(0);
+ return -EINVAL;
+}
+
+uint32_t
+tle_tcp_stream_close_bulk(struct tle_stream *ts[], uint32_t num)
+{
+ int32_t rc;
+ uint32_t i;
+ struct tle_ctx *ctx;
+ struct tle_tcp_stream *s;
+
+ rc = 0;
+
+ for (i = 0; i != num; i++) {
+
+ s = TCP_STREAM(ts[i]);
+ if (ts[i] == NULL || s->s.type >= TLE_VNUM) {
+ rc = EINVAL;
+ break;
+ }
+
+ ctx = s->s.ctx;
+ rc = stream_close(ctx, s);
+ if (rc != 0)
+ break;
+ }
+
+ if (rc != 0)
+ rte_errno = -rc;
+ return i;
+}
+
+int
+tle_tcp_stream_close(struct tle_stream *ts)
+{
+ struct tle_ctx *ctx;
+ struct tle_tcp_stream *s;
+
+ s = TCP_STREAM(ts);
+ if (ts == NULL || s->s.type >= TLE_VNUM)
+ return -EINVAL;
+
+ ctx = s->s.ctx;
+
+ /* reset stream events if any. */
+ if (s->rx.ev != NULL)
+ tle_event_idle(s->rx.ev);
+ if (s->tx.ev != NULL)
+ tle_event_idle(s->tx.ev);
+ if (s->err.ev != NULL)
+ tle_event_idle(s->err.ev);
+
+ return stream_close(ctx, s);
+}
+
+int
+tle_tcp_stream_get_addr(const struct tle_stream *ts,
+ struct tle_tcp_stream_addr *addr)
+{
+ struct sockaddr_in *lin4, *rin4;
+ struct sockaddr_in6 *lin6, *rin6;
+ struct tle_tcp_stream *s;
+
+ s = TCP_STREAM(ts);
+ if (addr == NULL || ts == NULL || s->s.type >= TLE_VNUM)
+ return -EINVAL;
+
+ if (s->s.type == TLE_V4) {
+
+ lin4 = (struct sockaddr_in *)&addr->local;
+ rin4 = (struct sockaddr_in *)&addr->remote;
+
+ addr->local.ss_family = AF_INET;
+ addr->remote.ss_family = AF_INET;
+
+ lin4->sin_port = s->s.port.dst;
+ rin4->sin_port = s->s.port.src;
+ lin4->sin_addr.s_addr = s->s.ipv4.addr.dst;
+ rin4->sin_addr.s_addr = s->s.ipv4.addr.src;
+
+ } else if (s->s.type == TLE_V6) {
+
+ lin6 = (struct sockaddr_in6 *)&addr->local;
+ rin6 = (struct sockaddr_in6 *)&addr->remote;
+
+ addr->local.ss_family = AF_INET6;
+ addr->remote.ss_family = AF_INET6;
+
+ lin6->sin6_port = s->s.port.dst;
+ rin6->sin6_port = s->s.port.src;
+ memcpy(&lin6->sin6_addr, &s->s.ipv6.addr.dst,
+ sizeof(lin6->sin6_addr));
+ memcpy(&rin6->sin6_addr, &s->s.ipv6.addr.src,
+ sizeof(rin6->sin6_addr));
+ }
+
+ return 0;
+}
+
+int
+tle_tcp_stream_listen(struct tle_stream *ts)
+{
+ struct tle_tcp_stream *s;
+ int32_t rc;
+
+ s = TCP_STREAM(ts);
+ if (ts == NULL || s->s.type >= TLE_VNUM)
+ return -EINVAL;
+
+ /* mark stream as not closable. */
+ if (rwl_try_acquire(&s->rx.use) > 0) {
+ rc = rte_atomic16_cmpset(&s->tcb.state, TCP_ST_CLOSED,
+ TCP_ST_LISTEN);
+ if (rc != 0) {
+ s->tcb.uop |= TCP_OP_LISTEN;
+ rc = 0;
+ } else
+ rc = -EDEADLK;
+ } else
+ rc = -EINVAL;
+
+ rwl_release(&s->rx.use);
+ return rc;
+}
diff --git a/lib/libtle_l4p/tcp_stream.h b/lib/libtle_l4p/tcp_stream.h
new file mode 100644
index 0000000..04c2f88
--- /dev/null
+++ b/lib/libtle_l4p/tcp_stream.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_STREAM_H_
+#define _TCP_STREAM_H_
+
+#include <rte_vect.h>
+#include <tle_dring.h>
+#include <tle_tcp.h>
+#include <tle_event.h>
+
+#include "stream.h"
+#include "misc.h"
+#include "tcp_misc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ TCP_ST_CLOSED,
+ TCP_ST_LISTEN,
+ TCP_ST_SYN_SENT,
+ TCP_ST_SYN_RCVD,
+ TCP_ST_ESTABLISHED,
+ TCP_ST_FIN_WAIT_1,
+ TCP_ST_FIN_WAIT_2,
+ TCP_ST_CLOSE_WAIT,
+ TCP_ST_CLOSING,
+ TCP_ST_LAST_ACK,
+ TCP_ST_TIME_WAIT,
+ TCP_ST_NUM
+};
+
+enum {
+ TCP_OP_LISTEN = 0x1,
+ TCP_OP_ACCEPT = 0x2,
+ TCP_OP_CONNECT = 0x4,
+ TCP_OP_CLOSE = 0x8,
+};
+
+struct tcb {
+ volatile uint16_t state;
+ volatile uint16_t uop; /* operations by user performed */
+ struct {
+ uint32_t nxt;
+ uint32_t irs; /* initial received sequence */
+ uint32_t wnd;
+ uint32_t ts;
+ struct {
+ uint32_t seq;
+ uint32_t on;
+ } frs;
+ uint32_t srtt; /* smoothed round trip time (scaled by >> 3) */
+ uint32_t rttvar; /* rtt variance */
+ uint16_t mss;
+ uint8_t wscale;
+ uint8_t dupack;
+ } rcv;
+ struct {
+ uint64_t nxt;
+ uint64_t una;
+ uint64_t rcvr; /* recover RFC 6582 */
+ uint64_t fss; /* FIN sequence # */
+ uint32_t fastack; /* # of partial acks in fast retransmit */
+ uint32_t wnd;
+ union wui wu; /* window update */
+ uint32_t ack; /* last sent ack */
+ uint32_t ts;
+ uint32_t cwnd; /* congestion window */
+ uint32_t ssthresh; /* slow start threshold */
+ uint32_t rto; /* retransmission timeout */
+ uint32_t iss; /* initial send sequence */
+ uint16_t mss;
+ uint8_t wscale;
+ uint8_t nb_retx; /* number of retransmission */
+ uint8_t nb_retm; /**< max number of retx attempts. */
+ } snd;
+ struct syn_opts so; /* initial syn options. */
+};
+
+
+struct tle_tcp_stream {
+
+ struct tle_stream s;
+
+ struct stbl_entry *ste; /* entry in streams table. */
+ struct tcb tcb;
+
+ struct {
+ void *handle;
+ } timer;
+
+ struct {
+ struct tle_event *ev;
+ struct tle_stream_cb cb;
+ } err;
+
+ struct {
+ rte_atomic32_t use;
+ struct rte_ring *q; /* listen (syn) queue */
+ struct ofo *ofo;
+ struct tle_event *ev; /* user provided recv event. */
+ struct tle_stream_cb cb; /* user provided recv callback. */
+ } rx __rte_cache_aligned;
+
+ struct {
+ rte_atomic32_t use;
+ rte_atomic32_t arm; /* when > 0 stream is in to-send queue */
+ struct {
+ uint32_t nb_elem; /* number of objects per drb. */
+ uint32_t nb_max; /* number of drbs per stream. */
+ struct rte_ring *r;
+ } drb;
+ struct rte_ring *q; /* (re)tx queue */
+ struct tle_event *ev;
+ struct tle_stream_cb cb;
+ struct tle_dest dst;
+ } tx __rte_cache_aligned;
+
+} __rte_cache_aligned;
+
+#define TCP_STREAM(p) \
+((struct tle_tcp_stream *)((uintptr_t)(p) - offsetof(struct tle_tcp_stream, s)))
+
+#define TCP_STREAM_TX_PENDING(s) \
+ ((s)->tx.drb.nb_max != rte_ring_count((s)->tx.drb.r))
+
+#define TCP_STREAM_TX_FINISHED(s) \
+ ((s)->tx.drb.nb_max == rte_ring_count((s)->tx.drb.r))
+
+#include "stream_table.h"
+
+struct sdr {
+ rte_spinlock_t lock;
+ STAILQ_HEAD(, tle_stream) fe;
+ STAILQ_HEAD(, tle_stream) be;
+};
+
+struct tcp_streams {
+ struct stbl st;
+ struct tle_timer_wheel *tmr; /* timer wheel */
+ struct rte_ring *tsq; /* to-send streams queue */
+ struct sdr dr; /* death row for zombie streams */
+ struct tle_tcp_stream s[]; /* array of allocated streams. */
+};
+
+#define CTX_TCP_STREAMS(ctx) ((struct tcp_streams *)(ctx)->streams.buf)
+#define CTX_TCP_STLB(ctx) (&CTX_TCP_STREAMS(ctx)->st)
+#define CTX_TCP_TMWHL(ctx) (CTX_TCP_STREAMS(ctx)->tmr)
+#define CTX_TCP_TSQ(ctx) (CTX_TCP_STREAMS(ctx)->tsq)
+#define CTX_TCP_SDR(ctx) (&CTX_TCP_STREAMS(ctx)->dr)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_STREAM_H_ */
diff --git a/lib/libtle_l4p/tcp_timer.h b/lib/libtle_l4p/tcp_timer.h
new file mode 100644
index 0000000..8faefb3
--- /dev/null
+++ b/lib/libtle_l4p/tcp_timer.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_TIMER_H_
+#define _TCP_TIMER_H_
+
+#include <tle_timer.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * internal defines.
+ * all RTO values are in ms.
+ */
+#define TCP_RTO_MAX 60000U /* RFC 6298 (2.5) */
+#define TCP_RTO_MIN 1000U /* RFC 6298 (2.4) */
+#define TCP_RTO_2MSL (2 * TCP_RTO_MAX)
+#define TCP_RTO_DEFAULT TCP_RTO_MIN /* RFC 6298 (2.1)*/
+#define TCP_RTO_GRANULARITY 100U
+
+
+static inline void
+timer_stop(struct tle_tcp_stream *s)
+{
+ struct tle_timer_wheel *tw;
+
+ if (s->timer.handle != NULL) {
+ tw = CTX_TCP_TMWHL(s->s.ctx);
+ tle_timer_stop(tw, s->timer.handle);
+ s->timer.handle = NULL;
+ }
+}
+
+static inline void
+timer_start(struct tle_tcp_stream *s)
+{
+ struct tle_timer_wheel *tw;
+
+ if (s->timer.handle == NULL) {
+ tw = CTX_TCP_TMWHL(s->s.ctx);
+ s->timer.handle = tle_timer_start(tw, s, s->tcb.snd.rto);
+ s->tcb.snd.nb_retx = 0;
+ }
+}
+
+static inline void
+timer_restart(struct tle_tcp_stream *s)
+{
+ struct tle_timer_wheel *tw;
+
+ tw = CTX_TCP_TMWHL(s->s.ctx);
+ s->timer.handle = tle_timer_start(tw, s, s->tcb.snd.rto);
+}
+
+
+/*
+ * reset number of retransmissions and restart RTO timer.
+ */
+static inline void
+timer_reset(struct tle_tcp_stream *s)
+{
+ timer_stop(s);
+ timer_start(s);
+}
+
+static inline uint32_t
+rto_roundup(uint32_t rto)
+{
+ rto = RTE_MAX(rto, TCP_RTO_MIN);
+ rto = RTE_MIN(rto, TCP_RTO_MAX);
+ return rto;
+}
+
+/*
+ * RFC6298: Computing TCP's Retransmission Timer
+ * RTTVAR <- (1 - beta) * RTTVAR + beta * |SRTT - R'|
+ * SRTT <- (1 - alpha) * SRTT + alpha * R'
+ * RTO <- SRTT + max (G, K*RTTVAR)
+ * the following computation is based on Jacobson'88 paper referenced
+ * in the RFC6298
+*/
+static inline void
+rto_estimate(struct tcb *tcb, int32_t rtt)
+{
+ uint32_t rto;
+
+ if (!rtt)
+ rtt = 1;
+ if (tcb->rcv.srtt) {
+ rtt -= (tcb->rcv.srtt >> 3); /* alpha = 1/8 */
+ tcb->rcv.srtt += rtt;
+
+ if (rtt < 0)
+ rtt = -rtt;
+ rtt -= (tcb->rcv.rttvar >> 2); /* beta = 1/4 */
+ tcb->rcv.rttvar += rtt;
+
+ } else {
+ tcb->rcv.srtt = rtt << 3;
+ tcb->rcv.rttvar = rtt << 1;
+ }
+
+ rto = (tcb->rcv.srtt >> 3) +
+ RTE_MAX(TCP_RTO_GRANULARITY, tcb->rcv.rttvar);
+ tcb->snd.rto = rto_roundup(rto);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_TIMER_H_ */
diff --git a/lib/libtle_l4p/tcp_txq.h b/lib/libtle_l4p/tcp_txq.h
new file mode 100644
index 0000000..0b199ba
--- /dev/null
+++ b/lib/libtle_l4p/tcp_txq.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_TXQ_H_
+#define _TCP_TXQ_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline struct rte_mbuf **
+tcp_txq_get_nxt_objs(const struct tle_tcp_stream *s, uint32_t *num)
+{
+ uint32_t cnt, head, mask, sz, tail;
+ struct rte_ring *r;
+
+ r = s->tx.q;
+ sz = r->prod.size;
+ mask = r->prod.mask;
+ head = r->cons.head & mask;
+ tail = r->prod.tail & mask;
+
+ cnt = (tail >= head) ? tail - head : sz - head;
+
+ *num = cnt;
+ return (struct rte_mbuf **)(r->ring + head);
+}
+
+static inline struct rte_mbuf **
+tcp_txq_get_una_objs(const struct tle_tcp_stream *s, uint32_t *num)
+{
+ uint32_t cnt, head, mask, sz, tail;
+ struct rte_ring *r;
+
+ r = s->tx.q;
+ sz = r->prod.size;
+ mask = r->prod.mask;
+ head = r->prod.tail & mask;
+ tail = r->cons.tail & mask;
+
+ cnt = (head >= tail) ? head - tail : sz - tail;
+
+ *num = cnt;
+ return (struct rte_mbuf **)(r->ring + tail);
+}
+
+static inline void
+tcp_txq_set_nxt_head(struct tle_tcp_stream *s, uint32_t num)
+{
+ struct rte_ring *r;
+
+ r = s->tx.q;
+ r->cons.head += num;
+}
+
+static inline void
+tcp_txq_rst_nxt_head(struct tle_tcp_stream *s)
+{
+ struct rte_ring *r;
+
+ r = s->tx.q;
+ r->cons.head = r->cons.tail;
+}
+
+static inline void
+tcp_txq_set_una_tail(struct tle_tcp_stream *s, uint32_t num)
+{
+ struct rte_ring *r;
+
+ r = s->tx.q;
+ rte_smp_rmb();
+ r->cons.tail += num;
+}
+
+static inline uint32_t
+tcp_txq_nxt_cnt(struct tle_tcp_stream *s)
+{
+ struct rte_ring *r;
+
+ r = s->tx.q;
+ return (r->prod.tail - r->cons.head) & r->prod.mask;
+}
+
+static inline void
+txs_enqueue(struct tle_ctx *ctx, struct tle_tcp_stream *s)
+{
+ struct rte_ring *r;
+ uint32_t n;
+
+ if (rte_atomic32_add_return(&s->tx.arm, 1) == 1) {
+ r = CTX_TCP_TSQ(ctx);
+ n = rte_ring_enqueue_burst(r, (void * const *)&s, 1);
+ RTE_VERIFY(n == 1);
+ }
+}
+
+static inline uint32_t
+txs_dequeue_bulk(struct tle_ctx *ctx, struct tle_tcp_stream *s[], uint32_t num)
+{
+ struct rte_ring *r;
+
+ r = CTX_TCP_TSQ(ctx);
+ return rte_ring_dequeue_burst(r, (void **)s, num);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_TXQ_H_ */
diff --git a/lib/libtle_l4p/tle_ctx.h b/lib/libtle_l4p/tle_ctx.h
new file mode 100644
index 0000000..a3516bf
--- /dev/null
+++ b/lib/libtle_l4p/tle_ctx.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_CTX_H_
+#define _TLE_CTX_H_
+
+#include <stdint.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <rte_common.h>
+#include <rte_mbuf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * <tle_ctx> - each such ctx represents an 'independent copy of the stack'.
+ * It owns set of <stream>s and <dev>s entities and provides
+ * (de)multiplexing input/output packets from/into devices into/from streams.
+ * <dev> is an abstraction for the underlying device, that is able
+ * to RX/TX packets and may provide some HW offload capabilities.
+ * It is a user responsibility to add to the <ctx> all <dev>s,
+ * that context has to manage, before starting to do stream operations
+ * (open/send/recv,close) over that context.
+ * Right now adding/deleting <dev>s to the context with open
+ * streams is not supported.
+ * <stream> represents an L4(UDP/TCP, etc.) endpoint <addr, port> and
+ * is an analogy to socket entity.
+ * As with a socket, there are ability to do recv/send over it.
+ * <stream> belongs to particular <ctx> but is visible globally across
+ * the process, i.e. any thread within the process can do recv/send over it
+ * without any further synchronisation.
+ * While 'upper' layer API is thread safe, lower layer API (rx_bulk/tx_bulk)
+ * is not thread safe and is not supposed to be run on multiple threads
+ * in parallel.
+ * So single thread can drive multiple <ctx>s and do IO for them,
+ * but multiple threads can't drive same <ctx> without some
+ * explicit synchronization.
+ */
+
+struct tle_ctx;
+struct tle_dev;
+
+/**
+ * Blocked L4 ports info.
+ */
+struct tle_bl_port {
+ uint32_t nb_port; /**< number of blocked ports. */
+ const uint16_t *port; /**< list of blocked ports. */
+};
+
+
+/**
+ * device parameters.
+ */
+struct tle_dev_param {
+ uint32_t rx_offload; /**< DEV_RX_OFFLOAD_* supported. */
+ uint32_t tx_offload; /**< DEV_TX_OFFLOAD_* supported. */
+ struct in_addr local_addr4; /**< local IPv4 address assigned. */
+ struct in6_addr local_addr6; /**< local IPv6 address assigned. */
+ struct tle_bl_port bl4; /**< blocked ports for IPv4 address. */
+ struct tle_bl_port bl6; /**< blocked ports for IPv4 address. */
+};
+
+#define TLE_DST_MAX_HDR 0x60
+
+struct tle_dest {
+ struct rte_mempool *head_mp;
+ /**< MP for fragment headers and control packets. */
+ struct tle_dev *dev; /**< device to send packets through. */
+ uint16_t mtu; /**< MTU for given destination. */
+ uint8_t l2_len; /**< L2 header length. */
+ uint8_t l3_len; /**< L3 header length. */
+ uint8_t hdr[TLE_DST_MAX_HDR]; /**< L2/L3 headers. */
+};
+
+/**
+ * context creation parameters.
+ */
+
+enum {
+ TLE_PROTO_UDP,
+ TLE_PROTO_TCP,
+ TLE_PROTO_NUM
+};
+
+struct tle_ctx_param {
+ int32_t socket_id; /**< socket ID to allocate memory for. */
+ uint32_t proto; /**< L4 proto to handle. */
+ uint32_t max_streams; /**< max number of streams in context. */
+ uint32_t max_stream_rbufs; /**< max recv mbufs per stream. */
+ uint32_t max_stream_sbufs; /**< max send mbufs per stream. */
+ uint32_t send_bulk_size; /**< expected # of packets per send call. */
+
+ int (*lookup4)(void *opaque, const struct in_addr *addr,
+ struct tle_dest *res);
+ /**< will be called by send() to get IPv4 packet destination info. */
+ void *lookup4_data;
+ /**< opaque data pointer for lookup4() callback. */
+
+ int (*lookup6)(void *opaque, const struct in6_addr *addr,
+ struct tle_dest *res);
+ /**< will be called by send() to get IPv6 packet destination info. */
+ void *lookup6_data;
+ /**< opaque data pointer for lookup6() callback. */
+};
+
+/**
+ * create L4 processing context.
+ * @param ctx_prm
+ * Parameters used to create and initialise the L4 context.
+ * @return
+ * Pointer to context structure that can be used in future operations,
+ * or NULL on error, with error code set in rte_errno.
+ *
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENOMEM - out of memory
+ */
+struct tle_ctx *
+tle_ctx_create(const struct tle_ctx_param *ctx_prm);
+
+/**
+ * Destroy given context.
+ *
+ * @param ctx
+ * context to destroy
+ */
+void tle_ctx_destroy(struct tle_ctx *ctx);
+
+/**
+ * Add new device into the given context.
+ * This function is not multi-thread safe.
+ *
+ * @param ctx
+ * context to add new device into.
+ * @param dev_prm
+ * Parameters used to create and initialise new device inside the context.
+ * @return
+ * Pointer to device structure that can be used in future operations,
+ * or NULL on error, with error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENODEV - max possible value of open devices is reached
+ * - ENOMEM - out of memory
+ */
+struct tle_dev *
+tle_add_dev(struct tle_ctx *ctx, const struct tle_dev_param *dev_prm);
+
+/**
+ * Remove and destroy previously added device from the given context.
+ * This function is not multi-thread safe.
+ *
+ * @param dev
+ * device to remove and destroy.
+ * @return
+ * zero on successful completion.
+ * - -EINVAL - invalid parameter passed to function
+ */
+int tle_del_dev(struct tle_dev *dev);
+
+/**
+ * Flags to the context that destinations info might be changed,
+ * so if it has any destinations data cached, then
+ * it has to be invalidated.
+ * @param ctx
+ * context to invalidate.
+ */
+void tle_ctx_invalidate(struct tle_ctx *ctx);
+
+/**
+ * Stream asynchronous notification mechanisms:
+ * a) recv/send callback.
+ * Stream recv/send notification callbacks behaviour is edge-triggered (ET).
+ * recv callback will be invoked if stream receive buffer was empty and
+ * new packet(s) have arrived.
+ * send callback will be invoked when stream send buffer was full,
+ * and some packets belonging to that stream were sent
+ * (part of send buffer became free again).
+ * Note that both recv and send callbacks are called with sort of read lock
+ * held on that stream. So it is not permitted to call stream_close()
+ * within the callback function. Doing that would cause a deadlock.
+ * While it is allowed to call stream send/recv functions within the
+ * callback, it is not recommended: callback function will be invoked
+ * within tle_udp_rx_bulk/tle_udp_tx_bulk context and some heavy processing
+ * within the callback functions might cause performance degradation
+ * or even loss of packets for further streams.
+ * b) recv/send event.
+ * Stream recv/send events behaviour is level-triggered (LT).
+ * receive event will be raised by either
+ * tle_udp_rx_burst() or tle_udp_stream_recv() as long as there are any
+ * remaining packets inside stream receive buffer.
+ * send event will be raised by either
+ * tle_udp_tx_burst() or tle_udp_stream_send() as long as there are any
+ * free space inside stream send buffer.
+ * Note that callback and event are mutually exclusive on <stream, op> basis.
+ * It is not possible to open a stream with both recv event and callback
+ * specified.
+ * Though it is possible to open a stream with recv callback and send event,
+ * or visa-versa.
+ * If the user doesn't need any notification mechanism for that stream,
+ * both event and callback could be set to zero.
+ */
+
+struct tle_event;
+struct tle_stream;
+
+/**
+ * Stream recv/send callback function and data.
+ */
+struct tle_stream_cb {
+ void (*func)(void *, struct tle_stream *);
+ void *data;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_CTX_H_ */
diff --git a/lib/libtle_udp/tle_event.h b/lib/libtle_l4p/tle_event.h
index 9357def..b19954a 100644
--- a/lib/libtle_udp/tle_event.h
+++ b/lib/libtle_l4p/tle_event.h
@@ -17,6 +17,7 @@
#define _SEV_IMPL_H_
#include <rte_common.h>
+#include <rte_memory.h>
#include <rte_spinlock.h>
#include <rte_atomic.h>
#include <sys/queue.h>
@@ -210,6 +211,26 @@ tle_event_idle(struct tle_event *ev)
rte_spinlock_unlock(&q->lock);
}
+static inline void
+tle_evq_idle(struct tle_evq *evq, struct tle_event *ev[], uint32_t num)
+{
+ uint32_t i, n;
+
+ rte_spinlock_lock(&evq->lock);
+
+ n = 0;
+ for (i = 0; i != num; i++) {
+ if (ev[i]->state == TLE_SEV_UP) {
+ TAILQ_REMOVE(&evq->armed, ev[i], ql);
+ n++;
+ }
+ ev[i]->state = TLE_SEV_IDLE;
+ }
+
+ evq->nb_armed -= n;
+ rte_spinlock_unlock(&evq->lock);
+}
+
/*
* return up to *num* user data pointers associated with
diff --git a/lib/libtle_l4p/tle_tcp.h b/lib/libtle_l4p/tle_tcp.h
new file mode 100644
index 0000000..e6eb336
--- /dev/null
+++ b/lib/libtle_l4p/tle_tcp.h
@@ -0,0 +1,395 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_TCP_H_
+#define _TLE_TCP_H_
+
+#include <tle_ctx.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * TCP stream creation parameters.
+ */
+struct tle_tcp_stream_addr {
+ struct sockaddr_storage local; /**< stream local address. */
+ struct sockaddr_storage remote; /**< stream remote address. */
+};
+
+#define TLE_TCP_DEFAULT_RETRIES 3
+
+struct tle_tcp_stream_cfg {
+ uint8_t nb_retries; /**< max number of retransmission attempts. */
+
+ /* _cb and _ev are mutually exclusive */
+ struct tle_event *err_ev; /**< error event to use. */
+ struct tle_stream_cb err_cb; /**< error callback to use. */
+
+ struct tle_event *recv_ev; /**< recv event to use. */
+ struct tle_stream_cb recv_cb; /**< recv callback to use. */
+
+ struct tle_event *send_ev; /**< send event to use. */
+ struct tle_stream_cb send_cb; /**< send callback to use. */
+};
+
+struct tle_tcp_stream_param {
+ struct tle_tcp_stream_addr addr;
+ struct tle_tcp_stream_cfg cfg;
+};
+
+/**
+ * create a new stream within given TCP context.
+ * @param ctx
+ * TCP context to create new stream within.
+ * @param prm
+ * Parameters used to create and initialise the new stream.
+ * @return
+ * Pointer to TCP stream structure that can be used in future TCP API calls,
+ * or NULL on error, with error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENOFILE - max limit of open streams reached for that context
+ */
+struct tle_stream *
+tle_tcp_stream_open(struct tle_ctx *ctx,
+ const struct tle_tcp_stream_param *prm);
+
+/**
+ * close an open stream.
+ * if the stream is in connected state, then:
+ * - connection termination would be performed.
+ * - if stream contains unsent data, then actual close will be postponed
+ * till either remaining data will be TX-ed, or timeout will expire.
+ * All packets that belong to that stream and remain in the device
+ * TX queue will be kept for father transmission.
+ * @param s
+ * Pointer to the stream to close.
+ * @return
+ * zero on successful completion.
+ * - -EINVAL - invalid parameter passed to function
+ * - -EDEADLK - close was already invoked on that stream
+ */
+int tle_tcp_stream_close(struct tle_stream *s);
+
+/**
+ * close a group of open streams.
+ * if the stream is in connected state, then:
+ * - connection termination would be performed.
+ * - if stream contains unsent data, then actual close will be postponed
+ * till either remaining data will be TX-ed, or timeout will expire.
+ * All packets that belong to that stream and remain in the device
+ * TX queue will be kept for father transmission.
+ * @param ts
+ * An array of pointers to streams that have to be closed.
+ * @param num
+ * Number of elements in the *ts* array.
+ * @return
+ * number of successfully closed streams.
+ * In case of error, error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - EDEADLK - close was already invoked on that stream
+ */
+uint32_t
+tle_tcp_stream_close_bulk(struct tle_stream *ts[], uint32_t num);
+
+/**
+ * get open stream local and remote addresses.
+ * @param s
+ * Pointer to the stream.
+ * @return
+ * zero on successful completion.
+ * - EINVAL - invalid parameter passed to function
+ */
+int
+tle_tcp_stream_get_addr(const struct tle_stream *s,
+ struct tle_tcp_stream_addr *addr);
+
+/**
+ * Client mode connect API.
+ */
+
+/**
+ * Attempt to establish connection with the destination TCP endpoint.
+ * Stream write event (or callback) will fire, if the connection will be
+ * established successfully.
+ * Note that stream in listen state or stream with already established
+ * connection, can't be subject of connect() call.
+ * In case of unsuccessful attempt, error event (or callback) will be
+ * activated.
+ * @param s
+ * Pointer to the stream.
+ * @param addr
+ * Address of the destination endpoint.
+ * @return
+ * zero on successful completion.
+ * - -EINVAL - invalid parameter passed to function
+ */
+int tle_tcp_stream_connect(struct tle_stream *s, const struct sockaddr *addr);
+
+/*
+ * Server mode connect API.
+ * Basic scheme for server mode API usage:
+ *
+ * <stream open happens here>
+ * tle_tcp_stream_listen(stream_to_listen);
+ * <wait for read event/callback on that stream>
+ * n = tle_tcp_synreqs(stream_to_listen, syn_reqs, sizeof(syn_reqs));
+ * for (i = 0, k = 0; i != n; i++) {
+ * rc = <decide should connection from that endpoint be allowed>;
+ * if (rc == 0) {
+ * //proceed with connection establishment
+ * k++;
+ * accept_param[k].syn = syn_reqs[i];
+ * <fill rest of accept_param fields for k-th connection>
+ * } else {
+ * //reject connection requests from that endpoint
+ * rej_reqs[i - k] = syn_reqs[i];
+ * }
+ * }
+ *
+ * //reject n - k connection requests
+ * tle_tcp_reject(stream_to_listen, rej_reqs, n - k);
+ *
+ * //accept k new connections
+ * rc = tle_tcp_accept(stream_to_listen, accept_param, new_con_streams, k);
+ * <handle errors>
+ */
+
+struct tle_syn_req {
+ struct rte_mbuf *pkt;
+ /*< mbuf with incoming connection request. */
+ void *opaque; /*< tldk related opaque pointer. */
+};
+
+struct tle_tcp_accept_param {
+ struct tle_syn_req syn; /*< mbuf with incoming SYN request. */
+ struct tle_tcp_stream_cfg cfg; /*< stream configure options. */
+};
+
+
+/**
+ * Set stream into the listen state (passive opener), i.e. make stream ready
+ * to accept new connections.
+ * Stream read event (or callback) will be activated as new SYN requests
+ * will arrive.
+ * Note that stream with already established (or establishing) connection
+ * can't be subject of listen() call.
+ * @param s
+ * Pointer to the stream.
+ * @return
+ * zero on successful completion.
+ * - -EINVAL - invalid parameter passed to function
+ */
+int tle_tcp_stream_listen(struct tle_stream *s);
+
+/**
+ * return up to *num* mbufs with SYN requests that were received
+ * for given TCP endpoint.
+ * Note that the stream has to be in listen state.
+ * For each returned mbuf:
+ * data_off set to the start of the packet
+ * l2_len, l3_len, l4_len are setup properly
+ * (so user can still extract L2/L3/L4 header info if needed)
+ * packet_type RTE_PTYPE_L2/L3/L4 bits are setup properly.
+ * L3/L4 checksum is verified.
+ * @param s
+ * TCP stream to receive packets from.
+ * @param rq
+ * An array of tle_syn_req structures that contains
+ * at least *num* elements in it.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @return
+ * number of of entries filled inside *pkt* array.
+ */
+uint16_t tle_tcp_stream_synreqs(struct tle_stream *s, struct tle_syn_req rq[],
+ uint32_t num);
+
+/**
+ * Accept connection requests for the given stream.
+ * Note that the stream has to be in listen state.
+ * For each new connection a new stream will be open.
+ * @param s
+ * TCP listen stream.
+ * @param prm
+ * An array of *tle_tcp_accept_param* structures that
+ * contains at least *num* elements in it.
+ * @param rs
+ * An array of pointers to *tle_stream* structures that
+ * must be large enough to store up to *num* pointers in it.
+ * @param num
+ * Number of elements in the *prm* and *rs* arrays.
+ * @return
+ * number of of entries filled inside *rs* array.
+ * In case of error, error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENFILE - no more streams are avaialble to open.
+ */
+int tle_tcp_stream_accept(struct tle_stream *s,
+ const struct tle_tcp_accept_param prm[], struct tle_stream *rs[],
+ uint32_t num);
+
+/**
+ * Reject connection requests for the given stream.
+ * Note that the stream has to be in listen state.
+ * For each new connection a new stream will be open.
+ * @param s
+ * TCP listen stream.
+ * @param rq
+ * An array of tle_syn_req structures that contains
+ * at least *num* elements in it.
+ * @param num
+ * Number of elements in the *pkt* array.
+ */
+void tle_tcp_reject(struct tle_stream *s, const struct tle_syn_req rq[],
+ uint32_t num);
+
+/**
+ * return up to *num* mbufs that was received for given TCP stream.
+ * Note that the stream has to be in connected state.
+ * Data ordering is preserved.
+ * For each returned mbuf:
+ * data_off set to the start of the packet's TCP data
+ * l2_len, l3_len, l4_len are setup properly
+ * (so user can still extract L2/L3 address info if needed)
+ * packet_type RTE_PTYPE_L2/L3/L4 bits are setup properly.
+ * L3/L4 checksum is verified.
+ * @param s
+ * TCP stream to receive packets from.
+ * @param pkt
+ * An array of pointers to *rte_mbuf* structures that
+ * must be large enough to store up to *num* pointers in it.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @return
+ * number of of entries filled inside *pkt* array.
+ */
+uint16_t tle_tcp_stream_recv(struct tle_stream *s, struct rte_mbuf *pkt[],
+ uint16_t num);
+
+/**
+ * Consume and queue up to *num* packets, that will be sent eventually
+ * by tle_tcp_tx_bulk().
+ * Note that the stream has to be in connected state.
+ * It is responsibility of that function is to determine over which TCP dev
+ * given packets have to be sent out and do necessary preparations for that.
+ * Based on the *dst_addr* it does route lookup, fills L2/L3/L4 headers,
+ * and, if necessary, fragments packets.
+ * Depending on the underlying device information, it either does
+ * IP/TCP checksum calculations in SW or sets mbuf TX checksum
+ * offload fields properly.
+ * For each input mbuf the following conditions have to be met:
+ * - data_off point to the start of packet's TCP data.
+ * - there is enough header space to prepend L2/L3/L4 headers.
+ * @param s
+ * TCP stream to send packets over.
+ * @param pkt
+ * The burst of output packets that need to be send.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @return
+ * number of packets successfully queued in the stream send buffer.
+ * In case of error, error code can be set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EAGAIN - operation can be perfomed right now
+ * (most likely close() was perfomed on that stream allready).
+ * - ENOTCONN - the stream is not connected.
+ */
+uint16_t tle_tcp_stream_send(struct tle_stream *s, struct rte_mbuf *pkt[],
+ uint16_t num);
+
+/**
+ * Back End (BE) API.
+ * BE API functions are not multi-thread safe.
+ * Supposed to be called by the L2/L3 processing layer.
+ */
+
+/**
+ * Take input mbufs and distribute them to open TCP streams.
+ * expects that for each input packet:
+ * - l2_len, l3_len, l4_len are setup correctly
+ * - (packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) != 0,
+ * - (packet_type & RTE_PTYPE_L4_TCP) != 0,
+ * During delivery L3/L4 checksums will be verified
+ * (either relies on HW offload or in SW).
+ * May cause some extra packets to be queued for TX.
+ * This function is not multi-thread safe.
+ * @param dev
+ * TCP device the packets were received from.
+ * @param pkt
+ * The burst of input packets that need to be processed.
+ * @param rp
+ * The array that will contain pointers of unprocessed packets at return.
+ * Should contain at least *num* elements.
+ * @param rc
+ * The array that will contain error code for corresponding rp[] entry:
+ * - ENOENT - no open stream matching this packet.
+ * - ENOBUFS - receive buffer of the destination stream is full.
+ * Should contain at least *num* elements.
+ * @param num
+ * Number of elements in the *pkt* input array.
+ * @return
+ * number of packets delivered to the TCP streams.
+ */
+uint16_t tle_tcp_rx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
+ struct rte_mbuf *rp[], int32_t rc[], uint16_t num);
+
+/**
+ * Fill *pkt* with pointers to the packets that have to be transmitted
+ * over given TCP device.
+ * Output packets have to be ready to be passed straight to rte_eth_tx_burst()
+ * without any extra processing.
+ * TCP/IPv4 checksum either already calculated or appropriate mbuf fields set
+ * properly for HW offload.
+ * This function is not multi-thread safe.
+ * @param dev
+ * TCP device the output packets will be transmitted over.
+ * @param pkt
+ * An array of pointers to *rte_mbuf* structures that
+ * must be large enough to store up to *num* pointers in it.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @return
+ * number of of entries filled inside *pkt* array.
+ */
+uint16_t tle_tcp_tx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
+ uint16_t num);
+
+/**
+ * perform internal processing for given TCP context.
+ * Checks which timers are expired and performs the required actions
+ * (retransmission/connection abort, etc.)
+ * May cause some extra packets to be queued for TX.
+ * This function is not multi-thread safe.
+ * @param ctx
+ * TCP context to process.
+ * @param num
+ * maximum number of streams to process.
+ * @return
+ * zero on successful completion.
+ * - EINVAL - invalid parameter passed to function
+ * @return
+ */
+int tle_tcp_process(struct tle_ctx *ctx, uint32_t num);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_TCP_H_ */
diff --git a/lib/libtle_l4p/tle_udp.h b/lib/libtle_l4p/tle_udp.h
new file mode 100644
index 0000000..d3a8fe9
--- /dev/null
+++ b/lib/libtle_l4p/tle_udp.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_UDP_H_
+#define _TLE_UDP_H_
+
+#include <tle_ctx.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * UDP stream creation parameters.
+ */
+struct tle_udp_stream_param {
+ struct sockaddr_storage local_addr; /**< stream local address. */
+ struct sockaddr_storage remote_addr; /**< stream remote address. */
+
+ /* _cb and _ev are mutually exclusive */
+ struct tle_event *recv_ev; /**< recv event to use. */
+ struct tle_stream_cb recv_cb; /**< recv callback to use. */
+
+ struct tle_event *send_ev; /**< send event to use. */
+ struct tle_stream_cb send_cb; /**< send callback to use. */
+};
+
+/**
+ * create a new stream within given UDP context.
+ * @param ctx
+ * UDP context to create new stream within.
+ * @param prm
+ * Parameters used to create and initialise the new stream.
+ * @return
+ * Pointer to UDP stream structure that can be used in future UDP API calls,
+ * or NULL on error, with error code set in rte_errno.
+ * Possible rte_errno errors include:
+ * - EINVAL - invalid parameter passed to function
+ * - ENOFILE - max limit of open streams reached for that context
+ */
+struct tle_stream *
+tle_udp_stream_open(struct tle_ctx *ctx,
+ const struct tle_udp_stream_param *prm);
+
+/**
+ * close an open stream.
+ * All packets still remaining in stream receive buffer will be freed.
+ * All packets still remaining in stream transmit buffer will be kept
+ * for father transmission.
+ * @param s
+ * Pointer to the stream to close.
+ * @return
+ * zero on successful completion.
+ * - -EINVAL - invalid parameter passed to function
+ */
+int tle_udp_stream_close(struct tle_stream *s);
+
+/**
+ * get open stream parameters.
+ * @param s
+ * Pointer to the stream.
+ * @return
+ * zero on successful completion.
+ * - EINVAL - invalid parameter passed to function
+ */
+int
+tle_udp_stream_get_param(const struct tle_stream *s,
+ struct tle_udp_stream_param *prm);
+
+/**
+ * Take input mbufs and distribute them to open UDP streams.
+ * expects that for each input packet:
+ * - l2_len, l3_len, l4_len are setup correctly
+ * - (packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) != 0,
+ * - (packet_type & RTE_PTYPE_L4_UDP) != 0,
+ * During delivery L3/L4 checksums will be verified
+ * (either relies on HW offload or in SW).
+ * This function is not multi-thread safe.
+ * @param dev
+ * UDP device the packets were received from.
+ * @param pkt
+ * The burst of input packets that need to be processed.
+ * @param rp
+ * The array that will contain pointers of unprocessed packets at return.
+ * Should contain at least *num* elements.
+ * @param rc
+ * The array that will contain error code for corresponding rp[] entry:
+ * - ENOENT - no open stream matching this packet.
+ * - ENOBUFS - receive buffer of the destination stream is full.
+ * Should contain at least *num* elements.
+ * @param num
+ * Number of elements in the *pkt* input array.
+ * @return
+ * number of packets delivered to the UDP streams.
+ */
+uint16_t tle_udp_rx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
+ struct rte_mbuf *rp[], int32_t rc[], uint16_t num);
+
+/**
+ * Fill *pkt* with pointers to the packets that have to be transmitted
+ * over given UDP device.
+ * Output packets have to be ready to be passed straight to rte_eth_tx_burst()
+ * without any extra processing.
+ * UDP/IPv4 checksum either already calculated or appropriate mbuf fields set
+ * properly for HW offload.
+ * This function is not multi-thread safe.
+ * @param dev
+ * UDP device the output packets will be transmitted over.
+ * @param pkt
+ * An array of pointers to *rte_mbuf* structures that
+ * must be large enough to store up to *num* pointers in it.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @return
+ * number of of entries filled inside *pkt* array.
+ */
+uint16_t tle_udp_tx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
+ uint16_t num);
+
+/*
+ * return up to *num* mbufs that was received for given UDP stream.
+ * For each returned mbuf:
+ * data_off set to the start of the packet's UDP data
+ * l2_len, l3_len, l4_len are setup properly
+ * (so user can still extract L2/L3 address info if needed)
+ * packet_type RTE_PTYPE_L2/L3/L4 bits are setup properly.
+ * L3/L4 checksum is verified.
+ * Packets with invalid L3/L4 checksum will be silently dropped.
+ * @param s
+ * UDP stream to receive packets from.
+ * @param pkt
+ * An array of pointers to *rte_mbuf* structures that
+ * must be large enough to store up to *num* pointers in it.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @return
+ * number of of entries filled inside *pkt* array.
+ */
+uint16_t tle_udp_stream_recv(struct tle_stream *s, struct rte_mbuf *pkt[],
+ uint16_t num);
+
+/**
+ * Consume and queue up to *num* packets, that will be sent eventually
+ * by tle_udp_tx_bulk().
+ * If *dst_addr* is NULL, then default remote address associated with that
+ * stream (if any) will be used.
+ * The main purpose of that function is to determine over which UDP dev
+ * given packets have to be sent out and do necessary preparations for that.
+ * Based on the *dst_addr* it does route lookup, fills L2/L3/L4 headers,
+ * and, if necessary, fragments packets.
+ * Depending on the underlying device information, it either does
+ * IP/UDP checksum calculations in SW or sets mbuf TX checksum
+ * offload fields properly.
+ * For each input mbuf the following conditions have to be met:
+ * - data_off point to the start of packet's UDP data.
+ * - there is enough header space to prepend L2/L3/L4 headers.
+ * @param s
+ * UDP stream to send packets over.
+ * @param pkt
+ * The burst of output packets that need to be send.
+ * @param num
+ * Number of elements in the *pkt* array.
+ * @param dst_addr
+ * Destination address to send packets to.
+ * @return
+ * number of packets successfully queued in the stream send buffer.
+ */
+uint16_t tle_udp_stream_send(struct tle_stream *s, struct rte_mbuf *pkt[],
+ uint16_t num, const struct sockaddr *dst_addr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_UDP_H_ */
diff --git a/lib/libtle_udp/udp_rxtx.c b/lib/libtle_l4p/udp_rxtx.c
index a5b48c8..01d3520 100644
--- a/lib/libtle_udp/udp_rxtx.c
+++ b/lib/libtle_l4p/udp_rxtx.c
@@ -20,18 +20,18 @@
#include <rte_ip_frag.h>
#include <rte_udp.h>
-#include "udp_impl.h"
+#include "udp_stream.h"
#include "misc.h"
static inline struct tle_udp_stream *
-rx_stream_obtain(struct tle_udp_dev *dev, uint32_t type, uint32_t port)
+rx_stream_obtain(struct tle_dev *dev, uint32_t type, uint32_t port)
{
struct tle_udp_stream *s;
- if (type >= TLE_UDP_VNUM || dev->dp[type] == NULL)
+ if (type >= TLE_VNUM || dev->dp[type] == NULL)
return NULL;
- s = dev->dp[type]->streams[port];
+ s = (struct tle_udp_stream *)dev->dp[type]->streams[port];
if (s == NULL)
return NULL;
@@ -49,38 +49,38 @@ get_pkt_type(const struct rte_mbuf *m)
v = m->packet_type &
(RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_MASK);
if (v == (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP))
- return TLE_UDP_V4;
+ return TLE_V4;
else if (v == (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP))
- return TLE_UDP_V6;
+ return TLE_V6;
else
- return TLE_UDP_VNUM;
+ return TLE_VNUM;
}
-static inline union udp_ports
-pkt_info(const struct tle_udp_dev *dev, struct rte_mbuf *m,
- union udp_ports *ports, union ipv4_addrs *addr4,
+static inline union l4_ports
+pkt_info(const struct tle_dev *dev, struct rte_mbuf *m,
+ union l4_ports *ports, union ipv4_addrs *addr4,
union ipv6_addrs **addr6)
{
uint32_t len;
- union udp_ports ret, *up;
+ union l4_ports ret, *up;
union ipv4_addrs *pa4;
ret.src = get_pkt_type(m);
len = m->l2_len;
- if (ret.src == TLE_UDP_V4) {
+ if (ret.src == TLE_V4) {
pa4 = rte_pktmbuf_mtod_offset(m, union ipv4_addrs *,
len + offsetof(struct ipv4_hdr, src_addr));
addr4->raw = pa4->raw;
- m->ol_flags |= dev->rx.ol_flags[TLE_UDP_V4];
- } else if (ret.src == TLE_UDP_V6) {
+ m->ol_flags |= dev->rx.ol_flags[TLE_V4];
+ } else if (ret.src == TLE_V6) {
*addr6 = rte_pktmbuf_mtod_offset(m, union ipv6_addrs *,
len + offsetof(struct ipv6_hdr, src_addr));
- m->ol_flags |= dev->rx.ol_flags[TLE_UDP_V6];
+ m->ol_flags |= dev->rx.ol_flags[TLE_V6];
}
len += m->l3_len;
- up = rte_pktmbuf_mtod_offset(m, union udp_ports *,
+ up = rte_pktmbuf_mtod_offset(m, union l4_ports *,
len + offsetof(struct udp_hdr, src_port));
ports->raw = up->raw;
ret.dst = ports->dst;
@@ -101,7 +101,7 @@ rx_stream(struct tle_udp_stream *s, void *mb[], struct rte_mbuf *rp[],
/* if RX queue was empty invoke user RX notification callback. */
if (s->rx.cb.func != NULL && r != 0 && rte_ring_count(s->rx.q) == r)
- s->rx.cb.func(s->rx.cb.data, s);
+ s->rx.cb.func(s->rx.cb.data, &s->s);
for (i = r, k = 0; i != num; i++, k++) {
rc[k] = ENOBUFS;
@@ -113,7 +113,7 @@ rx_stream(struct tle_udp_stream *s, void *mb[], struct rte_mbuf *rp[],
static inline uint16_t
rx_stream6(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
- union ipv6_addrs *addr[], union udp_ports port[],
+ union ipv6_addrs *addr[], union l4_ports port[],
struct rte_mbuf *rp[], int32_t rc[], uint16_t num)
{
uint32_t i, k, n;
@@ -124,9 +124,9 @@ rx_stream6(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
for (i = 0; i != num; i++) {
- if ((port[i].raw & s->pmsk.raw) != s->port.raw ||
- ymm_mask_cmp(&addr[i]->raw, &s->ipv6.addr.raw,
- &s->ipv6.mask.raw) != 0) {
+ if ((port[i].raw & s->s.pmsk.raw) != s->s.port.raw ||
+ ymm_mask_cmp(&addr[i]->raw, &s->s.ipv6.addr.raw,
+ &s->s.ipv6.mask.raw) != 0) {
rc[k] = ENOENT;
rp[k] = pkt[i];
k++;
@@ -141,7 +141,7 @@ rx_stream6(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
static inline uint16_t
rx_stream4(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
- union ipv4_addrs addr[], union udp_ports port[],
+ union ipv4_addrs addr[], union l4_ports port[],
struct rte_mbuf *rp[], int32_t rc[], uint16_t num)
{
uint32_t i, k, n;
@@ -152,9 +152,9 @@ rx_stream4(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
for (i = 0; i != num; i++) {
- if ((addr[i].raw & s->ipv4.mask.raw) != s->ipv4.addr.raw ||
- (port[i].raw & s->pmsk.raw) !=
- s->port.raw) {
+ if ((addr[i].raw & s->s.ipv4.mask.raw) != s->s.ipv4.addr.raw ||
+ (port[i].raw & s->s.pmsk.raw) !=
+ s->s.port.raw) {
rc[k] = ENOENT;
rp[k] = pkt[i];
k++;
@@ -168,12 +168,12 @@ rx_stream4(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
}
uint16_t
-tle_udp_rx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[],
+tle_udp_rx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
struct rte_mbuf *rp[], int32_t rc[], uint16_t num)
{
struct tle_udp_stream *s;
uint32_t i, j, k, n, p, t;
- union udp_ports tp[num], port[num];
+ union l4_ports tp[num], port[num];
union ipv4_addrs a4[num];
union ipv6_addrs *pa6[num];
@@ -191,7 +191,7 @@ tle_udp_rx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[],
s = rx_stream_obtain(dev, t, p);
if (s != NULL) {
- if (t == TLE_UDP_V4)
+ if (t == TLE_V4)
n = rx_stream4(s, pkt + i, a4 + i,
port + i, rp + k, rc + k, j - i);
else
@@ -217,7 +217,7 @@ tle_udp_rx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[],
}
static inline void
-stream_drb_release(struct tle_udp_stream *s, struct tle_drb * drb[],
+stream_drb_release(struct tle_udp_stream *s, struct tle_drb *drb[],
uint32_t nb_drb)
{
uint32_t n;
@@ -233,7 +233,7 @@ stream_drb_release(struct tle_udp_stream *s, struct tle_drb * drb[],
/* if stream send buffer was full invoke TX callback */
else if (s->tx.cb.func != NULL && n == 0)
- s->tx.cb.func(s->tx.cb.data, s);
+ s->tx.cb.func(s->tx.cb.data, &s->s);
}
@@ -241,7 +241,7 @@ stream_drb_release(struct tle_udp_stream *s, struct tle_drb * drb[],
}
uint16_t
-tle_udp_tx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
+tle_udp_tx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
{
uint32_t i, j, k, n;
struct tle_drb *drb[num];
@@ -260,7 +260,7 @@ tle_udp_tx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
for (i = 0; i != k; i = j) {
s = drb[i]->udata;
- for (j = i + 1; j != k && s == drb[i]->udata; j++)
+ for (j = i + 1; j != k && s == drb[j]->udata; j++)
;
stream_drb_release(s, drb + i, j - i);
}
@@ -268,73 +268,6 @@ tle_udp_tx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
return n;
}
-static int
-check_pkt_csum(const struct rte_mbuf *m, uint32_t type)
-{
- const struct ipv4_hdr *l3h4;
- const struct ipv6_hdr *l3h6;
- const struct udp_hdr *l4h;
- int32_t ret;
- uint16_t csum;
-
- ret = 0;
- l3h4 = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, m->l2_len);
- l3h6 = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, m->l2_len);
-
- if ((m->ol_flags & PKT_RX_IP_CKSUM_BAD) != 0) {
- csum = _ipv4x_cksum(l3h4, m->l3_len);
- ret = (csum != UINT16_MAX);
- }
-
- if (ret == 0 && (m->ol_flags & PKT_RX_L4_CKSUM_BAD) != 0) {
-
- /*
- * for IPv4 it is allowed to have zero UDP cksum,
- * for IPv6 valid UDP cksum is mandatory.
- */
- if (type == TLE_UDP_V4) {
- l4h = (const struct udp_hdr *)((uintptr_t)l3h4 +
- m->l3_len);
- csum = (l4h->dgram_cksum == 0) ? UINT16_MAX :
- _ipv4_udptcp_mbuf_cksum(m,
- m->l2_len + m->l3_len, l3h4);
- } else
- csum = _ipv6_udptcp_mbuf_cksum(m,
- m->l2_len + m->l3_len, l3h6);
-
- ret = (csum != UINT16_MAX);
- }
-
- return ret;
-}
-
-/* exclude NULLs from the final list of packets. */
-static inline uint32_t
-compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero)
-{
- uint32_t i, j, k, l;
-
- for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) {
-
- /* found a hole. */
- if (pkt[j] == NULL) {
-
- /* find how big is it. */
- for (i = j; i-- != 0 && pkt[i] == NULL; )
- ;
- /* fill the hole. */
- for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++)
- pkt[l] = pkt[k];
-
- nb_pkt -= j - i;
- nb_zero -= j - i;
- j = i + 1;
- }
- }
-
- return nb_pkt;
-}
-
/*
* helper function, do the necessary pre-processing for the received packets
* before handiing them to the strem_recv caller.
@@ -356,7 +289,8 @@ recv_pkt_process(struct rte_mbuf *m[], uint32_t num, uint32_t type)
f = flg[i] & (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD);
/* drop packets with invalid cksum(s). */
- if (f != 0 && check_pkt_csum(m[i], type) != 0) {
+ if (f != 0 && check_pkt_csum(m[i], m[i]->ol_flags, type,
+ IPPROTO_UDP) != 0) {
rte_pktmbuf_free(m[i]);
m[i] = NULL;
k++;
@@ -370,11 +304,12 @@ recv_pkt_process(struct rte_mbuf *m[], uint32_t num, uint32_t type)
}
uint16_t
-tle_udp_stream_recv(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
- uint16_t num)
+tle_udp_stream_recv(struct tle_stream *us, struct rte_mbuf *pkt[], uint16_t num)
{
uint32_t k, n;
+ struct tle_udp_stream *s;
+ s = UDP_STREAM(us);
n = rte_ring_mc_dequeue_burst(s->rx.q, (void **)pkt, num);
if (n == 0)
return 0;
@@ -389,58 +324,14 @@ tle_udp_stream_recv(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
rwl_release(&s->rx.use);
}
- k = recv_pkt_process(pkt, n, s->type);
+ k = recv_pkt_process(pkt, n, s->s.type);
return compress_pkt_list(pkt, n, k);
}
-static int32_t
-udp_get_dest(struct tle_udp_stream *s, const void *dst_addr,
- struct tle_udp_dest *dst)
-{
- int32_t rc;
- const struct in_addr *d4;
- const struct in6_addr *d6;
- struct tle_udp_ctx *ctx;
- struct tle_udp_dev *dev;
-
- ctx = s->ctx;
-
- /* it is here just to keep gcc happy. */
- d4 = NULL;
-
- if (s->type == TLE_UDP_V4) {
- d4 = dst_addr;
- rc = ctx->prm.lookup4(ctx->prm.lookup4_data, d4, dst);
- } else if (s->type == TLE_UDP_V6) {
- d6 = dst_addr;
- rc = ctx->prm.lookup6(ctx->prm.lookup6_data, d6, dst);
- } else
- rc = -ENOENT;
-
- if (rc < 0 || dst->dev == NULL || dst->dev->ctx != ctx)
- return -ENOENT;
-
- dev = dst->dev;
- if (s->type == TLE_UDP_V4) {
- struct ipv4_hdr *l3h;
- l3h = (struct ipv4_hdr *)(dst->hdr + dst->l2_len);
- l3h->src_addr = dev->prm.local_addr4.s_addr;
- l3h->dst_addr = d4->s_addr;
- } else {
- struct ipv6_hdr *l3h;
- l3h = (struct ipv6_hdr *)(dst->hdr + dst->l2_len);
- rte_memcpy(l3h->src_addr, &dev->prm.local_addr6,
- sizeof(l3h->src_addr));
- rte_memcpy(l3h->dst_addr, d6, sizeof(l3h->dst_addr));
- }
-
- return dev - ctx->dev;
-}
-
static inline int
udp_fill_mbuf(struct rte_mbuf *m,
uint32_t type, uint64_t ol_flags, uint32_t pid,
- union udph udph, const struct tle_udp_dest *dst)
+ union udph udph, const struct tle_dest *dst)
{
uint32_t len, plen;
char *l2h;
@@ -471,7 +362,7 @@ udp_fill_mbuf(struct rte_mbuf *m,
/* update proto specific fields. */
- if (type == TLE_UDP_V4) {
+ if (type == TLE_V4) {
struct ipv4_hdr *l3h;
l3h = (struct ipv4_hdr *)(l2h + dst->l2_len);
l3h->packet_id = rte_cpu_to_be_16(pid);
@@ -511,7 +402,7 @@ frag_fixup(const struct rte_mbuf *ms, struct rte_mbuf *mf, uint32_t type)
mf->ol_flags = ms->ol_flags;
mf->tx_offload = ms->tx_offload;
- if (type == TLE_UDP_V4 && (ms->ol_flags & PKT_TX_IP_CKSUM) == 0) {
+ if (type == TLE_V4 && (ms->ol_flags & PKT_TX_IP_CKSUM) == 0) {
l3h = rte_pktmbuf_mtod(mf, struct ipv4_hdr *);
l3h->hdr_checksum = _ipv4x_cksum(l3h, mf->l3_len);
}
@@ -522,7 +413,7 @@ frag_fixup(const struct rte_mbuf *ms, struct rte_mbuf *mf, uint32_t type)
*/
static inline int
fragment(struct rte_mbuf *pkt, struct rte_mbuf *frag[], uint32_t num,
- uint32_t type, const struct tle_udp_dest *dst)
+ uint32_t type, const struct tle_dest *dst)
{
int32_t frag_num, i;
uint16_t mtu;
@@ -533,7 +424,7 @@ fragment(struct rte_mbuf *pkt, struct rte_mbuf *frag[], uint32_t num,
mtu = dst->mtu - dst->l2_len;
/* fragment packet */
- if (type == TLE_UDP_V4)
+ if (type == TLE_V4)
frag_num = rte_ipv4_fragment_packet(pkt, frag, num, mtu,
dst->head_mp, dst->head_mp);
else
@@ -572,7 +463,7 @@ stream_drb_alloc(struct tle_udp_stream *s, struct tle_drb *drbs[],
/* enqueue up to num packets to the destination device queue. */
static inline uint16_t
-queue_pkt_out(struct tle_udp_stream *s, struct tle_udp_dev *dev,
+queue_pkt_out(struct tle_udp_stream *s, struct tle_dev *dev,
const void *pkt[], uint16_t nb_pkt,
struct tle_drb *drbs[], uint32_t *nb_drb)
{
@@ -613,7 +504,7 @@ queue_pkt_out(struct tle_udp_stream *s, struct tle_udp_dev *dev,
}
uint16_t
-tle_udp_stream_send(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
+tle_udp_stream_send(struct tle_stream *us, struct rte_mbuf *pkt[],
uint16_t num, const struct sockaddr *dst_addr)
{
int32_t di, frg, rc;
@@ -622,16 +513,18 @@ tle_udp_stream_send(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
uint32_t mtu, pid, type;
const struct sockaddr_in *d4;
const struct sockaddr_in6 *d6;
+ struct tle_udp_stream *s;
const void *da;
union udph udph;
- struct tle_udp_dest dst;
+ struct tle_dest dst;
struct tle_drb *drb[num];
- type = s->type;
+ s = UDP_STREAM(us);
+ type = s->s.type;
/* start filling UDP header. */
udph.raw = 0;
- udph.ports.src = s->port.dst;
+ udph.ports.src = s->s.port.dst;
/* figure out what destination addr/port to use. */
if (dst_addr != NULL) {
@@ -639,7 +532,7 @@ tle_udp_stream_send(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
rte_errno = EINVAL;
return 0;
}
- if (type == TLE_UDP_V4) {
+ if (type == TLE_V4) {
d4 = (const struct sockaddr_in *)dst_addr;
da = &d4->sin_addr;
udph.ports.dst = d4->sin_port;
@@ -649,14 +542,14 @@ tle_udp_stream_send(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
udph.ports.dst = d6->sin6_port;
}
} else {
- udph.ports.dst = s->port.src;
- if (type == TLE_UDP_V4)
- da = &s->ipv4.addr.src;
+ udph.ports.dst = s->s.port.src;
+ if (type == TLE_V4)
+ da = &s->s.ipv4.addr.src;
else
- da = &s->ipv6.addr.src;
+ da = &s->s.ipv6.addr.src;
}
- di = udp_get_dest(s, da, &dst);
+ di = stream_get_dest(&s->s, da, &dst);
if (di < 0) {
rte_errno = -di;
return 0;
diff --git a/lib/libtle_l4p/udp_stream.c b/lib/libtle_l4p/udp_stream.c
new file mode 100644
index 0000000..9f379d9
--- /dev/null
+++ b/lib/libtle_l4p/udp_stream.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <rte_malloc.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+
+#include "udp_stream.h"
+#include "misc.h"
+
+static void
+unuse_stream(struct tle_udp_stream *s)
+{
+ s->s.type = TLE_VNUM;
+ rte_atomic32_set(&s->rx.use, INT32_MIN);
+ rte_atomic32_set(&s->tx.use, INT32_MIN);
+}
+
+static void
+fini_stream(struct tle_udp_stream *s)
+{
+ if (s != NULL) {
+ rte_free(s->rx.q);
+ rte_free(s->tx.drb.r);
+ }
+}
+
+static void
+udp_fini_streams(struct tle_ctx *ctx)
+{
+ uint32_t i;
+ struct tle_udp_stream *s;
+
+ s = ctx->streams.buf;
+ if (s != NULL) {
+ for (i = 0; i != ctx->prm.max_streams; i++)
+ fini_stream(s + i);
+ }
+
+ rte_free(s);
+ ctx->streams.buf = NULL;
+ STAILQ_INIT(&ctx->streams.free);
+}
+
+static int
+init_stream(struct tle_ctx *ctx, struct tle_udp_stream *s)
+{
+ size_t bsz, rsz, sz;
+ uint32_t i, k, n, nb;
+ struct tle_drb *drb;
+ char name[RTE_RING_NAMESIZE];
+
+ /* init RX part. */
+
+ n = RTE_MAX(ctx->prm.max_stream_rbufs, 1U);
+ n = rte_align32pow2(n);
+ sz = sizeof(*s->rx.q) + n * sizeof(s->rx.q->ring[0]);
+
+ s->rx.q = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ ctx->prm.socket_id);
+ if (s->rx.q == NULL) {
+ UDP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
+ "failed with error code: %d\n",
+ __func__, s, sz, ctx->prm.socket_id, rte_errno);
+ return -ENOMEM;
+ }
+
+ snprintf(name, sizeof(name), "%p@%zu", s, sz);
+ rte_ring_init(s->rx.q, name, n, RING_F_SP_ENQ);
+
+ /* init TX part. */
+
+ nb = drb_nb_elem(ctx);
+ k = calc_stream_drb_num(ctx, nb);
+ n = rte_align32pow2(k);
+
+ /* size of the drbs ring */
+ rsz = sizeof(*s->tx.drb.r) + n * sizeof(s->tx.drb.r->ring[0]);
+ rsz = RTE_ALIGN_CEIL(rsz, RTE_CACHE_LINE_SIZE);
+
+ /* size of the drb. */
+ bsz = tle_drb_calc_size(nb);
+
+ /* total stream drbs size. */
+ sz = rsz + bsz * k;
+
+ s->tx.drb.r = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ ctx->prm.socket_id);
+ if (s->tx.drb.r == NULL) {
+ UDP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
+ "failed with error code: %d\n",
+ __func__, s, sz, ctx->prm.socket_id, rte_errno);
+ return -ENOMEM;
+ }
+
+ snprintf(name, sizeof(name), "%p@%zu", s, sz);
+ rte_ring_init(s->tx.drb.r, name, n, 0);
+
+ for (i = 0; i != k; i++) {
+ drb = (struct tle_drb *)((uintptr_t)s->tx.drb.r +
+ rsz + bsz * i);
+ drb->udata = s;
+ drb->size = nb;
+ rte_ring_enqueue(s->tx.drb.r, drb);
+ }
+
+ s->tx.drb.nb_elem = nb;
+ s->tx.drb.nb_max = k;
+
+ /* mark stream as avaialble to use. */
+
+ s->s.ctx = ctx;
+ unuse_stream(s);
+ STAILQ_INSERT_TAIL(&ctx->streams.free, &s->s, link);
+
+ return 0;
+}
+
+static void
+udp_free_drbs(struct tle_stream *s, struct tle_drb *drb[], uint32_t nb_drb)
+{
+ struct tle_udp_stream *us;
+
+ us = (struct tle_udp_stream *)s;
+ rte_ring_enqueue_burst(us->tx.drb.r, (void **)drb, nb_drb);
+}
+
+static int
+udp_init_streams(struct tle_ctx *ctx)
+{
+ size_t sz;
+ uint32_t i;
+ int32_t rc;
+ struct tle_udp_stream *s;
+
+ sz = sizeof(*s) * ctx->prm.max_streams;
+ s = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ ctx->prm.socket_id);
+ if (s == NULL) {
+ UDP_LOG(ERR, "allocation of %zu bytes on socket %d "
+ "for %u udp_streams failed\n",
+ sz, ctx->prm.socket_id, ctx->prm.max_streams);
+ return -ENOMEM;
+ }
+
+ ctx->streams.buf = s;
+ STAILQ_INIT(&ctx->streams.free);
+
+ for (i = 0; i != ctx->prm.max_streams; i++) {
+ rc = init_stream(ctx, s + i);
+ if (rc != 0) {
+ UDP_LOG(ERR, "initalisation of %u-th stream failed", i);
+ udp_fini_streams(ctx);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static void __attribute__((constructor))
+udp_stream_setup(void)
+{
+ static const struct stream_ops udp_ops = {
+ .init_streams = udp_init_streams,
+ .fini_streams = udp_fini_streams,
+ .free_drbs = udp_free_drbs,
+ };
+
+ tle_stream_ops[TLE_PROTO_UDP] = udp_ops;
+}
+
+static inline void
+stream_down(struct tle_udp_stream *s)
+{
+ rwl_down(&s->rx.use);
+ rwl_down(&s->tx.use);
+}
+
+static inline void
+stream_up(struct tle_udp_stream *s)
+{
+ rwl_up(&s->rx.use);
+ rwl_up(&s->tx.use);
+}
+
+static int
+check_stream_prm(const struct tle_ctx *ctx,
+ const struct tle_udp_stream_param *prm)
+{
+ if ((prm->local_addr.ss_family != AF_INET &&
+ prm->local_addr.ss_family != AF_INET6) ||
+ prm->local_addr.ss_family != prm->remote_addr.ss_family)
+ return -EINVAL;
+
+ /* callback and event notifications mechanisms are mutually exclusive */
+ if ((prm->recv_ev != NULL && prm->recv_cb.func != NULL) ||
+ (prm->send_ev != NULL && prm->send_cb.func != NULL))
+ return -EINVAL;
+
+ /* check does context support desired address family. */
+ if ((prm->local_addr.ss_family == AF_INET &&
+ ctx->prm.lookup4 == NULL) ||
+ (prm->local_addr.ss_family == AF_INET6 &&
+ ctx->prm.lookup6 == NULL))
+ return -EINVAL;
+
+ return 0;
+}
+
+struct tle_stream *
+tle_udp_stream_open(struct tle_ctx *ctx,
+ const struct tle_udp_stream_param *prm)
+{
+ struct tle_udp_stream *s;
+ int32_t rc;
+
+ if (ctx == NULL || prm == NULL || check_stream_prm(ctx, prm) != 0) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ s = (struct tle_udp_stream *)get_stream(ctx);
+ if (s == NULL) {
+ rte_errno = ENFILE;
+ return NULL;
+
+ /* some TX still pending for that stream. */
+ } else if (UDP_STREAM_TX_PENDING(s)) {
+ put_stream(ctx, &s->s, 0);
+ rte_errno = EAGAIN;
+ return NULL;
+ }
+
+ /* copy input parameters. */
+ s->prm = *prm;
+
+ /* setup L4 ports and L3 addresses fields. */
+ rc = stream_fill_ctx(ctx, &s->s,
+ (const struct sockaddr *)&prm->local_addr,
+ (const struct sockaddr *)&prm->remote_addr);
+
+ if (rc != 0) {
+ put_stream(ctx, &s->s, 1);
+ s = NULL;
+ rte_errno = rc;
+ } else {
+ /* setup stream notification menchanism */
+ s->rx.ev = prm->recv_ev;
+ s->rx.cb = prm->recv_cb;
+ s->tx.ev = prm->send_ev;
+ s->tx.cb = prm->send_cb;
+
+ /* mark stream as avaialbe for RX/TX */
+ if (s->tx.ev != NULL)
+ tle_event_raise(s->tx.ev);
+ stream_up(s);
+ }
+
+ return &s->s;
+}
+
+int
+tle_udp_stream_close(struct tle_stream *us)
+{
+ int32_t rc;
+ struct tle_ctx *ctx;
+ struct tle_udp_stream *s;
+
+ static const struct tle_stream_cb zcb;
+
+ s = UDP_STREAM(us);
+ if (us == NULL || s->s.type >= TLE_VNUM)
+ return -EINVAL;
+
+ ctx = s->s.ctx;
+
+ /* mark stream as unavaialbe for RX/TX. */
+ stream_down(s);
+
+ /* reset stream events if any. */
+ if (s->rx.ev != NULL) {
+ tle_event_idle(s->rx.ev);
+ s->rx.ev = NULL;
+ }
+ if (s->tx.ev != NULL) {
+ tle_event_idle(s->tx.ev);
+ s->tx.ev = NULL;
+ }
+
+ s->rx.cb = zcb;
+ s->tx.cb = zcb;
+
+ /* free stream's destination port */
+ rc = stream_clear_ctx(ctx, &s->s);
+
+ /* empty stream's RX queue */
+ empty_mbuf_ring(s->rx.q);
+
+ /*
+ * mark the stream as free again.
+ * if there still are pkts queued for TX,
+ * then put this stream to the tail of free list.
+ */
+ put_stream(ctx, &s->s, UDP_STREAM_TX_FINISHED(s));
+ return rc;
+}
+
+int
+tle_udp_stream_get_param(const struct tle_stream *us,
+ struct tle_udp_stream_param *prm)
+{
+ struct sockaddr_in *lin4;
+ struct sockaddr_in6 *lin6;
+ const struct tle_udp_stream *s;
+
+ s = UDP_STREAM(us);
+ if (prm == NULL || us == NULL || s->s.type >= TLE_VNUM)
+ return -EINVAL;
+
+ prm[0] = s->prm;
+ if (prm->local_addr.ss_family == AF_INET) {
+ lin4 = (struct sockaddr_in *)&prm->local_addr;
+ lin4->sin_port = s->s.port.dst;
+ } else if (s->prm.local_addr.ss_family == AF_INET6) {
+ lin6 = (struct sockaddr_in6 *)&prm->local_addr;
+ lin6->sin6_port = s->s.port.dst;
+ }
+
+ return 0;
+}
diff --git a/lib/libtle_l4p/udp_stream.h b/lib/libtle_l4p/udp_stream.h
new file mode 100644
index 0000000..a950e56
--- /dev/null
+++ b/lib/libtle_l4p/udp_stream.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _UDP_STREAM_H_
+#define _UDP_STREAM_H_
+
+#include <rte_vect.h>
+#include <tle_dring.h>
+#include <tle_udp.h>
+#include <tle_event.h>
+
+#include "osdep.h"
+#include "ctx.h"
+#include "stream.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+union udph {
+ uint64_t raw;
+ struct {
+ union l4_ports ports;
+ uint16_t len;
+ uint16_t cksum;
+ };
+};
+
+struct tle_udp_stream {
+
+ struct tle_stream s;
+
+ struct {
+ struct rte_ring *q;
+ struct tle_event *ev;
+ struct tle_stream_cb cb;
+ rte_atomic32_t use;
+ } rx __rte_cache_aligned;
+
+ struct {
+ rte_atomic32_t use;
+ struct {
+ uint32_t nb_elem; /* number of obects per drb. */
+ uint32_t nb_max; /* number of drbs per stream. */
+ struct rte_ring *r;
+ } drb;
+ struct tle_event *ev;
+ struct tle_stream_cb cb;
+ } tx __rte_cache_aligned;
+
+ struct tle_udp_stream_param prm;
+} __rte_cache_aligned;
+
+#define UDP_STREAM(p) \
+((struct tle_udp_stream *)((uintptr_t)(p) - offsetof(struct tle_udp_stream, s)))
+
+#define UDP_STREAM_TX_PENDING(s) \
+ ((s)->tx.drb.nb_max != rte_ring_count((s)->tx.drb.r))
+
+#define UDP_STREAM_TX_FINISHED(s) \
+ ((s)->tx.drb.nb_max == rte_ring_count((s)->tx.drb.r))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UDP_STREAM_H_ */
diff --git a/lib/libtle_timer/Makefile b/lib/libtle_timer/Makefile
new file mode 100644
index 0000000..c17d219
--- /dev/null
+++ b/lib/libtle_timer/Makefile
@@ -0,0 +1,38 @@
+# Copyright (c) 2016 Intel Corporation.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overwritten by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = libtle_timer.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := tle_timer_version.map
+
+LIBABIVER := 1
+
+#source files
+SRCS-y += timer.c
+
+SYMLINK-y-include += tle_timer.h
+
+include $(TLDK_ROOT)/mk/tle.lib.mk
diff --git a/lib/libtle_timer/timer.c b/lib/libtle_timer/timer.c
new file mode 100644
index 0000000..8b89fd6
--- /dev/null
+++ b/lib/libtle_timer/timer.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string.h>
+#include <sys/queue.h>
+#include <rte_cycles.h>
+#include <rte_errno.h>
+#include <tle_timer.h>
+
+#define TW_SLOTS_PER_RING 512
+#define TW_RING_SHIFT 9
+#define TW_RING_MASK (TW_SLOTS_PER_RING - 1)
+#define MAX_TIMER_BURST 0x20
+
+enum {
+ TW_RING_FAST,
+ TW_RING_SLOW,
+ TW_N_RINGS,
+};
+
+struct tle_timer_list;
+
+struct tle_timer_elmt {
+ void *obj; /** object for which timer is created */
+
+ struct tle_timer_list *list; /* current list object belongs to */
+
+ /** Slow ring only, saved when timer added to ring */
+ uint16_t fast_index;
+
+ LIST_ENTRY(tle_timer_elmt) link;
+};
+
+struct tle_timer_list {
+ uint32_t num;
+ LIST_HEAD(, tle_timer_elmt) head;
+};
+
+struct tle_timer_wheel {
+ uint64_t next_run_time; /** Next time the wheel should run */
+
+ uint64_t last_run_time; /** Last time the wheel ran */
+
+ uint32_t current_tick; /** current tick */
+
+ uint32_t current_index[TW_N_RINGS]; /** current wheel indices */
+
+ struct tle_timer_list free; /** free timers to be used */
+
+ struct tle_timer_list expired; /** expired timers to be pulled */
+
+ struct tle_timer_wheel_args prm; /** timer wheel configuration params */
+
+ /** wheel arrays */
+ struct tle_timer_list w[TW_N_RINGS][TW_SLOTS_PER_RING];
+};
+
+/** helper functions to manipulate the linked lists */
+static inline uint32_t
+get_timers(struct tle_timer_list *list, struct tle_timer_elmt *re[],
+ uint32_t num)
+{
+ struct tle_timer_elmt *e;
+ uint32_t i, n;
+
+ n = RTE_MIN(list->num, num);
+ for (i = 0; i != n; i++) {
+ e = LIST_FIRST(&list->head);
+ LIST_REMOVE(e, link);
+ e->list = NULL;
+ re[i] = e;
+ }
+
+ list->num -= n;
+ return n;
+}
+
+static inline struct tle_timer_elmt *
+get_timer(struct tle_timer_list *list)
+{
+ struct tle_timer_elmt *e;
+
+ e = LIST_FIRST(&list->head);
+ LIST_REMOVE(e, link);
+ e->list = NULL;
+ list->num--;
+ return e;
+}
+
+static inline void
+put_timers(struct tle_timer_list *list, struct tle_timer_elmt *te[],
+ uint32_t num)
+{
+ uint32_t i;
+
+ for (i = 0; i != num; i++) {
+ te[i]->list = list;
+ LIST_INSERT_HEAD(&list->head, te[i], link);
+ }
+ list->num += num;
+}
+
+static inline void
+put_timer(struct tle_timer_list *list, struct tle_timer_elmt *e)
+{
+ e->list = list;
+ LIST_INSERT_HEAD(&list->head, e, link);
+ list->num++;
+}
+
+static inline void
+rem_timer(struct tle_timer_list *list, struct tle_timer_elmt *e)
+{
+ LIST_REMOVE(e, link);
+ e->list = NULL;
+ list->num--;
+}
+
+/** create the tle timer wheel */
+struct tle_timer_wheel *
+tle_timer_create(struct tle_timer_wheel_args *prm, uint64_t now)
+{
+ uint32_t i, j;
+ size_t sz;
+ struct tle_timer_wheel *tw;
+ struct tle_timer_elmt *e;
+ struct tle_timer_elmt *timers;
+
+ if (prm == NULL) {
+ rte_errno = -EINVAL;
+ return NULL;
+ }
+
+ /* at least one timer has to be created */
+ if (prm->max_timer == 0) {
+ rte_errno = -EINVAL;
+ return NULL;
+ }
+
+ /* do not allow tick size smaller than 1ms */
+ if (prm->tick_size == 0) {
+ rte_errno = -EINVAL;
+ return NULL;
+ }
+
+ sz = sizeof(*tw) + prm->max_timer * sizeof(struct tle_timer_elmt);
+
+ /* allocate memory */
+ tw = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ prm->socket_id);
+
+ if (tw == NULL) {
+ rte_errno = -ENOMEM;
+ return NULL;
+ }
+
+ tw->last_run_time = now;
+ tw->prm = *prm;
+ timers = (struct tle_timer_elmt *)(tw + 1);
+
+ /* initialize the lists */
+ LIST_INIT(&tw->free.head);
+ LIST_INIT(&tw->expired.head);
+
+ for (i = 0; i < prm->max_timer; i++) {
+ e = timers + i;
+ put_timer(&tw->free, e);
+ }
+
+ for (i = 0; i < TW_N_RINGS; i++)
+ for (j = 0; j < TW_SLOTS_PER_RING; j++)
+ LIST_INIT(&tw->w[i][j].head);
+
+ return tw;
+}
+
+/** free the tle timer wheel */
+void
+tle_timer_free(struct tle_timer_wheel *tw)
+{
+ rte_free(tw);
+}
+
+/** start a timer */
+void *
+tle_timer_start(struct tle_timer_wheel *tw, void *obj, uint64_t interval)
+{
+ uint16_t slow_ring_index, fast_ring_index;
+ struct tle_timer_list *ts;
+ struct tle_timer_elmt *e;
+ uint32_t carry;
+ uint32_t nb_tick;
+
+ rte_errno = 0;
+ if (!interval) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ if (tw->free.num == 0) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ nb_tick = interval / tw->prm.tick_size;
+
+ fast_ring_index = nb_tick & TW_RING_MASK;
+ fast_ring_index += tw->current_index[TW_RING_FAST];
+ carry = fast_ring_index >= TW_SLOTS_PER_RING ? 1 : 0;
+ fast_ring_index %= TW_SLOTS_PER_RING;
+ slow_ring_index = (nb_tick >> TW_RING_SHIFT) + carry;
+
+ /* Timer duration exceeds ~7 hrs? Oops */
+ if (slow_ring_index >= TW_SLOTS_PER_RING) {
+ rte_errno = ERANGE;
+ return NULL;
+ }
+
+ /* Timer expires more than 51.2 seconds from now? */
+ if (slow_ring_index) {
+ slow_ring_index += tw->current_index[TW_RING_SLOW];
+ slow_ring_index %= TW_SLOTS_PER_RING;
+ ts = &tw->w[TW_RING_SLOW][slow_ring_index];
+
+ e = get_timer(&tw->free);
+ e->obj = obj;
+ e->fast_index = fast_ring_index;
+ put_timer(ts, e);
+
+ /* Return the user timer-cancellation handle */
+ return (void *)e;
+ }
+
+ /* Timer expires less than 51.2 seconds from now */
+ ts = &tw->w[TW_RING_FAST][fast_ring_index];
+
+ e = get_timer(&tw->free);
+ e->obj = obj;
+ put_timer(ts, e);
+
+ /* Give the user a handle to cancel the timer */
+ return (void *)e;
+}
+
+/** stop a timer */
+void tle_timer_stop(struct tle_timer_wheel *tw, void *timer)
+{
+ struct tle_timer_elmt *e;
+ struct tle_timer_list *ts;
+
+ /* Cancel the timer */
+ e = (struct tle_timer_elmt *)timer;
+ ts = e->list;
+ rem_timer(ts, e);
+ put_timer(&tw->free, e);
+}
+
+/** run the timer wheel. Call in every tick_size cycles
+ * (e.g. equivalent of 100ms).
+ */
+void tle_timer_expire(struct tle_timer_wheel *tw, uint64_t now)
+{
+ uint32_t nb_tick, i, n;
+ uint32_t fast_wheel_index, slow_wheel_index, demoted_index;
+ struct tle_timer_list *ts, *ts2;
+ struct tle_timer_elmt *re[MAX_TIMER_BURST], *e;
+
+ /* Shouldn't happen */
+ if (unlikely(now < tw->next_run_time))
+ return;
+
+ /* Number of tick_size cycles which have occurred */
+ nb_tick = (now - tw->last_run_time) / tw->prm.tick_size;
+ if (nb_tick == 0)
+ return;
+
+ /* Remember when we ran, compute next runtime */
+ tw->next_run_time = (now + tw->prm.tick_size);
+ tw->last_run_time = now;
+
+ for (i = 0; i < nb_tick; i++) {
+ fast_wheel_index = tw->current_index[TW_RING_FAST];
+
+ /* If we've been around the fast ring once,
+ * process one slot in the slow ring before we handle
+ * the fast ring.
+ */
+ if (unlikely(fast_wheel_index == TW_SLOTS_PER_RING)) {
+ fast_wheel_index = tw->current_index[TW_RING_FAST] = 0;
+
+ tw->current_index[TW_RING_SLOW]++;
+ tw->current_index[TW_RING_SLOW] %= TW_SLOTS_PER_RING;
+ slow_wheel_index = tw->current_index[TW_RING_SLOW];
+
+ ts = &tw->w[TW_RING_SLOW][slow_wheel_index];
+
+ /* Deal slow-ring elements into the fast ring. */
+ while (ts->num != 0) {
+ e = get_timer(ts);
+ demoted_index = e->fast_index;
+ ts2 = &tw->w[TW_RING_FAST][demoted_index];
+ put_timer(ts2, e);
+ };
+ LIST_INIT(&ts->head);
+ }
+
+ /* Handle the fast ring */
+ ts = &tw->w[TW_RING_FAST][fast_wheel_index];
+
+ /* Clear the fast-ring slot and move timers in expired list*/
+ n = get_timers(ts, re, RTE_DIM(re));
+ while (n != 0) {
+ put_timers(&tw->expired, re, n);
+ n = get_timers(ts, re, RTE_DIM(re));
+ };
+ LIST_INIT(&ts->head);
+
+ tw->current_index[TW_RING_FAST]++;
+ tw->current_tick++;
+ }
+}
+
+/** bulk retrieve of expired timers */
+int
+tle_timer_get_expired_bulk(struct tle_timer_wheel *tw, void *rt[], uint32_t num)
+{
+ uint32_t i, n;
+ struct tle_timer_elmt *e[MAX_TIMER_BURST];
+
+ n = get_timers(&tw->expired, e, num);
+
+ for (i = 0; i != n; i++)
+ rt[i] = e[i]->obj;
+
+ put_timers(&tw->free, e, n);
+
+ return n;
+}
diff --git a/lib/libtle_timer/tle_timer.h b/lib/libtle_timer/tle_timer.h
new file mode 100644
index 0000000..c40516e
--- /dev/null
+++ b/lib/libtle_timer/tle_timer.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __tle_timer_h__
+#define __tle_timer_h__
+
+#include <stdint.h>
+#include <rte_config.h>
+#include <rte_debug.h>
+#include <rte_malloc.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @file
+ * @brief timer definitions
+ *
+ * Design parameters:
+ * granularity: configurable in terms of units (i.e. cycles or ms).
+ * e.g. with 100ms tick
+ * required max period: 2.5 hours => 150 minutes => 90,000 ticks
+ * Rounding up to 256k ticks yields a two-level 512 slot-per-level
+ * wheel, resulting in a 7-hour max period.
+ */
+
+struct tle_timer_wheel_args {
+ uint32_t tick_size; /** tick size in units */
+
+ int32_t socket_id; /**< socket ID to allocate memory for. */
+
+ uint32_t max_timer; /** maximum number of timers */
+};
+
+struct tle_timer_wheel;
+
+/** initialize a timer wheel */
+struct tle_timer_wheel *
+tle_timer_create(struct tle_timer_wheel_args *prm, uint64_t now);
+
+/** free a timer wheel */
+void
+tle_timer_free(struct tle_timer_wheel *tw);
+
+/** start a timer */
+void *
+tle_timer_start(struct tle_timer_wheel *tw, void *obj, uint64_t interval);
+
+/** stop a timer */
+void
+tle_timer_stop(struct tle_timer_wheel *tw, void *timer);
+
+/** run the timer wheel. Call in every tick_size cycles
+ * (e.g. equivalent of 100ms).
+ */
+void
+tle_timer_expire(struct tle_timer_wheel *tw, uint64_t now);
+
+/** bulk retrieve of expired timers */
+int
+tle_timer_get_expired_bulk(struct tle_timer_wheel *tw, void *timers[],
+ uint32_t num);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __tle_timer_h__ */
diff --git a/lib/libtle_udp/tle_udp_impl.h b/lib/libtle_udp/tle_udp_impl.h
deleted file mode 100644
index c55d605..0000000
--- a/lib/libtle_udp/tle_udp_impl.h
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * Copyright (c) 2016 Intel Corporation.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _TLE_UDP_IMPL_H_
-#define _TLE_UDP_IMPL_H_
-
-#include <stdint.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-#include <rte_common.h>
-#include <rte_mbuf.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * <udp_ctx> - each such ctx represents an 'independent copy of the stack'.
- * It owns set of <udp_stream>s and <udp_dev>s entities and provides
- * (de)multiplexing input/output packets from/into UDP devices into/from
- * UDP streams.
- * <udp_dev> is an abstraction for the underlying device, that is able
- * to RX/TX packets and may provide some HW offload capabilities.
- * It is a user responsibility to add to the <udp_ctx> all <udp_dev>s,
- * that context has to manage, before starting to do stream operations
- * (open/send/recv,close) over that context.
- * Right now adding/deleting <udp_dev>s to the context with open
- * streams is not supported.
- * <udp_stream> represents an UDP endpoint <addr, port> and is an analogy to
- * socket entity.
- * As with a socket, there are ability to do recv/send over it.
- * <udp_stream> belongs to particular <udp_ctx> but is visible globally across
- * the process, i.e. any thread within the process can do recv/send over it
- * without any further synchronisation.
- * While 'upper' layer API is thread safe, lower layer API (rx_bulk/tx_bulk)
- * is not thread safe and is not supposed to be run on multiple threads
- * in parallel.
- * So single thread can drive multiple <udp_ctx>s and do IO for them,
- * but multiple threads can't drive same <udp_ctx> without some
- * explicit synchronization.
- */
-
-struct tle_udp_ctx;
-struct tle_udp_dev;
-
-/**
- * Blocked UDP ports info.
- */
-struct tle_bl_port {
- uint32_t nb_port; /**< number of blocked ports. */
- const uint16_t *port; /**< list of blocked ports. */
-};
-
-/**
- * UDP device parameters.
- */
-struct tle_udp_dev_param {
- uint32_t rx_offload; /**< DEV_RX_OFFLOAD_* supported. */
- uint32_t tx_offload; /**< DEV_TX_OFFLOAD_* supported. */
- struct in_addr local_addr4; /**< local IPv4 address assigned. */
- struct in6_addr local_addr6; /**< local IPv6 address assigned. */
- struct tle_bl_port bl4; /**< blocked ports for IPv4 address. */
- struct tle_bl_port bl6; /**< blocked ports for IPv4 address. */
-};
-
-#define TLE_UDP_MAX_HDR 0x60
-
-struct tle_udp_dest {
- struct rte_mempool *head_mp; /**< MP for fragment feaders. */
- struct tle_udp_dev *dev; /**< device to send packets through. */
- uint16_t mtu; /**< MTU for given destination. */
- uint8_t l2_len; /**< L2 header lenght. */
- uint8_t l3_len; /**< L3 header lenght. */
- uint8_t hdr[TLE_UDP_MAX_HDR]; /**< L2/L3 headers. */
-};
-
-/**
- * UDP context creation parameters.
- */
-struct tle_udp_ctx_param {
- int32_t socket_id; /**< socket ID to allocate memory for. */
- uint32_t max_streams; /**< max number of streams in context. */
- uint32_t max_stream_rbufs; /**< max recv mbufs per stream. */
- uint32_t max_stream_sbufs; /**< max send mbufs per stream. */
- uint32_t send_bulk_size; /**< expected # of packets per send call. */
-
- int (*lookup4)(void *opaque, const struct in_addr *addr,
- struct tle_udp_dest *res);
- /**< will be called by send() to get IPv4 packet destination info. */
- void *lookup4_data;
- /**< opaque data pointer for lookup4() callback. */
-
- int (*lookup6)(void *opaque, const struct in6_addr *addr,
- struct tle_udp_dest *res);
- /**< will be called by send() to get IPv6 packet destination info. */
- void *lookup6_data;
- /**< opaque data pointer for lookup6() callback. */
-};
-
-/**
- * create UDP context.
- * @param ctx_prm
- * Parameters used to create and initialise the UDP context.
- * @return
- * Pointer to UDP context structure that can be used in future UDP
- * operations, or NULL on error, with error code set in rte_errno.
- * Possible rte_errno errors include:
- * - EINVAL - invalid parameter passed to function
- * - ENOMEM - out of memory
- */
-struct tle_udp_ctx *
-tle_udp_create(const struct tle_udp_ctx_param *ctx_prm);
-
-/**
- * Destroy given UDP context.
- *
- * @param ctx
- * UDP context to destroy
- */
-void tle_udp_destroy(struct tle_udp_ctx *ctx);
-
-/**
- * Add new device into the given UDP context.
- * This function is not multi-thread safe.
- *
- * @param ctx
- * UDP context to add new device into.
- * @param dev_prm
- * Parameters used to create and initialise new device inside the
- * UDP context.
- * @return
- * Pointer to UDP device structure that can be used in future UDP
- * operations, or NULL on error, with error code set in rte_errno.
- * Possible rte_errno errors include:
- * - EINVAL - invalid parameter passed to function
- * - ENODEV - max possible value of open devices is reached
- * - ENOMEM - out of memory
- */
-struct tle_udp_dev *
-tle_udp_add_dev(struct tle_udp_ctx *ctx,
- const struct tle_udp_dev_param *dev_prm);
-
-/**
- * Remove and destroy previously added device from the given UDP context.
- * This function is not multi-thread safe.
- *
- * @param dev
- * UDP device to remove and destroy.
- * @return
- * zero on successful completion.
- * - -EINVAL - invalid parameter passed to function
- */
-int tle_udp_del_dev(struct tle_udp_dev *dev);
-
-/**
- * Flags to the UDP context that destinations info might be changed,
- * so if it has any destinations data cached, then
- * it has to be invalidated.
- * @param ctx
- * UDP context to invalidate.
- */
-void tle_udp_ctx_invalidate(struct tle_udp_ctx *ctx);
-
-struct tle_udp_stream;
-
-/**
- * Stream asynchronous notification mechanisms:
- * a) recv/send callback.
- * Stream recv/send notification callbacks behaviour is edge-triggered (ET).
- * recv callback will be invoked if stream receive buffer was empty and
- * new packet(s) have arrived.
- * send callback will be invoked when stream send buffer was full,
- * and some packets belonging to that stream were sent
- * (part of send buffer became free again).
- * Note that both recv and send callbacks are called with sort of read lock
- * held on that stream. So it is not permitted to call stream_close()
- * within the callback function. Doing that would cause a deadlock.
- * While it is allowed to call stream send/recv functions within the
- * callback, it is not recommended: callback function will be invoked
- * within tle_udp_rx_bulk/tle_udp_tx_bulk context and some heavy processing
- * within the callback functions might cause performance degradation
- * or even loss of packets for further streams.
- * b) recv/send event.
- * Stream recv/send events behavour is level-triggered (LT).
- * receive event will be raised by either
- * tle_udp_rx_burst() or tle_udp_stream_recv() as long as there are any
- * remaining packets inside stream receive buffer.
- * send event will be raised by either
- * tle_udp_tx_burst() or tle_udp_stream_send() as long as there are any
- * free space inside stream send buffer.
- * Note that callback and event are mutually exclusive on <stream, op> basis.
- * It is not possible to open a stream with both recv event and callback
- * specified.
- * Though it is possible to open a stream with recv callback and send event,
- * or visa-versa.
- * If the user doesn't need any notification mechanism for that stream,
- * both event and callback could be set to zero.
- */
-
-/**
- * Stream recv/send callback function and data.
- */
-struct tle_udp_stream_cb {
- void (*func)(void *, struct tle_udp_stream *);
- void *data;
-};
-
-struct tle_event;
-
-/**
- * UDP stream creation parameters.
- */
-struct tle_udp_stream_param {
- struct sockaddr_storage local_addr; /**< stream local address. */
- struct sockaddr_storage remote_addr; /**< stream remote address. */
-
- /* _cb and _ev are mutually exclusive */
- struct tle_event *recv_ev; /**< recv event to use. */
- struct tle_udp_stream_cb recv_cb; /**< recv callback to use. */
-
- struct tle_event *send_ev; /**< send event to use. */
- struct tle_udp_stream_cb send_cb; /**< send callback to use. */
-};
-
-/**
- * create a new stream within given UDP context.
- * @param ctx
- * UDP context to create new stream within.
- * @param prm
- * Parameters used to create and initialise the new stream.
- * @return
- * Pointer to UDP stream structure that can be used in future UDP API calls,
- * or NULL on error, with error code set in rte_errno.
- * Possible rte_errno errors include:
- * - EINVAL - invalid parameter passed to function
- * - ENOFILE - max limit of open streams reached for that context
- */
-struct tle_udp_stream *
-tle_udp_stream_open(struct tle_udp_ctx *ctx,
- const struct tle_udp_stream_param *prm);
-
-/**
- * close an open stream.
- * All packets still remaining in stream receive buffer will be freed.
- * All packets still remaining in stream transmit buffer will be kept
- * for father transmission.
- * @param s
- * Pointer to the stream to close.
- * @return
- * zero on successful completion.
- * - -EINVAL - invalid parameter passed to function
- */
-int tle_udp_stream_close(struct tle_udp_stream *s);
-
-/**
- * get open stream parameters.
- * @param s
- * Pointer to the stream.
- * @return
- * zero on successful completion.
- * - -EINVAL - invalid parameter passed to function
- */
-int
-tle_udp_stream_get_param(const struct tle_udp_stream *s,
- struct tle_udp_stream_param *prm);
-
-/**
- * Take input mbufs and distribute them to open UDP streams.
- * expects that for each input packet:
- * - l2_len, l3_len, l4_len are setup correctly
- * - (packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) != 0,
- * - (packet_type & RTE_PTYPE_L4_UDP) != 0,
- * During delivery L3/L4 checksums will be verified
- * (either relies on HW offload or in SW).
- * This function is not multi-thread safe.
- * @param dev
- * UDP device the packets were received from.
- * @param pkt
- * The burst of input packets that need to be processed.
- * @param rp
- * The array that will contain pointers of unprocessed packets at return.
- * Should contain at least *num* elements.
- * @param rc
- * The array that will contain error code for corresponding rp[] entry:
- * - ENOENT - no open stream matching this packet.
- * - ENOBUFS - receive buffer of the destination stream is full.
- * Should contain at least *num* elements.
- * @param num
- * Number of elements in the *pkt* input array.
- * @return
- * number of packets delivered to the UDP streams.
- */
-uint16_t tle_udp_rx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[],
- struct rte_mbuf *rp[], int32_t rc[], uint16_t num);
-
-/**
- * Fill *pkt* with pointers to the packets that have to be transmitted
- * over given UDP device.
- * Output packets have to be ready to be passed straight to rte_eth_tx_burst()
- * without any extra processing.
- * UDP/IPv4 checksum either already calculated or appropriate mbuf fields set
- * properly for HW offload.
- * This function is not multi-thread safe.
- * @param dev
- * UDP device the output packets will be transmitted over.
- * @param pkt
- * An array of pointers to *rte_mbuf* structures that
- * must be large enough to store up to *num* pointers in it.
- * @param num
- * Number of elements in the *pkt* array.
- * @return
- * number of of entries filled inside *pkt* array.
- */
-uint16_t tle_udp_tx_bulk(struct tle_udp_dev *dev, struct rte_mbuf *pkt[],
- uint16_t num);
-
-/*
- * return up to *num* mbufs that was received for given UDP stream.
- * For each returned mbuf:
- * data_off set to the start of the packet's UDP data
- * l2_len, l3_len, l4_len are setup properly
- * (so user can still extract L2/L3 address info if needed)
- * packet_type RTE_PTYPE_L2/L3/L4 bits are setup properly.
- * L3/L4 checksum is verified.
- * Packets with invalid L3/L4 checksum will be silently dropped.
- * @param s
- * UDP stream to receive packets from.
- * @param pkt
- * An array of pointers to *rte_mbuf* structures that
- * must be large enough to store up to *num* pointers in it.
- * @param num
- * Number of elements in the *pkt* array.
- * @return
- * number of of entries filled inside *pkt* array.
- */
-uint16_t tle_udp_stream_recv(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
- uint16_t num);
-
-/**
- * Consume and queue up to *num* packets, that will be sent eventually
- * by tle_udp_tx_bulk().
- * If *dst_addr* is NULL, then default remote address associated with that
- * stream (if any) will be used.
- * The main purpose of that function is to determine over which UDP dev
- * given packets have to be sent out and do necessary preparations for that.
- * Based on the *dst_addr* it does route lookup, fills L2/L3/L4 headers,
- * and, if necessary, fragments packets.
- * Depending on the underlying device information, it either does
- * IP/UDP checksum calculations in SW or sets mbuf TX checksum
- * offload fields properly.
- * For each input mbuf the following conditions have to be met:
- * - data_off point to the start of packet's UDP data.
- * - there is enough header space to prepend L2/L3/L4 headers.
- * @param s
- * UDP stream to send packets over.
- * @param pkt
- * The burst of output packets that need to be send.
- * @param num
- * Number of elements in the *pkt* array.
- * @param dst_addr
- * Destination address to send packets to.
- * @return
- * number of packets successfully queued in the stream send buffer.
- */
-uint16_t tle_udp_stream_send(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
- uint16_t num, const struct sockaddr *dst_addr);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _TLE_UDP_IMPL_H_ */
diff --git a/lib/libtle_udp/udp_ctl.c b/lib/libtle_udp/udp_ctl.c
deleted file mode 100644
index faedcad..0000000
--- a/lib/libtle_udp/udp_ctl.c
+++ /dev/null
@@ -1,794 +0,0 @@
-/*
- * Copyright (c) 2016 Intel Corporation.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <string.h>
-#include <rte_malloc.h>
-#include <rte_errno.h>
-#include <rte_ethdev.h>
-#include <rte_ip.h>
-#include <rte_udp.h>
-
-#include "udp_impl.h"
-#include "misc.h"
-
-#define LPORT_START 0x8000
-#define LPORT_END MAX_PORT_NUM
-
-#define LPORT_START_BLK PORT_BLK(LPORT_START)
-#define LPORT_END_BLK PORT_BLK(LPORT_END)
-
-static const struct in6_addr tle_udp6_any = IN6ADDR_ANY_INIT;
-static const struct in6_addr tle_udp6_none = {
- {
- .__u6_addr32 = {
- UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX
- },
- },
-};
-
-static int
-check_dev_prm(const struct tle_udp_dev_param *dev_prm)
-{
- /* no valid IPv4/IPv6 addresses provided. */
- if (dev_prm->local_addr4.s_addr == INADDR_ANY &&
- memcmp(&dev_prm->local_addr6, &tle_udp6_any,
- sizeof(tle_udp6_any)) == 0)
- return -EINVAL;
-
- /* all the ports are blocked. */
- if (dev_prm->bl4.nb_port > UINT16_MAX ||
- (dev_prm->bl4.nb_port != 0 && dev_prm->bl4.port == NULL))
- return -EINVAL;
-
- if (dev_prm->bl6.nb_port > UINT16_MAX ||
- (dev_prm->bl6.nb_port != 0 && dev_prm->bl6.port == NULL))
- return -EINVAL;
-
- return 0;
-}
-
-static void
-unuse_stream(struct tle_udp_stream *s)
-{
- s->type = TLE_UDP_VNUM;
- rte_atomic32_set(&s->rx.use, INT32_MIN);
- rte_atomic32_set(&s->tx.use, INT32_MIN);
-}
-
-/* calculate number of drbs per stream. */
-static uint32_t
-calc_stream_drb_num(const struct tle_udp_ctx *ctx, uint32_t obj_num)
-{
- uint32_t num;
-
- num = (ctx->prm.max_stream_sbufs + obj_num - 1) / obj_num;
- num = num + num / 2;
- num = RTE_MAX(num, RTE_DIM(ctx->dev) + 1);
- return num;
-}
-
-static uint32_t
-drb_nb_elem(const struct tle_udp_ctx *ctx)
-{
- return (ctx->prm.send_bulk_size != 0) ?
- ctx->prm.send_bulk_size : MAX_PKT_BURST;
-}
-
-static int
-init_stream(struct tle_udp_ctx *ctx, struct tle_udp_stream *s)
-{
- size_t bsz, rsz, sz;
- uint32_t i, k, n, nb;
- struct tle_drb *drb;
- char name[RTE_RING_NAMESIZE];
-
- /* init RX part. */
-
- n = RTE_MAX(ctx->prm.max_stream_rbufs, 1U);
- n = rte_align32pow2(n);
- sz = sizeof(*s->rx.q) + n * sizeof(s->rx.q->ring[0]);
-
- s->rx.q = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
- ctx->prm.socket_id);
- if (s->rx.q == NULL) {
- UDP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
- "failed with error code: %d\n",
- __func__, s, sz, ctx->prm.socket_id, rte_errno);
- return ENOMEM;
- }
-
- snprintf(name, sizeof(name), "%p@%zu", s, sz);
- rte_ring_init(s->rx.q, name, n, RING_F_SP_ENQ);
-
- /* init TX part. */
-
- nb = drb_nb_elem(ctx);
- k = calc_stream_drb_num(ctx, nb);
- n = rte_align32pow2(k);
-
- /* size of the drbs ring */
- rsz = sizeof(*s->tx.drb.r) + n * sizeof(s->tx.drb.r->ring[0]);
- rsz = RTE_ALIGN_CEIL(rsz, RTE_CACHE_LINE_SIZE);
-
- /* size of the drb. */
- bsz = tle_drb_calc_size(nb);
-
- /* total stream drbs size. */
- sz = rsz + bsz * k;
-
- s->tx.drb.r = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
- ctx->prm.socket_id);
- if (s->tx.drb.r == NULL) {
- UDP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
- "failed with error code: %d\n",
- __func__, s, sz, ctx->prm.socket_id, rte_errno);
- return ENOMEM;
- }
-
- snprintf(name, sizeof(name), "%p@%zu", s, sz);
- rte_ring_init(s->tx.drb.r, name, n, 0);
-
- for (i = 0; i != k; i++) {
- drb = (struct tle_drb *)((uintptr_t)s->tx.drb.r +
- rsz + bsz * i);
- drb->udata = s;
- drb->size = nb;
- rte_ring_enqueue(s->tx.drb.r, drb);
- }
-
- s->tx.drb.nb_elem = nb;
- s->tx.drb.nb_max = k;
-
- /* mark stream as avaialble to use. */
-
- s->ctx = ctx;
- unuse_stream(s);
- STAILQ_INSERT_TAIL(&ctx->streams.free, s, link);
-
- return 0;
-}
-
-static void
-fini_stream(struct tle_udp_stream *s)
-{
- rte_free(s->rx.q);
- rte_free(s->tx.drb.r);
-}
-
-struct tle_udp_ctx *
-tle_udp_create(const struct tle_udp_ctx_param *ctx_prm)
-{
- struct tle_udp_ctx *ctx;
- size_t sz;
- uint32_t i;
-
- if (ctx_prm == NULL) {
- rte_errno = EINVAL;
- return NULL;
- }
-
- sz = sizeof(*ctx);
- ctx = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
- ctx_prm->socket_id);
- if (ctx == NULL) {
- UDP_LOG(ERR, "allocation of %zu bytes for new udp_ctx "
- "on socket %d failed\n",
- sz, ctx_prm->socket_id);
- return NULL;
- }
-
- ctx->prm = *ctx_prm;
-
- sz = sizeof(*ctx->streams.buf) * ctx_prm->max_streams;
- ctx->streams.buf = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
- ctx_prm->socket_id);
- if (ctx->streams.buf == NULL) {
- UDP_LOG(ERR, "allocation of %zu bytes on socket %d "
- "for %u udp_streams failed\n",
- sz, ctx_prm->socket_id, ctx_prm->max_streams);
- tle_udp_destroy(ctx);
- return NULL;
- }
-
- STAILQ_INIT(&ctx->streams.free);
- for (i = 0; i != ctx_prm->max_streams &&
- init_stream(ctx, &ctx->streams.buf[i]) == 0;
- i++)
- ;
-
- if (i != ctx_prm->max_streams) {
- UDP_LOG(ERR, "initalisation of %u-th stream failed", i);
- tle_udp_destroy(ctx);
- return NULL;
- }
-
- for (i = 0; i != RTE_DIM(ctx->use); i++)
- udp_pbm_init(ctx->use + i, LPORT_START_BLK);
-
- ctx->streams.nb_free = ctx->prm.max_streams;
- return ctx;
-}
-
-void
-tle_udp_destroy(struct tle_udp_ctx *ctx)
-{
- uint32_t i;
-
- if (ctx == NULL) {
- rte_errno = EINVAL;
- return;
- }
-
- for (i = 0; i != RTE_DIM(ctx->dev); i++)
- tle_udp_del_dev(ctx->dev + i);
-
- if (ctx->streams.buf != 0) {
- for (i = 0; i != ctx->prm.max_streams; i++)
- fini_stream(&ctx->streams.buf[i]);
- rte_free(ctx->streams.buf);
- }
-
- rte_free(ctx);
-}
-
-void
-tle_udp_ctx_invalidate(struct tle_udp_ctx *ctx)
-{
- RTE_SET_USED(ctx);
-}
-
-static void
-fill_pbm(struct udp_pbm *pbm, const struct tle_bl_port *blp)
-{
- uint32_t i;
-
- for (i = 0; i != blp->nb_port; i++)
- udp_pbm_set(pbm, blp->port[i]);
-}
-
-static int
-init_dev_proto(struct tle_udp_dev *dev, uint32_t idx, int32_t socket_id,
- const struct tle_bl_port *blp)
-{
- size_t sz;
-
- sz = sizeof(*dev->dp[idx]);
- dev->dp[idx] = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
- socket_id);
-
- if (dev->dp[idx] == NULL) {
- UDP_LOG(ERR, "allocation of %zu bytes on "
- "socket %d for %u-th device failed\n",
- sz, socket_id, idx);
- return ENOMEM;
- }
-
- udp_pbm_init(&dev->dp[idx]->use, LPORT_START_BLK);
- fill_pbm(&dev->dp[idx]->use, blp);
-
- return 0;
-}
-
-static struct tle_udp_dev *
-find_free_dev(struct tle_udp_ctx *ctx)
-{
- uint32_t i;
-
- if (ctx->nb_dev < RTE_DIM(ctx->dev)) {
- for (i = 0; i != RTE_DIM(ctx->dev); i++) {
- if (ctx->dev[i].ctx != ctx)
- return ctx->dev + i;
- }
- }
-
- rte_errno = ENODEV;
- return NULL;
-}
-
-struct tle_udp_dev *
-tle_udp_add_dev(struct tle_udp_ctx *ctx,
- const struct tle_udp_dev_param *dev_prm)
-{
- int32_t rc;
- struct tle_udp_dev *dev;
-
- if (ctx == NULL || dev_prm == NULL || check_dev_prm(dev_prm) != 0) {
- rte_errno = EINVAL;
- return NULL;
- }
-
- dev = find_free_dev(ctx);
- if (dev == NULL)
- return NULL;
- rc = 0;
-
- /* device can handle IPv4 traffic */
- if (dev_prm->local_addr4.s_addr != INADDR_ANY) {
- rc = init_dev_proto(dev, TLE_UDP_V4, ctx->prm.socket_id,
- &dev_prm->bl4);
- if (rc == 0)
- fill_pbm(&ctx->use[TLE_UDP_V4], &dev_prm->bl4);
- }
-
- /* device can handle IPv6 traffic */
- if (rc == 0 && memcmp(&dev_prm->local_addr6, &tle_udp6_any,
- sizeof(tle_udp6_any)) != 0) {
- rc = init_dev_proto(dev, TLE_UDP_V6, ctx->prm.socket_id,
- &dev_prm->bl6);
- if (rc == 0)
- fill_pbm(&ctx->use[TLE_UDP_V6], &dev_prm->bl6);
- }
-
- if (rc != 0) {
- /* cleanup and return an error. */
- rte_free(dev->dp[TLE_UDP_V4]);
- rte_free(dev->dp[TLE_UDP_V6]);
- rte_errno = rc;
- return NULL;
- }
-
- /* setup RX data. */
- if (dev_prm->local_addr4.s_addr != INADDR_ANY &&
- (dev_prm->rx_offload & DEV_RX_OFFLOAD_IPV4_CKSUM) == 0)
- dev->rx.ol_flags[TLE_UDP_V4] |= PKT_RX_IP_CKSUM_BAD;
- if ((dev_prm->rx_offload & DEV_RX_OFFLOAD_UDP_CKSUM) == 0) {
- dev->rx.ol_flags[TLE_UDP_V4] |= PKT_RX_L4_CKSUM_BAD;
- dev->rx.ol_flags[TLE_UDP_V6] |= PKT_RX_L4_CKSUM_BAD;
- }
-
- /* setup TX data. */
- tle_dring_reset(&dev->tx.dr);
-
- if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_UDP_CKSUM) != 0) {
- dev->tx.ol_flags[TLE_UDP_V4] |= PKT_TX_IPV4 | PKT_TX_UDP_CKSUM;
- dev->tx.ol_flags[TLE_UDP_V6] |= PKT_TX_IPV6 | PKT_TX_UDP_CKSUM;
- }
- if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_IPV4_CKSUM) != 0)
- dev->tx.ol_flags[TLE_UDP_V4] |= PKT_TX_IPV4 | PKT_TX_IP_CKSUM;
-
- dev->prm = *dev_prm;
- dev->ctx = ctx;
- ctx->nb_dev++;
-
- return dev;
-}
-
-static void
-empty_dring(struct tle_dring *dr)
-{
- uint32_t i, k, n;
- struct tle_udp_stream *s;
- struct rte_mbuf *pkt[MAX_PKT_BURST];
- struct tle_drb *drb[MAX_PKT_BURST];
-
- do {
- k = RTE_DIM(drb);
- n = tle_dring_sc_dequeue(dr, (const void **)(uintptr_t)pkt,
- RTE_DIM(pkt), drb, &k);
-
- /* free mbufs */
- for (i = 0; i != n; i++)
- rte_pktmbuf_free(pkt[i]);
- /* free drbs */
- for (i = 0; i != k; i++) {
- s = drb[i]->udata;
- rte_ring_enqueue(s->tx.drb.r, drb[i]);
- }
- } while (n != 0);
-}
-
-int
-tle_udp_del_dev(struct tle_udp_dev *dev)
-{
- uint32_t p;
- struct tle_udp_ctx *ctx;
-
- if (dev == NULL || dev->ctx == NULL)
- return -EINVAL;
-
- ctx = dev->ctx;
- p = dev - ctx->dev;
-
- if (p >= RTE_DIM(ctx->dev) ||
- (dev->dp[TLE_UDP_V4] == NULL &&
- dev->dp[TLE_UDP_V6] == NULL))
- return -EINVAL;
-
- /* emtpy TX queues. */
- empty_dring(&dev->tx.dr);
-
- rte_free(dev->dp[TLE_UDP_V4]);
- rte_free(dev->dp[TLE_UDP_V6]);
- memset(dev, 0, sizeof(*dev));
- ctx->nb_dev--;
- return 0;
-}
-
-static inline void
-stream_down(struct tle_udp_stream *s)
-{
- rwl_down(&s->rx.use);
- rwl_down(&s->tx.use);
-}
-
-static inline void
-stream_up(struct tle_udp_stream *s)
-{
- rwl_up(&s->rx.use);
- rwl_up(&s->tx.use);
-}
-
-static struct tle_udp_dev *
-find_ipv4_dev(struct tle_udp_ctx *ctx, const struct in_addr *addr)
-{
- uint32_t i;
-
- for (i = 0; i != RTE_DIM(ctx->dev); i++) {
- if (ctx->dev[i].prm.local_addr4.s_addr == addr->s_addr &&
- ctx->dev[i].dp[TLE_UDP_V4] != NULL)
- return ctx->dev + i;
- }
-
- return NULL;
-}
-
-static struct tle_udp_dev *
-find_ipv6_dev(struct tle_udp_ctx *ctx, const struct in6_addr *addr)
-{
- uint32_t i;
-
- for (i = 0; i != RTE_DIM(ctx->dev); i++) {
- if (memcmp(&ctx->dev[i].prm.local_addr6, addr,
- sizeof(*addr)) == 0 &&
- ctx->dev[i].dp[TLE_UDP_V6] != NULL)
- return ctx->dev + i;
- }
-
- return NULL;
-}
-
-static int
-stream_fill_dev(struct tle_udp_ctx *ctx, struct tle_udp_stream *s)
-{
- struct tle_udp_dev *dev;
- struct udp_pbm *pbm;
- struct sockaddr_in *lin4;
- struct sockaddr_in6 *lin6;
- uint32_t i, p, sp, t;
-
- if (s->prm.local_addr.ss_family == AF_INET) {
- lin4 = (struct sockaddr_in *)&s->prm.local_addr;
- t = TLE_UDP_V4;
- p = lin4->sin_port;
- } else if (s->prm.local_addr.ss_family == AF_INET6) {
- lin6 = (struct sockaddr_in6 *)&s->prm.local_addr;
- t = TLE_UDP_V6;
- p = lin6->sin6_port;
- } else
- return EINVAL;
-
- p = ntohs(p);
-
- /* if local address is not wildcard, find device it belongs to. */
- if (t == TLE_UDP_V4 && lin4->sin_addr.s_addr != INADDR_ANY) {
- dev = find_ipv4_dev(ctx, &lin4->sin_addr);
- if (dev == NULL)
- return ENODEV;
- } else if (t == TLE_UDP_V6 && memcmp(&tle_udp6_any, &lin6->sin6_addr,
- sizeof(tle_udp6_any)) != 0) {
- dev = find_ipv6_dev(ctx, &lin6->sin6_addr);
- if (dev == NULL)
- return ENODEV;
- } else
- dev = NULL;
-
- if (dev != NULL)
- pbm = &dev->dp[t]->use;
- else
- pbm = &ctx->use[t];
-
- /* try to acquire local port number. */
- if (p == 0) {
- p = udp_pbm_find_range(pbm, pbm->blk, LPORT_END_BLK);
- if (p == 0 && pbm->blk > LPORT_START_BLK)
- p = udp_pbm_find_range(pbm, LPORT_START_BLK, pbm->blk);
- } else if (udp_pbm_check(pbm, p) != 0)
- return EEXIST;
-
- if (p == 0)
- return ENFILE;
-
- /* fill socket's dst port and type */
- sp = htons(p);
- s->type = t;
- s->port.dst = sp;
-
- /* mark port as in-use */
- udp_pbm_set(&ctx->use[t], p);
- if (dev != NULL) {
- udp_pbm_set(pbm, p);
- dev->dp[t]->streams[sp] = s;
- } else {
- for (i = 0; i != RTE_DIM(ctx->dev); i++) {
- if (ctx->dev[i].dp[t] != NULL) {
- udp_pbm_set(&ctx->dev[i].dp[t]->use, p);
- ctx->dev[i].dp[t]->streams[sp] = s;
- }
- }
- }
-
- return 0;
-}
-
-static int
-stream_clear_dev(struct tle_udp_ctx *ctx, struct tle_udp_stream *s)
-{
- struct tle_udp_dev *dev;
- uint32_t i, p, sp, t;
-
- t = s->type;
- sp = s->port.dst;
- p = ntohs(sp);
-
- /* if local address is not wildcard, find device it belongs to. */
- if (t == TLE_UDP_V4 && s->ipv4.addr.dst != INADDR_ANY) {
- dev = find_ipv4_dev(ctx, (struct in_addr *)&s->ipv4.addr.dst);
- if (dev == NULL)
- return ENODEV;
- } else if (t == TLE_UDP_V6 && memcmp(&tle_udp6_any, &s->ipv6.addr.dst,
- sizeof(tle_udp6_any)) != 0) {
- dev = find_ipv6_dev(ctx, (struct in6_addr *)&s->ipv6.addr.dst);
- if (dev == NULL)
- return ENODEV;
- } else
- dev = NULL;
-
- udp_pbm_clear(&ctx->use[t], p);
- if (dev != NULL) {
- udp_pbm_clear(&dev->dp[t]->use, p);
- dev->dp[t]->streams[sp] = NULL;
- } else {
- for (i = 0; i != RTE_DIM(ctx->dev); i++) {
- if (ctx->dev[i].dp[t] != NULL) {
- udp_pbm_clear(&ctx->dev[i].dp[t]->use, p);
- ctx->dev[i].dp[t]->streams[sp] = NULL;
- }
- }
- }
-
- return 0;
-}
-
-static struct tle_udp_stream *
-get_stream(struct tle_udp_ctx *ctx)
-{
- struct tle_udp_stream *s;
-
- s = NULL;
- if (ctx->streams.nb_free == 0)
- return s;
-
- rte_spinlock_lock(&ctx->streams.lock);
- if (ctx->streams.nb_free != 0) {
- s = STAILQ_FIRST(&ctx->streams.free);
- STAILQ_REMOVE_HEAD(&ctx->streams.free, link);
- ctx->streams.nb_free--;
- }
- rte_spinlock_unlock(&ctx->streams.lock);
- return s;
-}
-
-static void
-put_stream(struct tle_udp_ctx *ctx, struct tle_udp_stream *s, int32_t head)
-{
- s->type = TLE_UDP_VNUM;
- rte_spinlock_lock(&ctx->streams.lock);
- if (head != 0)
- STAILQ_INSERT_HEAD(&ctx->streams.free, s, link);
- else
- STAILQ_INSERT_TAIL(&ctx->streams.free, s, link);
- ctx->streams.nb_free++;
- rte_spinlock_unlock(&ctx->streams.lock);
-}
-
-static void
-fill_ipv4_am(const struct sockaddr_in *in, uint32_t *addr, uint32_t *mask)
-{
- *addr = in->sin_addr.s_addr;
- *mask = (*addr == INADDR_ANY) ? INADDR_ANY : INADDR_NONE;
-}
-
-static void
-fill_ipv6_am(const struct sockaddr_in6 *in, rte_xmm_t *addr, rte_xmm_t *mask)
-{
- const struct in6_addr *pm;
-
- memcpy(addr, &in->sin6_addr, sizeof(*addr));
- if (memcmp(&tle_udp6_any, addr, sizeof(*addr)) == 0)
- pm = &tle_udp6_any;
- else
- pm = &tle_udp6_none;
-
- memcpy(mask, pm, sizeof(*mask));
-}
-
-static int
-check_stream_prm(const struct tle_udp_ctx *ctx,
- const struct tle_udp_stream_param *prm)
-{
- if ((prm->local_addr.ss_family != AF_INET &&
- prm->local_addr.ss_family != AF_INET6) ||
- prm->local_addr.ss_family != prm->remote_addr.ss_family)
- return -EINVAL;
-
- /* callback and event notifications mechanisms are mutually exclusive */
- if ((prm->recv_ev != NULL && prm->recv_cb.func != NULL) ||
- (prm->send_ev != NULL && prm->send_cb.func != NULL))
- return -EINVAL;
-
- /* check does context support desired address family. */
- if ((prm->local_addr.ss_family == AF_INET &&
- ctx->prm.lookup4 == NULL) ||
- (prm->local_addr.ss_family == AF_INET6 &&
- ctx->prm.lookup6 == NULL))
- return -EINVAL;
-
- return 0;
-}
-
-struct tle_udp_stream *
-tle_udp_stream_open(struct tle_udp_ctx *ctx,
- const struct tle_udp_stream_param *prm)
-{
- struct tle_udp_stream *s;
- const struct sockaddr_in *rin;
- int32_t rc;
-
- if (ctx == NULL || prm == NULL || check_stream_prm(ctx, prm) != 0) {
- rte_errno = EINVAL;
- return NULL;
- }
-
- s = get_stream(ctx);
- if (s == NULL) {
- rte_errno = ENFILE;
- return NULL;
-
- /* some TX still pending for that stream. */
- } else if (UDP_STREAM_TX_PENDING(s)) {
- put_stream(ctx, s, 0);
- rte_errno = EAGAIN;
- return NULL;
- }
-
- /* copy input parameters. */
- s->prm = *prm;
-
- /* setup ports and port mask fields (except dst port). */
- rin = (const struct sockaddr_in *)&prm->remote_addr;
- s->port.src = rin->sin_port;
- s->pmsk.src = (s->port.src == 0) ? 0 : UINT16_MAX;
- s->pmsk.dst = UINT16_MAX;
-
- /* setup src and dst addresses. */
- if (prm->local_addr.ss_family == AF_INET) {
- fill_ipv4_am((const struct sockaddr_in *)&prm->local_addr,
- &s->ipv4.addr.dst, &s->ipv4.mask.dst);
- fill_ipv4_am((const struct sockaddr_in *)&prm->remote_addr,
- &s->ipv4.addr.src, &s->ipv4.mask.src);
- } else if (prm->local_addr.ss_family == AF_INET6) {
- fill_ipv6_am((const struct sockaddr_in6 *)&prm->local_addr,
- &s->ipv6.addr.dst, &s->ipv6.mask.dst);
- fill_ipv6_am((const struct sockaddr_in6 *)&prm->remote_addr,
- &s->ipv6.addr.src, &s->ipv6.mask.src);
- }
-
- rte_spinlock_lock(&ctx->dev_lock);
- rc = stream_fill_dev(ctx, s);
- rte_spinlock_unlock(&ctx->dev_lock);
-
- if (rc != 0) {
- put_stream(ctx, s, 1);
- s = NULL;
- rte_errno = rc;
- } else {
- /* setup stream notification menchanism */
- s->rx.ev = prm->recv_ev;
- s->rx.cb = prm->recv_cb;
- s->tx.ev = prm->send_ev;
- s->tx.cb = prm->send_cb;
-
- /* mark stream as avaialbe for RX/TX */
- if (s->tx.ev != NULL)
- tle_event_raise(s->tx.ev);
- stream_up(s);
- }
-
- return s;
-}
-
-int
-tle_udp_stream_close(struct tle_udp_stream *s)
-{
- uint32_t i, n;
- int32_t rc;
- struct tle_udp_ctx *ctx;
- struct rte_mbuf *m[MAX_PKT_BURST];
-
- static const struct tle_udp_stream_cb zcb;
-
- if (s == NULL || s->type >= TLE_UDP_VNUM)
- return -EINVAL;
-
- ctx = s->ctx;
-
- /* mark stream as unavaialbe for RX/TX. */
- stream_down(s);
-
- /* reset stream events if any. */
- if (s->rx.ev != NULL) {
- tle_event_idle(s->rx.ev);
- s->rx.ev = NULL;
- }
- if (s->tx.ev != NULL) {
- tle_event_idle(s->tx.ev);
- s->tx.ev = NULL;
- }
-
- s->rx.cb = zcb;
- s->tx.cb = zcb;
-
- /* free stream's destination port */
- rte_spinlock_lock(&ctx->dev_lock);
- rc = stream_clear_dev(ctx, s);
- rte_spinlock_unlock(&ctx->dev_lock);
-
- /* empty stream's RX queue */
- do {
- n = rte_ring_dequeue_burst(s->rx.q, (void **)m, RTE_DIM(m));
- for (i = 0; i != n; i++)
- rte_pktmbuf_free(m[i]);
- } while (n != 0);
-
- /*
- * mark the stream as free again.
- * if there still are pkts queued for TX,
- * then put this stream to the tail of free list.
- */
- put_stream(ctx, s, UDP_STREAM_TX_FINISHED(s));
- return rc;
-}
-
-int
-tle_udp_stream_get_param(const struct tle_udp_stream *s,
- struct tle_udp_stream_param *prm)
-{
- struct sockaddr_in *lin4;
- struct sockaddr_in6 *lin6;
-
- if (prm == NULL || s == NULL || s->type >= TLE_UDP_VNUM)
- return -EINVAL;
-
- prm[0] = s->prm;
- if (prm->local_addr.ss_family == AF_INET) {
- lin4 = (struct sockaddr_in *)&prm->local_addr;
- lin4->sin_port = s->port.dst;
- } else if (s->prm.local_addr.ss_family == AF_INET6) {
- lin6 = (struct sockaddr_in6 *)&prm->local_addr;
- lin6->sin6_port = s->port.dst;
- }
-
- return 0;
-}
diff --git a/lib/libtle_udp/udp_impl.h b/lib/libtle_udp/udp_impl.h
deleted file mode 100644
index af35197..0000000
--- a/lib/libtle_udp/udp_impl.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2016 Intel Corporation.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef _UDP_IMPL_H_
-#define _UDP_IMPL_H_
-
-#include <rte_spinlock.h>
-#include <rte_vect.h>
-#include <tle_dring.h>
-#include <tle_udp_impl.h>
-#include <tle_event.h>
-
-#include "port_bitmap.h"
-#include "osdep.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-enum {
- TLE_UDP_V4,
- TLE_UDP_V6,
- TLE_UDP_VNUM
-};
-
-union udp_ports {
- uint32_t raw;
- struct {
- uint16_t src;
- uint16_t dst;
- };
-};
-
-union udph {
- uint64_t raw;
- struct {
- union udp_ports ports;
- uint16_t len;
- uint16_t cksum;
- };
-};
-
-union ipv4_addrs {
- uint64_t raw;
- struct {
- uint32_t src;
- uint32_t dst;
- };
-};
-
-union ipv6_addrs {
- _ymm_t raw;
- struct {
- rte_xmm_t src;
- rte_xmm_t dst;
- };
-};
-
-union ip_addrs {
- union ipv4_addrs v4;
- union ipv6_addrs v6;
-};
-
-
-struct tle_udp_stream {
-
- STAILQ_ENTRY(tle_udp_stream) link;
- struct tle_udp_ctx *ctx;
-
- uint8_t type; /* TLE_UDP_V4 or TLE_UDP_V6 */
-
- struct {
- struct rte_ring *q;
- struct tle_event *ev;
- struct tle_udp_stream_cb cb;
- rte_atomic32_t use;
- } rx;
-
- union udp_ports port;
- union udp_ports pmsk;
-
- union {
- struct {
- union ipv4_addrs addr;
- union ipv4_addrs mask;
- } ipv4;
- struct {
- union ipv6_addrs addr;
- union ipv6_addrs mask;
- } ipv6;
- };
-
- struct {
- rte_atomic32_t use;
- struct {
- uint32_t nb_elem; /* number of obects per drb. */
- uint32_t nb_max; /* number of drbs per stream. */
- struct rte_ring *r;
- } drb;
- struct tle_event *ev;
- struct tle_udp_stream_cb cb;
- } tx __rte_cache_aligned;
-
- struct tle_udp_stream_param prm;
-} __rte_cache_aligned;
-
-#define UDP_STREAM_TX_PENDING(s) \
- ((s)->tx.drb.nb_max != rte_ring_count((s)->tx.drb.r))
-
-#define UDP_STREAM_TX_FINISHED(s) \
- ((s)->tx.drb.nb_max == rte_ring_count((s)->tx.drb.r))
-
-struct tle_udp_dport {
- struct udp_pbm use; /* ports in use. */
- struct tle_udp_stream *streams[MAX_PORT_NUM]; /* port to stream. */
-};
-
-struct tle_udp_dev {
- struct tle_udp_ctx *ctx;
- struct {
- uint64_t ol_flags[TLE_UDP_VNUM];
- } rx;
- struct {
- /* used by FE. */
- uint64_t ol_flags[TLE_UDP_VNUM];
- rte_atomic32_t packet_id[TLE_UDP_VNUM];
-
- /* used by FE & BE. */
- struct tle_dring dr;
- } tx;
- struct tle_udp_dev_param prm; /* copy of device paramaters. */
- struct tle_udp_dport *dp[TLE_UDP_VNUM]; /* device udp ports */
-};
-
-struct tle_udp_ctx {
- struct tle_udp_ctx_param prm;
- struct {
- rte_spinlock_t lock;
- uint32_t nb_free; /* number of free streams. */
- STAILQ_HEAD(, tle_udp_stream) free;
- struct tle_udp_stream *buf; /* array of streams */
- } streams;
-
- rte_spinlock_t dev_lock;
- uint32_t nb_dev;
- struct udp_pbm use[TLE_UDP_VNUM]; /* all ports in use. */
- struct tle_udp_dev dev[RTE_MAX_ETHPORTS];
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _UDP_IMPL_H_ */