aboutsummaryrefslogtreecommitdiffstats
path: root/app/nginx/src/tldk
diff options
context:
space:
mode:
Diffstat (limited to 'app/nginx/src/tldk')
-rw-r--r--app/nginx/src/tldk/be.c1240
-rw-r--r--app/nginx/src/tldk/be.h56
-rw-r--r--app/nginx/src/tldk/debug.h75
-rw-r--r--app/nginx/src/tldk/module.c497
-rw-r--r--app/nginx/src/tldk/ngx_tldk.h182
-rw-r--r--app/nginx/src/tldk/parse.c456
-rw-r--r--app/nginx/src/tldk/tldk_event.c276
-rw-r--r--app/nginx/src/tldk/tldk_sock.c549
-rw-r--r--app/nginx/src/tldk/tldk_sock.h108
9 files changed, 3439 insertions, 0 deletions
diff --git a/app/nginx/src/tldk/be.c b/app/nginx/src/tldk/be.c
new file mode 100644
index 0000000..ba4039a
--- /dev/null
+++ b/app/nginx/src/tldk/be.c
@@ -0,0 +1,1240 @@
+/*
+ * Copyright (c) 2017 Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <netinet/ip6.h>
+
+#include <rte_version.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
+#include <rte_lpm6.h>
+#include <rte_lpm.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
+#include <tle_tcp.h>
+
+#include <ngx_config.h>
+#include <ngx_core.h>
+
+#include "be.h"
+
+#define RX_RING_SIZE 0x400
+#define TX_RING_SIZE 0x800
+#define MAX_RULES 0x100
+#define MAX_TBL8 0x800
+
+#define MPOOL_CACHE_SIZE 0x100
+#define MPOOL_NB_BUF 0x20000
+
+#define FRAG_MBUF_BUF_SIZE (RTE_PKTMBUF_HEADROOM + TLE_DST_MAX_HDR)
+
+#define RX_CSUM_OFFLOAD (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM)
+
+#define TCP_MAX_PROCESS 0x20
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = {
+ .hw_vlan_strip = 1,
+ },
+};
+
+struct ptype2cb {
+ uint32_t mask;
+ const char *name;
+ rte_rx_callback_fn fn;
+};
+
+enum {
+ ETHER_PTYPE = 0x1,
+ IPV4_PTYPE = 0x2,
+ IPV4_EXT_PTYPE = 0x4,
+ IPV6_PTYPE = 0x8,
+ IPV6_EXT_PTYPE = 0x10,
+ TCP_PTYPE = 0x20,
+ UDP_PTYPE = 0x40,
+};
+
+int
+be_lcore_lpm_init(struct tldk_ctx *tcx, uint32_t sid,
+ const struct tldk_ctx_conf *cf)
+{
+ ngx_uint_t worker = cf->worker;
+ uint32_t lcore = cf->lcore;
+ char str[RTE_LPM_NAMESIZE];
+
+ const struct rte_lpm_config lpm4_cfg = {
+ .max_rules = MAX_RULES,
+ .number_tbl8s = MAX_TBL8,
+ };
+
+ const struct rte_lpm6_config lpm6_cfg = {
+ .max_rules = MAX_RULES,
+ .number_tbl8s = MAX_TBL8,
+ };
+
+ snprintf(str, sizeof(str), "LPM4%lu-%u\n", worker, lcore);
+ tcx->lpm4 = rte_lpm_create(str, sid, &lpm4_cfg);
+ RTE_LOG(NOTICE, USER1, "%s(worker=%lu, lcore=%u): lpm4=%p;\n",
+ __func__, worker, lcore, tcx->lpm4);
+ if (tcx->lpm4 == NULL)
+ return -ENOMEM;
+
+ snprintf(str, sizeof(str), "LPM6%lu-%u\n", worker, lcore);
+ tcx->lpm6 = rte_lpm6_create(str, sid, &lpm6_cfg);
+ RTE_LOG(NOTICE, USER1, "%s(worker=%lu, lcore=%u): lpm6=%p;\n",
+ __func__, worker, lcore, tcx->lpm6);
+ if (tcx->lpm6 == NULL) {
+ rte_lpm_free(tcx->lpm4);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int
+be_lpm4_dst_lookup(void *data, const struct in_addr *addr,
+ struct tle_dest *res)
+{
+ int32_t rc;
+ uint32_t idx;
+ struct tldk_ctx *tcx;
+ struct tle_dest *dst;
+
+ tcx = data;
+ rc = rte_lpm_lookup(tcx->lpm4, rte_be_to_cpu_32(addr->s_addr), &idx);
+ if (rc == 0) {
+ dst = &tcx->dst4[idx];
+ memcpy(res, dst, dst->l2_len + dst->l3_len +
+ offsetof(struct tle_dest, hdr));
+ }
+
+ return rc;
+}
+
+int
+be_lpm6_dst_lookup(void *data, const struct in6_addr *addr,
+ struct tle_dest *res)
+{
+ int32_t rc;
+ struct tldk_ctx *tcx;
+ struct tle_dest *dst;
+ uintptr_t p;
+#if RTE_VERSION_NUM(17, 5, 0, 0) <= RTE_VERSION
+ uint32_t idx;
+#else
+ uint8_t idx;
+#endif
+
+ tcx = data;
+ p = (uintptr_t)addr->s6_addr;
+ rc = rte_lpm6_lookup(tcx->lpm6, (uint8_t *)p, &idx);
+ if (rc == 0) {
+ dst = &tcx->dst6[idx];
+ memcpy(res, dst, dst->l2_len + dst->l3_len +
+ offsetof(struct tle_dest, hdr));
+ }
+
+ return rc;
+}
+
+/*
+ * Initialise DPDK port.
+ */
+static int
+port_init(const struct tldk_port_conf *pcf)
+{
+ int32_t rc;
+ struct rte_eth_conf port_conf;
+ struct rte_eth_dev_info dev_info;
+
+ rte_eth_dev_info_get(pcf->id, &dev_info);
+
+ if ((dev_info.rx_offload_capa & pcf->rx_offload) != pcf->rx_offload) {
+ RTE_LOG(ERR, USER1,
+ "port#%u supported/requested RX offloads don't match, "
+ "supported: %#x, requested: %#x;\n",
+ pcf->id, dev_info.rx_offload_capa, pcf->rx_offload);
+ return NGX_ERROR;
+ }
+ if ((dev_info.tx_offload_capa & pcf->tx_offload) != pcf->tx_offload) {
+ RTE_LOG(ERR, USER1,
+ "port#%u supported/requested TX offloads don't match, "
+ "supported: %#x, requested: %#x;\n",
+ pcf->id, dev_info.tx_offload_capa, pcf->tx_offload);
+ return NGX_ERROR;
+ }
+
+ port_conf = port_conf_default;
+
+ if ((pcf->rx_offload & RX_CSUM_OFFLOAD) != 0) {
+ RTE_LOG(ERR, USER1, "%s(%u): enabling RX csum offload;\n",
+ __func__, pcf->id);
+ port_conf.rxmode.hw_ip_checksum = 1;
+ }
+
+ port_conf.rxmode.max_rx_pkt_len = pcf->mtu + ETHER_CRC_LEN;
+ if (port_conf.rxmode.max_rx_pkt_len > ETHER_MAX_LEN)
+ port_conf.rxmode.jumbo_frame = 1;
+ port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
+ port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_TCP;
+
+ rc = rte_eth_dev_configure(pcf->id, pcf->nb_queues, pcf->nb_queues,
+ &port_conf);
+ RTE_LOG(NOTICE, USER1,
+ "%s: rte_eth_dev_configure(prt_id=%u, nb_rxq=%u, nb_txq=%u) "
+ "returns %d;\n", __func__, pcf->id, pcf->nb_queues,
+ pcf->nb_queues, rc);
+
+ if (rc != 0)
+ return NGX_ERROR;
+
+ return NGX_OK;
+}
+
+/*
+ * Check that lcore is enabled, not master, and not in use already.
+ */
+int
+be_check_lcore(uint32_t lid)
+{
+ if (rte_lcore_is_enabled(lid) == 0) {
+ RTE_LOG(ERR, USER1, "lcore %u is not enabled\n", lid);
+ return -EINVAL;
+ }
+
+ if (rte_get_master_lcore() != lid &&
+ rte_eal_get_lcore_state(lid) == RUNNING) {
+ RTE_LOG(ERR, USER1, "lcore %u already running %p\n",
+ lid, lcore_config[lid].f);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int
+be_mpool_init(struct tldk_ctx *tcx)
+{
+ int32_t rc;
+ uint32_t nmb, sid;
+ struct rte_mempool *mp;
+ char name[RTE_MEMPOOL_NAMESIZE];
+
+ ngx_uint_t worker = tcx->cf->worker;
+ uint32_t lcore = tcx->cf->lcore;
+
+ sid = rte_lcore_to_socket_id(tcx->cf->lcore);
+ nmb = (tcx->cf->nb_mbuf == 0) ? MPOOL_NB_BUF : tcx->cf->nb_mbuf;
+
+ snprintf(name, sizeof(name), "MP%lu-%u", worker, lcore);
+ mp = rte_pktmbuf_pool_create(name, nmb, MPOOL_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, sid);
+ if (mp == NULL) {
+ rc = -rte_errno;
+ RTE_LOG(ERR, USER1, "%s:Mempool creation failed for "
+ "ctx:wrk(%lu)-ctx:lcore(%u) with error code: %d\n",
+ __func__, worker, lcore, rc);
+ return rc;
+ }
+
+ tcx->mpool = mp;
+
+ snprintf(name, sizeof(name), "frag_MP%lu-%u",
+ worker, lcore);
+ mp = rte_pktmbuf_pool_create(name, nmb,
+ MPOOL_CACHE_SIZE, 0, FRAG_MBUF_BUF_SIZE, sid - 1);
+ if (mp == NULL) {
+ rc = -rte_errno;
+ RTE_LOG(ERR, USER1, "%s:Frag mempool creation failed for "
+ "ctx:wrk(%lu)-ctx:lcore(%u) with error code: %d\n",
+ __func__, worker, lcore, rc);
+ return rc;
+ }
+
+ tcx->frag_mpool = mp;
+
+ return 0;
+}
+
+int
+be_queue_init(struct tldk_ctx *tcx, const tldk_conf_t *cf)
+{
+ int32_t socket, rc;
+ uint16_t queue_id;
+ uint32_t port_id, i;
+ struct rte_eth_dev_info dev_info;
+ const struct tldk_ctx_conf *ctx;
+ const struct tldk_port_conf *pcf;
+
+ ctx = tcx->cf;
+ for (i = 0; i < ctx->nb_dev; i++) {
+ port_id = ctx->dev[i].port;
+ queue_id = ctx->dev[i].queue;
+ pcf = &cf->port[port_id];
+
+ rte_eth_dev_info_get(port_id, &dev_info);
+ dev_info.default_rxconf.rx_drop_en = 1;
+ dev_info.default_txconf.tx_free_thresh = TX_RING_SIZE / 2;
+
+ if (pcf->tx_offload != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u): enabling full featured TX;\n",
+ __func__, port_id);
+ dev_info.default_txconf.txq_flags = 0;
+ }
+
+ socket = rte_eth_dev_socket_id(port_id);
+
+ rc = rte_eth_rx_queue_setup(port_id, queue_id, RX_RING_SIZE,
+ socket, &dev_info.default_rxconf, tcx->mpool);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: rx queue=%u setup failed with error "
+ "code: %d\n", __func__, queue_id, rc);
+ return rc;
+ }
+
+ rc = rte_eth_tx_queue_setup(port_id, queue_id, TX_RING_SIZE,
+ socket, &dev_info.default_txconf);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: tx queue=%u setup failed with error "
+ "code: %d\n", __func__, queue_id, rc);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Setup all enabled ports.
+ */
+int
+be_port_init(tldk_conf_t *cf)
+{
+ int32_t rc;
+ uint32_t i;
+ struct tldk_port_conf *dpf;
+
+ for (i = 0; i != cf->nb_port; i++) {
+ dpf = &cf->port[i];
+ rc = port_init(dpf);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: port=%u init failed with error code: %d\n",
+ __func__, dpf->id, rc);
+ return NGX_ERROR;
+ }
+ rte_eth_macaddr_get(dpf->id, &dpf->mac);
+ rte_eth_promiscuous_enable(dpf->id);
+ }
+
+ return NGX_OK;
+}
+
+static int
+be_add_ipv4_route(struct tldk_ctx *tcx, const struct tldk_dest_conf *dcf,
+ uint8_t idx)
+{
+ int32_t rc;
+ uint32_t addr, depth;
+ char str[INET_ADDRSTRLEN];
+
+ depth = dcf->prfx;
+ addr = rte_be_to_cpu_32(dcf->ipv4.s_addr);
+
+ inet_ntop(AF_INET, &dcf->ipv4, str, sizeof(str));
+ rc = rte_lpm_add(tcx->lpm4, addr, depth, idx);
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u,dev_id=%u,dev=%p,"
+ "ipv4=%s/%u,mtu=%u,"
+ "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) "
+ "returns %d;\n",
+ __func__, tcx->cf->lcore, dcf->dev, tcx->dst4[idx].dev,
+ str, depth, tcx->dst4[idx].mtu,
+ dcf->mac.addr_bytes[0], dcf->mac.addr_bytes[1],
+ dcf->mac.addr_bytes[2], dcf->mac.addr_bytes[3],
+ dcf->mac.addr_bytes[4], dcf->mac.addr_bytes[5],
+ rc);
+
+ return rc;
+}
+
+static int
+be_add_ipv6_route(struct tldk_ctx *tcx, const struct tldk_dest_conf *dcf,
+ uint8_t idx)
+{
+ int32_t rc;
+ uint32_t depth;
+ char str[INET6_ADDRSTRLEN];
+
+ depth = dcf->prfx;
+
+ rc = rte_lpm6_add(tcx->lpm6, (uint8_t *)(uintptr_t)dcf->ipv6.s6_addr,
+ depth, idx);
+
+ inet_ntop(AF_INET6, &dcf->ipv6, str, sizeof(str));
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u,dev_id=%u,dev=%p,"
+ "ipv6=%s/%u,mtu=%u,"
+ "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) "
+ "returns %d;\n",
+ __func__, tcx->cf->lcore, dcf->dev, tcx->dst6[idx].dev,
+ str, depth, tcx->dst4[idx].mtu,
+ dcf->mac.addr_bytes[0], dcf->mac.addr_bytes[1],
+ dcf->mac.addr_bytes[2], dcf->mac.addr_bytes[3],
+ dcf->mac.addr_bytes[4], dcf->mac.addr_bytes[5],
+ rc);
+
+ return rc;
+}
+
+static void
+fill_dst(struct tle_dest *dst, const struct tldk_dev *td,
+ const struct tldk_port_conf *pcf, const struct tldk_dest_conf *dest,
+ uint16_t l3_type, struct rte_mempool *mp)
+{
+ struct ether_hdr *eth;
+ struct ipv4_hdr *ip4h;
+ struct ipv6_hdr *ip6h;
+
+ dst->dev = td->dev;
+ dst->head_mp = mp;
+ dst->mtu = RTE_MIN(dest->mtu, pcf->mtu);
+ dst->l2_len = sizeof(*eth);
+
+ eth = (struct ether_hdr *)dst->hdr;
+
+ ether_addr_copy(&pcf->mac, &eth->s_addr);
+ ether_addr_copy(&dest->mac, &eth->d_addr);
+ eth->ether_type = rte_cpu_to_be_16(l3_type);
+
+ if (l3_type == ETHER_TYPE_IPv4) {
+ dst->l3_len = sizeof(*ip4h);
+ ip4h = (struct ipv4_hdr *)(eth + 1);
+ ip4h->version_ihl = 4 << 4 |
+ sizeof(*ip4h) / IPV4_IHL_MULTIPLIER;
+ ip4h->time_to_live = 64;
+ ip4h->next_proto_id = IPPROTO_TCP;
+ } else if (l3_type == ETHER_TYPE_IPv6) {
+ dst->l3_len = sizeof(*ip6h);
+ ip6h = (struct ipv6_hdr *)(eth + 1);
+ ip6h->vtc_flow = 6 << 4;
+ ip6h->proto = IPPROTO_TCP;
+ ip6h->hop_limits = 64;
+ }
+}
+
+static int
+be_add_dest(const struct tldk_dest_conf *dcf, struct tldk_ctx *tcx,
+ uint32_t dev_idx, const struct tldk_port_conf *pcf, uint32_t family,
+ uint32_t dnum)
+{
+ struct tle_dest *dp;
+ uint32_t i, n, m;
+ uint16_t l3_type;
+ int32_t rc = 0;
+
+ if (family == AF_INET) {
+ n = tcx->dst4_num;
+ dp = tcx->dst4 + n;
+ m = RTE_DIM(tcx->dst4);
+ l3_type = ETHER_TYPE_IPv4;
+ } else {
+ n = tcx->dst6_num;
+ dp = tcx->dst6 + n;
+ m = RTE_DIM(tcx->dst6);
+ l3_type = ETHER_TYPE_IPv6;
+ }
+
+ if (n + dnum >= m) {
+ RTE_LOG(ERR, USER1, "%s(lcore=%u, family=%hu, dnum=%u) exceeds "
+ "maximum allowed number of destinations(%u);\n",
+ __func__, tcx->cf->lcore, family, dnum, m);
+ return -ENOSPC;
+ }
+
+ for (i = 0; i != dnum && rc == 0; i++) {
+ fill_dst(dp + i, &tcx->dev[dev_idx], pcf, dcf,
+ l3_type, tcx->frag_mpool);
+ if (family == AF_INET)
+ rc = be_add_ipv4_route(tcx, dcf, n + i);
+ else
+ rc = be_add_ipv6_route(tcx, dcf, n + i);
+ }
+
+ if (family == AF_INET)
+ tcx->dst4_num = n + i;
+ else
+ tcx->dst6_num = n + i;
+
+ return rc;
+}
+
+int
+be_dst_init(struct tldk_ctx *tcx, const tldk_conf_t *cf)
+{
+ uint32_t i, f, d, l, port_id;
+ const struct tldk_ctx_conf *ctx_cf = tcx->cf;
+ const struct tldk_dest_conf *dcf;
+ const struct tldk_port_conf *pcf;
+ int32_t rc = 0;
+
+ for (i = 0; i < ctx_cf->nb_dest; i++) {
+ dcf = &ctx_cf->dest[i];
+ f = dcf->family;
+ d = dcf->dev;
+ for (l = 0; l != tcx->nb_dev; l++) {
+ if (tcx->dev[l].cf.id == d) {
+ /* fetch the port conf for the port
+ * associated with device
+ */
+ port_id = tcx->dev[l].cf.port;
+ pcf = &cf->port[port_id];
+ rc = be_add_dest(dcf, tcx, l, pcf, f, 1);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(tcx=%u, family=%u) "
+ "could not add "
+ "destinations(%u)\n",
+ __func__, ctx_cf->lcore, f, i);
+ return -ENOSPC;
+ }
+ break;
+ }
+ }
+ }
+
+ return rc;
+}
+
+int
+be_add_dev(struct tldk_ctx *tcx, const tldk_conf_t *cf)
+{
+ int32_t rc = 0;
+ uint32_t i, port_id;
+ struct tle_dev_param dprm;
+ const struct tldk_port_conf *pcf;
+
+ memset(&dprm, 0, sizeof(dprm));
+
+ /* add the tle_dev on all applicable ports of the context */
+ for (i = 0; i != tcx->cf->nb_dev; i++) {
+
+ /* get the port id associated with the device */
+ port_id = tcx->cf->dev[i].port;
+
+ /* get the port config by port id */
+ pcf = &cf->port[port_id];
+
+ /* populate the tle_dev_param struct */
+ dprm.rx_offload = pcf->rx_offload;
+ dprm.tx_offload = pcf->tx_offload;
+ dprm.local_addr4.s_addr = pcf->ipv4;
+
+ memcpy(&dprm.local_addr6, &pcf->ipv6,
+ sizeof(pcf->ipv6));
+
+ /* add the tle_dev */
+ tcx->dev[i].dev = tle_add_dev(tcx->ctx, &dprm);
+
+ RTE_LOG(NOTICE, USER1, "%s(port=%u), dev: %p\n",
+ __func__, port_id,
+ tcx->dev[i].dev);
+
+ if (tcx->dev[i].dev == NULL)
+ rc = -rte_errno;
+
+ if (rc != 0)
+ return rc;
+
+ tcx->nb_dev++;
+ tcx->dev[i].cf = tcx->cf->dev[i];
+ }
+
+ return rc;
+}
+
+static uint32_t
+get_ptypes(const struct tldk_dev *td)
+{
+ uint32_t smask;
+ int32_t i, rc;
+ const uint32_t pmask = RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK |
+ RTE_PTYPE_L4_MASK;
+
+ smask = 0;
+ rc = rte_eth_dev_get_supported_ptypes(td->cf.port, pmask, NULL, 0);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u) failed to get supported ptypes;\n",
+ __func__, td->cf.port);
+ return smask;
+ }
+
+ uint32_t ptype[rc];
+ rc = rte_eth_dev_get_supported_ptypes(td->cf.port, pmask, ptype, rc);
+
+ for (i = 0; i != rc; i++) {
+ switch (ptype[i]) {
+ case RTE_PTYPE_L2_ETHER:
+ smask |= ETHER_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV4:
+ case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN:
+ smask |= IPV4_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV4_EXT:
+ smask |= IPV4_EXT_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV6:
+ case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN:
+ smask |= IPV6_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV6_EXT:
+ smask |= IPV6_EXT_PTYPE;
+ break;
+ case RTE_PTYPE_L4_TCP:
+ smask |= TCP_PTYPE;
+ break;
+ case RTE_PTYPE_L4_UDP:
+ smask |= UDP_PTYPE;
+ break;
+ }
+ }
+
+ return smask;
+}
+
+static inline uint64_t
+_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
+ uint64_t ol3, uint64_t ol2)
+{
+ return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49;
+}
+
+static inline void
+fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4)
+{
+ m->tx_offload = _mbuf_tx_offload(l2, l3, l4, 0, 0, 0);
+}
+
+static inline int
+is_ipv4_frag(const struct ipv4_hdr *iph)
+{
+ const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG);
+
+ return ((mask & iph->fragment_offset) != 0);
+}
+
+static inline uint32_t
+get_tcp_header_size(struct rte_mbuf *m, uint32_t l2_len, uint32_t l3_len)
+{
+ const struct tcp_hdr *tcp;
+
+ tcp = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len);
+ return (tcp->data_off >> 4) * 4;
+}
+
+static inline void
+adjust_ipv4_pktlen(struct rte_mbuf *m, uint32_t l2_len)
+{
+ uint32_t plen, trim;
+ const struct ipv4_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2_len);
+ plen = rte_be_to_cpu_16(iph->total_length) + l2_len;
+ if (plen < m->pkt_len) {
+ trim = m->pkt_len - plen;
+ rte_pktmbuf_trim(m, trim);
+ }
+}
+
+static inline void
+adjust_ipv6_pktlen(struct rte_mbuf *m, uint32_t l2_len)
+{
+ uint32_t plen, trim;
+ const struct ipv6_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, l2_len);
+ plen = rte_be_to_cpu_16(iph->payload_len) + sizeof(*iph) + l2_len;
+ if (plen < m->pkt_len) {
+ trim = m->pkt_len - plen;
+ rte_pktmbuf_trim(m, trim);
+ }
+}
+
+static inline void
+tcp_stat_update(struct tldk_ctx *lc, const struct rte_mbuf *m,
+ uint32_t l2_len, uint32_t l3_len)
+{
+ const struct tcp_hdr *th;
+
+ th = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len);
+ lc->tcp_stat.flags[th->tcp_flags]++;
+}
+
+static inline uint32_t
+get_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, uint32_t frag)
+{
+ const struct ipv4_hdr *iph;
+ int32_t dlen, len;
+
+ dlen = rte_pktmbuf_data_len(m);
+ dlen -= l2;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2);
+ len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER;
+
+ if (frag != 0 && is_ipv4_frag(iph)) {
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_FRAG;
+ }
+
+ if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto))
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+
+ return len;
+}
+
+static inline int
+ipv6x_hdr(uint32_t proto)
+{
+ return (proto == IPPROTO_HOPOPTS ||
+ proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_FRAGMENT ||
+ proto == IPPROTO_AH ||
+ proto == IPPROTO_NONE ||
+ proto == IPPROTO_DSTOPTS);
+}
+
+static inline uint32_t
+get_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto,
+ uint32_t fproto)
+{
+ const struct ip6_ext *ipx;
+ int32_t dlen, len, ofs;
+
+ len = sizeof(struct ipv6_hdr);
+
+ dlen = rte_pktmbuf_data_len(m);
+ dlen -= l2;
+
+ ofs = l2 + len;
+ ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs);
+
+ while (ofs > 0 && len < dlen) {
+
+ switch (nproto) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ ofs = (ipx->ip6e_len + 1) << 3;
+ break;
+ case IPPROTO_AH:
+ ofs = (ipx->ip6e_len + 2) << 2;
+ break;
+ case IPPROTO_FRAGMENT:
+ /*
+ * tso_segsz is not used by RX, so use it as temporary
+ * buffer to store the fragment offset.
+ */
+ m->tso_segsz = ofs;
+ ofs = sizeof(struct ip6_frag);
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_FRAG;
+ break;
+ default:
+ ofs = 0;
+ }
+
+ if (ofs > 0) {
+ nproto = ipx->ip6e_nxt;
+ len += ofs;
+ ipx += ofs / sizeof(*ipx);
+ }
+ }
+
+ /* unrecognized or invalid packet. */
+ if ((ofs == 0 && nproto != fproto) || len > dlen)
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+
+ return len;
+}
+
+static inline uint32_t
+get_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto)
+{
+ const struct ipv6_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ if (iph->proto == fproto)
+ return sizeof(struct ipv6_hdr);
+ else if (ipv6x_hdr(iph->proto) != 0)
+ return get_ipv6x_hdr_len(m, l2, iph->proto, fproto);
+
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return 0;
+}
+
+static inline void
+fill_eth_tcp_hdr_len(struct rte_mbuf *m)
+{
+ uint32_t dlen, l2_len, l3_len, l4_len;
+ uint16_t etp;
+ const struct ether_hdr *eth;
+
+ dlen = rte_pktmbuf_data_len(m);
+
+ /* check that first segment is at least 54B long. */
+ if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
+ sizeof(struct tcp_hdr)) {
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return;
+ }
+
+ l2_len = sizeof(*eth);
+
+ eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
+ etp = eth->ether_type;
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
+ l2_len += sizeof(struct vlan_hdr);
+
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
+ m->packet_type = RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1);
+ l4_len = get_tcp_header_size(m, l2_len, l3_len);
+ fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(m, l2_len);
+ } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
+ dlen >= l2_len + sizeof(struct ipv6_hdr) +
+ sizeof(struct tcp_hdr)) {
+ m->packet_type = RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(m, l2_len, l3_len);
+ fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(m, l2_len);
+ } else
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+}
+
+/*
+ * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k)
+ */
+static uint16_t
+type0_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, __rte_unused void *user_param)
+{
+ uint32_t j, tp;
+ uint32_t l4_len, l3_len, l2_len;
+ const struct ether_hdr *eth;
+
+ l2_len = sizeof(*eth);
+
+ for (j = 0; j != nb_pkts; j++) {
+
+ BE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ /* non fragmented tcp packets. */
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4 |
+ RTE_PTYPE_L2_ETHER):
+ l4_len = get_tcp_header_size(pkt[j], l2_len,
+ sizeof(struct ipv4_hdr));
+ fill_pkt_hdr_len(pkt[j], l2_len,
+ sizeof(struct ipv4_hdr), l4_len);
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6 |
+ RTE_PTYPE_L2_ETHER):
+ l4_len = get_tcp_header_size(pkt[j], l2_len,
+ sizeof(struct ipv6_hdr));
+ fill_pkt_hdr_len(pkt[j], l2_len,
+ sizeof(struct ipv6_hdr), l4_len);
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
+ IPPROTO_TCP, 0);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+ }
+
+ return nb_pkts;
+}
+
+/*
+ * HW can recognize L2/L3/L4 and fragments (i40e).
+ */
+static uint16_t
+type1_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, tp;
+ struct tldk_ctx *tcx;
+ uint32_t l4_len, l3_len, l2_len;
+ const struct ether_hdr *eth;
+
+ tcx = user_param;
+ l2_len = sizeof(*eth);
+
+ for (j = 0; j != nb_pkts; j++) {
+
+ BE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
+ IPPROTO_TCP, 0);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ tcp_stat_update(tcx, pkt[j], l2_len, l3_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ tcp_stat_update(tcx, pkt[j], l2_len, l3_len);
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+
+ }
+
+ return nb_pkts;
+}
+
+static uint16_t
+typen_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, __rte_unused void *user_param)
+{
+ uint32_t j;
+
+ for (j = 0; j != nb_pkts; j++) {
+
+ BE_PKT_DUMP(pkt[j]);
+ fill_eth_tcp_hdr_len(pkt[j]);
+ }
+
+ return nb_pkts;
+}
+
+int
+setup_rx_cb(const struct tldk_dev *td, struct tldk_ctx *tcx)
+{
+ int32_t rc;
+ uint32_t i, n, smask;
+ void *cb;
+ const struct ptype2cb *ptype2cb;
+
+ static const struct ptype2cb tcp_ptype2cb[] = {
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE |
+ IPV6_PTYPE | IPV6_EXT_PTYPE | TCP_PTYPE,
+ .name = "HW l2/l3x/l4-tcp ptype",
+ .fn = type0_tcp_rx_callback,
+ },
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE |
+ TCP_PTYPE,
+ .name = "HW l2/l3/l4-tcp ptype",
+ .fn = type1_tcp_rx_callback,
+ },
+ {
+ .mask = 0,
+ .name = "tcp no HW ptype",
+ .fn = typen_tcp_rx_callback,
+ },
+ };
+
+ smask = get_ptypes(td);
+
+ ptype2cb = tcp_ptype2cb;
+ n = RTE_DIM(tcp_ptype2cb);
+
+ for (i = 0; i != n; i++) {
+ if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) {
+ cb = rte_eth_add_rx_callback(td->cf.port, td->cf.queue,
+ ptype2cb[i].fn, tcx);
+ rc = -rte_errno;
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u), setup RX callback \"%s\" "
+ "returns %p;\n",
+ __func__, td->cf.port, ptype2cb[i].name, cb);
+ return ((cb == NULL) ? rc : 0);
+ }
+ }
+
+ /* no proper callback found. */
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u) failed to find an appropriate callback;\n",
+ __func__, td->cf.port);
+ return -ENOENT;
+}
+
+int
+be_lcore_setup(struct tldk_ctx *tcx)
+{
+ uint32_t i;
+ int32_t rc;
+
+ RTE_LOG(NOTICE, USER1, "%s:(lcore=%u, ctx=%p) start\n",
+ __func__, tcx->cf->lcore, tcx->ctx);
+
+ rc = 0;
+ for (i = 0; i != tcx->nb_dev && rc == 0; i++) {
+ RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, q=%u)\n",
+ __func__, i, tcx->dev[i].cf.port, tcx->dev[i].cf.queue);
+
+ rc = setup_rx_cb(&tcx->dev[i], tcx);
+ if (rc < 0)
+ return rc;
+ }
+
+ return rc;
+}
+
+static inline void
+be_rx(struct tldk_dev *dev)
+{
+ uint32_t j, k, n;
+ struct rte_mbuf *pkt[MAX_PKT_BURST];
+ struct rte_mbuf *rp[MAX_PKT_BURST];
+ int32_t rc[MAX_PKT_BURST];
+
+ n = rte_eth_rx_burst(dev->cf.port,
+ dev->cf.queue, pkt, RTE_DIM(pkt));
+
+ if (n != 0) {
+ dev->rx_stat.in += n;
+ BE_TRACE("%s(%u): rte_eth_rx_burst(%u, %u) returns %u\n",
+ __func__, dev->cf.id, dev->cf.port,
+ dev->cf.queue, n);
+
+ k = tle_tcp_rx_bulk(dev->dev, pkt, rp, rc, n);
+
+ dev->rx_stat.up += k;
+ dev->rx_stat.drop += n - k;
+ BE_TRACE("%s: tle_tcp_rx_bulk(%p, %u) returns %u\n",
+ __func__, dev->dev, n, k);
+
+ for (j = 0; j != n - k; j++) {
+ BE_TRACE("%s:%d(port=%u) rp[%u]={%p, %d};\n",
+ __func__, __LINE__, dev->cf.port,
+ j, rp[j], rc[j]);
+ rte_pktmbuf_free(rp[j]);
+ }
+ }
+}
+
+static inline void
+be_tx(struct tldk_dev *dev)
+{
+ uint32_t j = 0, k, n;
+ struct rte_mbuf **mb;
+
+ n = dev->tx_buf.num;
+ k = RTE_DIM(dev->tx_buf.pkt) - n;
+ mb = dev->tx_buf.pkt;
+
+ if (k >= RTE_DIM(dev->tx_buf.pkt) / 2) {
+ j = tle_tcp_tx_bulk(dev->dev, mb + n, k);
+ n += j;
+ dev->tx_stat.down += j;
+ }
+
+ if (n == 0)
+ return;
+
+ BE_TRACE("%s: tle_tcp_tx_bulk(%p) returns %u,\n"
+ "total pkts to send: %u\n",
+ __func__, dev->dev, j, n);
+
+ for (j = 0; j != n; j++)
+ BE_PKT_DUMP(mb[j]);
+
+ k = rte_eth_tx_burst(dev->cf.port,
+ dev->cf.queue, mb, n);
+
+ dev->tx_stat.out += k;
+ dev->tx_stat.drop += n - k;
+ BE_TRACE("%s: rte_eth_tx_burst(%u, %u, %u) returns %u\n",
+ __func__, dev->cf.port,
+ dev->cf.queue, n, k);
+
+ dev->tx_buf.num = n - k;
+ if (k != 0)
+ for (j = k; j != n; j++)
+ mb[j - k] = mb[j];
+}
+
+void
+be_lcore_tcp(struct tldk_ctx *tcx)
+{
+ uint32_t i;
+
+ if (tcx == NULL)
+ return;
+
+ for (i = 0; i != tcx->nb_dev; i++) {
+ be_rx(&tcx->dev[i]);
+ be_tx(&tcx->dev[i]);
+ }
+ tle_tcp_process(tcx->ctx, TCP_MAX_PROCESS);
+}
+
+void
+be_lcore_clear(struct tldk_ctx *tcx)
+{
+ uint32_t i, j;
+
+ if (tcx == NULL)
+ return;
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u, ctx: %p) finish\n",
+ __func__, tcx->cf->lcore, tcx->ctx);
+ for (i = 0; i != tcx->nb_dev; i++) {
+ RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, q=%u, lcore=%u, dev=%p) "
+ "rx_stats={"
+ "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "}, "
+ "tx_stats={"
+ "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "};\n",
+ __func__, i, tcx->dev[i].cf.port, tcx->dev[i].cf.queue,
+ tcx->cf->lcore,
+ tcx->dev[i].dev,
+ tcx->dev[i].rx_stat.in,
+ tcx->dev[i].rx_stat.up,
+ tcx->dev[i].rx_stat.drop,
+ tcx->dev[i].tx_stat.down,
+ tcx->dev[i].tx_stat.out,
+ tcx->dev[i].tx_stat.drop);
+ }
+
+ RTE_LOG(NOTICE, USER1, "tcp_stat={\n");
+ for (i = 0; i != RTE_DIM(tcx->tcp_stat.flags); i++) {
+ if (tcx->tcp_stat.flags[i] != 0)
+ RTE_LOG(NOTICE, USER1, "[flag=%#x]==%" PRIu64 ";\n",
+ i, tcx->tcp_stat.flags[i]);
+ }
+ RTE_LOG(NOTICE, USER1, "};\n");
+
+ for (i = 0; i != tcx->nb_dev; i++)
+ for (j = 0; j != tcx->dev[i].tx_buf.num; j++)
+ rte_pktmbuf_free(tcx->dev[i].tx_buf.pkt[j]);
+
+}
+
+void
+be_stop_port(uint32_t port)
+{
+ struct rte_eth_stats stats;
+
+ RTE_LOG(NOTICE, USER1, "%s: stoping port %u\n", __func__, port);
+
+ rte_eth_stats_get(port, &stats);
+ RTE_LOG(NOTICE, USER1, "port %u stats={\n"
+ "ipackets=%" PRIu64 ";"
+ "ibytes=%" PRIu64 ";"
+ "ierrors=%" PRIu64 ";"
+ "imissed=%" PRIu64 ";\n"
+ "opackets=%" PRIu64 ";"
+ "obytes=%" PRIu64 ";"
+ "oerrors=%" PRIu64 ";\n"
+ "}\n",
+ port,
+ stats.ipackets,
+ stats.ibytes,
+ stats.ierrors,
+ stats.imissed,
+ stats.opackets,
+ stats.obytes,
+ stats.oerrors);
+ rte_eth_dev_stop(port);
+}
+
+int
+be_lcore_main(void *arg)
+{
+ int32_t rc;
+ uint32_t lid, i;
+ struct tldk_ctx *tcx;
+ struct lcore_ctxs_list *lc_ctx;
+
+ lc_ctx = arg;
+ lid = rte_lcore_id();
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n", __func__, lid);
+
+ rc = 0;
+ while (force_quit == 0) {
+ for (i = 0; i < lc_ctx->nb_ctxs; i++) {
+ tcx = lc_ctx->ctxs[i];
+ be_lcore_tcp(tcx);
+ }
+ }
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n", __func__, lid);
+
+ return rc;
+}
diff --git a/app/nginx/src/tldk/be.h b/app/nginx/src/tldk/be.h
new file mode 100644
index 0000000..900dfa8
--- /dev/null
+++ b/app/nginx/src/tldk/be.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __BE_H__
+#define __BE_H__
+
+#include "ngx_tldk.h"
+
+extern volatile int force_quit;
+
+int be_lpm4_dst_lookup(void *data, const struct in_addr *addr,
+ struct tle_dest *res);
+int be_lpm6_dst_lookup(void *data, const struct in6_addr *addr,
+ struct tle_dest *res);
+int be_lcore_lpm_init(struct tldk_ctx *tcx, uint32_t sid,
+ const struct tldk_ctx_conf *cf);
+int be_port_init(tldk_conf_t *cf);
+int be_dst_init(struct tldk_ctx *tcx, const tldk_conf_t *cf);
+int be_add_dev(struct tldk_ctx *tcx, const tldk_conf_t *cf);
+int be_mpool_init(struct tldk_ctx *tcx);
+int be_queue_init(struct tldk_ctx *tcx, const tldk_conf_t *cf);
+int be_check_lcore(uint32_t lc);
+
+void
+be_lcore_tcp(struct tldk_ctx *tcx);
+
+int be_lcore_main(void *arg);
+int be_lcore_setup(struct tldk_ctx *tcx);
+void be_lcore_clear(struct tldk_ctx *tcx);
+
+void be_stop_port(uint32_t port);
+
+#endif /*__BE_H__ */
diff --git a/app/nginx/src/tldk/debug.h b/app/nginx/src/tldk/debug.h
new file mode 100644
index 0000000..2f19dcb
--- /dev/null
+++ b/app/nginx/src/tldk/debug.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2017 Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _TLDK_DEBUG_H_
+#define _TLDK_DEBUG_H_
+
+#include <rte_cycles.h>
+
+#define FUNC_STAT(v, c) do { \
+ static uint64_t nb_call, nb_data; \
+ nb_call++; \
+ nb_data += (v); \
+ if ((nb_call & ((c) - 1)) == 0) { \
+ printf("%s#%d@%u: nb_call=%lu, avg(" #v ")=%#Lf\n", \
+ __func__, __LINE__, rte_lcore_id(), nb_call, \
+ (long double)nb_data / nb_call); \
+ nb_call = 0; \
+ nb_data = 0; \
+ } \
+} while (0)
+
+#define FUNC_TM_STAT(v, c) do { \
+ static uint64_t nb_call, nb_data; \
+ static uint64_t cts, pts, sts; \
+ cts = rte_rdtsc(); \
+ if (pts != 0) \
+ sts += cts - pts; \
+ pts = cts; \
+ nb_call++; \
+ nb_data += (v); \
+ if ((nb_call & ((c) - 1)) == 0) { \
+ printf("%s#%d@%u: nb_call=%lu, " \
+ "avg(" #v ")=%#Lf, " \
+ "avg(cycles)=%#Lf, " \
+ "avg(cycles/" #v ")=%#Lf\n", \
+ __func__, __LINE__, rte_lcore_id(), nb_call, \
+ (long double)nb_data / nb_call, \
+ (long double)sts / nb_call, \
+ (long double)sts / nb_data); \
+ nb_call = 0; \
+ nb_data = 0; \
+ sts = 0; \
+ } \
+} while (0)
+
+#define COND_FUNC_STAT(e, v, c) do { \
+ if (e) { \
+ FUNC_STAT(v, c); \
+ } \
+} while (0)
+
+#endif /* _TLDK_DEBUG_H_ */
diff --git a/app/nginx/src/tldk/module.c b/app/nginx/src/tldk/module.c
new file mode 100644
index 0000000..4ddea36
--- /dev/null
+++ b/app/nginx/src/tldk/module.c
@@ -0,0 +1,497 @@
+/*
+ * Copyright (c) 2017 Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <rte_ethdev.h>
+#include <rte_lpm6.h>
+#include <rte_lpm.h>
+
+#include <ngx_config.h>
+#include <ngx_core.h>
+
+#include "ngx_tldk.h"
+#include "be.h"
+#include "tldk_sock.h"
+
+extern ngx_module_t ngx_tldk_module;
+
+/* map from ngx_worker to corresponding TLDK ctx */
+struct tldk_ctx wrk2ctx[RTE_MAX_LCORE] = {};
+
+/* per be lcore tldk_ctx(s) */
+static struct lcore_ctxs_list lc_ctxs[RTE_MAX_LCORE];
+
+volatile int force_quit;
+
+static void *
+tldk_module_create_conf(ngx_cycle_t *cycle)
+{
+ tldk_conf_t *cf;
+
+ cf = ngx_pcalloc(cycle->pool, sizeof(*cf));
+ if (cf == NULL)
+ return NULL;
+ return cf;
+}
+
+static char *
+tldk_module_init_conf(ngx_cycle_t *cycle, void *conf)
+{
+ tldk_conf_t *cf;
+
+ cf = conf;
+ (void)cf;
+ return NGX_CONF_OK;
+}
+
+static void
+fini_context(struct tldk_ctx *tcx)
+{
+ tle_ctx_destroy(tcx->ctx);
+ rte_lpm_free(tcx->lpm4);
+ rte_lpm6_free(tcx->lpm6);
+ rte_mempool_free(tcx->mpool);
+ rte_mempool_free(tcx->frag_mpool);
+ memset(tcx, 0, sizeof(*tcx));
+}
+
+static int
+init_context(struct tldk_ctx *tcx, const struct tldk_ctx_conf *cf,
+ tldk_conf_t *cft)
+{
+ uint32_t lc, rc, sid;
+ struct tle_ctx_param cprm;
+
+ lc = cf->lcore;
+ sid = rte_lcore_to_socket_id(lc);
+ rc = be_lcore_lpm_init(tcx, sid, cf);
+ if (rc != 0)
+ return rc;
+
+ memset(&cprm, 0, sizeof(cprm));
+ cprm.socket_id = sid;
+ cprm.proto = TLE_PROTO_TCP;
+ cprm.max_streams = cf->nb_stream;
+ cprm.max_stream_rbufs = cf->nb_rbuf;
+ cprm.max_stream_sbufs = cf->nb_sbuf;
+ if (cf->be_in_worker != 0)
+ cprm.flags |= TLE_CTX_FLAG_ST;
+ cprm.timewait = cf->tcp_timewait;
+ cprm.lookup4 = be_lpm4_dst_lookup;
+ cprm.lookup4_data = tcx;
+ cprm.lookup6 = be_lpm6_dst_lookup;
+ cprm.lookup6_data = tcx;
+ cprm.secret_key.u64[0] = rte_rand();
+ cprm.secret_key.u64[1] = rte_rand();
+
+ tcx->ctx = tle_ctx_create(&cprm);
+ RTE_LOG(NOTICE, USER1,
+ "%s: tle_ctx_create(lcore=%u) for worker=%lu returns %p;\n",
+ __func__, lc, cf->worker, tcx->ctx);
+ if (tcx->ctx == NULL) {
+ rte_lpm_free(tcx->lpm4);
+ rte_lpm6_free(tcx->lpm6);
+ return -ENOMEM;
+ }
+
+ tcx->cf = cf;
+
+ /* create mempool for the context */
+ rc = be_mpool_init(tcx);
+ if (rc != 0)
+ return rc;
+
+ /* initialize queues of the device given in the context */
+ rc = be_queue_init(tcx, cft);
+ if (rc != 0)
+ return rc;
+
+ /* create devices of the context */
+ rc = be_add_dev(tcx, cft);
+ if (rc != 0)
+ return rc;
+
+ /*
+ *1.create LPMs and add to the context
+ *2.add routes for the given destinations to the context
+ */
+ rc = be_dst_init(tcx, cft);
+ if (rc != 0)
+ return rc;
+
+ return 0;
+}
+
+void
+tldk_module_fini(ngx_cycle_t *cycle)
+{
+ tldk_conf_t *cf;
+ uint32_t i, wrk;
+
+ cf = (tldk_conf_t *)ngx_get_conf(cycle->conf_ctx, ngx_tldk_module);
+
+ /* signal all launched slave lcores to stop */
+ force_quit = 1;
+
+ /* wait all slave lcores to be stopped */
+ for (i = 0; i != cf->nb_ctx; i++)
+ rte_eal_wait_lcore(cf->ctx[i].lcore);
+
+ /* finish all TLDK contexts */
+ for (i = 0; i != cf->nb_ctx; i++) {
+ wrk = cf->ctx[i].worker;
+ /* free up all tx pkt buffers of the tldk_dev 'ses
+ * of the tldk_ctx
+ */
+ be_lcore_clear(wrk2ctx + wrk);
+ fini_context(wrk2ctx + wrk);
+ }
+
+ /* stop all ports */
+ for (i = 0; i != cf->nb_port; i++)
+ be_stop_port(cf->port[i].id);
+}
+
+/* configuration sanity check */
+static int
+process_conf(tldk_conf_t *cf)
+{
+ uint32_t i, j, port_id, mask;
+ uint16_t queue_id;
+ const struct tldk_ctx_conf *ctx;
+ struct tldk_port_conf *pcf;
+
+ /*
+ * count the number of queues associated
+ * with each port by iterating through all tldk_ctx'ses.
+ * if same queue of the port is used in multiple tldk_ctx'ses
+ * error will be returned.
+ */
+ for (i = 0; i < cf->nb_ctx; i++) {
+ ctx = &cf->ctx[i];
+ for (j = 0; j < ctx->nb_dev; j++) {
+ port_id = ctx->dev[j].port;
+ queue_id = ctx->dev[j].queue;
+ pcf = &cf->port[port_id];
+
+ if (queue_id >= MAX_PORT_QUEUE)
+ return -EINVAL;
+
+ mask = 1 << queue_id;
+
+ if (pcf->queue_map & mask)
+ /* tldk_port_conf already has the queue */
+ return -EEXIST;
+
+ pcf->queue_map |= mask;
+ if (pcf->nb_queues <= queue_id)
+ pcf->nb_queues = queue_id + 1;
+ }
+ }
+
+ return 0;
+}
+
+static ngx_int_t
+tldk_module_init(ngx_cycle_t *cycle)
+{
+ int32_t rc;
+ uint32_t i, j, wrk, num, lc, ctx_lim;
+ tldk_conf_t *cf;
+ struct tldk_ctx *tcx;
+
+ cf = (tldk_conf_t *)ngx_get_conf(cycle->conf_ctx, ngx_tldk_module);
+
+ rc = rte_eal_init(cf->eal_argc, cf->eal_argv);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: rte_eal_init failed with error code: %d\n",
+ __func__, rc);
+ return NGX_ERROR;
+ }
+
+ rc = process_conf(cf);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: process_conf failed with error code: %d\n",
+ __func__, rc);
+ return NGX_ERROR;
+ }
+
+ /* port initialization */
+ rc = be_port_init(cf);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: be_port_init failed with error code: %d\n",
+ __func__, rc);
+
+ return NGX_ERROR;
+ }
+
+ /* initialise TLDK contexts */
+ for (i = 0; i != cf->nb_ctx; i++) {
+ wrk = cf->ctx[i].worker;
+ rc = init_context(wrk2ctx + wrk, cf->ctx + i, cf);
+ if (rc != 0)
+ break;
+ }
+
+ if (i != cf->nb_ctx) {
+ for (j = 0; j != i; j++) {
+ wrk = cf->ctx[j].worker;
+ fini_context(wrk2ctx + wrk);
+ }
+ RTE_LOG(ERR, USER1,
+ "%s: init_context failed with error code: %d\n",
+ __func__, rc);
+ return NGX_ERROR;
+ }
+
+ /* start the ports */
+ for (i = 0; i != cf->nb_port; i++) {
+ RTE_LOG(NOTICE, USER1, "%s: starting port %u\n",
+ __func__, cf->port[i].id);
+
+ rc = rte_eth_dev_start(cf->port[i].id);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: rte_eth_dev_start(%u) returned "
+ "error code: %d\n",
+ __func__, cf->port[i].id, rc);
+ goto freectx;
+ }
+ }
+
+ /* accumulate all tldk_ctx(s) that belongs to one be lcore */
+ for (i = 0; i != cf->nb_ctx; i++) {
+ tcx = &wrk2ctx[cf->ctx[i].worker];
+ /* setup rx callbacks */
+ rc = be_lcore_setup(tcx);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: be_lcore_setup failed with error "
+ "code: %d\n", __func__, rc);
+ goto freectx;
+ }
+
+ if (tcx->cf->be_in_worker)
+ continue;
+
+ lc = cf->ctx[i].lcore;
+ num = lc_ctxs[lc].nb_ctxs;
+ ctx_lim = RTE_DIM(lc_ctxs[lc].ctxs);
+
+ if (num < ctx_lim) {
+ lc_ctxs[lc].ctxs[num] = tcx;
+ lc_ctxs[lc].nb_ctxs++;
+ } else {
+ RTE_LOG(ERR, USER1,
+ "%s: cannot assign more than supported %u "
+ "ctx(s) for the given lcore %u\n",
+ __func__, ctx_lim, lc);
+ goto freectx;
+ }
+ }
+
+ /*
+ * launch slave lcores with lcore_main_tcp to handle
+ * multiple tldk_ctx(s) of that lcore.
+ */
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (lc_ctxs[i].nb_ctxs != 0) {
+ if (be_check_lcore(i) != 0 ||
+ rte_eal_remote_launch(be_lcore_main,
+ &lc_ctxs[i], i) != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: failed to launch "
+ "be_lcore_main for core =%u\n",
+ __func__, i);
+ goto freectx;
+ }
+ }
+ }
+
+ return NGX_OK;
+
+freectx:
+ tldk_module_fini(cycle);
+ return NGX_ERROR;
+}
+
+static int
+tldk_open_listening(ngx_cycle_t *cycle, struct tldk_ctx *tcx)
+{
+ uint32_t i;
+ ngx_listening_t *ls;
+ char host[NI_MAXHOST];
+ char srv[NI_MAXSERV];
+
+ ls = cycle->listening.elts;
+ for (i = 0; i != cycle->listening.nelts; i++) {
+
+ if (ls[i].ignore != 0 || ls[i].listen == 0)
+ continue;
+
+ ngx_close_socket(ls[i].fd);
+ ls[i].fd = -1;
+
+ getnameinfo(ls[i].sockaddr, ls[i].socklen,
+ host, sizeof(host), srv, sizeof(srv),
+ NI_NUMERICHOST | NI_NUMERICSERV);
+
+ ls[i].fd = tldk_open_bind_listen(tcx,
+ ls[i].sockaddr->sa_family, ls[i].type,
+ ls[i].sockaddr, ls[i].socklen,
+ ls[i].backlog);
+
+ RTE_LOG(NOTICE, USER1, "%s(worker=%lu): "
+ "listen() for %s:%s returns %d, errno=%d;\n",
+ __func__, ngx_worker, host, srv, ls[i].fd, errno);
+
+ if (ls[i].fd == -1)
+ return NGX_ERROR;
+ }
+
+ return NGX_OK;
+}
+
+static void
+tldk_process_fini(ngx_cycle_t *cycle)
+{
+ struct tldk_ctx *tcx;
+ tcx = wrk2ctx + ngx_worker;
+
+ tldk_stbl_fini();
+ if (tcx->cf->be_in_worker != 0)
+ be_lcore_clear(tcx);
+}
+
+
+static ngx_int_t
+tldk_process_init(ngx_cycle_t *cycle)
+{
+ ngx_event_conf_t *ecf;
+ int32_t rc;
+
+ if (ngx_process != NGX_PROCESS_WORKER)
+ return NGX_OK;
+
+ rc = tldk_stbl_init(cycle, wrk2ctx + ngx_worker);
+ if (rc != 0)
+ return NGX_ERROR;
+
+ rc = tldk_open_listening(cycle, wrk2ctx + ngx_worker);
+ if (rc != 0)
+ return NGX_ERROR;
+
+ /* use tldk event module from now on*/
+ ecf = ngx_event_get_conf(cycle->conf_ctx, ngx_event_core_module);
+ ecf->use = ngx_tldk_event_module.ctx_index;
+
+ return NGX_OK;
+}
+
+static char *
+tldk_conf_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
+{
+ char *rv;
+ ngx_conf_t save;
+
+ save = *cf;
+ cf->handler = tldk_block_parse;
+ cf->handler_conf = conf;
+ rv = ngx_conf_parse(cf, NULL);
+ *cf = save;
+
+ return rv;
+}
+
+static char *
+tldk_ctx_block(ngx_conf_t *cf, ngx_command_t *cmd, void *conf)
+{
+ char *rv;
+ tldk_conf_t *tcf;
+ struct tldk_ctx_conf *tcx;
+ ngx_conf_t save;
+
+ tcf = (tldk_conf_t *)((void **)conf)[0];
+ if (tcf->nb_ctx >= RTE_DIM(tcf->ctx))
+ return NGX_CONF_ERROR;
+
+ /* setup default non-zero values, if any */
+ tcx = tcf->ctx + tcf->nb_ctx;
+ tcx->tcp_timewait = TLE_TCP_TIMEWAIT_DEFAULT;
+
+ save = *cf;
+ cf->handler = tldk_ctx_parse;
+ cf->handler_conf = conf;
+ rv = ngx_conf_parse(cf, NULL);
+ *cf = save;
+
+ if (rv == NGX_CONF_OK)
+ tcf->nb_ctx++;
+
+ return rv;
+}
+
+/*
+ * define NGX TLDK module.
+ */
+
+static ngx_command_t tldk_commands[] = {
+
+ {
+ .name = ngx_string("tldk_main"),
+ .type = NGX_MAIN_CONF|NGX_CONF_BLOCK|NGX_CONF_NOARGS,
+ .set = tldk_conf_block,
+ },
+ {
+ .name = ngx_string("tldk_ctx"),
+ .type = NGX_MAIN_CONF|NGX_CONF_BLOCK|NGX_CONF_NOARGS,
+ .set = tldk_ctx_block,
+ },
+ ngx_null_command,
+};
+
+static ngx_core_module_t tldk_module_ctx = {
+ ngx_string("tldk"),
+ tldk_module_create_conf,
+ tldk_module_init_conf
+};
+
+ngx_module_t ngx_tldk_module = {
+ NGX_MODULE_V1,
+ &tldk_module_ctx, /* module context */
+ tldk_commands, /* module directives */
+ NGX_CORE_MODULE, /* module type */
+ NULL, /* init master */
+ tldk_module_init, /* init module */
+ tldk_process_init, /* init process */
+ NULL, /* init thread */
+ NULL, /* exit thread */
+ tldk_process_fini, /* exit process */
+ tldk_module_fini, /* exit master */
+ NGX_MODULE_V1_PADDING
+};
diff --git a/app/nginx/src/tldk/ngx_tldk.h b/app/nginx/src/tldk/ngx_tldk.h
new file mode 100644
index 0000000..01ac556
--- /dev/null
+++ b/app/nginx/src/tldk/ngx_tldk.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2017 Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __NGX_TLDK_H__
+#define __NGX_TLDK_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <inttypes.h>
+
+#include <ngx_config.h>
+#include <ngx_core.h>
+
+#include <rte_config.h>
+#include <rte_eal.h>
+#include <rte_common.h>
+#include <rte_ether.h>
+
+#include <tle_ctx.h>
+#include <tle_event.h>
+
+#define MAX_PKT_BURST 0x20
+
+#define MAX_PORT_QUEUE \
+ (sizeof(((struct tldk_port_conf *)NULL)->queue_map) * CHAR_BIT)
+
+#define MAX_CTX_PER_LOCRE 32
+
+struct tldk_port_conf {
+ uint32_t id;
+ uint32_t nb_queues;
+ uint32_t queue_map;
+ uint32_t mtu;
+ uint32_t rx_offload;
+ uint32_t tx_offload;
+ uint32_t ipv4;
+ struct in6_addr ipv6;
+ struct ether_addr mac;
+};
+
+struct tldk_dev_conf {
+ uint32_t id;
+ uint32_t port;
+ uint32_t queue;
+};
+
+struct tldk_dest_conf {
+ uint32_t dev;
+ uint32_t mtu;
+ uint32_t prfx;
+ uint16_t family;
+ union {
+ struct in_addr ipv4;
+ struct in6_addr ipv6;
+ };
+ struct ether_addr mac;
+};
+
+#define TLDK_MAX_DEST 0x10
+
+struct tldk_ctx_conf {
+ ngx_uint_t worker;
+ uint32_t lcore;
+ uint32_t nb_mbuf;
+ uint32_t nb_stream;
+ uint32_t nb_rbuf;
+ uint32_t nb_sbuf;
+ uint32_t nb_dev;
+ uint32_t nb_dest;
+ uint32_t be_in_worker;
+ uint32_t tcp_timewait; /* TCP TIME_WAIT value in milliseconds */
+ struct tldk_dev_conf dev[RTE_MAX_ETHPORTS];
+ struct tldk_dest_conf dest[TLDK_MAX_DEST];
+};
+
+typedef struct tldk_conf tldk_conf_t;
+
+struct tldk_conf {
+ uint32_t eal_argc;
+ char *eal_argv[NGX_CONF_MAX_ARGS];
+ char eal_cmd[PATH_MAX];
+ uint32_t nb_port;
+ struct tldk_port_conf port[RTE_MAX_ETHPORTS];
+ uint32_t nb_ctx;
+ struct tldk_ctx_conf ctx[RTE_MAX_LCORE];
+};
+
+extern char *tldk_block_parse(ngx_conf_t *, ngx_command_t *, void *);
+extern char *tldk_ctx_parse(ngx_conf_t *, ngx_command_t *, void *);
+
+struct pkt_buf {
+ uint32_t num;
+ struct rte_mbuf *pkt[2 * MAX_PKT_BURST];
+};
+
+struct tldk_dev {
+ struct tle_dev *dev;
+ struct tldk_dev_conf cf;
+ struct {
+ uint64_t in;
+ uint64_t up;
+ uint64_t drop;
+ } rx_stat;
+ struct {
+ uint64_t down;
+ uint64_t out;
+ uint64_t drop;
+ } tx_stat;
+ struct pkt_buf tx_buf;
+};
+
+#define LCORE_MAX_DST (UINT8_MAX + 1)
+
+struct tldk_ctx {
+ const struct tldk_ctx_conf *cf;
+ struct rte_lpm *lpm4;
+ struct rte_lpm6 *lpm6;
+ struct tle_ctx *ctx;
+ struct rte_mempool *mpool;
+ struct rte_mempool *frag_mpool;
+ uint32_t nb_dev;
+ struct tldk_dev dev[RTE_MAX_ETHPORTS];
+ uint32_t dst4_num;
+ uint32_t dst6_num;
+ struct tle_dest dst4[LCORE_MAX_DST];
+ struct tle_dest dst6[LCORE_MAX_DST];
+ struct {
+ uint64_t flags[UINT8_MAX + 1];
+ } tcp_stat;
+} __rte_cache_aligned;
+
+extern struct tldk_ctx wrk2ctx[RTE_MAX_LCORE];
+
+struct lcore_ctxs_list {
+ uint32_t nb_ctxs;
+ struct tldk_ctx *ctxs[MAX_CTX_PER_LOCRE];
+};
+
+/* helper macros */
+#define DUMMY_MACRO do {} while (0)
+
+#ifdef BE_DEBUG
+#define BE_TRACE(fmt, arg...) printf(fmt, ##arg)
+#define BE_PKT_DUMP(p) rte_pktmbuf_dump(stdout, (p), 74)
+#else
+#define BE_TRACE(fmt, arg...) DUMMY_MACRO
+#define BE_PKT_DUMP(p) DUMMY_MACRO
+#endif
+
+#ifdef FE_DEBUG
+#define FE_TRACE(fmt, arg...) printf(fmt, ##arg)
+#define FE_PKT_DUMP(p) rte_pktmbuf_dump(stdout, (p), 74)
+#else
+#define FE_TRACE(fmt, arg...) DUMMY_MACRO
+#define FE_PKT_DUMP(p) DUMMY_MACRO
+#endif
+
+
+#endif /* __NGX_TLDK_H__ */
diff --git a/app/nginx/src/tldk/parse.c b/app/nginx/src/tldk/parse.c
new file mode 100644
index 0000000..5d71d9e
--- /dev/null
+++ b/app/nginx/src/tldk/parse.c
@@ -0,0 +1,456 @@
+/*
+ * Copyright (c) 2017 Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <ngx_config.h>
+#include <ngx_core.h>
+
+#include <ngx_tldk.h>
+
+union parse_val {
+ uint64_t u64;
+ struct {
+ uint16_t family;
+ union {
+ struct in_addr addr4;
+ struct in6_addr addr6;
+ };
+ } in;
+ struct ether_addr mac;
+ rte_cpuset_t cpuset;
+};
+
+struct key_handler {
+ const char *name;
+ int (*func)(const char *, void *);
+};
+
+static int
+parse_uint_val(const char *val, void *prm)
+{
+ union parse_val *rv;
+ unsigned long v;
+ char *end;
+
+ rv = prm;
+ errno = 0;
+ v = strtoul(val, &end, 0);
+ if (errno != 0 || end[0] != 0 || v > UINT32_MAX)
+ return -EINVAL;
+
+ rv->u64 = v;
+ return 0;
+}
+
+static int
+parse_ipv4_val(const char *val, void *prm)
+{
+ union parse_val *rv;
+
+ rv = prm;
+ if (inet_pton(AF_INET, val, &rv->in.addr4) != 1)
+ return -EINVAL;
+ rv->in.family = AF_INET;
+ return 0;
+}
+
+static int
+parse_ipv6_val(const char *val, void *prm)
+{
+ union parse_val *rv;
+
+ rv = prm;
+ if (inet_pton(AF_INET6, val, &rv->in.addr6) != 1)
+ return -EINVAL;
+ rv->in.family = AF_INET6;
+ return 0;
+}
+
+static int
+parse_ip_val(const char *val, void *prm)
+{
+ if (parse_ipv6_val(val, prm) != 0 &&
+ parse_ipv4_val(val, prm) != 0)
+ return -EINVAL;
+ return 0;
+}
+
+#define PARSE_UINT8x16(s, v, l) \
+do { \
+ char *end; \
+ unsigned long t; \
+ errno = 0; \
+ t = strtoul((s), &end, 16); \
+ if (errno != 0 || end[0] != (l) || t > UINT8_MAX) \
+ return -EINVAL; \
+ (s) = end + 1; \
+ (v) = t; \
+} while (0)
+
+static int
+parse_mac_val(const char *val, void *prm)
+{
+ union parse_val *rv;
+ const char *s;
+
+ rv = prm;
+ s = val;
+
+ PARSE_UINT8x16(s, rv->mac.addr_bytes[0], ':');
+ PARSE_UINT8x16(s, rv->mac.addr_bytes[1], ':');
+ PARSE_UINT8x16(s, rv->mac.addr_bytes[2], ':');
+ PARSE_UINT8x16(s, rv->mac.addr_bytes[3], ':');
+ PARSE_UINT8x16(s, rv->mac.addr_bytes[4], ':');
+ PARSE_UINT8x16(s, rv->mac.addr_bytes[5], 0);
+ return 0;
+}
+
+static char *
+tldk_port_parse(ngx_conf_t *cf, struct tldk_port_conf *prt)
+{
+ uint32_t i, j;
+ ngx_str_t *v;
+
+ static const struct key_handler kh[] = {
+ {
+ .name = "port",
+ .func = parse_uint_val,
+ },
+ {
+ .name = "mtu",
+ .func = parse_uint_val,
+ },
+ {
+ .name = "rx_offload",
+ .func = parse_uint_val,
+ },
+ {
+ .name = "tx_offload",
+ .func = parse_uint_val,
+ },
+ {
+ .name = "ipv4",
+ .func = parse_ipv4_val,
+ },
+ {
+ .name = "ipv6",
+ .func = parse_ipv6_val,
+ },
+ };
+
+ union parse_val pvl[RTE_DIM(kh)];
+
+ memset(pvl, 0, sizeof(pvl));
+ pvl[1].u64 = ETHER_MAX_LEN - ETHER_CRC_LEN;
+
+ if (cf->args->nelts % 2 != 0)
+ return NGX_CONF_ERROR;
+
+ v = cf->args->elts;
+ for (i = 0; i != cf->args->nelts; i += 2) {
+
+ for (j = 0; j != RTE_DIM(kh); j++) {
+ if (ngx_strcmp(v[i].data, kh[j].name) == 0) {
+ if (kh[j].func((const char *)v[i + 1].data,
+ pvl + j) < 0)
+ return NGX_CONF_ERROR;
+ else
+ break;
+ }
+ }
+
+ /* unknow key */
+ if (j == RTE_DIM(kh))
+ return NGX_CONF_ERROR;
+ }
+
+ memset(prt, 0, sizeof(*prt));
+
+ prt->id = pvl[0].u64;
+ prt->mtu = pvl[1].u64;
+ prt->rx_offload = pvl[2].u64;
+ prt->tx_offload = pvl[3].u64;
+ prt->ipv4 = pvl[4].in.addr4.s_addr;
+ prt->ipv6 = pvl[5].in.addr6;
+
+ return NGX_CONF_OK;
+}
+
+static char *
+tldk_dev_parse(ngx_conf_t *cf, struct tldk_dev_conf *dev,
+ tldk_conf_t *tcf)
+{
+ uint32_t i, j;
+ ngx_str_t *v;
+
+ static const struct key_handler kh[] = {
+ {
+ .name = "dev",
+ .func = parse_uint_val,
+ },
+ {
+ .name = "port",
+ .func = parse_uint_val,
+ },
+ {
+ .name = "queue",
+ .func = parse_uint_val,
+ },
+ };
+
+ union parse_val pvl[RTE_DIM(kh)];
+
+ memset(pvl, 0, sizeof(pvl));
+
+ if (cf->args->nelts % 2 != 0)
+ return NGX_CONF_ERROR;
+
+ v = cf->args->elts;
+ for (i = 0; i != cf->args->nelts; i += 2) {
+
+ for (j = 0; j != RTE_DIM(kh); j++) {
+ if (ngx_strcmp(v[i].data, kh[j].name) == 0) {
+ if (kh[j].func((const char *)v[i + 1].data,
+ pvl + j) < 0)
+ return NGX_CONF_ERROR;
+ else
+ break;
+ }
+ }
+
+ /* unknow key */
+ if (j == RTE_DIM(kh))
+ return NGX_CONF_ERROR;
+ }
+
+ memset(dev, 0, sizeof(*dev));
+
+ dev->id = pvl[0].u64;
+ dev->port = pvl[1].u64;
+ dev->queue = pvl[2].u64;
+
+ return NGX_CONF_OK;
+}
+
+static char *
+tldk_dest_parse(ngx_conf_t *cf, struct tldk_dest_conf *dst)
+{
+ uint32_t i, j;
+ ngx_str_t *v;
+
+ static const struct key_handler kh[] = {
+ {
+ .name = "dev",
+ .func = parse_uint_val,
+ },
+ {
+ .name = "mtu",
+ .func = parse_uint_val,
+ },
+ {
+ .name = "masklen",
+ .func = parse_uint_val,
+ },
+ {
+ .name = "addr",
+ .func = parse_ip_val,
+ },
+ {
+ .name = "mac",
+ .func = parse_mac_val,
+ },
+ };
+
+ union parse_val pvl[RTE_DIM(kh)];
+
+ memset(pvl, 0, sizeof(pvl));
+ pvl[1].u64 = ETHER_MAX_LEN - ETHER_CRC_LEN;
+
+ if (cf->args->nelts % 2 != 1 || cf->args->nelts == 1)
+ return NGX_CONF_ERROR;
+
+ v = cf->args->elts;
+ for (i = 1; i != cf->args->nelts; i += 2) {
+
+ for (j = 0; j != RTE_DIM(kh); j++) {
+ if (ngx_strcmp(v[i].data, kh[j].name) == 0) {
+ if (kh[j].func((const char *)v[i + 1].data,
+ pvl + j) < 0)
+ return NGX_CONF_ERROR;
+ else
+ break;
+ }
+ }
+
+ /* unknow key */
+ if (j == RTE_DIM(kh))
+ return NGX_CONF_ERROR;
+ }
+
+ memset(dst, 0, sizeof(*dst));
+
+ dst->dev = pvl[0].u64;
+ dst->mtu = pvl[1].u64;
+ dst->prfx = pvl[2].u64;
+
+ dst->family = pvl[3].in.family;
+ if (pvl[3].in.family == AF_INET)
+ dst->ipv4 = pvl[3].in.addr4;
+ else
+ dst->ipv6 = pvl[3].in.addr6;
+
+ memcpy(&dst->mac, &pvl[4].mac, sizeof(dst->mac));
+
+ return NGX_CONF_OK;
+}
+
+char *
+tldk_block_parse(ngx_conf_t *cf, ngx_command_t *dummy, void *conf)
+{
+ uint32_t i, len, n;
+ tldk_conf_t *tcf;
+ ngx_str_t *v;
+ char *rv, *s;
+ struct tldk_port_conf prt;
+
+ tcf = (tldk_conf_t *)((void **)conf)[0];
+ v = cf->args->elts;
+
+ if (ngx_strcmp(v[0].data, "eal_cmd") == 0) {
+
+ if (cf->args->nelts == 1 ||
+ cf->args->nelts > RTE_DIM(tcf->eal_argv))
+ return NGX_CONF_ERROR;
+
+ s = tcf->eal_cmd;
+ len = sizeof(tcf->eal_cmd);
+ for (i = 0; i != cf->args->nelts; i++) {
+ n = snprintf(s, len, "%s", v[i].data) + 1;
+ if (n > len)
+ return NGX_CONF_ERROR;
+ tcf->eal_argv[i] = s;
+ s += n;
+ len -= n;
+ }
+
+ tcf->eal_argc = i;
+ return NGX_CONF_OK;
+
+ } else if (ngx_strcmp(v[0].data, "port") == 0) {
+
+ rv = tldk_port_parse(cf, &prt);
+ if (rv == NGX_CONF_OK) {
+
+ /* too many ports */
+ if (tcf->nb_port >= RTE_DIM(tcf->port))
+ return NGX_CONF_ERROR;
+
+ /* copy stuff */
+ tcf->port[tcf->nb_port++] = prt;
+ }
+ return rv;
+ }
+
+ return NGX_CONF_ERROR;
+}
+
+char *
+tldk_ctx_parse(ngx_conf_t *cf, ngx_command_t *dummy, void *conf)
+{
+ char *rv;
+ ngx_str_t *v;
+ tldk_conf_t *tcf;
+ struct tldk_ctx_conf *tcx;
+ union parse_val pvl;
+
+ tcf = (tldk_conf_t *)((void **)conf)[0];
+ tcx = tcf->ctx + tcf->nb_ctx;
+ v = cf->args->elts;
+
+ if (ngx_strcmp(v[0].data, "worker") == 0) {
+ if (cf->args->nelts != 2 ||
+ parse_uint_val((const char *)v[1].data,
+ &pvl) < 0)
+ return NGX_CONF_ERROR;
+ tcx->worker = pvl.u64;
+ } else if (ngx_strcmp(v[0].data, "lcore") == 0) {
+ if (cf->args->nelts != 2 ||
+ parse_uint_val((const char *)v[1].data,
+ &pvl) < 0)
+ return NGX_CONF_ERROR;
+ tcx->lcore = pvl.u64;
+ } else if (ngx_strcmp(v[0].data, "mbufs") == 0) {
+ if (cf->args->nelts != 2 ||
+ parse_uint_val((const char *)v[1].data,
+ &pvl) < 0)
+ return NGX_CONF_ERROR;
+ tcx->nb_mbuf = pvl.u64;
+ } else if (ngx_strcmp(v[0].data, "streams") == 0) {
+ if (cf->args->nelts != 2 ||
+ parse_uint_val((const char *)v[1].data,
+ &pvl) < 0)
+ return NGX_CONF_ERROR;
+ tcx->nb_stream = pvl.u64;
+ } else if (ngx_strcmp(v[0].data, "rbufs") == 0) {
+ if (cf->args->nelts != 2 ||
+ parse_uint_val((const char *)v[1].data,
+ &pvl) < 0)
+ return NGX_CONF_ERROR;
+ tcx->nb_rbuf = pvl.u64;
+ } else if (ngx_strcmp(v[0].data, "sbufs") == 0) {
+ if (cf->args->nelts != 2 ||
+ parse_uint_val((const char *)v[1].data,
+ &pvl) < 0)
+ return NGX_CONF_ERROR;
+ tcx->nb_sbuf = pvl.u64;
+ } else if (ngx_strcmp(v[0].data, "be_in_worker") == 0) {
+ if (cf->args->nelts != 1)
+ return NGX_CONF_ERROR;
+ tcx->be_in_worker = 1;
+ } else if (ngx_strcmp(v[0].data, "tcp_timewait") == 0) {
+ if (cf->args->nelts != 2 ||
+ parse_uint_val((const char *)v[1].data,
+ &pvl) < 0)
+ return NGX_CONF_ERROR;
+ tcx->tcp_timewait = pvl.u64;
+ } else if (ngx_strcmp(v[0].data, "dev") == 0) {
+ if (tcx->nb_dev >= RTE_DIM(tcx->dev))
+ return NGX_CONF_ERROR;
+ rv = tldk_dev_parse(cf, tcx->dev + tcx->nb_dev, tcf);
+ if (rv != NGX_CONF_OK)
+ return rv;
+ tcx->nb_dev++;
+ return rv;
+ } else if (ngx_strcmp(v[0].data, "dest") == 0) {
+ if (tcx->nb_dest >= RTE_DIM(tcx->dest))
+ return NGX_CONF_ERROR;
+ rv = tldk_dest_parse(cf, tcx->dest + tcx->nb_dest);
+ if (rv != NGX_CONF_OK)
+ return rv;
+ tcx->nb_dest++;
+ }
+
+ return NGX_CONF_OK;
+}
diff --git a/app/nginx/src/tldk/tldk_event.c b/app/nginx/src/tldk/tldk_event.c
new file mode 100644
index 0000000..4630326
--- /dev/null
+++ b/app/nginx/src/tldk/tldk_event.c
@@ -0,0 +1,276 @@
+/*
+ * Copyright (c) 2017 Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <ngx_tldk.h>
+#include <tldk_sock.h>
+
+#include <rte_cycles.h>
+
+#include "be.h"
+#include "debug.h"
+
+#define EVENT_BULK 32
+
+enum {
+ EV_ACCEPT,
+ EV_RECV,
+ EV_SEND,
+ EV_ERR,
+ EV_NUM
+};
+
+struct tldk_event_stat {
+ uint64_t nb_get[EV_NUM];
+ uint64_t nb_post[EV_NUM];
+};
+
+static struct tldk_event_stat event_stat;
+
+extern ngx_event_module_t tldk_event_module;
+
+/*
+ * TLDK event module implementation
+ */
+
+static ngx_int_t
+tldk_add_event(ngx_event_t *ev, ngx_int_t event, ngx_uint_t flags)
+{
+ struct tldk_sock *ts;
+ ngx_connection_t *c;
+
+ c = ev->data;
+
+ FE_TRACE("%s(ev=%p,event=%#lx,flags=%#lx): fd=%d;\n",
+ __func__, ev, event, flags, c->fd);
+
+ ts = sd_to_sock(c->fd);
+ if (ts == NULL)
+ return NGX_OK;
+
+ if (event == NGX_READ_EVENT) {
+ tle_event_active(ts->rxev, TLE_SEV_DOWN);
+ tle_event_active(ts->erev, TLE_SEV_DOWN);
+ ts->rev = ev;
+ } else if (event == NGX_WRITE_EVENT) {
+ tle_event_active(ts->txev, TLE_SEV_DOWN);
+ tle_event_active(ts->erev, TLE_SEV_DOWN);
+ ts->wev = ev;
+ }
+
+ ev->active = 1;
+ return NGX_OK;
+}
+
+static ngx_int_t
+tldk_del_event(ngx_event_t *ev, ngx_int_t event, ngx_uint_t flags)
+{
+ struct tldk_sock *ts;
+ ngx_connection_t *c;
+
+ c = ev->data;
+
+ FE_TRACE("%s(ev=%p,event=%#lx,flags=%#lx): fd=%d;\n",
+ __func__, ev, event, flags, c->fd);
+
+ ev->active = 0;
+ if ((flags & NGX_CLOSE_EVENT) != 0)
+ return NGX_OK;
+
+ ts = sd_to_sock(c->fd);
+ if (ts == NULL)
+ return NGX_OK;
+
+ if (event == NGX_READ_EVENT) {
+ tle_event_down(ts->rxev);
+ tle_event_down(ts->erev);
+ ts->rev = NULL;
+ } else if (event == NGX_WRITE_EVENT) {
+ tle_event_down(ts->txev);
+ tle_event_down(ts->erev);
+ ts->wev = NULL;
+ }
+
+ return NGX_OK;
+}
+
+static inline void
+post_event(ngx_event_t *ev, ngx_queue_t *q, ngx_uint_t flags, uint32_t type)
+{
+ if (ev != NULL && ev->active == 1) {
+ ev->ready = 1;
+ event_stat.nb_post[type]++;
+ if ((flags & NGX_POST_EVENTS) != 0) {
+ ngx_post_event(ev, q);
+ } else
+ ev->handler(ev);
+ }
+}
+
+static ngx_int_t
+tldk_process_events(ngx_cycle_t *cycle, ngx_msec_t timer, ngx_uint_t flags)
+{
+ uint32_t i, n, ne, nr, ns, nt;
+ uint64_t tme, tms, tmw;
+ struct tldk_sock *te[EVENT_BULK];
+ struct tldk_sock *tr[EVENT_BULK];
+ struct tldk_sock *ts[EVENT_BULK];
+ struct tldk_sock *tt[EVENT_BULK];
+ struct tldk_ctx *tcx;
+
+ FE_TRACE("%s(cycle=%p,timer=%lu,flags=%#lx);\n",
+ __func__, cycle, timer, flags);
+
+ tcx = wrk2ctx + ngx_worker;
+
+ tms = rte_get_tsc_cycles();
+ tme = (timer == NGX_TIMER_INFINITE) ? timer :
+ timer * (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S;
+ tmw = 0;
+ n = 0;
+
+ do {
+ if (tcx->cf->be_in_worker != 0)
+ be_lcore_tcp(tcx);
+
+ ns = tle_evq_get(stbl.syneq, (const void **)(uintptr_t)ts,
+ RTE_DIM(ts));
+ nr = tle_evq_get(stbl.rxeq, (const void **)(uintptr_t)tr,
+ RTE_DIM(tr));
+ nt = tle_evq_get(stbl.txeq, (const void **)(uintptr_t)tt,
+ RTE_DIM(tt));
+ ne = tle_evq_get(stbl.ereq, (const void **)(uintptr_t)te,
+ RTE_DIM(te));
+ n = ne + nr + ns + nt;
+
+ if (n != 0) {
+ event_stat.nb_get[EV_ACCEPT] += ns;
+ event_stat.nb_get[EV_RECV] += nr;
+ event_stat.nb_get[EV_SEND] += nt;
+ event_stat.nb_get[EV_ERR] += ne;
+ break;
+ }
+
+ if (tcx->cf->be_in_worker == 0)
+ //sched_yield();
+ rte_delay_us(1);
+
+ tmw += rte_get_tsc_cycles() - tms;
+
+ } while (tmw < tme && ngx_quit == 0 && ngx_terminate == 0);
+
+ if ((flags & NGX_UPDATE_TIME) != 0 || ngx_event_timer_alarm)
+ ngx_time_update();
+
+ if (n == 0)
+ return NGX_OK;
+
+ for (i = 0; i != ns; i++)
+ post_event(ts[i]->rev, &ngx_posted_accept_events, flags,
+ EV_ACCEPT);
+
+ for (i = 0; i != nr; i++)
+ post_event(tr[i]->rev, &ngx_posted_events, flags, EV_RECV);
+
+ for (i = 0; i != nt; i++)
+ post_event(tt[i]->wev, &ngx_posted_events, flags, EV_SEND);
+
+ for (i = 0; i != ne; i++) {
+ te[i]->posterr++;
+ post_event(te[i]->rev, &ngx_posted_events, flags, EV_ERR);
+ post_event(te[i]->wev, &ngx_posted_events, flags, EV_ERR);
+ }
+
+ return NGX_OK;
+}
+
+static ngx_int_t
+tldk_init_events(ngx_cycle_t *cycle, ngx_msec_t timer)
+{
+ FE_TRACE("%s(cycle=%p,timer=%lu);\n",
+ __func__, cycle, timer);
+
+ /* overwrite event actions for worker process */
+ ngx_event_actions = tldk_event_module.actions;
+ ngx_event_flags = NGX_USE_LEVEL_EVENT;
+
+ ngx_io = ngx_os_io;
+ return NGX_OK;
+}
+
+void
+tldk_dump_event_stats(void)
+{
+ static const char * const name[EV_NUM] = {
+ "ACCEPT",
+ "RECV",
+ "SEND",
+ "ERR",
+ };
+
+ uint32_t i;
+
+ RTE_LOG(NOTICE, USER1, "%s(worker=%lu)={\n", __func__, ngx_worker);
+ for (i = 0; i != RTE_DIM(name); i++)
+ RTE_LOG(NOTICE, USER1,
+ "%s[GET, POST]={%" PRIu64 ", %" PRIu64 "};\n",
+ name[i], event_stat.nb_get[i], event_stat.nb_post[i]);
+ RTE_LOG(NOTICE, USER1, "};\n");
+}
+
+static void
+tldk_done_events(ngx_cycle_t *cycle)
+{
+}
+
+static ngx_str_t tldk_name = ngx_string("tldk");
+
+ngx_event_module_t tldk_event_module = {
+ .name = &tldk_name,
+ .actions = {
+ .add = tldk_add_event,
+ .del = tldk_del_event,
+ .enable = tldk_add_event,
+ .disable = tldk_del_event,
+ .process_events = tldk_process_events,
+ .init = tldk_init_events,
+ .done = tldk_done_events,
+ },
+};
+
+ngx_module_t ngx_tldk_event_module = {
+ NGX_MODULE_V1,
+ &tldk_event_module, /* module context */
+ NULL, /* module directives */
+ NGX_EVENT_MODULE, /* module type */
+ NULL, /* init master */
+ NULL, /* init module */
+ NULL, /* init process */
+ NULL, /* init thread */
+ NULL, /* exit thread */
+ NULL, /* exit process */
+ NULL, /* exit master */
+ NGX_MODULE_V1_PADDING
+};
diff --git a/app/nginx/src/tldk/tldk_sock.c b/app/nginx/src/tldk/tldk_sock.c
new file mode 100644
index 0000000..7831fc3
--- /dev/null
+++ b/app/nginx/src/tldk/tldk_sock.c
@@ -0,0 +1,549 @@
+/*
+ * Copyright (c) 2017 Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <ngx_tldk.h>
+#include <tldk_sock.h>
+
+#include <rte_malloc.h>
+#include <rte_errno.h>
+
+struct tldk_sock_stat {
+ uint64_t nb_accept;
+ uint64_t nb_close;
+ uint64_t nb_readv;
+ uint64_t nb_recv;
+ uint64_t nb_setopts;
+ uint64_t nb_shutdown;
+ uint64_t nb_writev;
+};
+
+static struct tldk_sock_stat sock_stat;
+
+/* One socket/file table per worker */
+struct tldk_stbl stbl = {
+ .snum = 0,
+};
+
+static int (*real_accept4)(int, struct sockaddr *, socklen_t *, int);
+static int (*real_close)(int);
+static ssize_t (*real_readv)(int, const struct iovec *, int);
+static ssize_t (*real_recv)(int, void *, size_t, int);
+static int (*real_setsockopt)(int, int, int, const void *, socklen_t);
+static int (*real_shutdown)(int, int);
+static ssize_t (*real_writev)(int, const struct iovec *, int);
+
+static inline uint32_t
+get_socks(struct tldk_sock_list *list, struct tldk_sock *rs[],
+ uint32_t num)
+{
+ struct tldk_sock *s;
+ uint32_t i, n;
+
+ n = RTE_MIN(list->num, num);
+ for (i = 0, s = LIST_FIRST(&list->head);
+ i != n;
+ i++, s = LIST_NEXT(s, link)) {
+ rs[i] = s;
+ }
+
+ /* we retrieved all free entries */
+ if (s == NULL)
+ LIST_INIT(&list->head);
+ else
+ LIST_FIRST(&list->head) = s;
+
+ list->num -= n;
+ return n;
+}
+
+static inline struct tldk_sock *
+get_sock(struct tldk_sock_list *list)
+{
+ struct tldk_sock *s;
+
+ if (get_socks(list, &s, 1) != 1)
+ return NULL;
+
+ return s;
+}
+
+static inline void
+put_socks(struct tldk_sock_list *list, struct tldk_sock *fs[], uint32_t num)
+{
+ uint32_t i;
+
+ for (i = 0; i != num; i++)
+ LIST_INSERT_HEAD(&list->head, fs[i], link);
+ list->num += num;
+}
+
+static inline void
+put_sock(struct tldk_sock_list *list, struct tldk_sock *s)
+{
+ put_socks(list, &s, 1);
+}
+
+static inline void
+rem_sock(struct tldk_sock_list *list, struct tldk_sock *s)
+{
+ LIST_REMOVE(s, link);
+ list->num--;
+}
+
+static void
+term_sock(struct tldk_sock *ts)
+{
+ tle_event_idle(ts->erev);
+ tle_event_idle(ts->rxev);
+ tle_event_idle(ts->txev);
+ tle_tcp_stream_close(ts->s);
+ ts->s = NULL;
+ ts->posterr = 0;
+}
+
+static int32_t
+close_sock(struct tldk_sock *ts)
+{
+ if (ts->s == NULL)
+ return EBADF;
+ term_sock(ts);
+ rem_sock(&stbl.use, ts);
+ put_sock(&stbl.free, ts);
+ return 0;
+}
+
+static void
+dump_sock_stats(void)
+{
+ RTE_LOG(NOTICE, USER1, "%s(worker=%lu)={\n"
+ "nb_accept=%" PRIu64 ";\n"
+ "nb_close=%" PRIu64 ";\n"
+ "nb_readv=%" PRIu64 ";\n"
+ "nb_recv=%" PRIu64 ";\n"
+ "nb_setopts=%" PRIu64 ";\n"
+ "nb_shutdown=%" PRIu64 ";\n"
+ "nb_writev=%" PRIu64 ";\n"
+ "};\n",
+ __func__,
+ ngx_worker,
+ sock_stat.nb_accept,
+ sock_stat.nb_close,
+ sock_stat.nb_readv,
+ sock_stat.nb_recv,
+ sock_stat.nb_setopts,
+ sock_stat.nb_shutdown,
+ sock_stat.nb_writev);
+}
+
+void
+tldk_stbl_fini(void)
+{
+ dump_sock_stats();
+ tldk_dump_event_stats();
+ rte_free(stbl.sd);
+ tle_evq_destroy(stbl.txeq);
+ tle_evq_destroy(stbl.rxeq);
+ tle_evq_destroy(stbl.ereq);
+ tle_evq_destroy(stbl.syneq);
+}
+
+#define INIT_FUNC(func) do { \
+ real_##func = dlsym(RTLD_NEXT, #func); \
+ RTE_ASSERT(real_##func); \
+} while (0)
+
+static void __attribute__((constructor))
+stub_init(void)
+{
+ INIT_FUNC(accept4);
+ INIT_FUNC(close);
+ INIT_FUNC(readv);
+ INIT_FUNC(recv);
+ INIT_FUNC(setsockopt);
+ INIT_FUNC(shutdown);
+ INIT_FUNC(writev);
+}
+
+#undef INIT_FUNC
+
+int
+tldk_stbl_init(const ngx_cycle_t *cycle, const struct tldk_ctx *tc)
+{
+ uint32_t i, lc, sid, sn;
+ size_t sz;
+ struct tle_evq_param eprm;
+ struct rlimit rlim;
+
+ lc = tc->cf->lcore;
+ sn = tc->cf->nb_stream;
+ sid = rte_lcore_to_socket_id(lc);
+
+ if (sn < cycle->listening.nelts + cycle->connection_n)
+ return -EINVAL;
+
+ if (getrlimit(RLIMIT_NOFILE, &rlim) != 0)
+ return -errno;
+
+ stbl.nosd = rlim.rlim_max;
+
+ /* allocate event queues */
+
+ memset(&eprm, 0, sizeof(eprm));
+ eprm.socket_id = sid;
+ eprm.max_events = sn;
+
+ stbl.syneq = tle_evq_create(&eprm);
+ stbl.ereq = tle_evq_create(&eprm);
+ stbl.rxeq = tle_evq_create(&eprm);
+ stbl.txeq = tle_evq_create(&eprm);
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u, worker=%lu): "
+ "synevq=%p, erevq=%p, rxevq=%p, txevq=%p\n",
+ __func__, lc, ngx_worker,
+ stbl.syneq, stbl.ereq, stbl.rxeq, stbl.txeq);
+ if (stbl.syneq == NULL || stbl.ereq == NULL || stbl.rxeq == NULL ||
+ stbl.txeq == NULL)
+ return -ENOMEM;
+
+ LIST_INIT(&stbl.lstn.head);
+ LIST_INIT(&stbl.free.head);
+ LIST_INIT(&stbl.use.head);
+
+ sz = sn * sizeof(*stbl.sd);
+ stbl.sd = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+ rte_lcore_to_socket_id(lc));
+
+ if (stbl.sd == NULL) {
+ RTE_LOG(ERR, USER1, "%s(lcore=%u, worker=%lu): "
+ "failed to allocate %zu bytes\n",
+ __func__, lc, ngx_worker, sz);
+ return -ENOMEM;
+ }
+
+ stbl.snum = sn;
+
+ /* init listen socks */
+ for (i = 0; i != cycle->listening.nelts; i++) {
+ stbl.sd[i].rxev = tle_event_alloc(stbl.syneq, stbl.sd + i);
+ stbl.sd[i].txev = tle_event_alloc(stbl.txeq, stbl.sd + i);
+ stbl.sd[i].erev = tle_event_alloc(stbl.ereq, stbl.sd + i);
+ put_sock(&stbl.lstn, stbl.sd + i);
+ }
+
+ /* init worker connection socks */
+ for (; i != sn; i++) {
+ stbl.sd[i].rxev = tle_event_alloc(stbl.rxeq, stbl.sd + i);
+ stbl.sd[i].txev = tle_event_alloc(stbl.txeq, stbl.sd + i);
+ stbl.sd[i].erev = tle_event_alloc(stbl.ereq, stbl.sd + i);
+ put_sock(&stbl.free, stbl.sd + i);
+ }
+
+ return 0;
+}
+
+int
+tldk_open_bind_listen(struct tldk_ctx *tcx, int domain, int type,
+ const struct sockaddr *addr, socklen_t addrlen, int backlog)
+{
+ int32_t rc;
+ struct tldk_sock *ts;
+ struct tle_tcp_stream_param sprm;
+
+ ts = get_sock(&stbl.lstn);
+ if (ts == NULL) {
+ errno = ENOBUFS;
+ return -1;
+ }
+
+ tle_event_active(ts->erev, TLE_SEV_DOWN);
+ tle_event_active(ts->rxev, TLE_SEV_DOWN);
+ tle_event_active(ts->txev, TLE_SEV_DOWN);
+
+ /* setup stream parameters */
+
+ memset(&sprm, 0, sizeof(sprm));
+
+ sprm.cfg.err_ev = ts->erev;
+ sprm.cfg.recv_ev = ts->rxev;
+ sprm.cfg.send_ev = ts->txev;
+
+ memcpy(&sprm.addr.local, addr, addrlen);
+ sprm.addr.remote.ss_family = sprm.addr.local.ss_family;
+
+ ts->s = tle_tcp_stream_open(tcx->ctx, &sprm);
+ if (ts->s != NULL)
+ rc = tle_tcp_stream_listen(ts->s);
+ else
+ rc = -rte_errno;
+
+ if (rc != 0) {
+ term_sock(ts);
+ put_sock(&stbl.lstn, ts);
+ errno = -rc;
+ return -1;
+ }
+
+ return SOCK_TO_SD(ts);
+}
+
+/*
+ * socket API
+ */
+
+int
+close(int sd)
+{
+ int32_t rc;
+ struct tldk_sock *ts;
+
+ FE_TRACE("worker#%lu: %s(%d);\n",
+ ngx_worker, __func__, sd);
+
+ ts = sd_to_sock(sd);
+ if (ts == NULL)
+ return real_close(sd);
+
+ sock_stat.nb_close++;
+
+ rc = close_sock(ts);
+ if (rc != 0) {
+ errno =-rc;
+ return -1;
+ }
+ return 0;
+}
+
+int
+shutdown(int sd, int how)
+{
+ struct tldk_sock *ts;
+
+ FE_TRACE("worker#%lu: %s(%d, %#x);\n",
+ ngx_worker, __func__, sd, how);
+
+ ts = sd_to_sock(sd);
+ if (ts == NULL)
+ return real_shutdown(sd, how);
+
+ sock_stat.nb_shutdown++;
+
+ errno = ENOTSUP;
+ return -1;
+}
+
+
+int
+accept4(int sd, struct sockaddr *addr, socklen_t *addrlen, int flags)
+{
+ uint32_t n, slen;
+ struct tle_stream *s;
+ struct tldk_sock *cs, *ts;
+ struct tle_tcp_stream_cfg prm;
+ struct tle_tcp_stream_addr sa;
+
+ FE_TRACE("worker#%lu: %s(%d, %p, %p, %#x);\n",
+ ngx_worker, __func__, sd, addr, addrlen, flags);
+
+ ts = sd_to_sock(sd);
+ if (ts == NULL)
+ return real_accept4(sd, addr, addrlen, flags);
+ else if (ts->s == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ sock_stat.nb_accept++;
+
+ n = ts->acpt.num;
+ if (n == 0) {
+ n = tle_tcp_stream_accept(ts->s, ts->acpt.buf,
+ RTE_DIM(ts->acpt.buf));
+ if (n == 0) {
+ errno = EAGAIN;
+ return -1;
+ }
+ }
+
+ s = ts->acpt.buf[n - 1];
+ ts->acpt.num = n - 1;
+
+ tle_event_raise(ts->rxev);
+
+ cs = get_sock(&stbl.free);
+ if (cs == NULL) {
+ tle_tcp_stream_close(s);
+ errno = ENOBUFS;
+ return -1;
+ }
+
+ cs->s = s;
+ put_sock(&stbl.use, cs);
+
+ tle_event_active(cs->erev, TLE_SEV_DOWN);
+ tle_event_active(cs->rxev, TLE_SEV_DOWN);
+ tle_event_active(cs->txev, TLE_SEV_DOWN);
+
+ memset(&prm, 0, sizeof(prm));
+ prm.recv_ev = cs->rxev;
+ prm.send_ev = cs->txev;
+ prm.err_ev = cs->erev;
+ tle_tcp_stream_update_cfg(&s, &prm, 1);
+
+ if (tle_tcp_stream_get_addr(s, &sa) == 0) {
+
+ if (sa.remote.ss_family == AF_INET)
+ slen = sizeof(struct sockaddr_in);
+ else if (sa.remote.ss_family == AF_INET6)
+ slen = sizeof(struct sockaddr_in6);
+ else
+ slen = 0;
+
+ slen = RTE_MIN(slen, *addrlen);
+ memcpy(addr, &sa.remote, slen);
+ *addrlen = slen;
+ }
+
+ return SOCK_TO_SD(cs);
+}
+
+ssize_t
+recv(int sd, void *buf, size_t len, int flags)
+{
+ ssize_t sz;
+ struct tldk_sock *ts;
+ struct iovec iv;
+
+ FE_TRACE("worker#%lu: %s(%d, %p, %zu, %#x);\n",
+ ngx_worker, __func__, sd, buf, len, flags);
+
+ ts = sd_to_sock(sd);
+ if (ts == NULL)
+ return real_recv(sd, buf, len, flags);
+ else if (ts->s == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ sock_stat.nb_recv++;
+
+ iv.iov_base = buf;
+ iv.iov_len = len;
+
+ sz = tle_tcp_stream_readv(ts->s, &iv, 1);
+ if (sz < 0)
+ errno = rte_errno;
+ else if (sz == 0 && ts->posterr == 0) {
+ errno = EAGAIN;
+ sz = -1;
+ }
+
+ FE_TRACE("worker#%lu: %s(%d, %p, %zu, %#x) returns %zd;\n",
+ ngx_worker, __func__, sd, buf, len, flags, sz);
+ return sz;
+}
+
+ssize_t
+readv(int sd, const struct iovec *iov, int iovcnt)
+{
+ ssize_t sz;
+ struct tldk_sock *ts;
+ struct tldk_ctx *tcx;
+
+ FE_TRACE("worker#%lu: %s(%d, %p, %d);\n",
+ ngx_worker, __func__, sd, iov, iovcnt);
+
+ tcx = wrk2ctx + ngx_worker;
+ ts = sd_to_sock(sd);
+ if (ts == NULL)
+ return real_readv(sd, iov, iovcnt);
+ else if (ts->s == NULL || tcx == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ sock_stat.nb_readv++;
+
+ sz = tle_tcp_stream_readv(ts->s, iov, iovcnt);
+ if (sz < 0)
+ errno = rte_errno;
+ else if (sz == 0 && ts->posterr == 0) {
+ errno = EAGAIN;
+ sz = -1;
+ }
+
+ FE_TRACE("worker#%lu: %s(%d, %p, %d) returns %zd;\n",
+ ngx_worker, __func__, sd, iov, iovcnt, sz);
+ return sz;
+}
+
+ssize_t
+writev(int sd, const struct iovec *iov, int iovcnt)
+{
+ ssize_t sz;
+ struct tldk_sock *ts;
+ struct tldk_ctx *tcx;
+
+ FE_TRACE("worker#%lu: %s(%d, %p, %d);\n",
+ ngx_worker, __func__, sd, iov, iovcnt);
+
+ tcx = wrk2ctx + ngx_worker;
+ ts = sd_to_sock(sd);
+ if (ts == NULL)
+ return real_writev(sd, iov, iovcnt);
+ else if (ts->s == NULL || tcx == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ sock_stat.nb_writev++;
+
+ sz = tle_tcp_stream_writev(ts->s, tcx->mpool, iov, iovcnt);
+ if (sz < 0)
+ errno = rte_errno;
+
+ FE_TRACE("worker#%lu: %s(%d, %p, %d) returns %zd;\n",
+ ngx_worker, __func__, sd, iov, iovcnt, sz);
+ return sz;
+}
+
+int
+setsockopt(int sd, int level, int optname, const void *optval, socklen_t optlen)
+{
+ struct tldk_sock *ts;
+
+ FE_TRACE("worker#%lu: %s(%d, %#x, %#x, %p, %d);\n",
+ ngx_worker, __func__, sd, level, optname, optval, optlen);
+
+ ts = sd_to_sock(sd);
+ if (ts == NULL)
+ return real_setsockopt(sd, level, optname, optval, optlen);
+ else if (ts->s == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/app/nginx/src/tldk/tldk_sock.h b/app/nginx/src/tldk/tldk_sock.h
new file mode 100644
index 0000000..b9140b5
--- /dev/null
+++ b/app/nginx/src/tldk/tldk_sock.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2017 Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef __TLDK_SOCK_H__
+#define __TLDK_SOCK_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <inttypes.h>
+
+#include <ngx_config.h>
+#include <ngx_core.h>
+#include <ngx_event.h>
+
+#include <tle_tcp.h>
+
+extern ngx_module_t ngx_tldk_event_module;
+
+extern int tldk_stbl_init(const ngx_cycle_t *, const struct tldk_ctx *);
+extern void tldk_stbl_fini(void);
+
+extern int
+tldk_open_bind_listen(struct tldk_ctx *tcx, int domain, int type,
+ const struct sockaddr *addr, socklen_t addrlen, int backlog);
+
+extern void
+tldk_dump_event_stats(void);
+
+#define TLDK_ACCEPT_BULK 0x10
+
+struct tldk_sock {
+ LIST_ENTRY(tldk_sock) link;
+ struct tle_stream *s;
+ struct tle_event *erev;
+ struct tle_event *rxev;
+ struct tle_event *txev;
+ ngx_event_t *rev;
+ ngx_event_t *wev;
+ uint16_t posterr;
+ struct {
+ uint32_t num;
+ struct tle_stream *buf[TLDK_ACCEPT_BULK];
+ } acpt;
+};
+
+struct tldk_sock_list {
+ uint32_t num;
+ LIST_HEAD(, tldk_sock) head;
+};
+
+struct tldk_stbl {
+ struct tle_evq *syneq;
+ struct tle_evq *ereq;
+ struct tle_evq *rxeq;
+ struct tle_evq *txeq;
+ struct tldk_sock_list free;
+ struct tldk_sock_list lstn;
+ struct tldk_sock_list use;
+ int32_t nosd;
+ uint32_t snum;
+ struct tldk_sock *sd;
+};
+
+
+#define SOCK_TO_SD(s) (stbl.nosd + ((s) - stbl.sd))
+#define SD_TO SOCK(d) (stbl.sd + ((d) - stbl.nosd))
+
+/* One socket/file table per worker */
+extern struct tldk_stbl stbl;
+
+static inline struct tldk_sock *
+sd_to_sock(int32_t sd)
+{
+ uint32_t n;
+
+ n = sd - stbl.nosd;
+ if (n >= stbl.snum)
+ return NULL;
+
+ return stbl.sd + n;
+}
+
+
+
+#endif /* __TLDK_SOCK_H__ */