aboutsummaryrefslogtreecommitdiffstats
path: root/app/nginx/src/tldk/be.c
diff options
context:
space:
mode:
Diffstat (limited to 'app/nginx/src/tldk/be.c')
-rw-r--r--app/nginx/src/tldk/be.c1240
1 files changed, 1240 insertions, 0 deletions
diff --git a/app/nginx/src/tldk/be.c b/app/nginx/src/tldk/be.c
new file mode 100644
index 0000000..ba4039a
--- /dev/null
+++ b/app/nginx/src/tldk/be.c
@@ -0,0 +1,1240 @@
+/*
+ * Copyright (c) 2017 Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <netinet/ip6.h>
+
+#include <rte_version.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
+#include <rte_lpm6.h>
+#include <rte_lpm.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
+#include <tle_tcp.h>
+
+#include <ngx_config.h>
+#include <ngx_core.h>
+
+#include "be.h"
+
+#define RX_RING_SIZE 0x400
+#define TX_RING_SIZE 0x800
+#define MAX_RULES 0x100
+#define MAX_TBL8 0x800
+
+#define MPOOL_CACHE_SIZE 0x100
+#define MPOOL_NB_BUF 0x20000
+
+#define FRAG_MBUF_BUF_SIZE (RTE_PKTMBUF_HEADROOM + TLE_DST_MAX_HDR)
+
+#define RX_CSUM_OFFLOAD (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM)
+
+#define TCP_MAX_PROCESS 0x20
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = {
+ .hw_vlan_strip = 1,
+ },
+};
+
+struct ptype2cb {
+ uint32_t mask;
+ const char *name;
+ rte_rx_callback_fn fn;
+};
+
+enum {
+ ETHER_PTYPE = 0x1,
+ IPV4_PTYPE = 0x2,
+ IPV4_EXT_PTYPE = 0x4,
+ IPV6_PTYPE = 0x8,
+ IPV6_EXT_PTYPE = 0x10,
+ TCP_PTYPE = 0x20,
+ UDP_PTYPE = 0x40,
+};
+
+int
+be_lcore_lpm_init(struct tldk_ctx *tcx, uint32_t sid,
+ const struct tldk_ctx_conf *cf)
+{
+ ngx_uint_t worker = cf->worker;
+ uint32_t lcore = cf->lcore;
+ char str[RTE_LPM_NAMESIZE];
+
+ const struct rte_lpm_config lpm4_cfg = {
+ .max_rules = MAX_RULES,
+ .number_tbl8s = MAX_TBL8,
+ };
+
+ const struct rte_lpm6_config lpm6_cfg = {
+ .max_rules = MAX_RULES,
+ .number_tbl8s = MAX_TBL8,
+ };
+
+ snprintf(str, sizeof(str), "LPM4%lu-%u\n", worker, lcore);
+ tcx->lpm4 = rte_lpm_create(str, sid, &lpm4_cfg);
+ RTE_LOG(NOTICE, USER1, "%s(worker=%lu, lcore=%u): lpm4=%p;\n",
+ __func__, worker, lcore, tcx->lpm4);
+ if (tcx->lpm4 == NULL)
+ return -ENOMEM;
+
+ snprintf(str, sizeof(str), "LPM6%lu-%u\n", worker, lcore);
+ tcx->lpm6 = rte_lpm6_create(str, sid, &lpm6_cfg);
+ RTE_LOG(NOTICE, USER1, "%s(worker=%lu, lcore=%u): lpm6=%p;\n",
+ __func__, worker, lcore, tcx->lpm6);
+ if (tcx->lpm6 == NULL) {
+ rte_lpm_free(tcx->lpm4);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int
+be_lpm4_dst_lookup(void *data, const struct in_addr *addr,
+ struct tle_dest *res)
+{
+ int32_t rc;
+ uint32_t idx;
+ struct tldk_ctx *tcx;
+ struct tle_dest *dst;
+
+ tcx = data;
+ rc = rte_lpm_lookup(tcx->lpm4, rte_be_to_cpu_32(addr->s_addr), &idx);
+ if (rc == 0) {
+ dst = &tcx->dst4[idx];
+ memcpy(res, dst, dst->l2_len + dst->l3_len +
+ offsetof(struct tle_dest, hdr));
+ }
+
+ return rc;
+}
+
+int
+be_lpm6_dst_lookup(void *data, const struct in6_addr *addr,
+ struct tle_dest *res)
+{
+ int32_t rc;
+ struct tldk_ctx *tcx;
+ struct tle_dest *dst;
+ uintptr_t p;
+#if RTE_VERSION_NUM(17, 5, 0, 0) <= RTE_VERSION
+ uint32_t idx;
+#else
+ uint8_t idx;
+#endif
+
+ tcx = data;
+ p = (uintptr_t)addr->s6_addr;
+ rc = rte_lpm6_lookup(tcx->lpm6, (uint8_t *)p, &idx);
+ if (rc == 0) {
+ dst = &tcx->dst6[idx];
+ memcpy(res, dst, dst->l2_len + dst->l3_len +
+ offsetof(struct tle_dest, hdr));
+ }
+
+ return rc;
+}
+
+/*
+ * Initialise DPDK port.
+ */
+static int
+port_init(const struct tldk_port_conf *pcf)
+{
+ int32_t rc;
+ struct rte_eth_conf port_conf;
+ struct rte_eth_dev_info dev_info;
+
+ rte_eth_dev_info_get(pcf->id, &dev_info);
+
+ if ((dev_info.rx_offload_capa & pcf->rx_offload) != pcf->rx_offload) {
+ RTE_LOG(ERR, USER1,
+ "port#%u supported/requested RX offloads don't match, "
+ "supported: %#x, requested: %#x;\n",
+ pcf->id, dev_info.rx_offload_capa, pcf->rx_offload);
+ return NGX_ERROR;
+ }
+ if ((dev_info.tx_offload_capa & pcf->tx_offload) != pcf->tx_offload) {
+ RTE_LOG(ERR, USER1,
+ "port#%u supported/requested TX offloads don't match, "
+ "supported: %#x, requested: %#x;\n",
+ pcf->id, dev_info.tx_offload_capa, pcf->tx_offload);
+ return NGX_ERROR;
+ }
+
+ port_conf = port_conf_default;
+
+ if ((pcf->rx_offload & RX_CSUM_OFFLOAD) != 0) {
+ RTE_LOG(ERR, USER1, "%s(%u): enabling RX csum offload;\n",
+ __func__, pcf->id);
+ port_conf.rxmode.hw_ip_checksum = 1;
+ }
+
+ port_conf.rxmode.max_rx_pkt_len = pcf->mtu + ETHER_CRC_LEN;
+ if (port_conf.rxmode.max_rx_pkt_len > ETHER_MAX_LEN)
+ port_conf.rxmode.jumbo_frame = 1;
+ port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
+ port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_TCP;
+
+ rc = rte_eth_dev_configure(pcf->id, pcf->nb_queues, pcf->nb_queues,
+ &port_conf);
+ RTE_LOG(NOTICE, USER1,
+ "%s: rte_eth_dev_configure(prt_id=%u, nb_rxq=%u, nb_txq=%u) "
+ "returns %d;\n", __func__, pcf->id, pcf->nb_queues,
+ pcf->nb_queues, rc);
+
+ if (rc != 0)
+ return NGX_ERROR;
+
+ return NGX_OK;
+}
+
+/*
+ * Check that lcore is enabled, not master, and not in use already.
+ */
+int
+be_check_lcore(uint32_t lid)
+{
+ if (rte_lcore_is_enabled(lid) == 0) {
+ RTE_LOG(ERR, USER1, "lcore %u is not enabled\n", lid);
+ return -EINVAL;
+ }
+
+ if (rte_get_master_lcore() != lid &&
+ rte_eal_get_lcore_state(lid) == RUNNING) {
+ RTE_LOG(ERR, USER1, "lcore %u already running %p\n",
+ lid, lcore_config[lid].f);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int
+be_mpool_init(struct tldk_ctx *tcx)
+{
+ int32_t rc;
+ uint32_t nmb, sid;
+ struct rte_mempool *mp;
+ char name[RTE_MEMPOOL_NAMESIZE];
+
+ ngx_uint_t worker = tcx->cf->worker;
+ uint32_t lcore = tcx->cf->lcore;
+
+ sid = rte_lcore_to_socket_id(tcx->cf->lcore);
+ nmb = (tcx->cf->nb_mbuf == 0) ? MPOOL_NB_BUF : tcx->cf->nb_mbuf;
+
+ snprintf(name, sizeof(name), "MP%lu-%u", worker, lcore);
+ mp = rte_pktmbuf_pool_create(name, nmb, MPOOL_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, sid);
+ if (mp == NULL) {
+ rc = -rte_errno;
+ RTE_LOG(ERR, USER1, "%s:Mempool creation failed for "
+ "ctx:wrk(%lu)-ctx:lcore(%u) with error code: %d\n",
+ __func__, worker, lcore, rc);
+ return rc;
+ }
+
+ tcx->mpool = mp;
+
+ snprintf(name, sizeof(name), "frag_MP%lu-%u",
+ worker, lcore);
+ mp = rte_pktmbuf_pool_create(name, nmb,
+ MPOOL_CACHE_SIZE, 0, FRAG_MBUF_BUF_SIZE, sid - 1);
+ if (mp == NULL) {
+ rc = -rte_errno;
+ RTE_LOG(ERR, USER1, "%s:Frag mempool creation failed for "
+ "ctx:wrk(%lu)-ctx:lcore(%u) with error code: %d\n",
+ __func__, worker, lcore, rc);
+ return rc;
+ }
+
+ tcx->frag_mpool = mp;
+
+ return 0;
+}
+
+int
+be_queue_init(struct tldk_ctx *tcx, const tldk_conf_t *cf)
+{
+ int32_t socket, rc;
+ uint16_t queue_id;
+ uint32_t port_id, i;
+ struct rte_eth_dev_info dev_info;
+ const struct tldk_ctx_conf *ctx;
+ const struct tldk_port_conf *pcf;
+
+ ctx = tcx->cf;
+ for (i = 0; i < ctx->nb_dev; i++) {
+ port_id = ctx->dev[i].port;
+ queue_id = ctx->dev[i].queue;
+ pcf = &cf->port[port_id];
+
+ rte_eth_dev_info_get(port_id, &dev_info);
+ dev_info.default_rxconf.rx_drop_en = 1;
+ dev_info.default_txconf.tx_free_thresh = TX_RING_SIZE / 2;
+
+ if (pcf->tx_offload != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u): enabling full featured TX;\n",
+ __func__, port_id);
+ dev_info.default_txconf.txq_flags = 0;
+ }
+
+ socket = rte_eth_dev_socket_id(port_id);
+
+ rc = rte_eth_rx_queue_setup(port_id, queue_id, RX_RING_SIZE,
+ socket, &dev_info.default_rxconf, tcx->mpool);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: rx queue=%u setup failed with error "
+ "code: %d\n", __func__, queue_id, rc);
+ return rc;
+ }
+
+ rc = rte_eth_tx_queue_setup(port_id, queue_id, TX_RING_SIZE,
+ socket, &dev_info.default_txconf);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: tx queue=%u setup failed with error "
+ "code: %d\n", __func__, queue_id, rc);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Setup all enabled ports.
+ */
+int
+be_port_init(tldk_conf_t *cf)
+{
+ int32_t rc;
+ uint32_t i;
+ struct tldk_port_conf *dpf;
+
+ for (i = 0; i != cf->nb_port; i++) {
+ dpf = &cf->port[i];
+ rc = port_init(dpf);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s: port=%u init failed with error code: %d\n",
+ __func__, dpf->id, rc);
+ return NGX_ERROR;
+ }
+ rte_eth_macaddr_get(dpf->id, &dpf->mac);
+ rte_eth_promiscuous_enable(dpf->id);
+ }
+
+ return NGX_OK;
+}
+
+static int
+be_add_ipv4_route(struct tldk_ctx *tcx, const struct tldk_dest_conf *dcf,
+ uint8_t idx)
+{
+ int32_t rc;
+ uint32_t addr, depth;
+ char str[INET_ADDRSTRLEN];
+
+ depth = dcf->prfx;
+ addr = rte_be_to_cpu_32(dcf->ipv4.s_addr);
+
+ inet_ntop(AF_INET, &dcf->ipv4, str, sizeof(str));
+ rc = rte_lpm_add(tcx->lpm4, addr, depth, idx);
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u,dev_id=%u,dev=%p,"
+ "ipv4=%s/%u,mtu=%u,"
+ "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) "
+ "returns %d;\n",
+ __func__, tcx->cf->lcore, dcf->dev, tcx->dst4[idx].dev,
+ str, depth, tcx->dst4[idx].mtu,
+ dcf->mac.addr_bytes[0], dcf->mac.addr_bytes[1],
+ dcf->mac.addr_bytes[2], dcf->mac.addr_bytes[3],
+ dcf->mac.addr_bytes[4], dcf->mac.addr_bytes[5],
+ rc);
+
+ return rc;
+}
+
+static int
+be_add_ipv6_route(struct tldk_ctx *tcx, const struct tldk_dest_conf *dcf,
+ uint8_t idx)
+{
+ int32_t rc;
+ uint32_t depth;
+ char str[INET6_ADDRSTRLEN];
+
+ depth = dcf->prfx;
+
+ rc = rte_lpm6_add(tcx->lpm6, (uint8_t *)(uintptr_t)dcf->ipv6.s6_addr,
+ depth, idx);
+
+ inet_ntop(AF_INET6, &dcf->ipv6, str, sizeof(str));
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u,dev_id=%u,dev=%p,"
+ "ipv6=%s/%u,mtu=%u,"
+ "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) "
+ "returns %d;\n",
+ __func__, tcx->cf->lcore, dcf->dev, tcx->dst6[idx].dev,
+ str, depth, tcx->dst4[idx].mtu,
+ dcf->mac.addr_bytes[0], dcf->mac.addr_bytes[1],
+ dcf->mac.addr_bytes[2], dcf->mac.addr_bytes[3],
+ dcf->mac.addr_bytes[4], dcf->mac.addr_bytes[5],
+ rc);
+
+ return rc;
+}
+
+static void
+fill_dst(struct tle_dest *dst, const struct tldk_dev *td,
+ const struct tldk_port_conf *pcf, const struct tldk_dest_conf *dest,
+ uint16_t l3_type, struct rte_mempool *mp)
+{
+ struct ether_hdr *eth;
+ struct ipv4_hdr *ip4h;
+ struct ipv6_hdr *ip6h;
+
+ dst->dev = td->dev;
+ dst->head_mp = mp;
+ dst->mtu = RTE_MIN(dest->mtu, pcf->mtu);
+ dst->l2_len = sizeof(*eth);
+
+ eth = (struct ether_hdr *)dst->hdr;
+
+ ether_addr_copy(&pcf->mac, &eth->s_addr);
+ ether_addr_copy(&dest->mac, &eth->d_addr);
+ eth->ether_type = rte_cpu_to_be_16(l3_type);
+
+ if (l3_type == ETHER_TYPE_IPv4) {
+ dst->l3_len = sizeof(*ip4h);
+ ip4h = (struct ipv4_hdr *)(eth + 1);
+ ip4h->version_ihl = 4 << 4 |
+ sizeof(*ip4h) / IPV4_IHL_MULTIPLIER;
+ ip4h->time_to_live = 64;
+ ip4h->next_proto_id = IPPROTO_TCP;
+ } else if (l3_type == ETHER_TYPE_IPv6) {
+ dst->l3_len = sizeof(*ip6h);
+ ip6h = (struct ipv6_hdr *)(eth + 1);
+ ip6h->vtc_flow = 6 << 4;
+ ip6h->proto = IPPROTO_TCP;
+ ip6h->hop_limits = 64;
+ }
+}
+
+static int
+be_add_dest(const struct tldk_dest_conf *dcf, struct tldk_ctx *tcx,
+ uint32_t dev_idx, const struct tldk_port_conf *pcf, uint32_t family,
+ uint32_t dnum)
+{
+ struct tle_dest *dp;
+ uint32_t i, n, m;
+ uint16_t l3_type;
+ int32_t rc = 0;
+
+ if (family == AF_INET) {
+ n = tcx->dst4_num;
+ dp = tcx->dst4 + n;
+ m = RTE_DIM(tcx->dst4);
+ l3_type = ETHER_TYPE_IPv4;
+ } else {
+ n = tcx->dst6_num;
+ dp = tcx->dst6 + n;
+ m = RTE_DIM(tcx->dst6);
+ l3_type = ETHER_TYPE_IPv6;
+ }
+
+ if (n + dnum >= m) {
+ RTE_LOG(ERR, USER1, "%s(lcore=%u, family=%hu, dnum=%u) exceeds "
+ "maximum allowed number of destinations(%u);\n",
+ __func__, tcx->cf->lcore, family, dnum, m);
+ return -ENOSPC;
+ }
+
+ for (i = 0; i != dnum && rc == 0; i++) {
+ fill_dst(dp + i, &tcx->dev[dev_idx], pcf, dcf,
+ l3_type, tcx->frag_mpool);
+ if (family == AF_INET)
+ rc = be_add_ipv4_route(tcx, dcf, n + i);
+ else
+ rc = be_add_ipv6_route(tcx, dcf, n + i);
+ }
+
+ if (family == AF_INET)
+ tcx->dst4_num = n + i;
+ else
+ tcx->dst6_num = n + i;
+
+ return rc;
+}
+
+int
+be_dst_init(struct tldk_ctx *tcx, const tldk_conf_t *cf)
+{
+ uint32_t i, f, d, l, port_id;
+ const struct tldk_ctx_conf *ctx_cf = tcx->cf;
+ const struct tldk_dest_conf *dcf;
+ const struct tldk_port_conf *pcf;
+ int32_t rc = 0;
+
+ for (i = 0; i < ctx_cf->nb_dest; i++) {
+ dcf = &ctx_cf->dest[i];
+ f = dcf->family;
+ d = dcf->dev;
+ for (l = 0; l != tcx->nb_dev; l++) {
+ if (tcx->dev[l].cf.id == d) {
+ /* fetch the port conf for the port
+ * associated with device
+ */
+ port_id = tcx->dev[l].cf.port;
+ pcf = &cf->port[port_id];
+ rc = be_add_dest(dcf, tcx, l, pcf, f, 1);
+ if (rc != 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(tcx=%u, family=%u) "
+ "could not add "
+ "destinations(%u)\n",
+ __func__, ctx_cf->lcore, f, i);
+ return -ENOSPC;
+ }
+ break;
+ }
+ }
+ }
+
+ return rc;
+}
+
+int
+be_add_dev(struct tldk_ctx *tcx, const tldk_conf_t *cf)
+{
+ int32_t rc = 0;
+ uint32_t i, port_id;
+ struct tle_dev_param dprm;
+ const struct tldk_port_conf *pcf;
+
+ memset(&dprm, 0, sizeof(dprm));
+
+ /* add the tle_dev on all applicable ports of the context */
+ for (i = 0; i != tcx->cf->nb_dev; i++) {
+
+ /* get the port id associated with the device */
+ port_id = tcx->cf->dev[i].port;
+
+ /* get the port config by port id */
+ pcf = &cf->port[port_id];
+
+ /* populate the tle_dev_param struct */
+ dprm.rx_offload = pcf->rx_offload;
+ dprm.tx_offload = pcf->tx_offload;
+ dprm.local_addr4.s_addr = pcf->ipv4;
+
+ memcpy(&dprm.local_addr6, &pcf->ipv6,
+ sizeof(pcf->ipv6));
+
+ /* add the tle_dev */
+ tcx->dev[i].dev = tle_add_dev(tcx->ctx, &dprm);
+
+ RTE_LOG(NOTICE, USER1, "%s(port=%u), dev: %p\n",
+ __func__, port_id,
+ tcx->dev[i].dev);
+
+ if (tcx->dev[i].dev == NULL)
+ rc = -rte_errno;
+
+ if (rc != 0)
+ return rc;
+
+ tcx->nb_dev++;
+ tcx->dev[i].cf = tcx->cf->dev[i];
+ }
+
+ return rc;
+}
+
+static uint32_t
+get_ptypes(const struct tldk_dev *td)
+{
+ uint32_t smask;
+ int32_t i, rc;
+ const uint32_t pmask = RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK |
+ RTE_PTYPE_L4_MASK;
+
+ smask = 0;
+ rc = rte_eth_dev_get_supported_ptypes(td->cf.port, pmask, NULL, 0);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u) failed to get supported ptypes;\n",
+ __func__, td->cf.port);
+ return smask;
+ }
+
+ uint32_t ptype[rc];
+ rc = rte_eth_dev_get_supported_ptypes(td->cf.port, pmask, ptype, rc);
+
+ for (i = 0; i != rc; i++) {
+ switch (ptype[i]) {
+ case RTE_PTYPE_L2_ETHER:
+ smask |= ETHER_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV4:
+ case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN:
+ smask |= IPV4_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV4_EXT:
+ smask |= IPV4_EXT_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV6:
+ case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN:
+ smask |= IPV6_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV6_EXT:
+ smask |= IPV6_EXT_PTYPE;
+ break;
+ case RTE_PTYPE_L4_TCP:
+ smask |= TCP_PTYPE;
+ break;
+ case RTE_PTYPE_L4_UDP:
+ smask |= UDP_PTYPE;
+ break;
+ }
+ }
+
+ return smask;
+}
+
+static inline uint64_t
+_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
+ uint64_t ol3, uint64_t ol2)
+{
+ return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49;
+}
+
+static inline void
+fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4)
+{
+ m->tx_offload = _mbuf_tx_offload(l2, l3, l4, 0, 0, 0);
+}
+
+static inline int
+is_ipv4_frag(const struct ipv4_hdr *iph)
+{
+ const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG);
+
+ return ((mask & iph->fragment_offset) != 0);
+}
+
+static inline uint32_t
+get_tcp_header_size(struct rte_mbuf *m, uint32_t l2_len, uint32_t l3_len)
+{
+ const struct tcp_hdr *tcp;
+
+ tcp = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len);
+ return (tcp->data_off >> 4) * 4;
+}
+
+static inline void
+adjust_ipv4_pktlen(struct rte_mbuf *m, uint32_t l2_len)
+{
+ uint32_t plen, trim;
+ const struct ipv4_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2_len);
+ plen = rte_be_to_cpu_16(iph->total_length) + l2_len;
+ if (plen < m->pkt_len) {
+ trim = m->pkt_len - plen;
+ rte_pktmbuf_trim(m, trim);
+ }
+}
+
+static inline void
+adjust_ipv6_pktlen(struct rte_mbuf *m, uint32_t l2_len)
+{
+ uint32_t plen, trim;
+ const struct ipv6_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, l2_len);
+ plen = rte_be_to_cpu_16(iph->payload_len) + sizeof(*iph) + l2_len;
+ if (plen < m->pkt_len) {
+ trim = m->pkt_len - plen;
+ rte_pktmbuf_trim(m, trim);
+ }
+}
+
+static inline void
+tcp_stat_update(struct tldk_ctx *lc, const struct rte_mbuf *m,
+ uint32_t l2_len, uint32_t l3_len)
+{
+ const struct tcp_hdr *th;
+
+ th = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len);
+ lc->tcp_stat.flags[th->tcp_flags]++;
+}
+
+static inline uint32_t
+get_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, uint32_t frag)
+{
+ const struct ipv4_hdr *iph;
+ int32_t dlen, len;
+
+ dlen = rte_pktmbuf_data_len(m);
+ dlen -= l2;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2);
+ len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER;
+
+ if (frag != 0 && is_ipv4_frag(iph)) {
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_FRAG;
+ }
+
+ if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto))
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+
+ return len;
+}
+
+static inline int
+ipv6x_hdr(uint32_t proto)
+{
+ return (proto == IPPROTO_HOPOPTS ||
+ proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_FRAGMENT ||
+ proto == IPPROTO_AH ||
+ proto == IPPROTO_NONE ||
+ proto == IPPROTO_DSTOPTS);
+}
+
+static inline uint32_t
+get_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto,
+ uint32_t fproto)
+{
+ const struct ip6_ext *ipx;
+ int32_t dlen, len, ofs;
+
+ len = sizeof(struct ipv6_hdr);
+
+ dlen = rte_pktmbuf_data_len(m);
+ dlen -= l2;
+
+ ofs = l2 + len;
+ ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs);
+
+ while (ofs > 0 && len < dlen) {
+
+ switch (nproto) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ ofs = (ipx->ip6e_len + 1) << 3;
+ break;
+ case IPPROTO_AH:
+ ofs = (ipx->ip6e_len + 2) << 2;
+ break;
+ case IPPROTO_FRAGMENT:
+ /*
+ * tso_segsz is not used by RX, so use it as temporary
+ * buffer to store the fragment offset.
+ */
+ m->tso_segsz = ofs;
+ ofs = sizeof(struct ip6_frag);
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_FRAG;
+ break;
+ default:
+ ofs = 0;
+ }
+
+ if (ofs > 0) {
+ nproto = ipx->ip6e_nxt;
+ len += ofs;
+ ipx += ofs / sizeof(*ipx);
+ }
+ }
+
+ /* unrecognized or invalid packet. */
+ if ((ofs == 0 && nproto != fproto) || len > dlen)
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+
+ return len;
+}
+
+static inline uint32_t
+get_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto)
+{
+ const struct ipv6_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ if (iph->proto == fproto)
+ return sizeof(struct ipv6_hdr);
+ else if (ipv6x_hdr(iph->proto) != 0)
+ return get_ipv6x_hdr_len(m, l2, iph->proto, fproto);
+
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return 0;
+}
+
+static inline void
+fill_eth_tcp_hdr_len(struct rte_mbuf *m)
+{
+ uint32_t dlen, l2_len, l3_len, l4_len;
+ uint16_t etp;
+ const struct ether_hdr *eth;
+
+ dlen = rte_pktmbuf_data_len(m);
+
+ /* check that first segment is at least 54B long. */
+ if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
+ sizeof(struct tcp_hdr)) {
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return;
+ }
+
+ l2_len = sizeof(*eth);
+
+ eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
+ etp = eth->ether_type;
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
+ l2_len += sizeof(struct vlan_hdr);
+
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
+ m->packet_type = RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1);
+ l4_len = get_tcp_header_size(m, l2_len, l3_len);
+ fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(m, l2_len);
+ } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
+ dlen >= l2_len + sizeof(struct ipv6_hdr) +
+ sizeof(struct tcp_hdr)) {
+ m->packet_type = RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(m, l2_len, l3_len);
+ fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(m, l2_len);
+ } else
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+}
+
+/*
+ * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k)
+ */
+static uint16_t
+type0_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, __rte_unused void *user_param)
+{
+ uint32_t j, tp;
+ uint32_t l4_len, l3_len, l2_len;
+ const struct ether_hdr *eth;
+
+ l2_len = sizeof(*eth);
+
+ for (j = 0; j != nb_pkts; j++) {
+
+ BE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ /* non fragmented tcp packets. */
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4 |
+ RTE_PTYPE_L2_ETHER):
+ l4_len = get_tcp_header_size(pkt[j], l2_len,
+ sizeof(struct ipv4_hdr));
+ fill_pkt_hdr_len(pkt[j], l2_len,
+ sizeof(struct ipv4_hdr), l4_len);
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6 |
+ RTE_PTYPE_L2_ETHER):
+ l4_len = get_tcp_header_size(pkt[j], l2_len,
+ sizeof(struct ipv6_hdr));
+ fill_pkt_hdr_len(pkt[j], l2_len,
+ sizeof(struct ipv6_hdr), l4_len);
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
+ IPPROTO_TCP, 0);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+ }
+
+ return nb_pkts;
+}
+
+/*
+ * HW can recognize L2/L3/L4 and fragments (i40e).
+ */
+static uint16_t
+type1_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, tp;
+ struct tldk_ctx *tcx;
+ uint32_t l4_len, l3_len, l2_len;
+ const struct ether_hdr *eth;
+
+ tcx = user_param;
+ l2_len = sizeof(*eth);
+
+ for (j = 0; j != nb_pkts; j++) {
+
+ BE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
+ IPPROTO_TCP, 0);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv4_pktlen(pkt[j], l2_len);
+ tcp_stat_update(tcx, pkt[j], l2_len, l3_len);
+ break;
+ case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP);
+ l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
+ fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
+ adjust_ipv6_pktlen(pkt[j], l2_len);
+ tcp_stat_update(tcx, pkt[j], l2_len, l3_len);
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+
+ }
+
+ return nb_pkts;
+}
+
+static uint16_t
+typen_tcp_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, __rte_unused void *user_param)
+{
+ uint32_t j;
+
+ for (j = 0; j != nb_pkts; j++) {
+
+ BE_PKT_DUMP(pkt[j]);
+ fill_eth_tcp_hdr_len(pkt[j]);
+ }
+
+ return nb_pkts;
+}
+
+int
+setup_rx_cb(const struct tldk_dev *td, struct tldk_ctx *tcx)
+{
+ int32_t rc;
+ uint32_t i, n, smask;
+ void *cb;
+ const struct ptype2cb *ptype2cb;
+
+ static const struct ptype2cb tcp_ptype2cb[] = {
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE |
+ IPV6_PTYPE | IPV6_EXT_PTYPE | TCP_PTYPE,
+ .name = "HW l2/l3x/l4-tcp ptype",
+ .fn = type0_tcp_rx_callback,
+ },
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE |
+ TCP_PTYPE,
+ .name = "HW l2/l3/l4-tcp ptype",
+ .fn = type1_tcp_rx_callback,
+ },
+ {
+ .mask = 0,
+ .name = "tcp no HW ptype",
+ .fn = typen_tcp_rx_callback,
+ },
+ };
+
+ smask = get_ptypes(td);
+
+ ptype2cb = tcp_ptype2cb;
+ n = RTE_DIM(tcp_ptype2cb);
+
+ for (i = 0; i != n; i++) {
+ if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) {
+ cb = rte_eth_add_rx_callback(td->cf.port, td->cf.queue,
+ ptype2cb[i].fn, tcx);
+ rc = -rte_errno;
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u), setup RX callback \"%s\" "
+ "returns %p;\n",
+ __func__, td->cf.port, ptype2cb[i].name, cb);
+ return ((cb == NULL) ? rc : 0);
+ }
+ }
+
+ /* no proper callback found. */
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u) failed to find an appropriate callback;\n",
+ __func__, td->cf.port);
+ return -ENOENT;
+}
+
+int
+be_lcore_setup(struct tldk_ctx *tcx)
+{
+ uint32_t i;
+ int32_t rc;
+
+ RTE_LOG(NOTICE, USER1, "%s:(lcore=%u, ctx=%p) start\n",
+ __func__, tcx->cf->lcore, tcx->ctx);
+
+ rc = 0;
+ for (i = 0; i != tcx->nb_dev && rc == 0; i++) {
+ RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, q=%u)\n",
+ __func__, i, tcx->dev[i].cf.port, tcx->dev[i].cf.queue);
+
+ rc = setup_rx_cb(&tcx->dev[i], tcx);
+ if (rc < 0)
+ return rc;
+ }
+
+ return rc;
+}
+
+static inline void
+be_rx(struct tldk_dev *dev)
+{
+ uint32_t j, k, n;
+ struct rte_mbuf *pkt[MAX_PKT_BURST];
+ struct rte_mbuf *rp[MAX_PKT_BURST];
+ int32_t rc[MAX_PKT_BURST];
+
+ n = rte_eth_rx_burst(dev->cf.port,
+ dev->cf.queue, pkt, RTE_DIM(pkt));
+
+ if (n != 0) {
+ dev->rx_stat.in += n;
+ BE_TRACE("%s(%u): rte_eth_rx_burst(%u, %u) returns %u\n",
+ __func__, dev->cf.id, dev->cf.port,
+ dev->cf.queue, n);
+
+ k = tle_tcp_rx_bulk(dev->dev, pkt, rp, rc, n);
+
+ dev->rx_stat.up += k;
+ dev->rx_stat.drop += n - k;
+ BE_TRACE("%s: tle_tcp_rx_bulk(%p, %u) returns %u\n",
+ __func__, dev->dev, n, k);
+
+ for (j = 0; j != n - k; j++) {
+ BE_TRACE("%s:%d(port=%u) rp[%u]={%p, %d};\n",
+ __func__, __LINE__, dev->cf.port,
+ j, rp[j], rc[j]);
+ rte_pktmbuf_free(rp[j]);
+ }
+ }
+}
+
+static inline void
+be_tx(struct tldk_dev *dev)
+{
+ uint32_t j = 0, k, n;
+ struct rte_mbuf **mb;
+
+ n = dev->tx_buf.num;
+ k = RTE_DIM(dev->tx_buf.pkt) - n;
+ mb = dev->tx_buf.pkt;
+
+ if (k >= RTE_DIM(dev->tx_buf.pkt) / 2) {
+ j = tle_tcp_tx_bulk(dev->dev, mb + n, k);
+ n += j;
+ dev->tx_stat.down += j;
+ }
+
+ if (n == 0)
+ return;
+
+ BE_TRACE("%s: tle_tcp_tx_bulk(%p) returns %u,\n"
+ "total pkts to send: %u\n",
+ __func__, dev->dev, j, n);
+
+ for (j = 0; j != n; j++)
+ BE_PKT_DUMP(mb[j]);
+
+ k = rte_eth_tx_burst(dev->cf.port,
+ dev->cf.queue, mb, n);
+
+ dev->tx_stat.out += k;
+ dev->tx_stat.drop += n - k;
+ BE_TRACE("%s: rte_eth_tx_burst(%u, %u, %u) returns %u\n",
+ __func__, dev->cf.port,
+ dev->cf.queue, n, k);
+
+ dev->tx_buf.num = n - k;
+ if (k != 0)
+ for (j = k; j != n; j++)
+ mb[j - k] = mb[j];
+}
+
+void
+be_lcore_tcp(struct tldk_ctx *tcx)
+{
+ uint32_t i;
+
+ if (tcx == NULL)
+ return;
+
+ for (i = 0; i != tcx->nb_dev; i++) {
+ be_rx(&tcx->dev[i]);
+ be_tx(&tcx->dev[i]);
+ }
+ tle_tcp_process(tcx->ctx, TCP_MAX_PROCESS);
+}
+
+void
+be_lcore_clear(struct tldk_ctx *tcx)
+{
+ uint32_t i, j;
+
+ if (tcx == NULL)
+ return;
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u, ctx: %p) finish\n",
+ __func__, tcx->cf->lcore, tcx->ctx);
+ for (i = 0; i != tcx->nb_dev; i++) {
+ RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, q=%u, lcore=%u, dev=%p) "
+ "rx_stats={"
+ "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "}, "
+ "tx_stats={"
+ "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "};\n",
+ __func__, i, tcx->dev[i].cf.port, tcx->dev[i].cf.queue,
+ tcx->cf->lcore,
+ tcx->dev[i].dev,
+ tcx->dev[i].rx_stat.in,
+ tcx->dev[i].rx_stat.up,
+ tcx->dev[i].rx_stat.drop,
+ tcx->dev[i].tx_stat.down,
+ tcx->dev[i].tx_stat.out,
+ tcx->dev[i].tx_stat.drop);
+ }
+
+ RTE_LOG(NOTICE, USER1, "tcp_stat={\n");
+ for (i = 0; i != RTE_DIM(tcx->tcp_stat.flags); i++) {
+ if (tcx->tcp_stat.flags[i] != 0)
+ RTE_LOG(NOTICE, USER1, "[flag=%#x]==%" PRIu64 ";\n",
+ i, tcx->tcp_stat.flags[i]);
+ }
+ RTE_LOG(NOTICE, USER1, "};\n");
+
+ for (i = 0; i != tcx->nb_dev; i++)
+ for (j = 0; j != tcx->dev[i].tx_buf.num; j++)
+ rte_pktmbuf_free(tcx->dev[i].tx_buf.pkt[j]);
+
+}
+
+void
+be_stop_port(uint32_t port)
+{
+ struct rte_eth_stats stats;
+
+ RTE_LOG(NOTICE, USER1, "%s: stoping port %u\n", __func__, port);
+
+ rte_eth_stats_get(port, &stats);
+ RTE_LOG(NOTICE, USER1, "port %u stats={\n"
+ "ipackets=%" PRIu64 ";"
+ "ibytes=%" PRIu64 ";"
+ "ierrors=%" PRIu64 ";"
+ "imissed=%" PRIu64 ";\n"
+ "opackets=%" PRIu64 ";"
+ "obytes=%" PRIu64 ";"
+ "oerrors=%" PRIu64 ";\n"
+ "}\n",
+ port,
+ stats.ipackets,
+ stats.ibytes,
+ stats.ierrors,
+ stats.imissed,
+ stats.opackets,
+ stats.obytes,
+ stats.oerrors);
+ rte_eth_dev_stop(port);
+}
+
+int
+be_lcore_main(void *arg)
+{
+ int32_t rc;
+ uint32_t lid, i;
+ struct tldk_ctx *tcx;
+ struct lcore_ctxs_list *lc_ctx;
+
+ lc_ctx = arg;
+ lid = rte_lcore_id();
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n", __func__, lid);
+
+ rc = 0;
+ while (force_quit == 0) {
+ for (i = 0; i < lc_ctx->nb_ctxs; i++) {
+ tcx = lc_ctx->ctxs[i];
+ be_lcore_tcp(tcx);
+ }
+ }
+
+ RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n", __func__, lid);
+
+ return rc;
+}