aboutsummaryrefslogtreecommitdiffstats
path: root/app/nginx/src/tldk/be.c
diff options
context:
space:
mode:
Diffstat (limited to 'app/nginx/src/tldk/be.c')
-rw-r--r--app/nginx/src/tldk/be.c1249
1 files changed, 0 insertions, 1249 deletions
diff --git a/app/nginx/src/tldk/be.c b/app/nginx/src/tldk/be.c
deleted file mode 100644
index 1309aa4..0000000
--- a/app/nginx/src/tldk/be.c
+++ /dev/null
@@ -1,1249 +0,0 @@
-/*
- * Copyright (c) 2017 Intel Corporation.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <assert.h>
-#include <netinet/ip6.h>
-
-#include <ngx_config.h>
-#include <ngx_core.h>
-
-#include "be.h"
-#include <rte_version.h>
-#include <rte_cycles.h>
-#include <rte_ethdev.h>
-#include <rte_errno.h>
-#include <rte_lpm6.h>
-#include <rte_lpm.h>
-#include <rte_ip.h>
-#include <rte_tcp.h>
-
-#include <tle_tcp.h>
-
-#if RTE_VERSION_NUM(17, 11, 0, 0) <= RTE_VERSION
-typedef uint16_t dpdk_port_t;
-#else
-typedef uint8_t dpdk_port_t;
-#endif
-
-#define RX_RING_SIZE 0x400
-#define TX_RING_SIZE 0x800
-#define MAX_RULES 0x100
-#define MAX_TBL8 0x800
-
-#define MPOOL_CACHE_SIZE 0x100
-#define MPOOL_NB_BUF 0x20000
-
-#define FRAG_MBUF_BUF_SIZE (RTE_PKTMBUF_HEADROOM + TLE_DST_MAX_HDR)
-
-#define RX_CSUM_OFFLOAD (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM)
-
-#define TCP_MAX_PROCESS 0x20
-
-static const struct rte_eth_conf port_conf_default = {
- .rxmode = {
- .offloads = DEV_RX_OFFLOAD_VLAN_STRIP,
- },
-};
-
-struct ptype2cb {
- uint32_t mask;
- const char *name;
- rte_rx_callback_fn fn;
-};
-
-enum {
- ETHER_PTYPE = 0x1,
- IPV4_PTYPE = 0x2,
- IPV4_EXT_PTYPE = 0x4,
- IPV6_PTYPE = 0x8,
- IPV6_EXT_PTYPE = 0x10,
- TCP_PTYPE = 0x20,
- UDP_PTYPE = 0x40,
-};
-
-int
-be_lcore_lpm_init(struct tldk_ctx *tcx, uint32_t sid,
- const struct tldk_ctx_conf *cf)
-{
- ngx_uint_t worker = cf->worker;
- uint32_t lcore = cf->lcore;
- char str[RTE_LPM_NAMESIZE];
-
- const struct rte_lpm_config lpm4_cfg = {
- .max_rules = MAX_RULES,
- .number_tbl8s = MAX_TBL8,
- };
-
- const struct rte_lpm6_config lpm6_cfg = {
- .max_rules = MAX_RULES,
- .number_tbl8s = MAX_TBL8,
- };
-
- snprintf(str, sizeof(str), "LPM4%lu-%u\n", worker, lcore);
- tcx->lpm4 = rte_lpm_create(str, sid, &lpm4_cfg);
- RTE_LOG(NOTICE, USER1, "%s(worker=%lu, lcore=%u): lpm4=%p;\n",
- __func__, worker, lcore, tcx->lpm4);
- if (tcx->lpm4 == NULL)
- return -ENOMEM;
-
- snprintf(str, sizeof(str), "LPM6%lu-%u\n", worker, lcore);
- tcx->lpm6 = rte_lpm6_create(str, sid, &lpm6_cfg);
- RTE_LOG(NOTICE, USER1, "%s(worker=%lu, lcore=%u): lpm6=%p;\n",
- __func__, worker, lcore, tcx->lpm6);
- if (tcx->lpm6 == NULL) {
- rte_lpm_free(tcx->lpm4);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-int
-be_lpm4_dst_lookup(void *data, __rte_unused uint64_t sdata,
- const struct in_addr *addr, struct tle_dest *res)
-{
- int32_t rc;
- uint32_t idx;
- struct tldk_ctx *tcx;
- struct tle_dest *dst;
-
- tcx = data;
- rc = rte_lpm_lookup(tcx->lpm4, rte_be_to_cpu_32(addr->s_addr), &idx);
- if (rc == 0) {
- dst = &tcx->dst4[idx];
- memcpy(res, dst, dst->l2_len + dst->l3_len +
- offsetof(struct tle_dest, hdr));
- }
-
- return rc;
-}
-
-int
-be_lpm6_dst_lookup(void *data, __rte_unused uint64_t sdata,
- const struct in6_addr *addr, struct tle_dest *res)
-{
- int32_t rc;
- struct tldk_ctx *tcx;
- struct tle_dest *dst;
- uintptr_t p;
-#if RTE_VERSION_NUM(17, 5, 0, 0) <= RTE_VERSION
- uint32_t idx;
-#else
- uint8_t idx;
-#endif
-
- tcx = data;
- p = (uintptr_t)addr->s6_addr;
- rc = rte_lpm6_lookup(tcx->lpm6, (uint8_t *)p, &idx);
- if (rc == 0) {
- dst = &tcx->dst6[idx];
- memcpy(res, dst, dst->l2_len + dst->l3_len +
- offsetof(struct tle_dest, hdr));
- }
-
- return rc;
-}
-
-/*
- * Initialise DPDK port.
- */
-static int
-port_init(const struct tldk_port_conf *pcf)
-{
- int32_t rc;
- struct rte_eth_conf port_conf;
- struct rte_eth_dev_info dev_info;
-
- rte_eth_dev_info_get(pcf->id, &dev_info);
-
- if ((dev_info.rx_offload_capa & pcf->rx_offload) != pcf->rx_offload) {
- RTE_LOG(ERR, USER1,
- "port#%u supported/requested RX offloads don't match, "
- "supported: %#" PRIx64 ", requested: %#" PRIx64 ";\n",
- pcf->id, (uint64_t)dev_info.rx_offload_capa,
- pcf->rx_offload);
- return NGX_ERROR;
- }
- if ((dev_info.tx_offload_capa & pcf->tx_offload) != pcf->tx_offload) {
- RTE_LOG(ERR, USER1,
- "port#%u supported/requested TX offloads don't match, "
- "supported: %#" PRIx64 ", requested: %#" PRIx64 ";\n",
- pcf->id, (uint64_t)dev_info.tx_offload_capa,
- pcf->tx_offload);
- return NGX_ERROR;
- }
-
- port_conf = port_conf_default;
-
- if ((pcf->rx_offload & RX_CSUM_OFFLOAD) != 0) {
- RTE_LOG(ERR, USER1, "%s(%u): enabling RX csum offload;\n",
- __func__, pcf->id);
- port_conf.rxmode.offloads |= pcf->rx_offload & RX_CSUM_OFFLOAD;
- }
-
- port_conf.rxmode.max_rx_pkt_len = pcf->mtu + RTE_ETHER_CRC_LEN;
- if (port_conf.rxmode.max_rx_pkt_len > RTE_ETHER_MAX_LEN)
- port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
- port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
- port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_TCP;
- port_conf.rx_adv_conf.rss_conf.rss_hf &=
- dev_info.flow_type_rss_offloads;
-
- port_conf.txmode.offloads = pcf->tx_offload;
-
- rc = rte_eth_dev_configure(pcf->id, pcf->nb_queues, pcf->nb_queues,
- &port_conf);
- RTE_LOG(NOTICE, USER1,
- "%s: rte_eth_dev_configure(prt_id=%u, nb_rxq=%u, nb_txq=%u) "
- "returns %d;\n", __func__, pcf->id, pcf->nb_queues,
- pcf->nb_queues, rc);
-
- if (rc != 0)
- return NGX_ERROR;
-
- return NGX_OK;
-}
-
-/*
- * Check that lcore is enabled, not master, and not in use already.
- */
-int
-be_check_lcore(uint32_t lid)
-{
- if (rte_lcore_is_enabled(lid) == 0) {
- RTE_LOG(ERR, USER1, "lcore %u is not enabled\n", lid);
- return -EINVAL;
- }
-
- if (rte_get_master_lcore() != lid &&
- rte_eal_get_lcore_state(lid) == RUNNING) {
- RTE_LOG(ERR, USER1, "lcore %u already in use\n", lid);
- return -EINVAL;
- }
-
- return 0;
-}
-
-int
-be_mpool_init(struct tldk_ctx *tcx)
-{
- int32_t rc;
- uint32_t nmb, sid;
- struct rte_mempool *mp;
- char name[RTE_MEMPOOL_NAMESIZE];
-
- ngx_uint_t worker = tcx->cf->worker;
- uint32_t lcore = tcx->cf->lcore;
-
- sid = rte_lcore_to_socket_id(tcx->cf->lcore);
- nmb = (tcx->cf->nb_mbuf == 0) ? MPOOL_NB_BUF : tcx->cf->nb_mbuf;
-
- snprintf(name, sizeof(name), "MP%lu-%u", worker, lcore);
- mp = rte_pktmbuf_pool_create(name, nmb, MPOOL_CACHE_SIZE, 0,
- RTE_MBUF_DEFAULT_BUF_SIZE, sid);
- if (mp == NULL) {
- rc = -rte_errno;
- RTE_LOG(ERR, USER1, "%s:Mempool creation failed for "
- "ctx:wrk(%lu)-ctx:lcore(%u) with error code: %d\n",
- __func__, worker, lcore, rc);
- return rc;
- }
-
- tcx->mpool = mp;
-
- snprintf(name, sizeof(name), "frag_MP%lu-%u",
- worker, lcore);
- mp = rte_pktmbuf_pool_create(name, nmb,
- MPOOL_CACHE_SIZE, 0, FRAG_MBUF_BUF_SIZE, sid - 1);
- if (mp == NULL) {
- rc = -rte_errno;
- RTE_LOG(ERR, USER1, "%s:Frag mempool creation failed for "
- "ctx:wrk(%lu)-ctx:lcore(%u) with error code: %d\n",
- __func__, worker, lcore, rc);
- return rc;
- }
-
- tcx->frag_mpool = mp;
-
- return 0;
-}
-
-int
-be_queue_init(struct tldk_ctx *tcx, const tldk_conf_t *cf)
-{
- int32_t socket, rc;
- uint16_t queue_id;
- uint32_t port_id, i, nb_rxd, nb_txd;
- struct rte_eth_dev_info dev_info;
- const struct tldk_ctx_conf *ctx;
-
- ctx = tcx->cf;
- for (i = 0; i < ctx->nb_dev; i++) {
- port_id = ctx->dev[i].port;
- queue_id = ctx->dev[i].queue;
-
- rte_eth_dev_info_get(port_id, &dev_info);
-
- dev_info.default_rxconf.rx_drop_en = 1;
-
- nb_rxd = RTE_MIN(RX_RING_SIZE, dev_info.rx_desc_lim.nb_max);
- nb_txd = RTE_MIN(TX_RING_SIZE, dev_info.tx_desc_lim.nb_max);
- dev_info.default_txconf.tx_free_thresh = nb_txd / 2;
-
- socket = rte_eth_dev_socket_id(port_id);
-
- rc = rte_eth_rx_queue_setup(port_id, queue_id, nb_rxd,
- socket, &dev_info.default_rxconf, tcx->mpool);
- if (rc < 0) {
- RTE_LOG(ERR, USER1,
- "%s: rx queue=%u setup failed with error "
- "code: %d\n", __func__, queue_id, rc);
- return rc;
- }
-
- rc = rte_eth_tx_queue_setup(port_id, queue_id, nb_txd,
- socket, &dev_info.default_txconf);
- if (rc < 0) {
- RTE_LOG(ERR, USER1,
- "%s: tx queue=%u setup failed with error "
- "code: %d\n", __func__, queue_id, rc);
- return rc;
- }
- }
-
- return 0;
-}
-
-/*
- * Setup all enabled ports.
- */
-int
-be_port_init(tldk_conf_t *cf)
-{
- int32_t rc;
- uint32_t i;
- struct tldk_port_conf *dpf;
-
- for (i = 0; i != cf->nb_port; i++) {
- dpf = &cf->port[i];
- rc = port_init(dpf);
- if (rc != 0) {
- RTE_LOG(ERR, USER1,
- "%s: port=%u init failed with error code: %d\n",
- __func__, dpf->id, rc);
- return NGX_ERROR;
- }
- rte_eth_macaddr_get(dpf->id, &dpf->mac);
- rte_eth_promiscuous_enable(dpf->id);
- }
-
- return NGX_OK;
-}
-
-static int
-be_add_ipv4_route(struct tldk_ctx *tcx, const struct tldk_dest_conf *dcf,
- uint8_t idx)
-{
- int32_t rc;
- uint32_t addr, depth;
- char str[INET_ADDRSTRLEN];
-
- depth = dcf->prfx;
- addr = rte_be_to_cpu_32(dcf->ipv4.s_addr);
-
- inet_ntop(AF_INET, &dcf->ipv4, str, sizeof(str));
- rc = rte_lpm_add(tcx->lpm4, addr, depth, idx);
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u,dev_id=%u,dev=%p,"
- "ipv4=%s/%u,mtu=%u,"
- "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) "
- "returns %d;\n",
- __func__, tcx->cf->lcore, dcf->dev, tcx->dst4[idx].dev,
- str, depth, tcx->dst4[idx].mtu,
- dcf->mac.addr_bytes[0], dcf->mac.addr_bytes[1],
- dcf->mac.addr_bytes[2], dcf->mac.addr_bytes[3],
- dcf->mac.addr_bytes[4], dcf->mac.addr_bytes[5],
- rc);
-
- return rc;
-}
-
-static int
-be_add_ipv6_route(struct tldk_ctx *tcx, const struct tldk_dest_conf *dcf,
- uint8_t idx)
-{
- int32_t rc;
- uint32_t depth;
- char str[INET6_ADDRSTRLEN];
-
- depth = dcf->prfx;
-
- rc = rte_lpm6_add(tcx->lpm6, (uint8_t *)(uintptr_t)dcf->ipv6.s6_addr,
- depth, idx);
-
- inet_ntop(AF_INET6, &dcf->ipv6, str, sizeof(str));
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u,dev_id=%u,dev=%p,"
- "ipv6=%s/%u,mtu=%u,"
- "mac=%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx) "
- "returns %d;\n",
- __func__, tcx->cf->lcore, dcf->dev, tcx->dst6[idx].dev,
- str, depth, tcx->dst4[idx].mtu,
- dcf->mac.addr_bytes[0], dcf->mac.addr_bytes[1],
- dcf->mac.addr_bytes[2], dcf->mac.addr_bytes[3],
- dcf->mac.addr_bytes[4], dcf->mac.addr_bytes[5],
- rc);
-
- return rc;
-}
-
-static void
-fill_dst(struct tle_dest *dst, const struct tldk_dev *td,
- const struct tldk_port_conf *pcf, const struct tldk_dest_conf *dest,
- uint16_t l3_type, struct rte_mempool *mp)
-{
- struct rte_ether_hdr *eth;
- struct rte_ipv4_hdr *ip4h;
- struct rte_ipv6_hdr *ip6h;
-
- dst->dev = td->dev;
- dst->head_mp = mp;
- dst->mtu = RTE_MIN(dest->mtu, pcf->mtu);
- dst->l2_len = sizeof(*eth);
-
- eth = (struct rte_ether_hdr *)dst->hdr;
-
- rte_ether_addr_copy(&pcf->mac, &eth->s_addr);
- rte_ether_addr_copy(&dest->mac, &eth->d_addr);
- eth->ether_type = rte_cpu_to_be_16(l3_type);
-
- if (l3_type == RTE_ETHER_TYPE_IPV4) {
- dst->l3_len = sizeof(*ip4h);
- ip4h = (struct rte_ipv4_hdr *)(eth + 1);
- ip4h->version_ihl = 4 << 4 |
- sizeof(*ip4h) / RTE_IPV4_IHL_MULTIPLIER;
- ip4h->time_to_live = 64;
- ip4h->next_proto_id = IPPROTO_TCP;
- } else if (l3_type == RTE_ETHER_TYPE_IPV6) {
- dst->l3_len = sizeof(*ip6h);
- ip6h = (struct rte_ipv6_hdr *)(eth + 1);
- ip6h->vtc_flow = 6 << 4;
- ip6h->proto = IPPROTO_TCP;
- ip6h->hop_limits = 64;
- }
-}
-
-static int
-be_add_dest(const struct tldk_dest_conf *dcf, struct tldk_ctx *tcx,
- uint32_t dev_idx, const struct tldk_port_conf *pcf, uint32_t family,
- uint32_t dnum)
-{
- struct tle_dest *dp;
- uint32_t i, n, m;
- uint16_t l3_type;
- int32_t rc = 0;
-
- if (family == AF_INET) {
- n = tcx->dst4_num;
- dp = tcx->dst4 + n;
- m = RTE_DIM(tcx->dst4);
- l3_type = RTE_ETHER_TYPE_IPV4;
- } else {
- n = tcx->dst6_num;
- dp = tcx->dst6 + n;
- m = RTE_DIM(tcx->dst6);
- l3_type = RTE_ETHER_TYPE_IPV6;
- }
-
- if (n + dnum >= m) {
- RTE_LOG(ERR, USER1, "%s(lcore=%u, family=%hu, dnum=%u) exceeds "
- "maximum allowed number of destinations(%u);\n",
- __func__, tcx->cf->lcore, family, dnum, m);
- return -ENOSPC;
- }
-
- for (i = 0; i != dnum && rc == 0; i++) {
- fill_dst(dp + i, &tcx->dev[dev_idx], pcf, dcf,
- l3_type, tcx->frag_mpool);
- if (family == AF_INET)
- rc = be_add_ipv4_route(tcx, dcf, n + i);
- else
- rc = be_add_ipv6_route(tcx, dcf, n + i);
- }
-
- if (family == AF_INET)
- tcx->dst4_num = n + i;
- else
- tcx->dst6_num = n + i;
-
- return rc;
-}
-
-int
-be_dst_init(struct tldk_ctx *tcx, const tldk_conf_t *cf)
-{
- uint32_t i, f, d, l, port_id;
- const struct tldk_ctx_conf *ctx_cf = tcx->cf;
- const struct tldk_dest_conf *dcf;
- const struct tldk_port_conf *pcf;
- int32_t rc = 0;
-
- for (i = 0; i < ctx_cf->nb_dest; i++) {
- dcf = &ctx_cf->dest[i];
- f = dcf->family;
- d = dcf->dev;
- for (l = 0; l != tcx->nb_dev; l++) {
- if (tcx->dev[l].cf.id == d) {
- /* fetch the port conf for the port
- * associated with device
- */
- port_id = tcx->dev[l].cf.port;
- pcf = &cf->port[port_id];
- rc = be_add_dest(dcf, tcx, l, pcf, f, 1);
- if (rc != 0) {
- RTE_LOG(ERR, USER1,
- "%s(tcx=%u, family=%u) "
- "could not add "
- "destinations(%u)\n",
- __func__, ctx_cf->lcore, f, i);
- return -ENOSPC;
- }
- break;
- }
- }
- }
-
- return rc;
-}
-
-int
-be_add_dev(struct tldk_ctx *tcx, const tldk_conf_t *cf)
-{
- int32_t rc = 0;
- uint32_t i, port_id;
- struct tle_dev_param dprm;
- const struct tldk_port_conf *pcf;
-
- memset(&dprm, 0, sizeof(dprm));
-
- /* add the tle_dev on all applicable ports of the context */
- for (i = 0; i != tcx->cf->nb_dev; i++) {
-
- /* get the port id associated with the device */
- port_id = tcx->cf->dev[i].port;
-
- /* get the port config by port id */
- pcf = &cf->port[port_id];
-
- /* populate the tle_dev_param struct */
- dprm.rx_offload = pcf->rx_offload;
- dprm.tx_offload = pcf->tx_offload;
- dprm.local_addr4.s_addr = pcf->ipv4;
-
- memcpy(&dprm.local_addr6, &pcf->ipv6,
- sizeof(pcf->ipv6));
-
- /* add the tle_dev */
- tcx->dev[i].dev = tle_add_dev(tcx->ctx, &dprm);
-
- RTE_LOG(NOTICE, USER1, "%s(port=%u), dev: %p\n",
- __func__, port_id,
- tcx->dev[i].dev);
-
- if (tcx->dev[i].dev == NULL)
- rc = -rte_errno;
-
- if (rc != 0)
- return rc;
-
- tcx->nb_dev++;
- tcx->dev[i].cf = tcx->cf->dev[i];
- }
-
- return rc;
-}
-
-static uint32_t
-get_ptypes(const struct tldk_dev *td)
-{
- uint32_t smask;
- int32_t i, rc;
- const uint32_t pmask = RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK |
- RTE_PTYPE_L4_MASK;
-
- smask = 0;
- rc = rte_eth_dev_get_supported_ptypes(td->cf.port, pmask, NULL, 0);
- if (rc < 0) {
- RTE_LOG(ERR, USER1,
- "%s(port=%u) failed to get supported ptypes;\n",
- __func__, td->cf.port);
- return smask;
- }
-
- uint32_t ptype[rc];
- rc = rte_eth_dev_get_supported_ptypes(td->cf.port, pmask, ptype, rc);
-
- for (i = 0; i != rc; i++) {
- switch (ptype[i]) {
- case RTE_PTYPE_L2_ETHER:
- smask |= ETHER_PTYPE;
- break;
- case RTE_PTYPE_L3_IPV4:
- case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN:
- smask |= IPV4_PTYPE;
- break;
- case RTE_PTYPE_L3_IPV4_EXT:
- smask |= IPV4_EXT_PTYPE;
- break;
- case RTE_PTYPE_L3_IPV6:
- case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN:
- smask |= IPV6_PTYPE;
- break;
- case RTE_PTYPE_L3_IPV6_EXT:
- smask |= IPV6_EXT_PTYPE;
- break;
- case RTE_PTYPE_L4_TCP:
- smask |= TCP_PTYPE;
- break;
- case RTE_PTYPE_L4_UDP:
- smask |= UDP_PTYPE;
- break;
- }
- }
-
- return smask;
-}
-
-static inline uint64_t
-_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
- uint64_t ol3, uint64_t ol2)
-{
- return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49;
-}
-
-static inline void
-fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4)
-{
- m->tx_offload = _mbuf_tx_offload(l2, l3, l4, 0, 0, 0);
-}
-
-static inline int
-is_ipv4_frag(const struct rte_ipv4_hdr *iph)
-{
- const uint16_t mask = rte_cpu_to_be_16(~RTE_IPV4_HDR_DF_FLAG);
-
- return ((mask & iph->fragment_offset) != 0);
-}
-
-static inline uint32_t
-get_tcp_header_size(struct rte_mbuf *m, uint32_t l2_len, uint32_t l3_len)
-{
- const struct rte_tcp_hdr *tcp;
-
- tcp = rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *, l2_len + l3_len);
- return (tcp->data_off >> 4) * 4;
-}
-
-static inline void
-adjust_ipv4_pktlen(struct rte_mbuf *m, uint32_t l2_len)
-{
- uint32_t plen, trim;
- const struct rte_ipv4_hdr *iph;
-
- iph = rte_pktmbuf_mtod_offset(m, const struct rte_ipv4_hdr *, l2_len);
- plen = rte_be_to_cpu_16(iph->total_length) + l2_len;
- if (plen < m->pkt_len) {
- trim = m->pkt_len - plen;
- rte_pktmbuf_trim(m, trim);
- }
-}
-
-static inline void
-adjust_ipv6_pktlen(struct rte_mbuf *m, uint32_t l2_len)
-{
- uint32_t plen, trim;
- const struct rte_ipv6_hdr *iph;
-
- iph = rte_pktmbuf_mtod_offset(m, const struct rte_ipv6_hdr *, l2_len);
- plen = rte_be_to_cpu_16(iph->payload_len) + sizeof(*iph) + l2_len;
- if (plen < m->pkt_len) {
- trim = m->pkt_len - plen;
- rte_pktmbuf_trim(m, trim);
- }
-}
-
-static inline void
-tcp_stat_update(struct tldk_ctx *lc, const struct rte_mbuf *m,
- uint32_t l2_len, uint32_t l3_len)
-{
- const struct rte_tcp_hdr *th;
-
- th = rte_pktmbuf_mtod_offset(m, struct rte_tcp_hdr *, l2_len + l3_len);
- lc->tcp_stat.flags[th->tcp_flags]++;
-}
-
-static inline uint32_t
-get_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, uint32_t frag)
-{
- const struct rte_ipv4_hdr *iph;
- int32_t dlen, len;
-
- dlen = rte_pktmbuf_data_len(m);
- dlen -= l2;
-
- iph = rte_pktmbuf_mtod_offset(m, const struct rte_ipv4_hdr *, l2);
- len = (iph->version_ihl & RTE_IPV4_HDR_IHL_MASK) *
- RTE_IPV4_IHL_MULTIPLIER;
-
- if (frag != 0 && is_ipv4_frag(iph)) {
- m->packet_type &= ~RTE_PTYPE_L4_MASK;
- m->packet_type |= RTE_PTYPE_L4_FRAG;
- }
-
- if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto))
- m->packet_type = RTE_PTYPE_UNKNOWN;
-
- return len;
-}
-
-static inline int
-ipv6x_hdr(uint32_t proto)
-{
- return (proto == IPPROTO_HOPOPTS ||
- proto == IPPROTO_ROUTING ||
- proto == IPPROTO_FRAGMENT ||
- proto == IPPROTO_AH ||
- proto == IPPROTO_NONE ||
- proto == IPPROTO_DSTOPTS);
-}
-
-static inline uint32_t
-get_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto,
- uint32_t fproto)
-{
- const struct ip6_ext *ipx;
- int32_t dlen, len, ofs;
-
- len = sizeof(struct rte_ipv6_hdr);
-
- dlen = rte_pktmbuf_data_len(m);
- dlen -= l2;
-
- ofs = l2 + len;
- ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs);
-
- while (ofs > 0 && len < dlen) {
-
- switch (nproto) {
- case IPPROTO_HOPOPTS:
- case IPPROTO_ROUTING:
- case IPPROTO_DSTOPTS:
- ofs = (ipx->ip6e_len + 1) << 3;
- break;
- case IPPROTO_AH:
- ofs = (ipx->ip6e_len + 2) << 2;
- break;
- case IPPROTO_FRAGMENT:
- /*
- * tso_segsz is not used by RX, so use it as temporary
- * buffer to store the fragment offset.
- */
- m->tso_segsz = ofs;
- ofs = sizeof(struct ip6_frag);
- m->packet_type &= ~RTE_PTYPE_L4_MASK;
- m->packet_type |= RTE_PTYPE_L4_FRAG;
- break;
- default:
- ofs = 0;
- }
-
- if (ofs > 0) {
- nproto = ipx->ip6e_nxt;
- len += ofs;
- ipx += ofs / sizeof(*ipx);
- }
- }
-
- /* unrecognized or invalid packet. */
- if ((ofs == 0 && nproto != fproto) || len > dlen)
- m->packet_type = RTE_PTYPE_UNKNOWN;
-
- return len;
-}
-
-static inline uint32_t
-get_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto)
-{
- const struct rte_ipv6_hdr *iph;
-
- iph = rte_pktmbuf_mtod_offset(m, const struct rte_ipv6_hdr *,
- sizeof(struct rte_ether_hdr));
-
- if (iph->proto == fproto)
- return sizeof(struct rte_ipv6_hdr);
- else if (ipv6x_hdr(iph->proto) != 0)
- return get_ipv6x_hdr_len(m, l2, iph->proto, fproto);
-
- m->packet_type = RTE_PTYPE_UNKNOWN;
- return 0;
-}
-
-static inline void
-fill_eth_tcp_hdr_len(struct rte_mbuf *m)
-{
- uint32_t dlen, l2_len, l3_len, l4_len;
- uint16_t etp;
- const struct rte_ether_hdr *eth;
-
- dlen = rte_pktmbuf_data_len(m);
-
- /* check that first segment is at least 54B long. */
- if (dlen < sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr) +
- sizeof(struct rte_tcp_hdr)) {
- m->packet_type = RTE_PTYPE_UNKNOWN;
- return;
- }
-
- l2_len = sizeof(*eth);
-
- eth = rte_pktmbuf_mtod(m, const struct rte_ether_hdr *);
- etp = eth->ether_type;
- if (etp == rte_be_to_cpu_16(RTE_ETHER_TYPE_VLAN))
- l2_len += sizeof(struct rte_vlan_hdr);
-
- if (etp == rte_be_to_cpu_16(RTE_ETHER_TYPE_IPV4)) {
- m->packet_type = RTE_PTYPE_L4_TCP |
- RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_L2_ETHER;
- l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_TCP, 1);
- l4_len = get_tcp_header_size(m, l2_len, l3_len);
- fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
- adjust_ipv4_pktlen(m, l2_len);
- } else if (etp == rte_be_to_cpu_16(RTE_ETHER_TYPE_IPV6) &&
- dlen >= l2_len + sizeof(struct rte_ipv6_hdr) +
- sizeof(struct rte_tcp_hdr)) {
- m->packet_type = RTE_PTYPE_L4_TCP |
- RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_L2_ETHER;
- l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP);
- l4_len = get_tcp_header_size(m, l2_len, l3_len);
- fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
- adjust_ipv6_pktlen(m, l2_len);
- } else
- m->packet_type = RTE_PTYPE_UNKNOWN;
-}
-
-/*
- * HW can recognize L2/L3 with/without extensions/L4 (ixgbe/igb/fm10k)
- */
-static uint16_t
-type0_tcp_rx_callback(__rte_unused dpdk_port_t port,
- __rte_unused uint16_t queue,
- struct rte_mbuf *pkt[], uint16_t nb_pkts,
- __rte_unused uint16_t max_pkts, __rte_unused void *user_param)
-{
- uint32_t j, tp;
- uint32_t l4_len, l3_len, l2_len;
- const struct rte_ether_hdr *eth;
-
- l2_len = sizeof(*eth);
-
- for (j = 0; j != nb_pkts; j++) {
-
- BE_PKT_DUMP(pkt[j]);
-
- tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
- RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
-
- switch (tp) {
- /* non fragmented tcp packets. */
- case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4 |
- RTE_PTYPE_L2_ETHER):
- l4_len = get_tcp_header_size(pkt[j], l2_len,
- sizeof(struct rte_ipv4_hdr));
- fill_pkt_hdr_len(pkt[j], l2_len,
- sizeof(struct rte_ipv4_hdr), l4_len);
- adjust_ipv4_pktlen(pkt[j], l2_len);
- break;
- case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6 |
- RTE_PTYPE_L2_ETHER):
- l4_len = get_tcp_header_size(pkt[j], l2_len,
- sizeof(struct rte_ipv6_hdr));
- fill_pkt_hdr_len(pkt[j], l2_len,
- sizeof(struct rte_ipv6_hdr), l4_len);
- adjust_ipv6_pktlen(pkt[j], l2_len);
- break;
- case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT |
- RTE_PTYPE_L2_ETHER):
- l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
- IPPROTO_TCP, 0);
- l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
- fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
- adjust_ipv4_pktlen(pkt[j], l2_len);
- break;
- case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT |
- RTE_PTYPE_L2_ETHER):
- l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP);
- l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
- fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
- adjust_ipv6_pktlen(pkt[j], l2_len);
- break;
- default:
- /* treat packet types as invalid. */
- pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
- break;
- }
- }
-
- return nb_pkts;
-}
-
-/*
- * HW can recognize L2/L3/L4 and fragments (i40e).
- */
-static uint16_t
-type1_tcp_rx_callback(__rte_unused dpdk_port_t port,
- __rte_unused uint16_t queue,
- struct rte_mbuf *pkt[], uint16_t nb_pkts,
- __rte_unused uint16_t max_pkts, void *user_param)
-{
- uint32_t j, tp;
- struct tldk_ctx *tcx;
- uint32_t l4_len, l3_len, l2_len;
- const struct rte_ether_hdr *eth;
-
- tcx = user_param;
- l2_len = sizeof(*eth);
-
- for (j = 0; j != nb_pkts; j++) {
-
- BE_PKT_DUMP(pkt[j]);
-
- tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
- RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
-
- switch (tp) {
- case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_L2_ETHER):
- l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
- IPPROTO_TCP, 0);
- l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
- fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
- adjust_ipv4_pktlen(pkt[j], l2_len);
- tcp_stat_update(tcx, pkt[j], l2_len, l3_len);
- break;
- case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_L2_ETHER):
- l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_TCP);
- l4_len = get_tcp_header_size(pkt[j], l2_len, l3_len);
- fill_pkt_hdr_len(pkt[j], l2_len, l3_len, l4_len);
- adjust_ipv6_pktlen(pkt[j], l2_len);
- tcp_stat_update(tcx, pkt[j], l2_len, l3_len);
- break;
- default:
- /* treat packet types as invalid. */
- pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
- break;
- }
-
- }
-
- return nb_pkts;
-}
-
-static uint16_t
-typen_tcp_rx_callback(__rte_unused dpdk_port_t port,
- __rte_unused uint16_t queue,
- struct rte_mbuf *pkt[], uint16_t nb_pkts,
- __rte_unused uint16_t max_pkts, __rte_unused void *user_param)
-{
- uint32_t j;
-
- for (j = 0; j != nb_pkts; j++) {
-
- BE_PKT_DUMP(pkt[j]);
- fill_eth_tcp_hdr_len(pkt[j]);
- }
-
- return nb_pkts;
-}
-
-int
-setup_rx_cb(const struct tldk_dev *td, struct tldk_ctx *tcx)
-{
- int32_t rc;
- uint32_t i, n, smask;
- const void *cb;
- const struct ptype2cb *ptype2cb;
-
- static const struct ptype2cb tcp_ptype2cb[] = {
- {
- .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE |
- IPV6_PTYPE | IPV6_EXT_PTYPE | TCP_PTYPE,
- .name = "HW l2/l3x/l4-tcp ptype",
- .fn = type0_tcp_rx_callback,
- },
- {
- .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE |
- TCP_PTYPE,
- .name = "HW l2/l3/l4-tcp ptype",
- .fn = type1_tcp_rx_callback,
- },
- {
- .mask = 0,
- .name = "tcp no HW ptype",
- .fn = typen_tcp_rx_callback,
- },
- };
-
- smask = get_ptypes(td);
-
- ptype2cb = tcp_ptype2cb;
- n = RTE_DIM(tcp_ptype2cb);
-
- for (i = 0; i != n; i++) {
- if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) {
- cb = rte_eth_add_rx_callback(td->cf.port, td->cf.queue,
- ptype2cb[i].fn, tcx);
- rc = -rte_errno;
- RTE_LOG(ERR, USER1,
- "%s(port=%u), setup RX callback \"%s\" "
- "returns %p;\n",
- __func__, td->cf.port, ptype2cb[i].name, cb);
- return ((cb == NULL) ? rc : 0);
- }
- }
-
- /* no proper callback found. */
- RTE_LOG(ERR, USER1,
- "%s(port=%u) failed to find an appropriate callback;\n",
- __func__, td->cf.port);
- return -ENOENT;
-}
-
-int
-be_lcore_setup(struct tldk_ctx *tcx)
-{
- uint32_t i;
- int32_t rc;
-
- RTE_LOG(NOTICE, USER1, "%s:(lcore=%u, ctx=%p) start\n",
- __func__, tcx->cf->lcore, tcx->ctx);
-
- rc = 0;
- for (i = 0; i != tcx->nb_dev && rc == 0; i++) {
- RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, q=%u)\n",
- __func__, i, tcx->dev[i].cf.port, tcx->dev[i].cf.queue);
-
- rc = setup_rx_cb(&tcx->dev[i], tcx);
- if (rc < 0)
- return rc;
- }
-
- return rc;
-}
-
-static inline void
-be_rx(struct tldk_dev *dev)
-{
- uint32_t j, k, n;
- struct rte_mbuf *pkt[MAX_PKT_BURST];
- struct rte_mbuf *rp[MAX_PKT_BURST];
- int32_t rc[MAX_PKT_BURST];
-
- n = rte_eth_rx_burst(dev->cf.port,
- dev->cf.queue, pkt, RTE_DIM(pkt));
-
- if (n != 0) {
- dev->rx_stat.in += n;
- BE_TRACE("%s(%u): rte_eth_rx_burst(%u, %u) returns %u\n",
- __func__, dev->cf.id, dev->cf.port,
- dev->cf.queue, n);
-
- k = tle_tcp_rx_bulk(dev->dev, pkt, rp, rc, n);
-
- dev->rx_stat.up += k;
- dev->rx_stat.drop += n - k;
- BE_TRACE("%s: tle_tcp_rx_bulk(%p, %u) returns %u\n",
- __func__, dev->dev, n, k);
-
- for (j = 0; j != n - k; j++) {
- BE_TRACE("%s:%d(port=%u) rp[%u]={%p, %d};\n",
- __func__, __LINE__, dev->cf.port,
- j, rp[j], rc[j]);
- rte_pktmbuf_free(rp[j]);
- }
- }
-}
-
-static inline void
-be_tx(struct tldk_dev *dev)
-{
- uint32_t j = 0, k, n;
- struct rte_mbuf **mb;
-
- n = dev->tx_buf.num;
- k = RTE_DIM(dev->tx_buf.pkt) - n;
- mb = dev->tx_buf.pkt;
-
- if (k >= RTE_DIM(dev->tx_buf.pkt) / 2) {
- j = tle_tcp_tx_bulk(dev->dev, mb + n, k);
- n += j;
- dev->tx_stat.down += j;
- }
-
- if (n == 0)
- return;
-
- BE_TRACE("%s: tle_tcp_tx_bulk(%p) returns %u,\n"
- "total pkts to send: %u\n",
- __func__, dev->dev, j, n);
-
- for (j = 0; j != n; j++)
- BE_PKT_DUMP(mb[j]);
-
- k = rte_eth_tx_burst(dev->cf.port,
- dev->cf.queue, mb, n);
-
- dev->tx_stat.out += k;
- dev->tx_stat.drop += n - k;
- BE_TRACE("%s: rte_eth_tx_burst(%u, %u, %u) returns %u\n",
- __func__, dev->cf.port,
- dev->cf.queue, n, k);
-
- dev->tx_buf.num = n - k;
- if (k != 0)
- for (j = k; j != n; j++)
- mb[j - k] = mb[j];
-}
-
-void
-be_lcore_tcp(struct tldk_ctx *tcx)
-{
- uint32_t i;
-
- if (tcx == NULL)
- return;
-
- for (i = 0; i != tcx->nb_dev; i++) {
- be_rx(&tcx->dev[i]);
- be_tx(&tcx->dev[i]);
- }
- tle_tcp_process(tcx->ctx, TCP_MAX_PROCESS);
-}
-
-void
-be_lcore_clear(struct tldk_ctx *tcx)
-{
- uint32_t i, j;
-
- if (tcx == NULL)
- return;
-
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u, ctx: %p) finish\n",
- __func__, tcx->cf->lcore, tcx->ctx);
- for (i = 0; i != tcx->nb_dev; i++) {
- RTE_LOG(NOTICE, USER1, "%s:%u(port=%u, q=%u, lcore=%u, dev=%p) "
- "rx_stats={"
- "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "}, "
- "tx_stats={"
- "in=%" PRIu64 ",up=%" PRIu64 ",drop=%" PRIu64 "};\n",
- __func__, i, tcx->dev[i].cf.port, tcx->dev[i].cf.queue,
- tcx->cf->lcore,
- tcx->dev[i].dev,
- tcx->dev[i].rx_stat.in,
- tcx->dev[i].rx_stat.up,
- tcx->dev[i].rx_stat.drop,
- tcx->dev[i].tx_stat.down,
- tcx->dev[i].tx_stat.out,
- tcx->dev[i].tx_stat.drop);
- }
-
- RTE_LOG(NOTICE, USER1, "tcp_stat={\n");
- for (i = 0; i != RTE_DIM(tcx->tcp_stat.flags); i++) {
- if (tcx->tcp_stat.flags[i] != 0)
- RTE_LOG(NOTICE, USER1, "[flag=%#x]==%" PRIu64 ";\n",
- i, tcx->tcp_stat.flags[i]);
- }
- RTE_LOG(NOTICE, USER1, "};\n");
-
- for (i = 0; i != tcx->nb_dev; i++)
- for (j = 0; j != tcx->dev[i].tx_buf.num; j++)
- rte_pktmbuf_free(tcx->dev[i].tx_buf.pkt[j]);
-
-}
-
-void
-be_stop_port(uint32_t port)
-{
- struct rte_eth_stats stats;
-
- RTE_LOG(NOTICE, USER1, "%s: stoping port %u\n", __func__, port);
-
- rte_eth_stats_get(port, &stats);
- RTE_LOG(NOTICE, USER1, "port %u stats={\n"
- "ipackets=%" PRIu64 ";"
- "ibytes=%" PRIu64 ";"
- "ierrors=%" PRIu64 ";"
- "imissed=%" PRIu64 ";\n"
- "opackets=%" PRIu64 ";"
- "obytes=%" PRIu64 ";"
- "oerrors=%" PRIu64 ";\n"
- "}\n",
- port,
- stats.ipackets,
- stats.ibytes,
- stats.ierrors,
- stats.imissed,
- stats.opackets,
- stats.obytes,
- stats.oerrors);
- rte_eth_dev_stop(port);
-}
-
-int
-be_lcore_main(void *arg)
-{
- int32_t rc;
- uint32_t lid, i;
- struct tldk_ctx *tcx;
- struct lcore_ctxs_list *lc_ctx;
-
- lc_ctx = arg;
- lid = rte_lcore_id();
-
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u) start\n", __func__, lid);
-
- rc = 0;
- while (force_quit == 0) {
- for (i = 0; i < lc_ctx->nb_ctxs; i++) {
- tcx = lc_ctx->ctxs[i];
- be_lcore_tcp(tcx);
- }
- }
-
- RTE_LOG(NOTICE, USER1, "%s(lcore=%u) finish\n", __func__, lid);
-
- return rc;
-}