aboutsummaryrefslogtreecommitdiffstats
path: root/examples/udpfwd/pkt.c
diff options
context:
space:
mode:
Diffstat (limited to 'examples/udpfwd/pkt.c')
-rw-r--r--examples/udpfwd/pkt.c579
1 files changed, 579 insertions, 0 deletions
diff --git a/examples/udpfwd/pkt.c b/examples/udpfwd/pkt.c
new file mode 100644
index 0000000..b0d4452
--- /dev/null
+++ b/examples/udpfwd/pkt.c
@@ -0,0 +1,579 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "netbe.h"
+#include <netinet/ip6.h>
+
+static inline void
+fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4)
+{
+ m->l2_len = l2;
+ m->l3_len = l3;
+ m->l4_len = l4;
+}
+
+static inline int
+is_ipv4_frag(const struct ipv4_hdr *iph)
+{
+ const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG);
+
+ return ((mask & iph->fragment_offset) != 0);
+}
+
+static inline void
+fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto,
+ uint32_t frag)
+{
+ const struct ipv4_hdr *iph;
+ int32_t dlen, len;
+
+ dlen = rte_pktmbuf_data_len(m);
+ dlen -= l2 + sizeof(struct udp_hdr);
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2);
+ len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER;
+
+ if (frag != 0 && is_ipv4_frag(iph)) {
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_FRAG;
+ }
+
+ if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto))
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ else
+ fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr));
+}
+
+static inline int
+ipv6x_hdr(uint32_t proto)
+{
+ return (proto == IPPROTO_HOPOPTS ||
+ proto == IPPROTO_ROUTING ||
+ proto == IPPROTO_FRAGMENT ||
+ proto == IPPROTO_AH ||
+ proto == IPPROTO_NONE ||
+ proto == IPPROTO_DSTOPTS);
+}
+
+static inline void
+fill_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto,
+ uint32_t fproto)
+{
+ const struct ip6_ext *ipx;
+ int32_t dlen, len, ofs;
+
+ len = sizeof(struct ipv6_hdr);
+
+ dlen = rte_pktmbuf_data_len(m);
+ dlen -= l2 + sizeof(struct udp_hdr);
+
+ ofs = l2 + len;
+ ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs);
+
+ while (ofs > 0 && len < dlen) {
+
+ switch (nproto) {
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ ofs = (ipx->ip6e_len + 1) << 3;
+ break;
+ case IPPROTO_AH:
+ ofs = (ipx->ip6e_len + 2) << 2;
+ break;
+ case IPPROTO_FRAGMENT:
+ /*
+ * tso_segsz is not used by RX, so suse it as temporary
+ * buffer to store the fragment offset.
+ */
+ m->tso_segsz = ofs;
+ ofs = sizeof(struct ip6_frag);
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_FRAG;
+ break;
+ default:
+ ofs = 0;
+ }
+
+ if (ofs > 0) {
+ nproto = ipx->ip6e_nxt;
+ len += ofs;
+ ipx += ofs / sizeof(*ipx);
+ }
+ }
+
+ /* undercognised or invalid packet. */
+ if ((ofs == 0 && nproto != fproto) || len > dlen)
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ else
+ fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr));
+}
+
+static inline void
+fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto)
+{
+ const struct ipv6_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ if (iph->proto == fproto)
+ fill_pkt_hdr_len(m, l2, sizeof(struct ipv6_hdr),
+ sizeof(struct udp_hdr));
+ else if (ipv6x_hdr(iph->proto) != 0)
+ fill_ipv6x_hdr_len(m, l2, iph->proto, fproto);
+}
+
+static inline void
+fill_eth_hdr_len(struct rte_mbuf *m)
+{
+ uint32_t dlen, l2;
+ uint16_t etp;
+ const struct ether_hdr *eth;
+
+ dlen = rte_pktmbuf_data_len(m);
+
+ /* check that first segment is at least 42B long. */
+ if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) +
+ sizeof(struct udp_hdr)) {
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+ return;
+ }
+
+ l2 = sizeof(*eth);
+
+ eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
+ etp = eth->ether_type;
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN))
+ l2 += sizeof(struct vlan_hdr);
+
+ if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
+ m->packet_type = RTE_PTYPE_L4_UDP |
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ fill_ipv4_hdr_len(m, l2, IPPROTO_UDP, 1);
+ } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
+ dlen >= l2 + sizeof(struct ipv6_hdr) +
+ sizeof(struct udp_hdr)) {
+ m->packet_type = RTE_PTYPE_L4_UDP |
+ RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER;
+ fill_ipv6_hdr_len(m, l2, IPPROTO_UDP);
+ } else
+ m->packet_type = RTE_PTYPE_UNKNOWN;
+}
+
+static inline void
+fix_reassembled(struct rte_mbuf *m)
+{
+ /* update packet type. */
+ m->packet_type &= ~RTE_PTYPE_L4_MASK;
+ m->packet_type |= RTE_PTYPE_L4_UDP;
+
+ /* fix reassemble setting TX flags. */
+ m->ol_flags &= ~PKT_TX_IP_CKSUM;
+
+ /* fix l3_len after reassemble. */
+ if (RTE_ETH_IS_IPV6_HDR(m->packet_type))
+ m->l3_len = m->l3_len - sizeof(struct ipv6_extension_fragment);
+}
+
+static struct rte_mbuf *
+reassemble(struct rte_mbuf *m, struct rte_ip_frag_tbl *tbl,
+ struct rte_ip_frag_death_row *dr, uint64_t tms)
+{
+ if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
+
+ struct ipv4_hdr *iph;
+
+ iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
+
+ /* process this fragment. */
+ m = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, iph);
+
+ } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
+
+ struct ipv6_hdr *iph;
+ struct ipv6_extension_fragment *fhdr;
+
+ iph = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len);
+
+ /*
+ * we store fragment header offset in tso_segsz before
+ * temporary, just to avoid another scan of ipv6 header.
+ */
+ fhdr = rte_pktmbuf_mtod_offset(m,
+ struct ipv6_extension_fragment *, m->tso_segsz);
+ m->tso_segsz = 0;
+
+ /* process this fragment. */
+ m = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, iph, fhdr);
+
+ } else {
+ rte_pktmbuf_free(m);
+ m = NULL;
+ }
+
+ /* got reassembled packet. */
+ if (m != NULL)
+ fix_reassembled(m);
+
+ return m;
+}
+
+/* exclude NULLs from the final list of packets. */
+static inline uint32_t
+compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero)
+{
+ uint32_t i, j, k, l;
+
+ for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) {
+
+ /* found a hole. */
+ if (pkt[j] == NULL) {
+
+ /* find how big is it. */
+ for (i = j; i-- != 0 && pkt[i] == NULL; )
+ ;
+ /* fill the hole. */
+ for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++)
+ pkt[l] = pkt[k];
+
+ nb_pkt -= j - i;
+ nb_zero -= j - i;
+ }
+ }
+
+ return nb_pkt;
+}
+
+/*
+ * HW can recognise L2/L3 with/without extentions/L4 (ixgbe/igb/fm10k)
+ */
+static uint16_t
+type0_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, tp, x;
+ uint64_t cts;
+ struct netbe_lcore *lc;
+
+ lc = user_param;
+ cts = 0;
+
+ x = 0;
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ /* non fragmented udp packets. */
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4 |
+ RTE_PTYPE_L2_ETHER):
+ fill_pkt_hdr_len(pkt[j], sizeof(struct ether_hdr),
+ sizeof(struct ipv4_hdr),
+ sizeof(struct udp_hdr));
+ break;
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6 |
+ RTE_PTYPE_L2_ETHER):
+ fill_pkt_hdr_len(pkt[j], sizeof(struct ether_hdr),
+ sizeof(struct ipv6_hdr),
+ sizeof(struct udp_hdr));
+ break;
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr),
+ UINT32_MAX, 0);
+ break;
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr),
+ IPPROTO_UDP);
+ break;
+ /* possibly fragmented udp packets. */
+ case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER):
+ case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER):
+ fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr),
+ IPPROTO_UDP, 1);
+ break;
+ case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER):
+ case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER):
+ fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr),
+ IPPROTO_UDP);
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+
+ /*
+ * if it is a fragment, try to reassemble it,
+ * if by some reason it can't be done, then
+ * set pkt[] entry to NULL.
+ */
+ if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) ==
+ RTE_PTYPE_L4_FRAG) {
+ cts = (cts == 0) ? rte_rdtsc() : cts;
+ pkt[j] = reassemble(pkt[j], lc->ftbl, &lc->death_row,
+ cts);
+ x += (pkt[j] == NULL);
+ }
+ }
+
+ /* reassemble was invoked, cleanup its death-row. */
+ if (cts != 0)
+ rte_ip_frag_free_death_row(&lc->death_row, 0);
+
+ if (x == 0)
+ return nb_pkts;
+
+ NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
+ "%u non-reassembled fragments;\n",
+ __func__, port, queue, nb_pkts, x);
+
+ return compress_pkt_list(pkt, nb_pkts, x);
+}
+
+/*
+ * HW can recognise L2/L3/L4 and fragments (i40e).
+ */
+static uint16_t
+type1_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, tp, x;
+ uint64_t cts;
+ struct netbe_lcore *lc;
+
+ lc = user_param;
+ cts = 0;
+
+ x = 0;
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+
+ tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+ RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+ switch (tp) {
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr),
+ UINT32_MAX, 0);
+ break;
+ case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr),
+ IPPROTO_UDP);
+ break;
+ case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr),
+ IPPROTO_UDP, 0);
+ break;
+ case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L2_ETHER):
+ fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr),
+ IPPROTO_UDP);
+ break;
+ default:
+ /* treat packet types as invalid. */
+ pkt[j]->packet_type = RTE_PTYPE_UNKNOWN;
+ break;
+ }
+
+ /*
+ * if it is a fragment, try to reassemble it,
+ * if by some reason it can't be done, then
+ * set pkt[] entry to NULL.
+ */
+ if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) ==
+ RTE_PTYPE_L4_FRAG) {
+ cts = (cts == 0) ? rte_rdtsc() : cts;
+ pkt[j] = reassemble(pkt[j], lc->ftbl, &lc->death_row,
+ cts);
+ x += (pkt[j] == NULL);
+ }
+ }
+
+ /* reassemble was invoked, cleanup its death-row. */
+ if (cts != 0)
+ rte_ip_frag_free_death_row(&lc->death_row, 0);
+
+ if (x == 0)
+ return nb_pkts;
+
+ NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
+ "%u non-reassembled fragments;\n",
+ __func__, port, queue, nb_pkts, x);
+
+ return compress_pkt_list(pkt, nb_pkts, x);
+}
+
+/*
+ * generic, assumes HW doesn't recognise any packet type.
+ */
+static uint16_t
+typen_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue,
+ struct rte_mbuf *pkt[], uint16_t nb_pkts,
+ __rte_unused uint16_t max_pkts, void *user_param)
+{
+ uint32_t j, x;
+ uint64_t cts;
+ struct netbe_lcore *lc;
+
+ lc = user_param;
+ cts = 0;
+
+ x = 0;
+ for (j = 0; j != nb_pkts; j++) {
+
+ NETBE_PKT_DUMP(pkt[j]);
+ fill_eth_hdr_len(pkt[j]);
+
+ /*
+ * if it is a fragment, try to reassemble it,
+ * if by some reason it can't be done, then
+ * set pkt[] entry to NULL.
+ */
+ if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) ==
+ RTE_PTYPE_L4_FRAG) {
+ cts = (cts == 0) ? rte_rdtsc() : cts;
+ pkt[j] = reassemble(pkt[j], lc->ftbl, &lc->death_row,
+ cts);
+ x += (pkt[j] == NULL);
+ }
+ }
+
+ /* reassemble was invoked, cleanup its death-row. */
+ if (cts != 0)
+ rte_ip_frag_free_death_row(&lc->death_row, 0);
+
+ if (x == 0)
+ return nb_pkts;
+
+ NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): "
+ "%u non-reassembled fragments;\n",
+ __func__, port, queue, nb_pkts, x);
+
+ return compress_pkt_list(pkt, nb_pkts, x);
+}
+
+int
+setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc)
+{
+ int32_t i, rc;
+ uint32_t smask;
+ void *cb;
+
+ const uint32_t pmask = RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK |
+ RTE_PTYPE_L4_MASK;
+
+ enum {
+ ETHER_PTYPE = 0x1,
+ IPV4_PTYPE = 0x2,
+ IPV4_EXT_PTYPE = 0x4,
+ IPV6_PTYPE = 0x8,
+ IPV6_EXT_PTYPE = 0x10,
+ UDP_PTYPE = 0x20,
+ };
+
+ static const struct {
+ uint32_t mask;
+ const char *name;
+ rte_rx_callback_fn fn;
+ } ptype2cb[] = {
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE |
+ IPV6_PTYPE | IPV6_EXT_PTYPE | UDP_PTYPE,
+ .name = "HW l2/l3x/l4 ptype",
+ .fn = type0_rx_callback,
+ },
+ {
+ .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE |
+ UDP_PTYPE,
+ .name = "HW l2/l3/l4 ptype",
+ .fn = type1_rx_callback,
+ },
+ {
+ .mask = 0,
+ .name = "no HW ptype",
+ .fn = typen_rx_callback,
+ },
+ };
+
+ rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, NULL, 0);
+ if (rc < 0) {
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u) failed to get supported ptypes;\n",
+ __func__, uprt->id);
+ return rc;
+ }
+
+ uint32_t ptype[rc];
+ rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, ptype, rc);
+
+ smask = 0;
+ for (i = 0; i != rc; i++) {
+ switch (ptype[i]) {
+ case RTE_PTYPE_L2_ETHER:
+ smask |= ETHER_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV4:
+ case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN:
+ smask |= IPV4_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV4_EXT:
+ smask |= IPV4_EXT_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV6:
+ case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN:
+ smask |= IPV6_PTYPE;
+ break;
+ case RTE_PTYPE_L3_IPV6_EXT:
+ smask |= IPV6_EXT_PTYPE;
+ break;
+ case RTE_PTYPE_L4_UDP:
+ smask |= UDP_PTYPE;
+ break;
+ }
+ }
+
+ for (i = 0; i != RTE_DIM(ptype2cb); i++) {
+ if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) {
+ cb = rte_eth_add_rx_callback(uprt->id, 0,
+ ptype2cb[i].fn, lc);
+ rc = -rte_errno;
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u), setup RX callback \"%s\" "
+ "returns %p;\n",
+ __func__, uprt->id, ptype2cb[i].name, cb);
+ return ((cb == NULL) ? rc : 0);
+ }
+ }
+
+ /* no proper callback found. */
+ RTE_LOG(ERR, USER1,
+ "%s(port=%u) failed to find an appropriate callback;\n",
+ __func__, uprt->id);
+ return -ENOENT;
+}