diff options
Diffstat (limited to 'examples/udpfwd/pkt.c')
-rw-r--r-- | examples/udpfwd/pkt.c | 579 |
1 files changed, 579 insertions, 0 deletions
diff --git a/examples/udpfwd/pkt.c b/examples/udpfwd/pkt.c new file mode 100644 index 0000000..b0d4452 --- /dev/null +++ b/examples/udpfwd/pkt.c @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "netbe.h" +#include <netinet/ip6.h> + +static inline void +fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4) +{ + m->l2_len = l2; + m->l3_len = l3; + m->l4_len = l4; +} + +static inline int +is_ipv4_frag(const struct ipv4_hdr *iph) +{ + const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG); + + return ((mask & iph->fragment_offset) != 0); +} + +static inline void +fill_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, + uint32_t frag) +{ + const struct ipv4_hdr *iph; + int32_t dlen, len; + + dlen = rte_pktmbuf_data_len(m); + dlen -= l2 + sizeof(struct udp_hdr); + + iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2); + len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER; + + if (frag != 0 && is_ipv4_frag(iph)) { + m->packet_type &= ~RTE_PTYPE_L4_MASK; + m->packet_type |= RTE_PTYPE_L4_FRAG; + } + + if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto)) + m->packet_type = RTE_PTYPE_UNKNOWN; + else + fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr)); +} + +static inline int +ipv6x_hdr(uint32_t proto) +{ + return (proto == IPPROTO_HOPOPTS || + proto == IPPROTO_ROUTING || + proto == IPPROTO_FRAGMENT || + proto == IPPROTO_AH || + proto == IPPROTO_NONE || + proto == IPPROTO_DSTOPTS); +} + +static inline void +fill_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t nproto, + uint32_t fproto) +{ + const struct ip6_ext *ipx; + int32_t dlen, len, ofs; + + len = sizeof(struct ipv6_hdr); + + dlen = rte_pktmbuf_data_len(m); + dlen -= l2 + sizeof(struct udp_hdr); + + ofs = l2 + len; + ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs); + + while (ofs > 0 && len < dlen) { + + switch (nproto) { + case IPPROTO_HOPOPTS: + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + ofs = (ipx->ip6e_len + 1) << 3; + break; + case IPPROTO_AH: + ofs = (ipx->ip6e_len + 2) << 2; + break; + case IPPROTO_FRAGMENT: + /* + * tso_segsz is not used by RX, so suse it as temporary + * buffer to store the fragment offset. + */ + m->tso_segsz = ofs; + ofs = sizeof(struct ip6_frag); + m->packet_type &= ~RTE_PTYPE_L4_MASK; + m->packet_type |= RTE_PTYPE_L4_FRAG; + break; + default: + ofs = 0; + } + + if (ofs > 0) { + nproto = ipx->ip6e_nxt; + len += ofs; + ipx += ofs / sizeof(*ipx); + } + } + + /* undercognised or invalid packet. */ + if ((ofs == 0 && nproto != fproto) || len > dlen) + m->packet_type = RTE_PTYPE_UNKNOWN; + else + fill_pkt_hdr_len(m, l2, len, sizeof(struct udp_hdr)); +} + +static inline void +fill_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto) +{ + const struct ipv6_hdr *iph; + + iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, + sizeof(struct ether_hdr)); + + if (iph->proto == fproto) + fill_pkt_hdr_len(m, l2, sizeof(struct ipv6_hdr), + sizeof(struct udp_hdr)); + else if (ipv6x_hdr(iph->proto) != 0) + fill_ipv6x_hdr_len(m, l2, iph->proto, fproto); +} + +static inline void +fill_eth_hdr_len(struct rte_mbuf *m) +{ + uint32_t dlen, l2; + uint16_t etp; + const struct ether_hdr *eth; + + dlen = rte_pktmbuf_data_len(m); + + /* check that first segment is at least 42B long. */ + if (dlen < sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr) + + sizeof(struct udp_hdr)) { + m->packet_type = RTE_PTYPE_UNKNOWN; + return; + } + + l2 = sizeof(*eth); + + eth = rte_pktmbuf_mtod(m, const struct ether_hdr *); + etp = eth->ether_type; + if (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN)) + l2 += sizeof(struct vlan_hdr); + + if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) { + m->packet_type = RTE_PTYPE_L4_UDP | + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + fill_ipv4_hdr_len(m, l2, IPPROTO_UDP, 1); + } else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) && + dlen >= l2 + sizeof(struct ipv6_hdr) + + sizeof(struct udp_hdr)) { + m->packet_type = RTE_PTYPE_L4_UDP | + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER; + fill_ipv6_hdr_len(m, l2, IPPROTO_UDP); + } else + m->packet_type = RTE_PTYPE_UNKNOWN; +} + +static inline void +fix_reassembled(struct rte_mbuf *m) +{ + /* update packet type. */ + m->packet_type &= ~RTE_PTYPE_L4_MASK; + m->packet_type |= RTE_PTYPE_L4_UDP; + + /* fix reassemble setting TX flags. */ + m->ol_flags &= ~PKT_TX_IP_CKSUM; + + /* fix l3_len after reassemble. */ + if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) + m->l3_len = m->l3_len - sizeof(struct ipv6_extension_fragment); +} + +static struct rte_mbuf * +reassemble(struct rte_mbuf *m, struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, uint64_t tms) +{ + if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) { + + struct ipv4_hdr *iph; + + iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); + + /* process this fragment. */ + m = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, iph); + + } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) { + + struct ipv6_hdr *iph; + struct ipv6_extension_fragment *fhdr; + + iph = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len); + + /* + * we store fragment header offset in tso_segsz before + * temporary, just to avoid another scan of ipv6 header. + */ + fhdr = rte_pktmbuf_mtod_offset(m, + struct ipv6_extension_fragment *, m->tso_segsz); + m->tso_segsz = 0; + + /* process this fragment. */ + m = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, iph, fhdr); + + } else { + rte_pktmbuf_free(m); + m = NULL; + } + + /* got reassembled packet. */ + if (m != NULL) + fix_reassembled(m); + + return m; +} + +/* exclude NULLs from the final list of packets. */ +static inline uint32_t +compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero) +{ + uint32_t i, j, k, l; + + for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) { + + /* found a hole. */ + if (pkt[j] == NULL) { + + /* find how big is it. */ + for (i = j; i-- != 0 && pkt[i] == NULL; ) + ; + /* fill the hole. */ + for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++) + pkt[l] = pkt[k]; + + nb_pkt -= j - i; + nb_zero -= j - i; + } + } + + return nb_pkt; +} + +/* + * HW can recognise L2/L3 with/without extentions/L4 (ixgbe/igb/fm10k) + */ +static uint16_t +type0_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, tp, x; + uint64_t cts; + struct netbe_lcore *lc; + + lc = user_param; + cts = 0; + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + + tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); + + switch (tp) { + /* non fragmented udp packets. */ + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4 | + RTE_PTYPE_L2_ETHER): + fill_pkt_hdr_len(pkt[j], sizeof(struct ether_hdr), + sizeof(struct ipv4_hdr), + sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6 | + RTE_PTYPE_L2_ETHER): + fill_pkt_hdr_len(pkt[j], sizeof(struct ether_hdr), + sizeof(struct ipv6_hdr), + sizeof(struct udp_hdr)); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT | + RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), + UINT32_MAX, 0); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT | + RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), + IPPROTO_UDP); + break; + /* possibly fragmented udp packets. */ + case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER): + case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), + IPPROTO_UDP, 1); + break; + case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER): + case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), + IPPROTO_UDP); + break; + default: + /* treat packet types as invalid. */ + pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; + break; + } + + /* + * if it is a fragment, try to reassemble it, + * if by some reason it can't be done, then + * set pkt[] entry to NULL. + */ + if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == + RTE_PTYPE_L4_FRAG) { + cts = (cts == 0) ? rte_rdtsc() : cts; + pkt[j] = reassemble(pkt[j], lc->ftbl, &lc->death_row, + cts); + x += (pkt[j] == NULL); + } + } + + /* reassemble was invoked, cleanup its death-row. */ + if (cts != 0) + rte_ip_frag_free_death_row(&lc->death_row, 0); + + if (x == 0) + return nb_pkts; + + NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " + "%u non-reassembled fragments;\n", + __func__, port, queue, nb_pkts, x); + + return compress_pkt_list(pkt, nb_pkts, x); +} + +/* + * HW can recognise L2/L3/L4 and fragments (i40e). + */ +static uint16_t +type1_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, tp, x; + uint64_t cts; + struct netbe_lcore *lc; + + lc = user_param; + cts = 0; + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + + tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | + RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK); + + switch (tp) { + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), + UINT32_MAX, 0); + break; + case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), + IPPROTO_UDP); + break; + case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv4_hdr_len(pkt[j], sizeof(struct ether_hdr), + IPPROTO_UDP, 0); + break; + case (RTE_PTYPE_L4_FRAG | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L2_ETHER): + fill_ipv6_hdr_len(pkt[j], sizeof(struct ether_hdr), + IPPROTO_UDP); + break; + default: + /* treat packet types as invalid. */ + pkt[j]->packet_type = RTE_PTYPE_UNKNOWN; + break; + } + + /* + * if it is a fragment, try to reassemble it, + * if by some reason it can't be done, then + * set pkt[] entry to NULL. + */ + if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == + RTE_PTYPE_L4_FRAG) { + cts = (cts == 0) ? rte_rdtsc() : cts; + pkt[j] = reassemble(pkt[j], lc->ftbl, &lc->death_row, + cts); + x += (pkt[j] == NULL); + } + } + + /* reassemble was invoked, cleanup its death-row. */ + if (cts != 0) + rte_ip_frag_free_death_row(&lc->death_row, 0); + + if (x == 0) + return nb_pkts; + + NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " + "%u non-reassembled fragments;\n", + __func__, port, queue, nb_pkts, x); + + return compress_pkt_list(pkt, nb_pkts, x); +} + +/* + * generic, assumes HW doesn't recognise any packet type. + */ +static uint16_t +typen_rx_callback(__rte_unused uint8_t port, __rte_unused uint16_t queue, + struct rte_mbuf *pkt[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, void *user_param) +{ + uint32_t j, x; + uint64_t cts; + struct netbe_lcore *lc; + + lc = user_param; + cts = 0; + + x = 0; + for (j = 0; j != nb_pkts; j++) { + + NETBE_PKT_DUMP(pkt[j]); + fill_eth_hdr_len(pkt[j]); + + /* + * if it is a fragment, try to reassemble it, + * if by some reason it can't be done, then + * set pkt[] entry to NULL. + */ + if ((pkt[j]->packet_type & RTE_PTYPE_L4_MASK) == + RTE_PTYPE_L4_FRAG) { + cts = (cts == 0) ? rte_rdtsc() : cts; + pkt[j] = reassemble(pkt[j], lc->ftbl, &lc->death_row, + cts); + x += (pkt[j] == NULL); + } + } + + /* reassemble was invoked, cleanup its death-row. */ + if (cts != 0) + rte_ip_frag_free_death_row(&lc->death_row, 0); + + if (x == 0) + return nb_pkts; + + NETBE_TRACE("%s(port=%u, queue=%u, nb_pkts=%u): " + "%u non-reassembled fragments;\n", + __func__, port, queue, nb_pkts, x); + + return compress_pkt_list(pkt, nb_pkts, x); +} + +int +setup_rx_cb(const struct netbe_port *uprt, struct netbe_lcore *lc) +{ + int32_t i, rc; + uint32_t smask; + void *cb; + + const uint32_t pmask = RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK | + RTE_PTYPE_L4_MASK; + + enum { + ETHER_PTYPE = 0x1, + IPV4_PTYPE = 0x2, + IPV4_EXT_PTYPE = 0x4, + IPV6_PTYPE = 0x8, + IPV6_EXT_PTYPE = 0x10, + UDP_PTYPE = 0x20, + }; + + static const struct { + uint32_t mask; + const char *name; + rte_rx_callback_fn fn; + } ptype2cb[] = { + { + .mask = ETHER_PTYPE | IPV4_PTYPE | IPV4_EXT_PTYPE | + IPV6_PTYPE | IPV6_EXT_PTYPE | UDP_PTYPE, + .name = "HW l2/l3x/l4 ptype", + .fn = type0_rx_callback, + }, + { + .mask = ETHER_PTYPE | IPV4_PTYPE | IPV6_PTYPE | + UDP_PTYPE, + .name = "HW l2/l3/l4 ptype", + .fn = type1_rx_callback, + }, + { + .mask = 0, + .name = "no HW ptype", + .fn = typen_rx_callback, + }, + }; + + rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, NULL, 0); + if (rc < 0) { + RTE_LOG(ERR, USER1, + "%s(port=%u) failed to get supported ptypes;\n", + __func__, uprt->id); + return rc; + } + + uint32_t ptype[rc]; + rc = rte_eth_dev_get_supported_ptypes(uprt->id, pmask, ptype, rc); + + smask = 0; + for (i = 0; i != rc; i++) { + switch (ptype[i]) { + case RTE_PTYPE_L2_ETHER: + smask |= ETHER_PTYPE; + break; + case RTE_PTYPE_L3_IPV4: + case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN: + smask |= IPV4_PTYPE; + break; + case RTE_PTYPE_L3_IPV4_EXT: + smask |= IPV4_EXT_PTYPE; + break; + case RTE_PTYPE_L3_IPV6: + case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN: + smask |= IPV6_PTYPE; + break; + case RTE_PTYPE_L3_IPV6_EXT: + smask |= IPV6_EXT_PTYPE; + break; + case RTE_PTYPE_L4_UDP: + smask |= UDP_PTYPE; + break; + } + } + + for (i = 0; i != RTE_DIM(ptype2cb); i++) { + if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) { + cb = rte_eth_add_rx_callback(uprt->id, 0, + ptype2cb[i].fn, lc); + rc = -rte_errno; + RTE_LOG(ERR, USER1, + "%s(port=%u), setup RX callback \"%s\" " + "returns %p;\n", + __func__, uprt->id, ptype2cb[i].name, cb); + return ((cb == NULL) ? rc : 0); + } + } + + /* no proper callback found. */ + RTE_LOG(ERR, USER1, + "%s(port=%u) failed to find an appropriate callback;\n", + __func__, uprt->id); + return -ENOENT; +} |