diff options
Diffstat (limited to 'lib/librte_gro')
-rw-r--r-- | lib/librte_gro/Makefile | 33 | ||||
-rw-r--r-- | lib/librte_gro/gro_tcp4.c | 357 | ||||
-rw-r--r-- | lib/librte_gro/gro_tcp4.h | 283 | ||||
-rw-r--r-- | lib/librte_gro/gro_vxlan_tcp4.c | 494 | ||||
-rw-r--r-- | lib/librte_gro/gro_vxlan_tcp4.h | 156 | ||||
-rw-r--r-- | lib/librte_gro/meson.build | 6 | ||||
-rw-r--r-- | lib/librte_gro/rte_gro.c | 229 | ||||
-rw-r--r-- | lib/librte_gro/rte_gro.h | 129 |
8 files changed, 1141 insertions, 546 deletions
diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile index eb423ccb..bec248f9 100644 --- a/lib/librte_gro/Makefile +++ b/lib/librte_gro/Makefile @@ -1,33 +1,5 @@ -# BSD LICENSE -# -# Copyright(c) 2017 Intel Corporation. All rights reserved. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in -# the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Intel Corporation nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation include $(RTE_SDK)/mk/rte.vars.mk @@ -45,6 +17,7 @@ LIBABIVER := 1 # source files SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c +SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_vxlan_tcp4.c # install this header file SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c index 61a04232..2c0f35c6 100644 --- a/lib/librte_gro/gro_tcp4.c +++ b/lib/librte_gro/gro_tcp4.c @@ -1,41 +1,11 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2017 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation */ #include <rte_malloc.h> #include <rte_mbuf.h> #include <rte_cycles.h> #include <rte_ethdev.h> -#include <rte_ip.h> -#include <rte_tcp.h> #include "gro_tcp4.h" @@ -72,20 +42,20 @@ gro_tcp4_tbl_create(uint16_t socket_id, } tbl->max_item_num = entries_num; - size = sizeof(struct gro_tcp4_key) * entries_num; - tbl->keys = rte_zmalloc_socket(__func__, + size = sizeof(struct gro_tcp4_flow) * entries_num; + tbl->flows = rte_zmalloc_socket(__func__, size, RTE_CACHE_LINE_SIZE, socket_id); - if (tbl->keys == NULL) { + if (tbl->flows == NULL) { rte_free(tbl->items); rte_free(tbl); return NULL; } - /* INVALID_ARRAY_INDEX indicates empty key */ + /* INVALID_ARRAY_INDEX indicates an empty flow */ for (i = 0; i < entries_num; i++) - tbl->keys[i].start_index = INVALID_ARRAY_INDEX; - tbl->max_key_num = entries_num; + tbl->flows[i].start_index = INVALID_ARRAY_INDEX; + tbl->max_flow_num = entries_num; return tbl; } @@ -97,111 +67,11 @@ gro_tcp4_tbl_destroy(void *tbl) if (tcp_tbl) { rte_free(tcp_tbl->items); - rte_free(tcp_tbl->keys); + rte_free(tcp_tbl->flows); } rte_free(tcp_tbl); } -/* - * merge two TCP/IPv4 packets without updating checksums. - * If cmp is larger than 0, append the new packet to the - * original packet. Otherwise, pre-pend the new packet to - * the original packet. - */ -static inline int -merge_two_tcp4_packets(struct gro_tcp4_item *item_src, - struct rte_mbuf *pkt, - uint16_t ip_id, - uint32_t sent_seq, - int cmp) -{ - struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; - uint16_t tcp_datalen; - - if (cmp > 0) { - pkt_head = item_src->firstseg; - pkt_tail = pkt; - } else { - pkt_head = pkt; - pkt_tail = item_src->firstseg; - } - - /* check if the packet length will be beyond the max value */ - tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len - - pkt_tail->l3_len - pkt_tail->l4_len; - if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen > - TCP4_MAX_L3_LENGTH) - return 0; - - /* remove packet header for the tail packet */ - rte_pktmbuf_adj(pkt_tail, - pkt_tail->l2_len + - pkt_tail->l3_len + - pkt_tail->l4_len); - - /* chain two packets together */ - if (cmp > 0) { - item_src->lastseg->next = pkt; - item_src->lastseg = rte_pktmbuf_lastseg(pkt); - /* update IP ID to the larger value */ - item_src->ip_id = ip_id; - } else { - lastseg = rte_pktmbuf_lastseg(pkt); - lastseg->next = item_src->firstseg; - item_src->firstseg = pkt; - /* update sent_seq to the smaller value */ - item_src->sent_seq = sent_seq; - } - item_src->nb_merged++; - - /* update mbuf metadata for the merged packet */ - pkt_head->nb_segs += pkt_tail->nb_segs; - pkt_head->pkt_len += pkt_tail->pkt_len; - - return 1; -} - -static inline int -check_seq_option(struct gro_tcp4_item *item, - struct tcp_hdr *tcp_hdr, - uint16_t tcp_hl, - uint16_t tcp_dl, - uint16_t ip_id, - uint32_t sent_seq) -{ - struct rte_mbuf *pkt0 = item->firstseg; - struct ipv4_hdr *ipv4_hdr0; - struct tcp_hdr *tcp_hdr0; - uint16_t tcp_hl0, tcp_dl0; - uint16_t len; - - ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) + - pkt0->l2_len); - tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len); - tcp_hl0 = pkt0->l4_len; - - /* check if TCP option fields equal. If not, return 0. */ - len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr); - if ((tcp_hl != tcp_hl0) || - ((len > 0) && (memcmp(tcp_hdr + 1, - tcp_hdr0 + 1, - len) != 0))) - return 0; - - /* check if the two packets are neighbors */ - tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0; - if ((sent_seq == (item->sent_seq + tcp_dl0)) && - (ip_id == (item->ip_id + 1))) - /* append the new packet */ - return 1; - else if (((sent_seq + tcp_dl) == item->sent_seq) && - ((ip_id + item->nb_merged) == item->ip_id)) - /* pre-pend the new packet */ - return -1; - else - return 0; -} - static inline uint32_t find_an_empty_item(struct gro_tcp4_tbl *tbl) { @@ -215,13 +85,13 @@ find_an_empty_item(struct gro_tcp4_tbl *tbl) } static inline uint32_t -find_an_empty_key(struct gro_tcp4_tbl *tbl) +find_an_empty_flow(struct gro_tcp4_tbl *tbl) { uint32_t i; - uint32_t max_key_num = tbl->max_key_num; + uint32_t max_flow_num = tbl->max_flow_num; - for (i = 0; i < max_key_num; i++) - if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX) + for (i = 0; i < max_flow_num; i++) + if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX) return i; return INVALID_ARRAY_INDEX; } @@ -229,10 +99,11 @@ find_an_empty_key(struct gro_tcp4_tbl *tbl) static inline uint32_t insert_new_item(struct gro_tcp4_tbl *tbl, struct rte_mbuf *pkt, - uint16_t ip_id, - uint32_t sent_seq, + uint64_t start_time, uint32_t prev_idx, - uint64_t start_time) + uint32_t sent_seq, + uint16_t ip_id, + uint8_t is_atomic) { uint32_t item_idx; @@ -247,9 +118,10 @@ insert_new_item(struct gro_tcp4_tbl *tbl, tbl->items[item_idx].sent_seq = sent_seq; tbl->items[item_idx].ip_id = ip_id; tbl->items[item_idx].nb_merged = 1; + tbl->items[item_idx].is_atomic = is_atomic; tbl->item_num++; - /* if the previous packet exists, chain the new one with it */ + /* if the previous packet exists, chain them together. */ if (prev_idx != INVALID_ARRAY_INDEX) { tbl->items[item_idx].next_pkt_idx = tbl->items[prev_idx].next_pkt_idx; @@ -265,7 +137,7 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx, { uint32_t next_idx = tbl->items[item_idx].next_pkt_idx; - /* set NULL to firstseg to indicate it's an empty item */ + /* NULL indicates an empty item */ tbl->items[item_idx].firstseg = NULL; tbl->item_num--; if (prev_item_idx != INVALID_ARRAY_INDEX) @@ -275,52 +147,35 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx, } static inline uint32_t -insert_new_key(struct gro_tcp4_tbl *tbl, - struct tcp4_key *key_src, +insert_new_flow(struct gro_tcp4_tbl *tbl, + struct tcp4_flow_key *src, uint32_t item_idx) { - struct tcp4_key *key_dst; - uint32_t key_idx; + struct tcp4_flow_key *dst; + uint32_t flow_idx; - key_idx = find_an_empty_key(tbl); - if (key_idx == INVALID_ARRAY_INDEX) + flow_idx = find_an_empty_flow(tbl); + if (unlikely(flow_idx == INVALID_ARRAY_INDEX)) return INVALID_ARRAY_INDEX; - key_dst = &(tbl->keys[key_idx].key); - - ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr)); - ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr)); - key_dst->ip_src_addr = key_src->ip_src_addr; - key_dst->ip_dst_addr = key_src->ip_dst_addr; - key_dst->recv_ack = key_src->recv_ack; - key_dst->src_port = key_src->src_port; - key_dst->dst_port = key_src->dst_port; - - /* non-INVALID_ARRAY_INDEX value indicates this key is valid */ - tbl->keys[key_idx].start_index = item_idx; - tbl->key_num++; - - return key_idx; -} + dst = &(tbl->flows[flow_idx].key); -static inline int -is_same_key(struct tcp4_key k1, struct tcp4_key k2) -{ - if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0) - return 0; + ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr)); + ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr)); + dst->ip_src_addr = src->ip_src_addr; + dst->ip_dst_addr = src->ip_dst_addr; + dst->recv_ack = src->recv_ack; + dst->src_port = src->src_port; + dst->dst_port = src->dst_port; - if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0) - return 0; + tbl->flows[flow_idx].start_index = item_idx; + tbl->flow_num++; - return ((k1.ip_src_addr == k2.ip_src_addr) && - (k1.ip_dst_addr == k2.ip_dst_addr) && - (k1.recv_ack == k2.recv_ack) && - (k1.src_port == k2.src_port) && - (k1.dst_port == k2.dst_port)); + return flow_idx; } /* - * update packet length for the flushed packet. + * update the packet length for the flushed packet. */ static inline void update_header(struct gro_tcp4_item *item) @@ -343,30 +198,41 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, struct ipv4_hdr *ipv4_hdr; struct tcp_hdr *tcp_hdr; uint32_t sent_seq; - uint16_t tcp_dl, ip_id; + uint16_t tcp_dl, ip_id, hdr_len, frag_off; + uint8_t is_atomic; - struct tcp4_key key; + struct tcp4_flow_key key; uint32_t cur_idx, prev_idx, item_idx; - uint32_t i, max_key_num; + uint32_t i, max_flow_num, remaining_flow_num; int cmp; + uint8_t find; eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len); tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len); + hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len; /* - * if FIN, SYN, RST, PSH, URG, ECE or - * CWR is set, return immediately. + * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE + * or CWR set. */ if (tcp_hdr->tcp_flags != TCP_ACK_FLAG) return -1; - /* if payload length is 0, return immediately */ - tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len - - pkt->l4_len; - if (tcp_dl == 0) + /* + * Don't process the packet whose payload length is less than or + * equal to 0. + */ + tcp_dl = pkt->pkt_len - hdr_len; + if (tcp_dl <= 0) return -1; - ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); + /* + * Save IPv4 ID for the packet whose DF bit is 0. For the packet + * whose DF bit is 1, IPv4 ID is ignored. + */ + frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); + is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG; + ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id); sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr)); @@ -377,25 +243,35 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, key.dst_port = tcp_hdr->dst_port; key.recv_ack = tcp_hdr->recv_ack; - /* search for a key */ - max_key_num = tbl->max_key_num; - for (i = 0; i < max_key_num; i++) { - if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) && - is_same_key(tbl->keys[i].key, key)) - break; + /* Search for a matched flow. */ + max_flow_num = tbl->max_flow_num; + remaining_flow_num = tbl->flow_num; + find = 0; + for (i = 0; i < max_flow_num && remaining_flow_num; i++) { + if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) { + if (is_same_tcp4_flow(tbl->flows[i].key, key)) { + find = 1; + break; + } + remaining_flow_num--; + } } - /* can't find a key, so insert a new key and a new item. */ - if (i == tbl->max_key_num) { - item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq, - INVALID_ARRAY_INDEX, start_time); + /* + * Fail to find a matched flow. Insert a new flow and store the + * packet into the flow. + */ + if (find == 0) { + item_idx = insert_new_item(tbl, pkt, start_time, + INVALID_ARRAY_INDEX, sent_seq, ip_id, + is_atomic); if (item_idx == INVALID_ARRAY_INDEX) return -1; - if (insert_new_key(tbl, &key, item_idx) == + if (insert_new_flow(tbl, &key, item_idx) == INVALID_ARRAY_INDEX) { /* - * fail to insert a new key, so - * delete the inserted item + * Fail to insert a new flow, so delete the + * stored packet. */ delete_item(tbl, item_idx, INVALID_ARRAY_INDEX); return -1; @@ -403,24 +279,27 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, return 0; } - /* traverse all packets in the item group to find one to merge */ - cur_idx = tbl->keys[i].start_index; + /* + * Check all packets in the flow and try to find a neighbor for + * the input packet. + */ + cur_idx = tbl->flows[i].start_index; prev_idx = cur_idx; do { cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr, - pkt->l4_len, tcp_dl, ip_id, sent_seq); + sent_seq, ip_id, pkt->l4_len, tcp_dl, 0, + is_atomic); if (cmp) { if (merge_two_tcp4_packets(&(tbl->items[cur_idx]), - pkt, ip_id, - sent_seq, cmp)) + pkt, cmp, sent_seq, ip_id, 0)) return 1; /* - * fail to merge two packets since the packet - * length will be greater than the max value. - * So insert the packet into the item group. + * Fail to merge the two packets, as the packet + * length is greater than the max value. Store + * the packet into the flow. */ - if (insert_new_item(tbl, pkt, ip_id, sent_seq, - prev_idx, start_time) == + if (insert_new_item(tbl, pkt, start_time, prev_idx, + sent_seq, ip_id, is_atomic) == INVALID_ARRAY_INDEX) return -1; return 0; @@ -429,12 +308,9 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, cur_idx = tbl->items[cur_idx].next_pkt_idx; } while (cur_idx != INVALID_ARRAY_INDEX); - /* - * can't find a packet in the item group to merge, - * so insert the packet into the item group. - */ - if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx, - start_time) == INVALID_ARRAY_INDEX) + /* Fail to find a neighbor, so store the packet into the flow. */ + if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq, + ip_id, is_atomic) == INVALID_ARRAY_INDEX) return -1; return 0; @@ -448,44 +324,33 @@ gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, { uint16_t k = 0; uint32_t i, j; - uint32_t max_key_num = tbl->max_key_num; + uint32_t max_flow_num = tbl->max_flow_num; - for (i = 0; i < max_key_num; i++) { - /* all keys have been checked, return immediately */ - if (tbl->key_num == 0) + for (i = 0; i < max_flow_num; i++) { + if (unlikely(tbl->flow_num == 0)) return k; - j = tbl->keys[i].start_index; + j = tbl->flows[i].start_index; while (j != INVALID_ARRAY_INDEX) { if (tbl->items[j].start_time <= flush_timestamp) { out[k++] = tbl->items[j].firstseg; if (tbl->items[j].nb_merged > 1) update_header(&(tbl->items[j])); /* - * delete the item and get - * the next packet index + * Delete the packet and get the next + * packet in the flow. */ - j = delete_item(tbl, j, - INVALID_ARRAY_INDEX); + j = delete_item(tbl, j, INVALID_ARRAY_INDEX); + tbl->flows[i].start_index = j; + if (j == INVALID_ARRAY_INDEX) + tbl->flow_num--; - /* - * delete the key as all of - * packets are flushed - */ - if (j == INVALID_ARRAY_INDEX) { - tbl->keys[i].start_index = - INVALID_ARRAY_INDEX; - tbl->key_num--; - } else - /* update start_index of the key */ - tbl->keys[i].start_index = j; - - if (k == nb_out) + if (unlikely(k == nb_out)) return k; } else /* - * left packets of this key won't be - * timeout, so go to check other keys. + * The left packets in this flow won't be + * timeout. Go to check other flows. */ break; } diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h index 0a817162..6bb30cdb 100644 --- a/lib/librte_gro/gro_tcp4.h +++ b/lib/librte_gro/gro_tcp4.h @@ -1,49 +1,24 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2017 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation */ #ifndef _GRO_TCP4_H_ #define _GRO_TCP4_H_ +#include <rte_ip.h> +#include <rte_tcp.h> + #define INVALID_ARRAY_INDEX 0xffffffffUL #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) /* - * the max L3 length of a TCP/IPv4 packet. The L3 length - * is the sum of ipv4 header, tcp header and L4 payload. + * The max length of a IPv4 packet, which includes the length of the L3 + * header, the L4 header and the data payload. */ -#define TCP4_MAX_L3_LENGTH UINT16_MAX +#define MAX_IPV4_PKT_LENGTH UINT16_MAX -/* criteria of mergeing packets */ -struct tcp4_key { +/* Header fields representing a TCP/IPv4 flow */ +struct tcp4_flow_key { struct ether_addr eth_saddr; struct ether_addr eth_daddr; uint32_t ip_src_addr; @@ -54,44 +29,43 @@ struct tcp4_key { uint16_t dst_port; }; -struct gro_tcp4_key { - struct tcp4_key key; +struct gro_tcp4_flow { + struct tcp4_flow_key key; /* - * the index of the first packet in the item group. - * If the value is INVALID_ARRAY_INDEX, it means - * the key is empty. + * The index of the first packet in the flow. + * INVALID_ARRAY_INDEX indicates an empty flow. */ uint32_t start_index; }; struct gro_tcp4_item { /* - * first segment of the packet. If the value + * The first MBUF segment of the packet. If the value * is NULL, it means the item is empty. */ struct rte_mbuf *firstseg; - /* last segment of the packet */ + /* The last MBUF segment of the packet */ struct rte_mbuf *lastseg; /* - * the time when the first packet is inserted - * into the table. If a packet in the table is - * merged with an incoming packet, this value - * won't be updated. We set this value only - * when the first packet is inserted into the - * table. + * The time when the first packet is inserted into the table. + * This value won't be updated, even if the packet is merged + * with other packets. */ uint64_t start_time; /* - * we use next_pkt_idx to chain the packets that - * have same key value but can't be merged together. + * next_pkt_idx is used to chain the packets that + * are in the same flow but can't be merged together + * (e.g. caused by packet reordering). */ uint32_t next_pkt_idx; - /* the sequence number of the packet */ + /* TCP sequence number of the packet */ uint32_t sent_seq; - /* the IP ID of the packet */ + /* IPv4 ID of the packet */ uint16_t ip_id; /* the number of merged packets */ uint16_t nb_merged; + /* Indicate if IPv4 ID can be ignored */ + uint8_t is_atomic; }; /* @@ -100,31 +74,31 @@ struct gro_tcp4_item { struct gro_tcp4_tbl { /* item array */ struct gro_tcp4_item *items; - /* key array */ - struct gro_tcp4_key *keys; + /* flow array */ + struct gro_tcp4_flow *flows; /* current item number */ uint32_t item_num; - /* current key num */ - uint32_t key_num; + /* current flow num */ + uint32_t flow_num; /* item array size */ uint32_t max_item_num; - /* key array size */ - uint32_t max_key_num; + /* flow array size */ + uint32_t max_flow_num; }; /** * This function creates a TCP/IPv4 reassembly table. * * @param socket_id - * socket index for allocating TCP/IPv4 reassemble table + * Socket index for allocating the TCP/IPv4 reassemble table * @param max_flow_num - * the maximum number of flows in the TCP/IPv4 GRO table + * The maximum number of flows in the TCP/IPv4 GRO table * @param max_item_per_flow - * the maximum packet number per flow. + * The maximum number of packets per flow * * @return - * if create successfully, return a pointer which points to the - * created TCP/IPv4 GRO table. Otherwise, return NULL. + * - Return the table pointer on success. + * - Return NULL on failure. */ void *gro_tcp4_tbl_create(uint16_t socket_id, uint16_t max_flow_num, @@ -134,62 +108,56 @@ void *gro_tcp4_tbl_create(uint16_t socket_id, * This function destroys a TCP/IPv4 reassembly table. * * @param tbl - * a pointer points to the TCP/IPv4 reassembly table. + * Pointer pointing to the TCP/IPv4 reassembly table. */ void gro_tcp4_tbl_destroy(void *tbl); /** - * This function searches for a packet in the TCP/IPv4 reassembly table - * to merge with the inputted one. To merge two packets is to chain them - * together and update packet headers. Packets, whose SYN, FIN, RST, PSH - * CWR, ECE or URG bit is set, are returned immediately. Packets which - * only have packet headers (i.e. without data) are also returned - * immediately. Otherwise, the packet is either merged, or inserted into - * the table. Besides, if there is no available space to insert the - * packet, this function returns immediately too. + * This function merges a TCP/IPv4 packet. It doesn't process the packet, + * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have + * payload. * - * This function assumes the inputted packet is with correct IPv4 and - * TCP checksums. And if two packets are merged, it won't re-calculate - * IPv4 and TCP checksums. Besides, if the inputted packet is IP - * fragmented, it assumes the packet is complete (with TCP header). + * This function doesn't check if the packet has correct checksums and + * doesn't re-calculate checksums for the merged packet. Additionally, + * it assumes the packets are complete (i.e., MF==0 && frag_off==0), + * when IP fragmentation is possible (i.e., DF==0). It returns the + * packet, if the packet has invalid parameters (e.g. SYN bit is set) + * or there is no available space in the table. * * @param pkt - * packet to reassemble. + * Packet to reassemble * @param tbl - * a pointer that points to a TCP/IPv4 reassembly table. + * Pointer pointing to the TCP/IPv4 reassembly table * @start_time - * the start time that the packet is inserted into the table + * The time when the packet is inserted into the table * * @return - * if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE - * or URG bit is set, or there is no available space in the table to - * insert a new item or a new key, return a negative value. If the - * packet is merged successfully, return an positive value. If the - * packet is inserted into the table, return 0. + * - Return a positive value if the packet is merged. + * - Return zero if the packet isn't merged but stored in the table. + * - Return a negative value for invalid parameters or no available + * space in the table. */ int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt, struct gro_tcp4_tbl *tbl, uint64_t start_time); /** - * This function flushes timeout packets in a TCP/IPv4 reassembly table - * to applications, and without updating checksums for merged packets. - * The max number of flushed timeout packets is the element number of - * the array which is used to keep flushed packets. + * This function flushes timeout packets in a TCP/IPv4 reassembly table, + * and without updating checksums. * * @param tbl - * a pointer that points to a TCP GRO table. + * TCP/IPv4 reassembly table pointer * @param flush_timestamp - * this function flushes packets which are inserted into the table - * before or at the flush_timestamp. + * Flush packets which are inserted into the table before or at the + * flush_timestamp. * @param out - * pointer array which is used to keep flushed packets. + * Pointer array used to keep flushed packets * @param nb_out - * the element number of out. It's also the max number of timeout + * The element number in 'out'. It also determines the maximum number of * packets that can be flushed finally. * * @return - * the number of packets that are returned. + * The number of flushed packets */ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, uint64_t flush_timestamp, @@ -201,10 +169,133 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, * reassembly table. * * @param tbl - * pointer points to a TCP/IPv4 reassembly table. + * TCP/IPv4 reassembly table pointer * * @return - * the number of packets in the table + * The number of packets in the table */ uint32_t gro_tcp4_tbl_pkt_count(void *tbl); + +/* + * Check if two TCP/IPv4 packets belong to the same flow. + */ +static inline int +is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2) +{ + return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) && + is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) && + (k1.ip_src_addr == k2.ip_src_addr) && + (k1.ip_dst_addr == k2.ip_dst_addr) && + (k1.recv_ack == k2.recv_ack) && + (k1.src_port == k2.src_port) && + (k1.dst_port == k2.dst_port)); +} + +/* + * Merge two TCP/IPv4 packets without updating checksums. + * If cmp is larger than 0, append the new packet to the + * original packet. Otherwise, pre-pend the new packet to + * the original packet. + */ +static inline int +merge_two_tcp4_packets(struct gro_tcp4_item *item, + struct rte_mbuf *pkt, + int cmp, + uint32_t sent_seq, + uint16_t ip_id, + uint16_t l2_offset) +{ + struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; + uint16_t hdr_len, l2_len; + + if (cmp > 0) { + pkt_head = item->firstseg; + pkt_tail = pkt; + } else { + pkt_head = pkt; + pkt_tail = item->firstseg; + } + + /* check if the IPv4 packet length is greater than the max value */ + hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len + + pkt_head->l4_len; + l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len; + if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len - + hdr_len > MAX_IPV4_PKT_LENGTH)) + return 0; + + /* remove the packet header for the tail packet */ + rte_pktmbuf_adj(pkt_tail, hdr_len); + + /* chain two packets together */ + if (cmp > 0) { + item->lastseg->next = pkt; + item->lastseg = rte_pktmbuf_lastseg(pkt); + /* update IP ID to the larger value */ + item->ip_id = ip_id; + } else { + lastseg = rte_pktmbuf_lastseg(pkt); + lastseg->next = item->firstseg; + item->firstseg = pkt; + /* update sent_seq to the smaller value */ + item->sent_seq = sent_seq; + item->ip_id = ip_id; + } + item->nb_merged++; + + /* update MBUF metadata for the merged packet */ + pkt_head->nb_segs += pkt_tail->nb_segs; + pkt_head->pkt_len += pkt_tail->pkt_len; + + return 1; +} + +/* + * Check if two TCP/IPv4 packets are neighbors. + */ +static inline int +check_seq_option(struct gro_tcp4_item *item, + struct tcp_hdr *tcph, + uint32_t sent_seq, + uint16_t ip_id, + uint16_t tcp_hl, + uint16_t tcp_dl, + uint16_t l2_offset, + uint8_t is_atomic) +{ + struct rte_mbuf *pkt_orig = item->firstseg; + struct ipv4_hdr *iph_orig; + struct tcp_hdr *tcph_orig; + uint16_t len, tcp_hl_orig; + + iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) + + l2_offset + pkt_orig->l2_len); + tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len); + tcp_hl_orig = pkt_orig->l4_len; + + /* Check if TCP option fields equal */ + len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct tcp_hdr); + if ((tcp_hl != tcp_hl_orig) || ((len > 0) && + (memcmp(tcph + 1, tcph_orig + 1, + len) != 0))) + return 0; + + /* Don't merge packets whose DF bits are different */ + if (unlikely(item->is_atomic ^ is_atomic)) + return 0; + + /* check if the two packets are neighbors */ + len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len - + pkt_orig->l3_len - tcp_hl_orig; + if ((sent_seq == item->sent_seq + len) && (is_atomic || + (ip_id == item->ip_id + 1))) + /* append the new packet */ + return 1; + else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic || + (ip_id + item->nb_merged == item->ip_id))) + /* pre-pend the new packet */ + return -1; + + return 0; +} #endif diff --git a/lib/librte_gro/gro_vxlan_tcp4.c b/lib/librte_gro/gro_vxlan_tcp4.c new file mode 100644 index 00000000..ca86f010 --- /dev/null +++ b/lib/librte_gro/gro_vxlan_tcp4.c @@ -0,0 +1,494 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_udp.h> + +#include "gro_vxlan_tcp4.h" + +void * +gro_vxlan_tcp4_tbl_create(uint16_t socket_id, + uint16_t max_flow_num, + uint16_t max_item_per_flow) +{ + struct gro_vxlan_tcp4_tbl *tbl; + size_t size; + uint32_t entries_num, i; + + entries_num = max_flow_num * max_item_per_flow; + entries_num = RTE_MIN(entries_num, GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM); + + if (entries_num == 0) + return NULL; + + tbl = rte_zmalloc_socket(__func__, + sizeof(struct gro_vxlan_tcp4_tbl), + RTE_CACHE_LINE_SIZE, + socket_id); + if (tbl == NULL) + return NULL; + + size = sizeof(struct gro_vxlan_tcp4_item) * entries_num; + tbl->items = rte_zmalloc_socket(__func__, + size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (tbl->items == NULL) { + rte_free(tbl); + return NULL; + } + tbl->max_item_num = entries_num; + + size = sizeof(struct gro_vxlan_tcp4_flow) * entries_num; + tbl->flows = rte_zmalloc_socket(__func__, + size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (tbl->flows == NULL) { + rte_free(tbl->items); + rte_free(tbl); + return NULL; + } + + for (i = 0; i < entries_num; i++) + tbl->flows[i].start_index = INVALID_ARRAY_INDEX; + tbl->max_flow_num = entries_num; + + return tbl; +} + +void +gro_vxlan_tcp4_tbl_destroy(void *tbl) +{ + struct gro_vxlan_tcp4_tbl *vxlan_tbl = tbl; + + if (vxlan_tbl) { + rte_free(vxlan_tbl->items); + rte_free(vxlan_tbl->flows); + } + rte_free(vxlan_tbl); +} + +static inline uint32_t +find_an_empty_item(struct gro_vxlan_tcp4_tbl *tbl) +{ + uint32_t max_item_num = tbl->max_item_num, i; + + for (i = 0; i < max_item_num; i++) + if (tbl->items[i].inner_item.firstseg == NULL) + return i; + return INVALID_ARRAY_INDEX; +} + +static inline uint32_t +find_an_empty_flow(struct gro_vxlan_tcp4_tbl *tbl) +{ + uint32_t max_flow_num = tbl->max_flow_num, i; + + for (i = 0; i < max_flow_num; i++) + if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX) + return i; + return INVALID_ARRAY_INDEX; +} + +static inline uint32_t +insert_new_item(struct gro_vxlan_tcp4_tbl *tbl, + struct rte_mbuf *pkt, + uint64_t start_time, + uint32_t prev_idx, + uint32_t sent_seq, + uint16_t outer_ip_id, + uint16_t ip_id, + uint8_t outer_is_atomic, + uint8_t is_atomic) +{ + uint32_t item_idx; + + item_idx = find_an_empty_item(tbl); + if (unlikely(item_idx == INVALID_ARRAY_INDEX)) + return INVALID_ARRAY_INDEX; + + tbl->items[item_idx].inner_item.firstseg = pkt; + tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt); + tbl->items[item_idx].inner_item.start_time = start_time; + tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX; + tbl->items[item_idx].inner_item.sent_seq = sent_seq; + tbl->items[item_idx].inner_item.ip_id = ip_id; + tbl->items[item_idx].inner_item.nb_merged = 1; + tbl->items[item_idx].inner_item.is_atomic = is_atomic; + tbl->items[item_idx].outer_ip_id = outer_ip_id; + tbl->items[item_idx].outer_is_atomic = outer_is_atomic; + tbl->item_num++; + + /* If the previous packet exists, chain the new one with it. */ + if (prev_idx != INVALID_ARRAY_INDEX) { + tbl->items[item_idx].inner_item.next_pkt_idx = + tbl->items[prev_idx].inner_item.next_pkt_idx; + tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx; + } + + return item_idx; +} + +static inline uint32_t +delete_item(struct gro_vxlan_tcp4_tbl *tbl, + uint32_t item_idx, + uint32_t prev_item_idx) +{ + uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx; + + /* NULL indicates an empty item. */ + tbl->items[item_idx].inner_item.firstseg = NULL; + tbl->item_num--; + if (prev_item_idx != INVALID_ARRAY_INDEX) + tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx; + + return next_idx; +} + +static inline uint32_t +insert_new_flow(struct gro_vxlan_tcp4_tbl *tbl, + struct vxlan_tcp4_flow_key *src, + uint32_t item_idx) +{ + struct vxlan_tcp4_flow_key *dst; + uint32_t flow_idx; + + flow_idx = find_an_empty_flow(tbl); + if (unlikely(flow_idx == INVALID_ARRAY_INDEX)) + return INVALID_ARRAY_INDEX; + + dst = &(tbl->flows[flow_idx].key); + + ether_addr_copy(&(src->inner_key.eth_saddr), + &(dst->inner_key.eth_saddr)); + ether_addr_copy(&(src->inner_key.eth_daddr), + &(dst->inner_key.eth_daddr)); + dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr; + dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr; + dst->inner_key.recv_ack = src->inner_key.recv_ack; + dst->inner_key.src_port = src->inner_key.src_port; + dst->inner_key.dst_port = src->inner_key.dst_port; + + dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags; + dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni; + ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr)); + ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr)); + dst->outer_ip_src_addr = src->outer_ip_src_addr; + dst->outer_ip_dst_addr = src->outer_ip_dst_addr; + dst->outer_src_port = src->outer_src_port; + dst->outer_dst_port = src->outer_dst_port; + + tbl->flows[flow_idx].start_index = item_idx; + tbl->flow_num++; + + return flow_idx; +} + +static inline int +is_same_vxlan_tcp4_flow(struct vxlan_tcp4_flow_key k1, + struct vxlan_tcp4_flow_key k2) +{ + return (is_same_ether_addr(&k1.outer_eth_saddr, &k2.outer_eth_saddr) && + is_same_ether_addr(&k1.outer_eth_daddr, + &k2.outer_eth_daddr) && + (k1.outer_ip_src_addr == k2.outer_ip_src_addr) && + (k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) && + (k1.outer_src_port == k2.outer_src_port) && + (k1.outer_dst_port == k2.outer_dst_port) && + (k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) && + (k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) && + is_same_tcp4_flow(k1.inner_key, k2.inner_key)); +} + +static inline int +check_vxlan_seq_option(struct gro_vxlan_tcp4_item *item, + struct tcp_hdr *tcp_hdr, + uint32_t sent_seq, + uint16_t outer_ip_id, + uint16_t ip_id, + uint16_t tcp_hl, + uint16_t tcp_dl, + uint8_t outer_is_atomic, + uint8_t is_atomic) +{ + struct rte_mbuf *pkt = item->inner_item.firstseg; + int cmp; + uint16_t l2_offset; + + /* Don't merge packets whose outer DF bits are different. */ + if (unlikely(item->outer_is_atomic ^ outer_is_atomic)) + return 0; + + l2_offset = pkt->outer_l2_len + pkt->outer_l3_len; + cmp = check_seq_option(&item->inner_item, tcp_hdr, sent_seq, ip_id, + tcp_hl, tcp_dl, l2_offset, is_atomic); + if ((cmp > 0) && (outer_is_atomic || + (outer_ip_id == item->outer_ip_id + 1))) + /* Append the new packet. */ + return 1; + else if ((cmp < 0) && (outer_is_atomic || + (outer_ip_id + item->inner_item.nb_merged == + item->outer_ip_id))) + /* Prepend the new packet. */ + return -1; + + return 0; +} + +static inline int +merge_two_vxlan_tcp4_packets(struct gro_vxlan_tcp4_item *item, + struct rte_mbuf *pkt, + int cmp, + uint32_t sent_seq, + uint16_t outer_ip_id, + uint16_t ip_id) +{ + if (merge_two_tcp4_packets(&item->inner_item, pkt, cmp, sent_seq, + ip_id, pkt->outer_l2_len + + pkt->outer_l3_len)) { + /* Update the outer IPv4 ID to the large value. */ + item->outer_ip_id = cmp > 0 ? outer_ip_id : item->outer_ip_id; + return 1; + } + + return 0; +} + +static inline void +update_vxlan_header(struct gro_vxlan_tcp4_item *item) +{ + struct ipv4_hdr *ipv4_hdr; + struct udp_hdr *udp_hdr; + struct rte_mbuf *pkt = item->inner_item.firstseg; + uint16_t len; + + /* Update the outer IPv4 header. */ + len = pkt->pkt_len - pkt->outer_l2_len; + ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + pkt->outer_l2_len); + ipv4_hdr->total_length = rte_cpu_to_be_16(len); + + /* Update the outer UDP header. */ + len -= pkt->outer_l3_len; + udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len); + udp_hdr->dgram_len = rte_cpu_to_be_16(len); + + /* Update the inner IPv4 header. */ + len -= pkt->l2_len; + ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len); + ipv4_hdr->total_length = rte_cpu_to_be_16(len); +} + +int32_t +gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt, + struct gro_vxlan_tcp4_tbl *tbl, + uint64_t start_time) +{ + struct ether_hdr *outer_eth_hdr, *eth_hdr; + struct ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr; + struct tcp_hdr *tcp_hdr; + struct udp_hdr *udp_hdr; + struct vxlan_hdr *vxlan_hdr; + uint32_t sent_seq; + uint16_t tcp_dl, frag_off, outer_ip_id, ip_id; + uint8_t outer_is_atomic, is_atomic; + + struct vxlan_tcp4_flow_key key; + uint32_t cur_idx, prev_idx, item_idx; + uint32_t i, max_flow_num, remaining_flow_num; + int cmp; + uint16_t hdr_len; + uint8_t find; + + outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr + + pkt->outer_l2_len); + udp_hdr = (struct udp_hdr *)((char *)outer_ipv4_hdr + + pkt->outer_l3_len); + vxlan_hdr = (struct vxlan_hdr *)((char *)udp_hdr + + sizeof(struct udp_hdr)); + eth_hdr = (struct ether_hdr *)((char *)vxlan_hdr + + sizeof(struct vxlan_hdr)); + ipv4_hdr = (struct ipv4_hdr *)((char *)udp_hdr + pkt->l2_len); + tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len); + + /* + * Don't process the packet which has FIN, SYN, RST, PSH, URG, + * ECE or CWR set. + */ + if (tcp_hdr->tcp_flags != TCP_ACK_FLAG) + return -1; + + hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len + + pkt->l3_len + pkt->l4_len; + /* + * Don't process the packet whose payload length is less than or + * equal to 0. + */ + tcp_dl = pkt->pkt_len - hdr_len; + if (tcp_dl <= 0) + return -1; + + /* + * Save IPv4 ID for the packet whose DF bit is 0. For the packet + * whose DF bit is 1, IPv4 ID is ignored. + */ + frag_off = rte_be_to_cpu_16(outer_ipv4_hdr->fragment_offset); + outer_is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG; + outer_ip_id = outer_is_atomic ? 0 : + rte_be_to_cpu_16(outer_ipv4_hdr->packet_id); + frag_off = rte_be_to_cpu_16(ipv4_hdr->fragment_offset); + is_atomic = (frag_off & IPV4_HDR_DF_FLAG) == IPV4_HDR_DF_FLAG; + ip_id = is_atomic ? 0 : rte_be_to_cpu_16(ipv4_hdr->packet_id); + + sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); + + ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr)); + ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr)); + key.inner_key.ip_src_addr = ipv4_hdr->src_addr; + key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr; + key.inner_key.recv_ack = tcp_hdr->recv_ack; + key.inner_key.src_port = tcp_hdr->src_port; + key.inner_key.dst_port = tcp_hdr->dst_port; + + key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags; + key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni; + ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr)); + ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr)); + key.outer_ip_src_addr = outer_ipv4_hdr->src_addr; + key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr; + key.outer_src_port = udp_hdr->src_port; + key.outer_dst_port = udp_hdr->dst_port; + + /* Search for a matched flow. */ + max_flow_num = tbl->max_flow_num; + remaining_flow_num = tbl->flow_num; + find = 0; + for (i = 0; i < max_flow_num && remaining_flow_num; i++) { + if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) { + if (is_same_vxlan_tcp4_flow(tbl->flows[i].key, key)) { + find = 1; + break; + } + remaining_flow_num--; + } + } + + /* + * Can't find a matched flow. Insert a new flow and store the + * packet into the flow. + */ + if (find == 0) { + item_idx = insert_new_item(tbl, pkt, start_time, + INVALID_ARRAY_INDEX, sent_seq, outer_ip_id, + ip_id, outer_is_atomic, is_atomic); + if (item_idx == INVALID_ARRAY_INDEX) + return -1; + if (insert_new_flow(tbl, &key, item_idx) == + INVALID_ARRAY_INDEX) { + /* + * Fail to insert a new flow, so + * delete the inserted packet. + */ + delete_item(tbl, item_idx, INVALID_ARRAY_INDEX); + return -1; + } + return 0; + } + + /* Check all packets in the flow and try to find a neighbor. */ + cur_idx = tbl->flows[i].start_index; + prev_idx = cur_idx; + do { + cmp = check_vxlan_seq_option(&(tbl->items[cur_idx]), tcp_hdr, + sent_seq, outer_ip_id, ip_id, pkt->l4_len, + tcp_dl, outer_is_atomic, is_atomic); + if (cmp) { + if (merge_two_vxlan_tcp4_packets(&(tbl->items[cur_idx]), + pkt, cmp, sent_seq, + outer_ip_id, ip_id)) + return 1; + /* + * Can't merge two packets, as the packet + * length will be greater than the max value. + * Insert the packet into the flow. + */ + if (insert_new_item(tbl, pkt, start_time, prev_idx, + sent_seq, outer_ip_id, + ip_id, outer_is_atomic, + is_atomic) == + INVALID_ARRAY_INDEX) + return -1; + return 0; + } + prev_idx = cur_idx; + cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx; + } while (cur_idx != INVALID_ARRAY_INDEX); + + /* Can't find neighbor. Insert the packet into the flow. */ + if (insert_new_item(tbl, pkt, start_time, prev_idx, sent_seq, + outer_ip_id, ip_id, outer_is_atomic, + is_atomic) == INVALID_ARRAY_INDEX) + return -1; + + return 0; +} + +uint16_t +gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl, + uint64_t flush_timestamp, + struct rte_mbuf **out, + uint16_t nb_out) +{ + uint16_t k = 0; + uint32_t i, j; + uint32_t max_flow_num = tbl->max_flow_num; + + for (i = 0; i < max_flow_num; i++) { + if (unlikely(tbl->flow_num == 0)) + return k; + + j = tbl->flows[i].start_index; + while (j != INVALID_ARRAY_INDEX) { + if (tbl->items[j].inner_item.start_time <= + flush_timestamp) { + out[k++] = tbl->items[j].inner_item.firstseg; + if (tbl->items[j].inner_item.nb_merged > 1) + update_vxlan_header(&(tbl->items[j])); + /* + * Delete the item and get the next packet + * index. + */ + j = delete_item(tbl, j, INVALID_ARRAY_INDEX); + tbl->flows[i].start_index = j; + if (j == INVALID_ARRAY_INDEX) + tbl->flow_num--; + + if (unlikely(k == nb_out)) + return k; + } else + /* + * The left packets in the flow won't be + * timeout. Go to check other flows. + */ + break; + } + } + return k; +} + +uint32_t +gro_vxlan_tcp4_tbl_pkt_count(void *tbl) +{ + struct gro_vxlan_tcp4_tbl *gro_tbl = tbl; + + if (gro_tbl) + return gro_tbl->item_num; + + return 0; +} diff --git a/lib/librte_gro/gro_vxlan_tcp4.h b/lib/librte_gro/gro_vxlan_tcp4.h new file mode 100644 index 00000000..0cafb921 --- /dev/null +++ b/lib/librte_gro/gro_vxlan_tcp4.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ + +#ifndef _GRO_VXLAN_TCP4_H_ +#define _GRO_VXLAN_TCP4_H_ + +#include "gro_tcp4.h" + +#define GRO_VXLAN_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) + +/* Header fields representing a VxLAN flow */ +struct vxlan_tcp4_flow_key { + struct tcp4_flow_key inner_key; + struct vxlan_hdr vxlan_hdr; + + struct ether_addr outer_eth_saddr; + struct ether_addr outer_eth_daddr; + + uint32_t outer_ip_src_addr; + uint32_t outer_ip_dst_addr; + + /* Outer UDP ports */ + uint16_t outer_src_port; + uint16_t outer_dst_port; + +}; + +struct gro_vxlan_tcp4_flow { + struct vxlan_tcp4_flow_key key; + /* + * The index of the first packet in the flow. INVALID_ARRAY_INDEX + * indicates an empty flow. + */ + uint32_t start_index; +}; + +struct gro_vxlan_tcp4_item { + struct gro_tcp4_item inner_item; + /* IPv4 ID in the outer IPv4 header */ + uint16_t outer_ip_id; + /* Indicate if outer IPv4 ID can be ignored */ + uint8_t outer_is_atomic; +}; + +/* + * VxLAN (with an outer IPv4 header and an inner TCP/IPv4 packet) + * reassembly table structure + */ +struct gro_vxlan_tcp4_tbl { + /* item array */ + struct gro_vxlan_tcp4_item *items; + /* flow array */ + struct gro_vxlan_tcp4_flow *flows; + /* current item number */ + uint32_t item_num; + /* current flow number */ + uint32_t flow_num; + /* the maximum item number */ + uint32_t max_item_num; + /* the maximum flow number */ + uint32_t max_flow_num; +}; + +/** + * This function creates a VxLAN reassembly table for VxLAN packets + * which have an outer IPv4 header and an inner TCP/IPv4 packet. + * + * @param socket_id + * Socket index for allocating the table + * @param max_flow_num + * The maximum number of flows in the table + * @param max_item_per_flow + * The maximum number of packets per flow + * + * @return + * - Return the table pointer on success. + * - Return NULL on failure. + */ +void *gro_vxlan_tcp4_tbl_create(uint16_t socket_id, + uint16_t max_flow_num, + uint16_t max_item_per_flow); + +/** + * This function destroys a VxLAN reassembly table. + * + * @param tbl + * Pointer pointing to the VxLAN reassembly table + */ +void gro_vxlan_tcp4_tbl_destroy(void *tbl); + +/** + * This function merges a VxLAN packet which has an outer IPv4 header and + * an inner TCP/IPv4 packet. It doesn't process the packet, whose TCP + * header has SYN, FIN, RST, PSH, CWR, ECE or URG bit set, or which + * doesn't have payload. + * + * This function doesn't check if the packet has correct checksums and + * doesn't re-calculate checksums for the merged packet. Additionally, + * it assumes the packets are complete (i.e., MF==0 && frag_off==0), when + * IP fragmentation is possible (i.e., DF==0). It returns the packet, if + * the packet has invalid parameters (e.g. SYN bit is set) or there is no + * available space in the table. + * + * @param pkt + * Packet to reassemble + * @param tbl + * Pointer pointing to the VxLAN reassembly table + * @start_time + * The time when the packet is inserted into the table + * + * @return + * - Return a positive value if the packet is merged. + * - Return zero if the packet isn't merged but stored in the table. + * - Return a negative value for invalid parameters or no available + * space in the table. + */ +int32_t gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt, + struct gro_vxlan_tcp4_tbl *tbl, + uint64_t start_time); + +/** + * This function flushes timeout packets in the VxLAN reassembly table, + * and without updating checksums. + * + * @param tbl + * Pointer pointing to a VxLAN GRO table + * @param flush_timestamp + * This function flushes packets which are inserted into the table + * before or at the flush_timestamp. + * @param out + * Pointer array used to keep flushed packets + * @param nb_out + * The element number in 'out'. It also determines the maximum number of + * packets that can be flushed finally. + * + * @return + * The number of flushed packets + */ +uint16_t gro_vxlan_tcp4_tbl_timeout_flush(struct gro_vxlan_tcp4_tbl *tbl, + uint64_t flush_timestamp, + struct rte_mbuf **out, + uint16_t nb_out); + +/** + * This function returns the number of the packets in a VxLAN + * reassembly table. + * + * @param tbl + * Pointer pointing to the VxLAN reassembly table + * + * @return + * The number of packets in the table + */ +uint32_t gro_vxlan_tcp4_tbl_pkt_count(void *tbl); +#endif diff --git a/lib/librte_gro/meson.build b/lib/librte_gro/meson.build new file mode 100644 index 00000000..501668c8 --- /dev/null +++ b/lib/librte_gro/meson.build @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +sources = files('rte_gro.c', 'gro_tcp4.c', 'gro_vxlan_tcp4.c') +headers = files('rte_gro.h') +deps += ['ethdev'] diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c index 7853246a..6618f4d3 100644 --- a/lib/librte_gro/rte_gro.c +++ b/lib/librte_gro/rte_gro.c @@ -1,33 +1,5 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2017 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation */ #include <rte_malloc.h> @@ -37,6 +9,7 @@ #include "rte_gro.h" #include "gro_tcp4.h" +#include "gro_vxlan_tcp4.h" typedef void *(*gro_tbl_create_fn)(uint16_t socket_id, uint16_t max_flow_num, @@ -45,17 +18,33 @@ typedef void (*gro_tbl_destroy_fn)(void *tbl); typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl); static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = { - gro_tcp4_tbl_create, NULL}; + gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create, NULL}; static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = { - gro_tcp4_tbl_destroy, NULL}; + gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy, + NULL}; static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = { - gro_tcp4_tbl_pkt_count, NULL}; + gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count, + NULL}; + +#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ + ((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP)) + +#define IS_IPV4_VXLAN_TCP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ + ((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \ + ((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \ + RTE_PTYPE_TUNNEL_VXLAN) && \ + ((ptype & RTE_PTYPE_INNER_L4_TCP) == \ + RTE_PTYPE_INNER_L4_TCP) && \ + (((ptype & RTE_PTYPE_INNER_L3_MASK) & \ + (RTE_PTYPE_INNER_L3_IPV4 | \ + RTE_PTYPE_INNER_L3_IPV4_EXT | \ + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0)) /* - * GRO context structure, which is used to merge packets. It keeps - * many reassembly tables of desired GRO types. Applications need to - * create GRO context objects before using rte_gro_reassemble to - * perform GRO. + * GRO context structure. It keeps the table structures, which are + * used to merge packets, for different GRO types. Before using + * rte_gro_reassemble(), applications need to create the GRO context + * first. */ struct gro_ctx { /* GRO types to perform */ @@ -113,8 +102,6 @@ rte_gro_ctx_destroy(void *ctx) uint64_t gro_type_flag; uint8_t i; - if (gro_ctx == NULL) - return; for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) { gro_type_flag = 1ULL << i; if ((gro_ctx->gro_types & gro_type_flag) == 0) @@ -131,62 +118,95 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts, uint16_t nb_pkts, const struct rte_gro_param *param) { - uint16_t i; - uint16_t nb_after_gro = nb_pkts; - uint32_t item_num; - /* allocate a reassembly table for TCP/IPv4 GRO */ struct gro_tcp4_tbl tcp_tbl; - struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM]; + struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} }; + /* Allocate a reassembly table for VXLAN GRO */ + struct gro_vxlan_tcp4_tbl vxlan_tbl; + struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; + struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM] = { + {{0}, 0, 0} }; + struct rte_mbuf *unprocess_pkts[nb_pkts]; - uint16_t unprocess_num = 0; + uint32_t item_num; int32_t ret; - uint64_t current_time; + uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts; + uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0; - if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0) + if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 | + RTE_GRO_TCP_IPV4)) == 0)) return nb_pkts; - /* get the actual number of packets */ + /* Get the maximum number of packets */ item_num = RTE_MIN(nb_pkts, (param->max_flow_num * - param->max_item_per_flow)); + param->max_item_per_flow)); item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM); - for (i = 0; i < item_num; i++) - tcp_keys[i].start_index = INVALID_ARRAY_INDEX; - - tcp_tbl.keys = tcp_keys; - tcp_tbl.items = tcp_items; - tcp_tbl.key_num = 0; - tcp_tbl.item_num = 0; - tcp_tbl.max_key_num = item_num; - tcp_tbl.max_item_num = item_num; + if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) { + for (i = 0; i < item_num; i++) + vxlan_flows[i].start_index = INVALID_ARRAY_INDEX; + + vxlan_tbl.flows = vxlan_flows; + vxlan_tbl.items = vxlan_items; + vxlan_tbl.flow_num = 0; + vxlan_tbl.item_num = 0; + vxlan_tbl.max_flow_num = item_num; + vxlan_tbl.max_item_num = item_num; + do_vxlan_gro = 1; + } - current_time = rte_rdtsc(); + if (param->gro_types & RTE_GRO_TCP_IPV4) { + for (i = 0; i < item_num; i++) + tcp_flows[i].start_index = INVALID_ARRAY_INDEX; + + tcp_tbl.flows = tcp_flows; + tcp_tbl.items = tcp_items; + tcp_tbl.flow_num = 0; + tcp_tbl.item_num = 0; + tcp_tbl.max_flow_num = item_num; + tcp_tbl.max_item_num = item_num; + do_tcp4_gro = 1; + } for (i = 0; i < nb_pkts; i++) { - if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 | - RTE_PTYPE_L4_TCP)) == - (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) { - ret = gro_tcp4_reassemble(pkts[i], - &tcp_tbl, - current_time); + /* + * The timestamp is ignored, since all packets + * will be flushed from the tables. + */ + if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) && + do_vxlan_gro) { + ret = gro_vxlan_tcp4_reassemble(pkts[i], &vxlan_tbl, 0); + if (ret > 0) + /* Merge successfully */ + nb_after_gro--; + else if (ret < 0) + unprocess_pkts[unprocess_num++] = pkts[i]; + } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) && + do_tcp4_gro) { + ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, 0); if (ret > 0) /* merge successfully */ nb_after_gro--; - else if (ret < 0) { - unprocess_pkts[unprocess_num++] = - pkts[i]; - } + else if (ret < 0) + unprocess_pkts[unprocess_num++] = pkts[i]; } else unprocess_pkts[unprocess_num++] = pkts[i]; } - /* re-arrange GROed packets */ if (nb_after_gro < nb_pkts) { - i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time, - pkts, nb_pkts); + i = 0; + /* Flush all packets from the tables */ + if (do_vxlan_gro) { + i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl, + 0, pkts, nb_pkts); + } + if (do_tcp4_gro) { + i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0, + &pkts[i], nb_pkts - i); + } + /* Copy unprocessed packets */ if (unprocess_num > 0) { memcpy(&pkts[i], unprocess_pkts, sizeof(struct rte_mbuf *) * @@ -202,31 +222,43 @@ rte_gro_reassemble(struct rte_mbuf **pkts, uint16_t nb_pkts, void *ctx) { - uint16_t i, unprocess_num = 0; struct rte_mbuf *unprocess_pkts[nb_pkts]; struct gro_ctx *gro_ctx = ctx; + void *tcp_tbl, *vxlan_tbl; uint64_t current_time; + uint16_t i, unprocess_num = 0; + uint8_t do_tcp4_gro, do_vxlan_gro; - if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0) + if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 | + RTE_GRO_TCP_IPV4)) == 0)) return nb_pkts; + tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX]; + vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX]; + + do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == + RTE_GRO_TCP_IPV4; + do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) == + RTE_GRO_IPV4_VXLAN_TCP_IPV4; + current_time = rte_rdtsc(); for (i = 0; i < nb_pkts; i++) { - if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 | - RTE_PTYPE_L4_TCP)) == - (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) { - if (gro_tcp4_reassemble(pkts[i], - gro_ctx->tbls - [RTE_GRO_TCP_IPV4_INDEX], + if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) && + do_vxlan_gro) { + if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl, + current_time) < 0) + unprocess_pkts[unprocess_num++] = pkts[i]; + } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) && + do_tcp4_gro) { + if (gro_tcp4_reassemble(pkts[i], tcp_tbl, current_time) < 0) unprocess_pkts[unprocess_num++] = pkts[i]; } else unprocess_pkts[unprocess_num++] = pkts[i]; } if (unprocess_num > 0) { - memcpy(pkts, unprocess_pkts, - sizeof(struct rte_mbuf *) * + memcpy(pkts, unprocess_pkts, sizeof(struct rte_mbuf *) * unprocess_num); } @@ -242,17 +274,27 @@ rte_gro_timeout_flush(void *ctx, { struct gro_ctx *gro_ctx = ctx; uint64_t flush_timestamp; + uint16_t num = 0; gro_types = gro_types & gro_ctx->gro_types; flush_timestamp = rte_rdtsc() - timeout_cycles; - if (gro_types & RTE_GRO_TCP_IPV4) { - return gro_tcp4_tbl_timeout_flush( + if (gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) { + num = gro_vxlan_tcp4_tbl_timeout_flush(gro_ctx->tbls[ + RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX], + flush_timestamp, out, max_nb_out); + max_nb_out -= num; + } + + /* If no available space in 'out', stop flushing. */ + if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) { + num += gro_tcp4_tbl_timeout_flush( gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX], flush_timestamp, - out, max_nb_out); + &out[num], max_nb_out); } - return 0; + + return num; } uint64_t @@ -260,19 +302,20 @@ rte_gro_get_pkt_count(void *ctx) { struct gro_ctx *gro_ctx = ctx; gro_tbl_pkt_count_fn pkt_count_fn; + uint64_t gro_types = gro_ctx->gro_types, flag; uint64_t item_num = 0; - uint64_t gro_type_flag; uint8_t i; - for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) { - gro_type_flag = 1ULL << i; - if ((gro_ctx->gro_types & gro_type_flag) == 0) + for (i = 0; i < RTE_GRO_TYPE_MAX_NUM && gro_types; i++) { + flag = 1ULL << i; + if ((gro_types & flag) == 0) continue; + gro_types ^= flag; pkt_count_fn = tbl_pkt_count_fn[i]; - if (pkt_count_fn == NULL) - continue; - item_num += pkt_count_fn(gro_ctx->tbls[i]); + if (pkt_count_fn) + item_num += pkt_count_fn(gro_ctx->tbls[i]); } + return item_num; } diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h index d57e0c5f..8d781b5f 100644 --- a/lib/librte_gro/rte_gro.h +++ b/lib/librte_gro/rte_gro.h @@ -1,33 +1,5 @@ -/*- - * BSD LICENSE - * - * Copyright(c) 2017 Intel Corporation. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation */ #ifndef _RTE_GRO_H_ @@ -51,16 +23,19 @@ extern "C" { */ #define RTE_GRO_TYPE_MAX_NUM 64 /**< the max number of supported GRO types */ -#define RTE_GRO_TYPE_SUPPORT_NUM 1 +#define RTE_GRO_TYPE_SUPPORT_NUM 2 /**< the number of currently supported GRO types */ #define RTE_GRO_TCP_IPV4_INDEX 0 #define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX) /**< TCP/IPv4 GRO flag */ +#define RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX 1 +#define RTE_GRO_IPV4_VXLAN_TCP_IPV4 (1ULL << RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX) +/**< VxLAN GRO flag. */ /** - * A structure which is used to create GRO context objects or tell - * rte_gro_reassemble_burst() what reassembly rules are demanded. + * Structure used to create GRO context objects or used to pass + * application-determined parameters to rte_gro_reassemble_burst(). */ struct rte_gro_param { uint64_t gro_types; @@ -106,26 +81,23 @@ void rte_gro_ctx_destroy(void *ctx); /** * This is one of the main reassembly APIs, which merges numbers of - * packets at a time. It assumes that all inputted packets are with - * correct checksums. That is, applications should guarantee all - * inputted packets are correct. Besides, it doesn't re-calculate - * checksums for merged packets. If inputted packets are IP fragmented, - * this function assumes them are complete (i.e. with L4 header). After - * finishing processing, it returns all GROed packets to applications - * immediately. + * packets at a time. It doesn't check if input packets have correct + * checksums and doesn't re-calculate checksums for merged packets. + * It assumes the packets are complete (i.e., MF==0 && frag_off==0), + * when IP fragmentation is possible (i.e., DF==0). The GROed packets + * are returned as soon as the function finishes. * * @param pkts - * a pointer array which points to the packets to reassemble. Besides, - * it keeps mbuf addresses for the GROed packets. + * Pointer array pointing to the packets to reassemble. Besides, it + * keeps MBUF addresses for the GROed packets. * @param nb_pkts - * the number of packets to reassemble. + * The number of packets to reassemble * @param param - * applications use it to tell rte_gro_reassemble_burst() what rules - * are demanded. + * Application-determined parameters for reassembling packets. * * @return - * the number of packets after been GROed. If no packets are merged, - * the returned value is nb_pkts. + * The number of packets after been GROed. If no packets are merged, + * the return value is equals to nb_pkts. */ uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts, uint16_t nb_pkts, @@ -135,32 +107,28 @@ uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts, * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * Reassembly function, which tries to merge inputted packets with - * the packets in the reassembly tables of a given GRO context. This - * function assumes all inputted packets are with correct checksums. - * And it won't update checksums if two packets are merged. Besides, - * if inputted packets are IP fragmented, this function assumes they - * are complete packets (i.e. with L4 header). + * Reassembly function, which tries to merge input packets with the + * existed packets in the reassembly tables of a given GRO context. + * It doesn't check if input packets have correct checksums and doesn't + * re-calculate checksums for merged packets. Additionally, it assumes + * the packets are complete (i.e., MF==0 && frag_off==0), when IP + * fragmentation is possible (i.e., DF==0). * - * If the inputted packets don't have data or are with unsupported GRO - * types etc., they won't be processed and are returned to applications. - * Otherwise, the inputted packets are either merged or inserted into - * the table. If applications want get packets in the table, they need - * to call flush API. + * If the input packets have invalid parameters (e.g. no data payload, + * unsupported GRO types), they are returned to applications. Otherwise, + * they are either merged or inserted into the table. Applications need + * to flush packets from the tables by flush API, if they want to get the + * GROed packets. * * @param pkts - * packet to reassemble. Besides, after this function finishes, it - * keeps the unprocessed packets (e.g. without data or unsupported - * GRO types). + * Packets to reassemble. It's also used to store the unprocessed packets. * @param nb_pkts - * the number of packets to reassemble. + * The number of packets to reassemble * @param ctx - * a pointer points to a GRO context object. + * GRO context object pointer * * @return - * return the number of unprocessed packets (e.g. without data or - * unsupported GRO types). If all packets are processed (merged or - * inserted into the table), return 0. + * The number of unprocessed packets. */ uint16_t rte_gro_reassemble(struct rte_mbuf **pkts, uint16_t nb_pkts, @@ -170,29 +138,28 @@ uint16_t rte_gro_reassemble(struct rte_mbuf **pkts, * @warning * @b EXPERIMENTAL: this API may change without prior notice * - * This function flushes the timeout packets from reassembly tables of - * desired GRO types. The max number of flushed timeout packets is the - * element number of the array which is used to keep the flushed packets. + * This function flushes the timeout packets from the reassembly tables + * of desired GRO types. The max number of flushed packets is the + * element number of 'out'. * - * Besides, this function won't re-calculate checksums for merged - * packets in the tables. That is, the returned packets may be with - * wrong checksums. + * Additionally, the flushed packets may have incorrect checksums, since + * this function doesn't re-calculate checksums for merged packets. * * @param ctx - * a pointer points to a GRO context object. + * GRO context object pointer. * @param timeout_cycles - * max TTL for packets in reassembly tables, measured in nanosecond. + * The max TTL for packets in reassembly tables, measured in nanosecond. * @param gro_types - * this function only flushes packets which belong to the GRO types - * specified by gro_types. + * This function flushes packets whose GRO types are specified by + * gro_types. * @param out - * a pointer array that is used to keep flushed timeout packets. + * Pointer array used to keep flushed packets. * @param max_nb_out - * the element number of out. It's also the max number of timeout + * The element number of 'out'. It's also the max number of timeout * packets that can be flushed finally. * * @return - * the number of flushed packets. If no packets are flushed, return 0. + * The number of flushed packets. */ uint16_t rte_gro_timeout_flush(void *ctx, uint64_t timeout_cycles, @@ -208,10 +175,10 @@ uint16_t rte_gro_timeout_flush(void *ctx, * of a given GRO context. * * @param ctx - * pointer points to a GRO context object. + * GRO context object pointer. * * @return - * the number of packets in all reassembly tables. + * The number of packets in the tables. */ uint64_t rte_gro_get_pkt_count(void *ctx); |