aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_gro/gro_tcp4.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_gro/gro_tcp4.h')
-rw-r--r--lib/librte_gro/gro_tcp4.h283
1 files changed, 187 insertions, 96 deletions
diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h
index 0a817162..6bb30cdb 100644
--- a/lib/librte_gro/gro_tcp4.h
+++ b/lib/librte_gro/gro_tcp4.h
@@ -1,49 +1,24 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2017 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Intel Corporation
*/
#ifndef _GRO_TCP4_H_
#define _GRO_TCP4_H_
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
#define INVALID_ARRAY_INDEX 0xffffffffUL
#define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
/*
- * the max L3 length of a TCP/IPv4 packet. The L3 length
- * is the sum of ipv4 header, tcp header and L4 payload.
+ * The max length of a IPv4 packet, which includes the length of the L3
+ * header, the L4 header and the data payload.
*/
-#define TCP4_MAX_L3_LENGTH UINT16_MAX
+#define MAX_IPV4_PKT_LENGTH UINT16_MAX
-/* criteria of mergeing packets */
-struct tcp4_key {
+/* Header fields representing a TCP/IPv4 flow */
+struct tcp4_flow_key {
struct ether_addr eth_saddr;
struct ether_addr eth_daddr;
uint32_t ip_src_addr;
@@ -54,44 +29,43 @@ struct tcp4_key {
uint16_t dst_port;
};
-struct gro_tcp4_key {
- struct tcp4_key key;
+struct gro_tcp4_flow {
+ struct tcp4_flow_key key;
/*
- * the index of the first packet in the item group.
- * If the value is INVALID_ARRAY_INDEX, it means
- * the key is empty.
+ * The index of the first packet in the flow.
+ * INVALID_ARRAY_INDEX indicates an empty flow.
*/
uint32_t start_index;
};
struct gro_tcp4_item {
/*
- * first segment of the packet. If the value
+ * The first MBUF segment of the packet. If the value
* is NULL, it means the item is empty.
*/
struct rte_mbuf *firstseg;
- /* last segment of the packet */
+ /* The last MBUF segment of the packet */
struct rte_mbuf *lastseg;
/*
- * the time when the first packet is inserted
- * into the table. If a packet in the table is
- * merged with an incoming packet, this value
- * won't be updated. We set this value only
- * when the first packet is inserted into the
- * table.
+ * The time when the first packet is inserted into the table.
+ * This value won't be updated, even if the packet is merged
+ * with other packets.
*/
uint64_t start_time;
/*
- * we use next_pkt_idx to chain the packets that
- * have same key value but can't be merged together.
+ * next_pkt_idx is used to chain the packets that
+ * are in the same flow but can't be merged together
+ * (e.g. caused by packet reordering).
*/
uint32_t next_pkt_idx;
- /* the sequence number of the packet */
+ /* TCP sequence number of the packet */
uint32_t sent_seq;
- /* the IP ID of the packet */
+ /* IPv4 ID of the packet */
uint16_t ip_id;
/* the number of merged packets */
uint16_t nb_merged;
+ /* Indicate if IPv4 ID can be ignored */
+ uint8_t is_atomic;
};
/*
@@ -100,31 +74,31 @@ struct gro_tcp4_item {
struct gro_tcp4_tbl {
/* item array */
struct gro_tcp4_item *items;
- /* key array */
- struct gro_tcp4_key *keys;
+ /* flow array */
+ struct gro_tcp4_flow *flows;
/* current item number */
uint32_t item_num;
- /* current key num */
- uint32_t key_num;
+ /* current flow num */
+ uint32_t flow_num;
/* item array size */
uint32_t max_item_num;
- /* key array size */
- uint32_t max_key_num;
+ /* flow array size */
+ uint32_t max_flow_num;
};
/**
* This function creates a TCP/IPv4 reassembly table.
*
* @param socket_id
- * socket index for allocating TCP/IPv4 reassemble table
+ * Socket index for allocating the TCP/IPv4 reassemble table
* @param max_flow_num
- * the maximum number of flows in the TCP/IPv4 GRO table
+ * The maximum number of flows in the TCP/IPv4 GRO table
* @param max_item_per_flow
- * the maximum packet number per flow.
+ * The maximum number of packets per flow
*
* @return
- * if create successfully, return a pointer which points to the
- * created TCP/IPv4 GRO table. Otherwise, return NULL.
+ * - Return the table pointer on success.
+ * - Return NULL on failure.
*/
void *gro_tcp4_tbl_create(uint16_t socket_id,
uint16_t max_flow_num,
@@ -134,62 +108,56 @@ void *gro_tcp4_tbl_create(uint16_t socket_id,
* This function destroys a TCP/IPv4 reassembly table.
*
* @param tbl
- * a pointer points to the TCP/IPv4 reassembly table.
+ * Pointer pointing to the TCP/IPv4 reassembly table.
*/
void gro_tcp4_tbl_destroy(void *tbl);
/**
- * This function searches for a packet in the TCP/IPv4 reassembly table
- * to merge with the inputted one. To merge two packets is to chain them
- * together and update packet headers. Packets, whose SYN, FIN, RST, PSH
- * CWR, ECE or URG bit is set, are returned immediately. Packets which
- * only have packet headers (i.e. without data) are also returned
- * immediately. Otherwise, the packet is either merged, or inserted into
- * the table. Besides, if there is no available space to insert the
- * packet, this function returns immediately too.
+ * This function merges a TCP/IPv4 packet. It doesn't process the packet,
+ * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have
+ * payload.
*
- * This function assumes the inputted packet is with correct IPv4 and
- * TCP checksums. And if two packets are merged, it won't re-calculate
- * IPv4 and TCP checksums. Besides, if the inputted packet is IP
- * fragmented, it assumes the packet is complete (with TCP header).
+ * This function doesn't check if the packet has correct checksums and
+ * doesn't re-calculate checksums for the merged packet. Additionally,
+ * it assumes the packets are complete (i.e., MF==0 && frag_off==0),
+ * when IP fragmentation is possible (i.e., DF==0). It returns the
+ * packet, if the packet has invalid parameters (e.g. SYN bit is set)
+ * or there is no available space in the table.
*
* @param pkt
- * packet to reassemble.
+ * Packet to reassemble
* @param tbl
- * a pointer that points to a TCP/IPv4 reassembly table.
+ * Pointer pointing to the TCP/IPv4 reassembly table
* @start_time
- * the start time that the packet is inserted into the table
+ * The time when the packet is inserted into the table
*
* @return
- * if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE
- * or URG bit is set, or there is no available space in the table to
- * insert a new item or a new key, return a negative value. If the
- * packet is merged successfully, return an positive value. If the
- * packet is inserted into the table, return 0.
+ * - Return a positive value if the packet is merged.
+ * - Return zero if the packet isn't merged but stored in the table.
+ * - Return a negative value for invalid parameters or no available
+ * space in the table.
*/
int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt,
struct gro_tcp4_tbl *tbl,
uint64_t start_time);
/**
- * This function flushes timeout packets in a TCP/IPv4 reassembly table
- * to applications, and without updating checksums for merged packets.
- * The max number of flushed timeout packets is the element number of
- * the array which is used to keep flushed packets.
+ * This function flushes timeout packets in a TCP/IPv4 reassembly table,
+ * and without updating checksums.
*
* @param tbl
- * a pointer that points to a TCP GRO table.
+ * TCP/IPv4 reassembly table pointer
* @param flush_timestamp
- * this function flushes packets which are inserted into the table
- * before or at the flush_timestamp.
+ * Flush packets which are inserted into the table before or at the
+ * flush_timestamp.
* @param out
- * pointer array which is used to keep flushed packets.
+ * Pointer array used to keep flushed packets
* @param nb_out
- * the element number of out. It's also the max number of timeout
+ * The element number in 'out'. It also determines the maximum number of
* packets that can be flushed finally.
*
* @return
- * the number of packets that are returned.
+ * The number of flushed packets
*/
uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
uint64_t flush_timestamp,
@@ -201,10 +169,133 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl,
* reassembly table.
*
* @param tbl
- * pointer points to a TCP/IPv4 reassembly table.
+ * TCP/IPv4 reassembly table pointer
*
* @return
- * the number of packets in the table
+ * The number of packets in the table
*/
uint32_t gro_tcp4_tbl_pkt_count(void *tbl);
+
+/*
+ * Check if two TCP/IPv4 packets belong to the same flow.
+ */
+static inline int
+is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2)
+{
+ return (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) &&
+ is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) &&
+ (k1.ip_src_addr == k2.ip_src_addr) &&
+ (k1.ip_dst_addr == k2.ip_dst_addr) &&
+ (k1.recv_ack == k2.recv_ack) &&
+ (k1.src_port == k2.src_port) &&
+ (k1.dst_port == k2.dst_port));
+}
+
+/*
+ * Merge two TCP/IPv4 packets without updating checksums.
+ * If cmp is larger than 0, append the new packet to the
+ * original packet. Otherwise, pre-pend the new packet to
+ * the original packet.
+ */
+static inline int
+merge_two_tcp4_packets(struct gro_tcp4_item *item,
+ struct rte_mbuf *pkt,
+ int cmp,
+ uint32_t sent_seq,
+ uint16_t ip_id,
+ uint16_t l2_offset)
+{
+ struct rte_mbuf *pkt_head, *pkt_tail, *lastseg;
+ uint16_t hdr_len, l2_len;
+
+ if (cmp > 0) {
+ pkt_head = item->firstseg;
+ pkt_tail = pkt;
+ } else {
+ pkt_head = pkt;
+ pkt_tail = item->firstseg;
+ }
+
+ /* check if the IPv4 packet length is greater than the max value */
+ hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len +
+ pkt_head->l4_len;
+ l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len;
+ if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len -
+ hdr_len > MAX_IPV4_PKT_LENGTH))
+ return 0;
+
+ /* remove the packet header for the tail packet */
+ rte_pktmbuf_adj(pkt_tail, hdr_len);
+
+ /* chain two packets together */
+ if (cmp > 0) {
+ item->lastseg->next = pkt;
+ item->lastseg = rte_pktmbuf_lastseg(pkt);
+ /* update IP ID to the larger value */
+ item->ip_id = ip_id;
+ } else {
+ lastseg = rte_pktmbuf_lastseg(pkt);
+ lastseg->next = item->firstseg;
+ item->firstseg = pkt;
+ /* update sent_seq to the smaller value */
+ item->sent_seq = sent_seq;
+ item->ip_id = ip_id;
+ }
+ item->nb_merged++;
+
+ /* update MBUF metadata for the merged packet */
+ pkt_head->nb_segs += pkt_tail->nb_segs;
+ pkt_head->pkt_len += pkt_tail->pkt_len;
+
+ return 1;
+}
+
+/*
+ * Check if two TCP/IPv4 packets are neighbors.
+ */
+static inline int
+check_seq_option(struct gro_tcp4_item *item,
+ struct tcp_hdr *tcph,
+ uint32_t sent_seq,
+ uint16_t ip_id,
+ uint16_t tcp_hl,
+ uint16_t tcp_dl,
+ uint16_t l2_offset,
+ uint8_t is_atomic)
+{
+ struct rte_mbuf *pkt_orig = item->firstseg;
+ struct ipv4_hdr *iph_orig;
+ struct tcp_hdr *tcph_orig;
+ uint16_t len, tcp_hl_orig;
+
+ iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) +
+ l2_offset + pkt_orig->l2_len);
+ tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len);
+ tcp_hl_orig = pkt_orig->l4_len;
+
+ /* Check if TCP option fields equal */
+ len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct tcp_hdr);
+ if ((tcp_hl != tcp_hl_orig) || ((len > 0) &&
+ (memcmp(tcph + 1, tcph_orig + 1,
+ len) != 0)))
+ return 0;
+
+ /* Don't merge packets whose DF bits are different */
+ if (unlikely(item->is_atomic ^ is_atomic))
+ return 0;
+
+ /* check if the two packets are neighbors */
+ len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len -
+ pkt_orig->l3_len - tcp_hl_orig;
+ if ((sent_seq == item->sent_seq + len) && (is_atomic ||
+ (ip_id == item->ip_id + 1)))
+ /* append the new packet */
+ return 1;
+ else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic ||
+ (ip_id + item->nb_merged == item->ip_id)))
+ /* pre-pend the new packet */
+ return -1;
+
+ return 0;
+}
#endif