diff options
Diffstat (limited to 'lib/librte_gro')
-rw-r--r-- | lib/librte_gro/Makefile | 51 | ||||
-rw-r--r-- | lib/librte_gro/gro_tcp4.c | 505 | ||||
-rw-r--r-- | lib/librte_gro/gro_tcp4.h | 210 | ||||
-rw-r--r-- | lib/librte_gro/rte_gro.c | 278 | ||||
-rw-r--r-- | lib/librte_gro/rte_gro.h | 222 | ||||
-rw-r--r-- | lib/librte_gro/rte_gro_version.map | 12 |
6 files changed, 1278 insertions, 0 deletions
diff --git a/lib/librte_gro/Makefile b/lib/librte_gro/Makefile new file mode 100644 index 00000000..747eeec9 --- /dev/null +++ b/lib/librte_gro/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2017 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_gro.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +EXPORT_MAP := rte_gro_version.map + +LIBABIVER := 1 + +# source files +SRCS-$(CONFIG_RTE_LIBRTE_GRO) += rte_gro.c +SRCS-$(CONFIG_RTE_LIBRTE_GRO) += gro_tcp4.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_GRO)-include += rte_gro.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c new file mode 100644 index 00000000..61a04232 --- /dev/null +++ b/lib/librte_gro/gro_tcp4.c @@ -0,0 +1,505 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> +#include <rte_ip.h> +#include <rte_tcp.h> + +#include "gro_tcp4.h" + +void * +gro_tcp4_tbl_create(uint16_t socket_id, + uint16_t max_flow_num, + uint16_t max_item_per_flow) +{ + struct gro_tcp4_tbl *tbl; + size_t size; + uint32_t entries_num, i; + + entries_num = max_flow_num * max_item_per_flow; + entries_num = RTE_MIN(entries_num, GRO_TCP4_TBL_MAX_ITEM_NUM); + + if (entries_num == 0) + return NULL; + + tbl = rte_zmalloc_socket(__func__, + sizeof(struct gro_tcp4_tbl), + RTE_CACHE_LINE_SIZE, + socket_id); + if (tbl == NULL) + return NULL; + + size = sizeof(struct gro_tcp4_item) * entries_num; + tbl->items = rte_zmalloc_socket(__func__, + size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (tbl->items == NULL) { + rte_free(tbl); + return NULL; + } + tbl->max_item_num = entries_num; + + size = sizeof(struct gro_tcp4_key) * entries_num; + tbl->keys = rte_zmalloc_socket(__func__, + size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (tbl->keys == NULL) { + rte_free(tbl->items); + rte_free(tbl); + return NULL; + } + /* INVALID_ARRAY_INDEX indicates empty key */ + for (i = 0; i < entries_num; i++) + tbl->keys[i].start_index = INVALID_ARRAY_INDEX; + tbl->max_key_num = entries_num; + + return tbl; +} + +void +gro_tcp4_tbl_destroy(void *tbl) +{ + struct gro_tcp4_tbl *tcp_tbl = tbl; + + if (tcp_tbl) { + rte_free(tcp_tbl->items); + rte_free(tcp_tbl->keys); + } + rte_free(tcp_tbl); +} + +/* + * merge two TCP/IPv4 packets without updating checksums. + * If cmp is larger than 0, append the new packet to the + * original packet. Otherwise, pre-pend the new packet to + * the original packet. + */ +static inline int +merge_two_tcp4_packets(struct gro_tcp4_item *item_src, + struct rte_mbuf *pkt, + uint16_t ip_id, + uint32_t sent_seq, + int cmp) +{ + struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; + uint16_t tcp_datalen; + + if (cmp > 0) { + pkt_head = item_src->firstseg; + pkt_tail = pkt; + } else { + pkt_head = pkt; + pkt_tail = item_src->firstseg; + } + + /* check if the packet length will be beyond the max value */ + tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len - + pkt_tail->l3_len - pkt_tail->l4_len; + if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen > + TCP4_MAX_L3_LENGTH) + return 0; + + /* remove packet header for the tail packet */ + rte_pktmbuf_adj(pkt_tail, + pkt_tail->l2_len + + pkt_tail->l3_len + + pkt_tail->l4_len); + + /* chain two packets together */ + if (cmp > 0) { + item_src->lastseg->next = pkt; + item_src->lastseg = rte_pktmbuf_lastseg(pkt); + /* update IP ID to the larger value */ + item_src->ip_id = ip_id; + } else { + lastseg = rte_pktmbuf_lastseg(pkt); + lastseg->next = item_src->firstseg; + item_src->firstseg = pkt; + /* update sent_seq to the smaller value */ + item_src->sent_seq = sent_seq; + } + item_src->nb_merged++; + + /* update mbuf metadata for the merged packet */ + pkt_head->nb_segs += pkt_tail->nb_segs; + pkt_head->pkt_len += pkt_tail->pkt_len; + + return 1; +} + +static inline int +check_seq_option(struct gro_tcp4_item *item, + struct tcp_hdr *tcp_hdr, + uint16_t tcp_hl, + uint16_t tcp_dl, + uint16_t ip_id, + uint32_t sent_seq) +{ + struct rte_mbuf *pkt0 = item->firstseg; + struct ipv4_hdr *ipv4_hdr0; + struct tcp_hdr *tcp_hdr0; + uint16_t tcp_hl0, tcp_dl0; + uint16_t len; + + ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) + + pkt0->l2_len); + tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len); + tcp_hl0 = pkt0->l4_len; + + /* check if TCP option fields equal. If not, return 0. */ + len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr); + if ((tcp_hl != tcp_hl0) || + ((len > 0) && (memcmp(tcp_hdr + 1, + tcp_hdr0 + 1, + len) != 0))) + return 0; + + /* check if the two packets are neighbors */ + tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0; + if ((sent_seq == (item->sent_seq + tcp_dl0)) && + (ip_id == (item->ip_id + 1))) + /* append the new packet */ + return 1; + else if (((sent_seq + tcp_dl) == item->sent_seq) && + ((ip_id + item->nb_merged) == item->ip_id)) + /* pre-pend the new packet */ + return -1; + else + return 0; +} + +static inline uint32_t +find_an_empty_item(struct gro_tcp4_tbl *tbl) +{ + uint32_t i; + uint32_t max_item_num = tbl->max_item_num; + + for (i = 0; i < max_item_num; i++) + if (tbl->items[i].firstseg == NULL) + return i; + return INVALID_ARRAY_INDEX; +} + +static inline uint32_t +find_an_empty_key(struct gro_tcp4_tbl *tbl) +{ + uint32_t i; + uint32_t max_key_num = tbl->max_key_num; + + for (i = 0; i < max_key_num; i++) + if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX) + return i; + return INVALID_ARRAY_INDEX; +} + +static inline uint32_t +insert_new_item(struct gro_tcp4_tbl *tbl, + struct rte_mbuf *pkt, + uint16_t ip_id, + uint32_t sent_seq, + uint32_t prev_idx, + uint64_t start_time) +{ + uint32_t item_idx; + + item_idx = find_an_empty_item(tbl); + if (item_idx == INVALID_ARRAY_INDEX) + return INVALID_ARRAY_INDEX; + + tbl->items[item_idx].firstseg = pkt; + tbl->items[item_idx].lastseg = rte_pktmbuf_lastseg(pkt); + tbl->items[item_idx].start_time = start_time; + tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX; + tbl->items[item_idx].sent_seq = sent_seq; + tbl->items[item_idx].ip_id = ip_id; + tbl->items[item_idx].nb_merged = 1; + tbl->item_num++; + + /* if the previous packet exists, chain the new one with it */ + if (prev_idx != INVALID_ARRAY_INDEX) { + tbl->items[item_idx].next_pkt_idx = + tbl->items[prev_idx].next_pkt_idx; + tbl->items[prev_idx].next_pkt_idx = item_idx; + } + + return item_idx; +} + +static inline uint32_t +delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx, + uint32_t prev_item_idx) +{ + uint32_t next_idx = tbl->items[item_idx].next_pkt_idx; + + /* set NULL to firstseg to indicate it's an empty item */ + tbl->items[item_idx].firstseg = NULL; + tbl->item_num--; + if (prev_item_idx != INVALID_ARRAY_INDEX) + tbl->items[prev_item_idx].next_pkt_idx = next_idx; + + return next_idx; +} + +static inline uint32_t +insert_new_key(struct gro_tcp4_tbl *tbl, + struct tcp4_key *key_src, + uint32_t item_idx) +{ + struct tcp4_key *key_dst; + uint32_t key_idx; + + key_idx = find_an_empty_key(tbl); + if (key_idx == INVALID_ARRAY_INDEX) + return INVALID_ARRAY_INDEX; + + key_dst = &(tbl->keys[key_idx].key); + + ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr)); + ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr)); + key_dst->ip_src_addr = key_src->ip_src_addr; + key_dst->ip_dst_addr = key_src->ip_dst_addr; + key_dst->recv_ack = key_src->recv_ack; + key_dst->src_port = key_src->src_port; + key_dst->dst_port = key_src->dst_port; + + /* non-INVALID_ARRAY_INDEX value indicates this key is valid */ + tbl->keys[key_idx].start_index = item_idx; + tbl->key_num++; + + return key_idx; +} + +static inline int +is_same_key(struct tcp4_key k1, struct tcp4_key k2) +{ + if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0) + return 0; + + if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0) + return 0; + + return ((k1.ip_src_addr == k2.ip_src_addr) && + (k1.ip_dst_addr == k2.ip_dst_addr) && + (k1.recv_ack == k2.recv_ack) && + (k1.src_port == k2.src_port) && + (k1.dst_port == k2.dst_port)); +} + +/* + * update packet length for the flushed packet. + */ +static inline void +update_header(struct gro_tcp4_item *item) +{ + struct ipv4_hdr *ipv4_hdr; + struct rte_mbuf *pkt = item->firstseg; + + ipv4_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) + + pkt->l2_len); + ipv4_hdr->total_length = rte_cpu_to_be_16(pkt->pkt_len - + pkt->l2_len); +} + +int32_t +gro_tcp4_reassemble(struct rte_mbuf *pkt, + struct gro_tcp4_tbl *tbl, + uint64_t start_time) +{ + struct ether_hdr *eth_hdr; + struct ipv4_hdr *ipv4_hdr; + struct tcp_hdr *tcp_hdr; + uint32_t sent_seq; + uint16_t tcp_dl, ip_id; + + struct tcp4_key key; + uint32_t cur_idx, prev_idx, item_idx; + uint32_t i, max_key_num; + int cmp; + + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); + ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len); + tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len); + + /* + * if FIN, SYN, RST, PSH, URG, ECE or + * CWR is set, return immediately. + */ + if (tcp_hdr->tcp_flags != TCP_ACK_FLAG) + return -1; + /* if payload length is 0, return immediately */ + tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len - + pkt->l4_len; + if (tcp_dl == 0) + return -1; + + ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); + sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); + + ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr)); + ether_addr_copy(&(eth_hdr->d_addr), &(key.eth_daddr)); + key.ip_src_addr = ipv4_hdr->src_addr; + key.ip_dst_addr = ipv4_hdr->dst_addr; + key.src_port = tcp_hdr->src_port; + key.dst_port = tcp_hdr->dst_port; + key.recv_ack = tcp_hdr->recv_ack; + + /* search for a key */ + max_key_num = tbl->max_key_num; + for (i = 0; i < max_key_num; i++) { + if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) && + is_same_key(tbl->keys[i].key, key)) + break; + } + + /* can't find a key, so insert a new key and a new item. */ + if (i == tbl->max_key_num) { + item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq, + INVALID_ARRAY_INDEX, start_time); + if (item_idx == INVALID_ARRAY_INDEX) + return -1; + if (insert_new_key(tbl, &key, item_idx) == + INVALID_ARRAY_INDEX) { + /* + * fail to insert a new key, so + * delete the inserted item + */ + delete_item(tbl, item_idx, INVALID_ARRAY_INDEX); + return -1; + } + return 0; + } + + /* traverse all packets in the item group to find one to merge */ + cur_idx = tbl->keys[i].start_index; + prev_idx = cur_idx; + do { + cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr, + pkt->l4_len, tcp_dl, ip_id, sent_seq); + if (cmp) { + if (merge_two_tcp4_packets(&(tbl->items[cur_idx]), + pkt, ip_id, + sent_seq, cmp)) + return 1; + /* + * fail to merge two packets since the packet + * length will be greater than the max value. + * So insert the packet into the item group. + */ + if (insert_new_item(tbl, pkt, ip_id, sent_seq, + prev_idx, start_time) == + INVALID_ARRAY_INDEX) + return -1; + return 0; + } + prev_idx = cur_idx; + cur_idx = tbl->items[cur_idx].next_pkt_idx; + } while (cur_idx != INVALID_ARRAY_INDEX); + + /* + * can't find a packet in the item group to merge, + * so insert the packet into the item group. + */ + if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx, + start_time) == INVALID_ARRAY_INDEX) + return -1; + + return 0; +} + +uint16_t +gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, + uint64_t flush_timestamp, + struct rte_mbuf **out, + uint16_t nb_out) +{ + uint16_t k = 0; + uint32_t i, j; + uint32_t max_key_num = tbl->max_key_num; + + for (i = 0; i < max_key_num; i++) { + /* all keys have been checked, return immediately */ + if (tbl->key_num == 0) + return k; + + j = tbl->keys[i].start_index; + while (j != INVALID_ARRAY_INDEX) { + if (tbl->items[j].start_time <= flush_timestamp) { + out[k++] = tbl->items[j].firstseg; + if (tbl->items[j].nb_merged > 1) + update_header(&(tbl->items[j])); + /* + * delete the item and get + * the next packet index + */ + j = delete_item(tbl, j, + INVALID_ARRAY_INDEX); + + /* + * delete the key as all of + * packets are flushed + */ + if (j == INVALID_ARRAY_INDEX) { + tbl->keys[i].start_index = + INVALID_ARRAY_INDEX; + tbl->key_num--; + } else + /* update start_index of the key */ + tbl->keys[i].start_index = j; + + if (k == nb_out) + return k; + } else + /* + * left packets of this key won't be + * timeout, so go to check other keys. + */ + break; + } + } + return k; +} + +uint32_t +gro_tcp4_tbl_pkt_count(void *tbl) +{ + struct gro_tcp4_tbl *gro_tbl = tbl; + + if (gro_tbl) + return gro_tbl->item_num; + + return 0; +} diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h new file mode 100644 index 00000000..f41dcee3 --- /dev/null +++ b/lib/librte_gro/gro_tcp4.h @@ -0,0 +1,210 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _GRO_TCP4_H_ +#define _GRO_TCP4_H_ + +#define INVALID_ARRAY_INDEX 0xffffffffUL +#define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) + +/* + * the max L3 length of a TCP/IPv4 packet. The L3 length + * is the sum of ipv4 header, tcp header and L4 payload. + */ +#define TCP4_MAX_L3_LENGTH UINT16_MAX + +/* criteria of mergeing packets */ +struct tcp4_key { + struct ether_addr eth_saddr; + struct ether_addr eth_daddr; + uint32_t ip_src_addr; + uint32_t ip_dst_addr; + + uint32_t recv_ack; + uint16_t src_port; + uint16_t dst_port; +}; + +struct gro_tcp4_key { + struct tcp4_key key; + /* + * the index of the first packet in the item group. + * If the value is INVALID_ARRAY_INDEX, it means + * the key is empty. + */ + uint32_t start_index; +}; + +struct gro_tcp4_item { + /* + * first segment of the packet. If the value + * is NULL, it means the item is empty. + */ + struct rte_mbuf *firstseg; + /* last segment of the packet */ + struct rte_mbuf *lastseg; + /* + * the time when the first packet is inserted + * into the table. If a packet in the table is + * merged with an incoming packet, this value + * won't be updated. We set this value only + * when the first packet is inserted into the + * table. + */ + uint64_t start_time; + /* + * we use next_pkt_idx to chain the packets that + * have same key value but can't be merged together. + */ + uint32_t next_pkt_idx; + /* the sequence number of the packet */ + uint32_t sent_seq; + /* the IP ID of the packet */ + uint16_t ip_id; + /* the number of merged packets */ + uint16_t nb_merged; +}; + +/* + * TCP/IPv4 reassembly table structure. + */ +struct gro_tcp4_tbl { + /* item array */ + struct gro_tcp4_item *items; + /* key array */ + struct gro_tcp4_key *keys; + /* current item number */ + uint32_t item_num; + /* current key num */ + uint32_t key_num; + /* item array size */ + uint32_t max_item_num; + /* key array size */ + uint32_t max_key_num; +}; + +/** + * This function creates a TCP/IPv4 reassembly table. + * + * @param socket_id + * socket index for allocating TCP/IPv4 reassemblt table + * @param max_flow_num + * the maximum number of flows in the TCP/IPv4 GRO table + * @param max_item_per_flow + * the maximum packet number per flow. + * + * @return + * if create successfully, return a pointer which points to the + * created TCP/IPv4 GRO table. Otherwise, return NULL. + */ +void *gro_tcp4_tbl_create(uint16_t socket_id, + uint16_t max_flow_num, + uint16_t max_item_per_flow); + +/** + * This function destroys a TCP/IPv4 reassembly table. + * + * @param tbl + * a pointer points to the TCP/IPv4 reassembly table. + */ +void gro_tcp4_tbl_destroy(void *tbl); + +/** + * This function searches for a packet in the TCP/IPv4 reassembly table + * to merge with the inputted one. To merge two packets is to chain them + * together and update packet headers. Packets, whose SYN, FIN, RST, PSH + * CWR, ECE or URG bit is set, are returned immediately. Packets which + * only have packet headers (i.e. without data) are also returned + * immediately. Otherwise, the packet is either merged, or inserted into + * the table. Besides, if there is no available space to insert the + * packet, this function returns immediately too. + * + * This function assumes the inputted packet is with correct IPv4 and + * TCP checksums. And if two packets are merged, it won't re-calculate + * IPv4 and TCP checksums. Besides, if the inputted packet is IP + * fragmented, it assumes the packet is complete (with TCP header). + * + * @param pkt + * packet to reassemble. + * @param tbl + * a pointer that points to a TCP/IPv4 reassembly table. + * @start_time + * the start time that the packet is inserted into the table + * + * @return + * if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE + * or URG bit is set, or there is no available space in the table to + * insert a new item or a new key, return a negative value. If the + * packet is merged successfully, return an positive value. If the + * packet is inserted into the table, return 0. + */ +int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt, + struct gro_tcp4_tbl *tbl, + uint64_t start_time); + +/** + * This function flushes timeout packets in a TCP/IPv4 reassembly table + * to applications, and without updating checksums for merged packets. + * The max number of flushed timeout packets is the element number of + * the array which is used to keep flushed packets. + * + * @param tbl + * a pointer that points to a TCP GRO table. + * @param flush_timestamp + * this function flushes packets which are inserted into the table + * before or at the flush_timestamp. + * @param out + * pointer array which is used to keep flushed packets. + * @param nb_out + * the element number of out. It's also the max number of timeout + * packets that can be flushed finally. + * + * @return + * the number of packets that are returned. + */ +uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, + uint64_t flush_timestamp, + struct rte_mbuf **out, + uint16_t nb_out); + +/** + * This function returns the number of the packets in a TCP/IPv4 + * reassembly table. + * + * @param tbl + * pointer points to a TCP/IPv4 reassembly table. + * + * @return + * the number of packets in the table + */ +uint32_t gro_tcp4_tbl_pkt_count(void *tbl); +#endif diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c new file mode 100644 index 00000000..7853246a --- /dev/null +++ b/lib/librte_gro/rte_gro.c @@ -0,0 +1,278 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_cycles.h> +#include <rte_ethdev.h> + +#include "rte_gro.h" +#include "gro_tcp4.h" + +typedef void *(*gro_tbl_create_fn)(uint16_t socket_id, + uint16_t max_flow_num, + uint16_t max_item_per_flow); +typedef void (*gro_tbl_destroy_fn)(void *tbl); +typedef uint32_t (*gro_tbl_pkt_count_fn)(void *tbl); + +static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = { + gro_tcp4_tbl_create, NULL}; +static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = { + gro_tcp4_tbl_destroy, NULL}; +static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = { + gro_tcp4_tbl_pkt_count, NULL}; + +/* + * GRO context structure, which is used to merge packets. It keeps + * many reassembly tables of desired GRO types. Applications need to + * create GRO context objects before using rte_gro_reassemble to + * perform GRO. + */ +struct gro_ctx { + /* GRO types to perform */ + uint64_t gro_types; + /* reassembly tables */ + void *tbls[RTE_GRO_TYPE_MAX_NUM]; +}; + +void * +rte_gro_ctx_create(const struct rte_gro_param *param) +{ + struct gro_ctx *gro_ctx; + gro_tbl_create_fn create_tbl_fn; + uint64_t gro_type_flag = 0; + uint64_t gro_types = 0; + uint8_t i; + + gro_ctx = rte_zmalloc_socket(__func__, + sizeof(struct gro_ctx), + RTE_CACHE_LINE_SIZE, + param->socket_id); + if (gro_ctx == NULL) + return NULL; + + for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) { + gro_type_flag = 1ULL << i; + if ((param->gro_types & gro_type_flag) == 0) + continue; + + create_tbl_fn = tbl_create_fn[i]; + if (create_tbl_fn == NULL) + continue; + + gro_ctx->tbls[i] = create_tbl_fn(param->socket_id, + param->max_flow_num, + param->max_item_per_flow); + if (gro_ctx->tbls[i] == NULL) { + /* destroy all created tables */ + gro_ctx->gro_types = gro_types; + rte_gro_ctx_destroy(gro_ctx); + return NULL; + } + gro_types |= gro_type_flag; + } + gro_ctx->gro_types = param->gro_types; + + return gro_ctx; +} + +void +rte_gro_ctx_destroy(void *ctx) +{ + gro_tbl_destroy_fn destroy_tbl_fn; + struct gro_ctx *gro_ctx = ctx; + uint64_t gro_type_flag; + uint8_t i; + + if (gro_ctx == NULL) + return; + for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) { + gro_type_flag = 1ULL << i; + if ((gro_ctx->gro_types & gro_type_flag) == 0) + continue; + destroy_tbl_fn = tbl_destroy_fn[i]; + if (destroy_tbl_fn) + destroy_tbl_fn(gro_ctx->tbls[i]); + } + rte_free(gro_ctx); +} + +uint16_t +rte_gro_reassemble_burst(struct rte_mbuf **pkts, + uint16_t nb_pkts, + const struct rte_gro_param *param) +{ + uint16_t i; + uint16_t nb_after_gro = nb_pkts; + uint32_t item_num; + + /* allocate a reassembly table for TCP/IPv4 GRO */ + struct gro_tcp4_tbl tcp_tbl; + struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM]; + struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} }; + + struct rte_mbuf *unprocess_pkts[nb_pkts]; + uint16_t unprocess_num = 0; + int32_t ret; + uint64_t current_time; + + if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0) + return nb_pkts; + + /* get the actual number of packets */ + item_num = RTE_MIN(nb_pkts, (param->max_flow_num * + param->max_item_per_flow)); + item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM); + + for (i = 0; i < item_num; i++) + tcp_keys[i].start_index = INVALID_ARRAY_INDEX; + + tcp_tbl.keys = tcp_keys; + tcp_tbl.items = tcp_items; + tcp_tbl.key_num = 0; + tcp_tbl.item_num = 0; + tcp_tbl.max_key_num = item_num; + tcp_tbl.max_item_num = item_num; + + current_time = rte_rdtsc(); + + for (i = 0; i < nb_pkts; i++) { + if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 | + RTE_PTYPE_L4_TCP)) == + (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) { + ret = gro_tcp4_reassemble(pkts[i], + &tcp_tbl, + current_time); + if (ret > 0) + /* merge successfully */ + nb_after_gro--; + else if (ret < 0) { + unprocess_pkts[unprocess_num++] = + pkts[i]; + } + } else + unprocess_pkts[unprocess_num++] = pkts[i]; + } + + /* re-arrange GROed packets */ + if (nb_after_gro < nb_pkts) { + i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time, + pkts, nb_pkts); + if (unprocess_num > 0) { + memcpy(&pkts[i], unprocess_pkts, + sizeof(struct rte_mbuf *) * + unprocess_num); + } + } + + return nb_after_gro; +} + +uint16_t +rte_gro_reassemble(struct rte_mbuf **pkts, + uint16_t nb_pkts, + void *ctx) +{ + uint16_t i, unprocess_num = 0; + struct rte_mbuf *unprocess_pkts[nb_pkts]; + struct gro_ctx *gro_ctx = ctx; + uint64_t current_time; + + if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0) + return nb_pkts; + + current_time = rte_rdtsc(); + + for (i = 0; i < nb_pkts; i++) { + if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 | + RTE_PTYPE_L4_TCP)) == + (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) { + if (gro_tcp4_reassemble(pkts[i], + gro_ctx->tbls + [RTE_GRO_TCP_IPV4_INDEX], + current_time) < 0) + unprocess_pkts[unprocess_num++] = pkts[i]; + } else + unprocess_pkts[unprocess_num++] = pkts[i]; + } + if (unprocess_num > 0) { + memcpy(pkts, unprocess_pkts, + sizeof(struct rte_mbuf *) * + unprocess_num); + } + + return unprocess_num; +} + +uint16_t +rte_gro_timeout_flush(void *ctx, + uint64_t timeout_cycles, + uint64_t gro_types, + struct rte_mbuf **out, + uint16_t max_nb_out) +{ + struct gro_ctx *gro_ctx = ctx; + uint64_t flush_timestamp; + + gro_types = gro_types & gro_ctx->gro_types; + flush_timestamp = rte_rdtsc() - timeout_cycles; + + if (gro_types & RTE_GRO_TCP_IPV4) { + return gro_tcp4_tbl_timeout_flush( + gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX], + flush_timestamp, + out, max_nb_out); + } + return 0; +} + +uint64_t +rte_gro_get_pkt_count(void *ctx) +{ + struct gro_ctx *gro_ctx = ctx; + gro_tbl_pkt_count_fn pkt_count_fn; + uint64_t item_num = 0; + uint64_t gro_type_flag; + uint8_t i; + + for (i = 0; i < RTE_GRO_TYPE_MAX_NUM; i++) { + gro_type_flag = 1ULL << i; + if ((gro_ctx->gro_types & gro_type_flag) == 0) + continue; + + pkt_count_fn = tbl_pkt_count_fn[i]; + if (pkt_count_fn == NULL) + continue; + item_num += pkt_count_fn(gro_ctx->tbls[i]); + } + return item_num; +} diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h new file mode 100644 index 00000000..d57e0c5f --- /dev/null +++ b/lib/librte_gro/rte_gro.h @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_GRO_H_ +#define _RTE_GRO_H_ + +/** + * @file + * Interface to GRO library + */ + +#include <stdint.h> +#include <rte_mbuf.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_GRO_MAX_BURST_ITEM_NUM 128U +/**< the max number of packets that rte_gro_reassemble_burst() + * can process in each invocation. + */ +#define RTE_GRO_TYPE_MAX_NUM 64 +/**< the max number of supported GRO types */ +#define RTE_GRO_TYPE_SUPPORT_NUM 1 +/**< the number of currently supported GRO types */ + +#define RTE_GRO_TCP_IPV4_INDEX 0 +#define RTE_GRO_TCP_IPV4 (1ULL << RTE_GRO_TCP_IPV4_INDEX) +/**< TCP/IPv4 GRO flag */ + +/** + * A structure which is used to create GRO context objects or tell + * rte_gro_reassemble_burst() what reassembly rules are demanded. + */ +struct rte_gro_param { + uint64_t gro_types; + /**< desired GRO types */ + uint16_t max_flow_num; + /**< max flow number */ + uint16_t max_item_per_flow; + /**< max packet number per flow */ + uint16_t socket_id; + /**< socket index for allocating GRO related data structures, + * like reassembly tables. When use rte_gro_reassemble_burst(), + * applications don't need to set this value. + */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * This function create a GRO context object, which is used to merge + * packets in rte_gro_reassemble(). + * + * @param param + * applications use it to pass needed parameters to create a GRO + * context object. + * + * @return + * if create successfully, return a pointer which points to the GRO + * context object. Otherwise, return NULL. + */ +void *rte_gro_ctx_create(const struct rte_gro_param *param); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * This function destroys a GRO context object. + * + * @param ctx + * pointer points to a GRO context object. + */ +void rte_gro_ctx_destroy(void *ctx); + +/** + * This is one of the main reassembly APIs, which merges numbers of + * packets at a time. It assumes that all inputted packets are with + * correct checksums. That is, applications should guarantee all + * inputted packets are correct. Besides, it doesn't re-calculate + * checksums for merged packets. If inputted packets are IP fragmented, + * this function assumes them are complete (i.e. with L4 header). After + * finishing processing, it returns all GROed packets to applications + * immediately. + * + * @param pkts + * a pointer array which points to the packets to reassemble. Besides, + * it keeps mbuf addresses for the GROed packets. + * @param nb_pkts + * the number of packets to reassemble. + * @param param + * applications use it to tell rte_gro_reassemble_burst() what rules + * are demanded. + * + * @return + * the number of packets after been GROed. If no packets are merged, + * the returned value is nb_pkts. + */ +uint16_t rte_gro_reassemble_burst(struct rte_mbuf **pkts, + uint16_t nb_pkts, + const struct rte_gro_param *param); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Reassembly function, which tries to merge inputted packets with + * the packets in the reassembly tables of a given GRO context. This + * function assumes all inputted packets are with correct checksums. + * And it won't update checksums if two packets are merged. Besides, + * if inputted packets are IP fragmented, this function assumes they + * are complete packets (i.e. with L4 header). + * + * If the inputted packets don't have data or are with unsupported GRO + * types etc., they won't be processed and are returned to applications. + * Otherwise, the inputted packets are either merged or inserted into + * the table. If applications want get packets in the table, they need + * to call flush API. + * + * @param pkts + * packet to reassemble. Besides, after this function finishes, it + * keeps the unprocessed packets (e.g. without data or unsupported + * GRO types). + * @param nb_pkts + * the number of packets to reassemble. + * @param ctx + * a pointer points to a GRO context object. + * + * @return + * return the number of unprocessed packets (e.g. without data or + * unsupported GRO types). If all packets are processed (merged or + * inserted into the table), return 0. + */ +uint16_t rte_gro_reassemble(struct rte_mbuf **pkts, + uint16_t nb_pkts, + void *ctx); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * This function flushes the timeout packets from reassembly tables of + * desired GRO types. The max number of flushed timeout packets is the + * element number of the array which is used to keep the flushed packets. + * + * Besides, this function won't re-calculate checksums for merged + * packets in the tables. That is, the returned packets may be with + * wrong checksums. + * + * @param ctx + * a pointer points to a GRO context object. + * @param timeout_cycles + * max TTL for packets in reassembly tables, measured in nanosecond. + * @param gro_types + * this function only flushes packets which belong to the GRO types + * specified by gro_types. + * @param out + * a pointer array that is used to keep flushed timeout packets. + * @param max_nb_out + * the element number of out. It's also the max number of timeout + * packets that can be flushed finally. + * + * @return + * the number of flushed packets. If no packets are flushed, return 0. + */ +uint16_t rte_gro_timeout_flush(void *ctx, + uint64_t timeout_cycles, + uint64_t gro_types, + struct rte_mbuf **out, + uint16_t max_nb_out); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * This function returns the number of packets in all reassembly tables + * of a given GRO context. + * + * @param ctx + * pointer points to a GRO context object. + * + * @return + * the number of packets in all reassembly tables. + */ +uint64_t rte_gro_get_pkt_count(void *ctx); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_GRO_H_ */ diff --git a/lib/librte_gro/rte_gro_version.map b/lib/librte_gro/rte_gro_version.map new file mode 100644 index 00000000..bb40bb41 --- /dev/null +++ b/lib/librte_gro/rte_gro_version.map @@ -0,0 +1,12 @@ +DPDK_17.08 { + global: + + rte_gro_ctrl_create; + rte_gro_ctrl_destroy; + rte_gro_get_pkt_count; + rte_gro_reassemble; + rte_gro_reassemble_burst; + rte_gro_timeout_flush; + + local: *; +}; |