diff options
Diffstat (limited to 'lib/libtle_l4p/stream_table.h')
-rw-r--r-- | lib/libtle_l4p/stream_table.h | 490 |
1 files changed, 353 insertions, 137 deletions
diff --git a/lib/libtle_l4p/stream_table.h b/lib/libtle_l4p/stream_table.h index 033c306..ba8d165 100644 --- a/lib/libtle_l4p/stream_table.h +++ b/lib/libtle_l4p/stream_table.h @@ -16,199 +16,415 @@ #ifndef _STREAM_TABLE_H_ #define _STREAM_TABLE_H_ +#include <string.h> #include <rte_hash.h> -#include "tcp_misc.h" +#include "stream.h" +#include "misc.h" #ifdef __cplusplus extern "C" { #endif +#define HASH_SIZE_32K 32771 +#define HASH_SIZE_64K 65537 +#define HASH_SIZE_128K 131071 + +#define HASH_SIZE HASH_SIZE_64K + struct stbl_entry { void *data; }; -struct shtbl { - uint32_t nb_ent; /* max number of entries in the table. */ - rte_spinlock_t l; /* lock to protect the hash table */ - struct rte_hash *t; - struct stbl_entry *ent; +struct stbl { + rte_spinlock_t l; + uint32_t need_lock; + struct stbl_entry head[HASH_SIZE]; } __rte_cache_aligned; -struct stbl { - struct shtbl ht[TLE_VNUM]; -}; +static inline int +stbl_init(struct stbl *st, uint32_t lock) +{ + st->need_lock = lock; + return 0; +} -struct stbl4_key { - union l4_ports port; - union ipv4_addrs addr; -} __attribute__((__packed__)); +static inline int +stbl_fini(struct stbl *st) +{ + st->need_lock = 0; + return 0; +} -struct stbl6_key { - union l4_ports port; - union ipv6_addrs addr; -} __attribute__((__packed__)); +static inline uint8_t +compare_pkt(const struct tle_stream *s, const union pkt_info *pi) +{ + if (s->type != pi->tf.type) + return -1; -struct stbl_key { - union l4_ports port; - union { - union ipv4_addrs addr4; - union ipv6_addrs addr6; - }; -} __attribute__((__packed__)); + if (s->port.raw != pi->port.raw) + return -1; -extern void stbl_fini(struct stbl *st); + if (s->type == TLE_V4) { + if (s->ipv4.addr.raw != pi->addr4.raw) + return -1; + } else { + if (memcmp(&s->ipv6.addr, pi->addr6, sizeof(union ipv6_addrs))) + return -1; + } -extern int stbl_init(struct stbl *st, uint32_t num, int32_t socket); + return 0; +} -static inline void -stbl_pkt_fill_key(struct stbl_key *k, const union pkt_info *pi, uint32_t type) +static inline uint32_t +stbl_hash_stream(const struct tle_stream *s) { - static const struct stbl_key zero = { - .port.raw = 0, - }; - - k->port = pi->port; - if (type == TLE_V4) - k->addr4 = pi->addr4; - else if (type == TLE_V6) - k->addr6 = *pi->addr6; - else - *k = zero; + int i; + unsigned int hash; + + if (s->type == TLE_V4) { + hash = s->ipv4.addr.src ^ s->ipv4.addr.dst + ^ s->port.src ^ s->port.dst; + } else { + hash = s->port.src ^ s->port.dst; + for (i = 0; i < 4; i++) { + hash ^= s->ipv6.addr.src.u32[i]; + hash ^= s->ipv6.addr.dst.u32[i]; + } + } + + return hash % HASH_SIZE; } -static inline void -stbl_lock(struct stbl *st, uint32_t type) +static inline uint32_t +stbl_hash_pkt(const union pkt_info* pi) { - rte_spinlock_lock(&st->ht[type].l); + int i; + unsigned int hash; + + if (pi->tf.type == TLE_V4) { + hash = pi->addr4.src ^ pi->addr4.dst ^ pi->port.src ^ pi->port.dst; + } else { + hash = pi->port.src ^ pi->port.dst; + for (i = 0; i < 4; i++) { + hash ^= pi->addr6->src.u32[i]; + hash ^= pi->addr6->dst.u32[i]; + } + } + + return hash % HASH_SIZE; } -static inline void -stbl_unlock(struct stbl *st, uint32_t type) +static inline struct stbl_entry* +stbl_add_stream(struct stbl *st, struct tle_stream *s) { - rte_spinlock_unlock(&st->ht[type].l); + struct stbl_entry* entry; + + if (st->need_lock) + rte_spinlock_lock(&st->l); + entry = &st->head[stbl_hash_stream(s)]; + s->link.stqe_next = (struct tle_stream*)entry->data; + entry->data = s; + if (st->need_lock) + rte_spinlock_unlock(&st->l); + + return entry; } -static inline struct stbl_entry * -stbl_add_entry(struct stbl *st, const union pkt_info *pi) +static inline struct tle_stream * +stbl_find_stream(struct stbl *st, const union pkt_info *pi) { - int32_t rc; - uint32_t type; - struct shtbl *ht; - struct stbl_key k; - - type = pi->tf.type; - stbl_pkt_fill_key(&k, pi, type); - ht = st->ht + type; - - rc = rte_hash_add_key(ht->t, &k); - if ((uint32_t)rc >= ht->nb_ent) - return NULL; - return ht->ent + rc; + struct tle_stream* head; + + if (st->need_lock) + rte_spinlock_lock(&st->l); + head = (struct tle_stream*)st->head[stbl_hash_pkt(pi)].data; + while (head != NULL) { + if (compare_pkt(head, pi) == 0) + break; + + head = head->link.stqe_next; + } + if (st->need_lock) + rte_spinlock_unlock(&st->l); + return head; } -static inline struct stbl_entry * -stbl_add_stream(struct stbl *st, const union pkt_info *pi, const void *s) +static inline void +stbl_del_stream(struct stbl *st, struct stbl_entry *se, + struct tle_stream *s) { - struct stbl_entry *se; + struct tle_stream *prev, *current; - se = stbl_add_entry(st, pi); - if (se != NULL) - se->data = (void *)(uintptr_t)s; - return se; + if (st->need_lock) + rte_spinlock_lock(&st->l); + if (se == NULL) + se = &st->head[stbl_hash_stream(s)]; + prev = NULL; + current = (struct tle_stream*)se->data; + while (current != NULL) { + if (current != s) { + prev = current; + current = current->link.stqe_next; + continue; + } + + if (prev) + prev->link.stqe_next = current->link.stqe_next; + else + se->data = current->link.stqe_next; + break; + } + if (st->need_lock) + rte_spinlock_unlock(&st->l); + + s->link.stqe_next = NULL; } -static inline struct stbl_entry * -stbl_find_entry(struct stbl *st, const union pkt_info *pi) +struct bhash4_key { + uint16_t port; + uint32_t addr; +} __attribute__((__packed__)); + +struct bhash6_key { + uint16_t port; + rte_xmm_t addr; +} __attribute__((__packed__)); + +struct bhash_key { + uint16_t port; + union { + uint32_t addr4; + rte_xmm_t addr6; + }; +} __attribute__((__packed__)); + +void bhash_fini(struct tle_ctx *ctx); + +int bhash_init(struct tle_ctx *ctx); + +static inline int +bhash_sockaddr2key(const struct sockaddr *addr, struct bhash_key *key) { - int32_t rc; - uint32_t type; - struct shtbl *ht; - struct stbl_key k; - - type = pi->tf.type; - stbl_pkt_fill_key(&k, pi, type); - ht = st->ht + type; - - rc = rte_hash_lookup(ht->t, &k); - if ((uint32_t)rc >= ht->nb_ent) - return NULL; - return ht->ent + rc; + int t; + const struct sockaddr_in *lin4; + const struct sockaddr_in6 *lin6; + + if (addr->sa_family == AF_INET) { + lin4 = (const struct sockaddr_in *)addr; + key->port = lin4->sin_port; + key->addr4 = lin4->sin_addr.s_addr; + t = TLE_V4; + } else { + lin6 = (const struct sockaddr_in6 *)addr; + memcpy(&key->addr6, &lin6->sin6_addr, sizeof(key->addr6)); + key->port = lin6->sin6_port; + t = TLE_V6; + } + + return t; } -static inline void * -stbl_find_data(struct stbl *st, const union pkt_info *pi) +/* Return 0 on success; + * Return errno on failure. + */ +static inline int +bhash_add_entry(struct tle_ctx *ctx, const struct sockaddr *addr, + struct tle_stream *s) { - struct stbl_entry *ent; - - ent = stbl_find_entry(st, pi); - return (ent == NULL) ? NULL : ent->data; + int t; + int rc; + int is_first; + struct bhash_key key; + struct rte_hash *bhash; + struct tle_stream *old, *tmp; + + is_first = 0; + t = bhash_sockaddr2key(addr, &key); + + rte_spinlock_lock(&ctx->bhash_lock[t]); + bhash = ctx->bhash[t]; + rc = rte_hash_lookup_data(bhash, &key, (void **)&old); + if (rc == -ENOENT) { + is_first = 1; + s->link.stqe_next = NULL; /* just to avoid follow */ + rc = rte_hash_add_key_data(bhash, &key, s); + } else if (rc >= 0) { + if (t == TLE_V4 && old->type == TLE_V6) { + /* V6 stream may listen V4 address, assure V4 stream + * is ahead of V6 stream in the list + */ + s->link.stqe_next = old; + rte_hash_add_key_data(bhash, &key, s); + } else { + tmp = old->link.stqe_next; + old->link.stqe_next = s; + s->link.stqe_next = tmp; + } + } + rte_spinlock_unlock(&ctx->bhash_lock[t]); + + /* IPv6 socket with unspecified address could receive IPv4 packets. + * So the stream should also be recorded in IPv4 table. + * Only the first stream need be inserted into V4 list, otherwise + * the V6 list is already following V4 list. + */ + if (t == TLE_V6 && !s->option.ipv6only && is_first && + IN6_IS_ADDR_UNSPECIFIED(&key.addr6)) { + t = TLE_V4; + rte_spinlock_lock(&ctx->bhash_lock[t]); + bhash = ctx->bhash[t]; + rc = rte_hash_lookup_data(bhash, &key, (void **)&old); + if (rc == -ENOENT) + rc = rte_hash_add_key_data(bhash, &key, s); + else if (rc >= 0) { + while(old->link.stqe_next != NULL) + old = old->link.stqe_next; + old->link.stqe_next = s; + s->link.stqe_next = NULL; + } + rte_spinlock_unlock(&ctx->bhash_lock[t]); + } + + return (rc >= 0) ? 0 : (-rc); } -#include "tcp_stream.h" - static inline void -stbl_stream_fill_key(struct stbl_key *k, const struct tle_stream *s, - uint32_t type) +bhash_del_entry(struct tle_ctx *ctx, struct tle_stream *s, + const struct sockaddr *addr) { - static const struct stbl_key zero = { - .port.raw = 0, - }; + int t; + int rc; + struct bhash_key key; + struct tle_stream *f, *cur, *pre = NULL; + + t = bhash_sockaddr2key(addr, &key); + + rte_spinlock_lock(&ctx->bhash_lock[t]); + rc = rte_hash_lookup_data(ctx->bhash[t], &key, (void **)&f); + if (rc >= 0) { + cur = f; + pre = NULL; + while (cur != s) { + pre = cur; + cur = cur->link.stqe_next; + } + + if (pre == NULL) { + cur = cur->link.stqe_next; + if (cur == NULL) + rte_hash_del_key(ctx->bhash[t], &key); + else /* change data */ + rte_hash_add_key_data(ctx->bhash[t], &key, cur); + } else + pre->link.stqe_next = cur->link.stqe_next; + } + + rte_spinlock_unlock(&ctx->bhash_lock[t]); + + if (rc < 0) + return; + + s->link.stqe_next = NULL; + + /* IPv6 socket with unspecified address could receive IPv4 packets. + * So the stream should also be recorded in IPv4 table*/ + if (t == TLE_V6 && !s->option.ipv6only && pre == NULL && + IN6_IS_ADDR_UNSPECIFIED(&key.addr6)) { + t = TLE_V4; + rte_spinlock_lock(&ctx->bhash_lock[t]); + rc = rte_hash_lookup_data(ctx->bhash[t], &key, (void **)&f); + if (rc >= 0) { + cur = f; + pre = NULL; + while (cur != s) { + pre = cur; + cur = cur->link.stqe_next; + } + + if (pre == NULL) { + cur = cur->link.stqe_next; + if (cur == NULL) + rte_hash_del_key(ctx->bhash[t], &key); + else /* change data */ + rte_hash_add_key_data(ctx->bhash[t], &key, cur); + } else + pre->link.stqe_next = cur->link.stqe_next; + } + + rte_spinlock_unlock(&ctx->bhash_lock[t]); + } - k->port = s->port; - if (type == TLE_V4) - k->addr4 = s->ipv4.addr; - else if (type == TLE_V6) - k->addr6 = s->ipv6.addr; - else - *k = zero; } -static inline struct stbl_entry * -stbl_add_stream_lock(struct stbl *st, const struct tle_tcp_stream *s) +static inline void * +bhash_reuseport_get_stream(struct tle_stream *s) { - uint32_t type; - struct stbl_key k; - struct stbl_entry *se; - struct shtbl *ht; - int32_t rc; - - type = s->s.type; - stbl_stream_fill_key(&k, &s->s, type); - ht = st->ht + type; + int n = 0; + struct tle_stream *e, *all[32]; + + e = s; + while(e && n < 32) { + all[n++] = e; + e = e->link.stqe_next; + } + + /* for each connection, this function will be called twice + * 1st time for the first handshake: SYN + * 2nd time for the third handshake: ACK + */ + return all[(s->reuseport_seed++) % n]; +} - stbl_lock(st, type); - rc = rte_hash_add_key(ht->t, &k); - stbl_unlock(st, type); +static inline void * +bhash_lookup4(struct rte_hash *t, uint32_t addr, uint16_t port, uint8_t reuse) +{ + int rc; + void *s = NULL; + struct bhash_key key = { + .port = port, + .addr4 = addr, + }; - if ((uint32_t)rc >= ht->nb_ent) - return NULL; + rc = rte_hash_lookup_data(t, &key, &s); + if (rc == -ENOENT) { + key.addr4 = INADDR_ANY; + rc = rte_hash_lookup_data(t, &key, &s); + } - se = ht->ent + rc; - if (se != NULL) - se->data = (void *)(uintptr_t)s; + if (rc >= 0) { + if (reuse) + return bhash_reuseport_get_stream(s); + else + return s; + } - return se; + return NULL; } -static inline void -stbl_del_stream(struct stbl *st, struct stbl_entry *se, - const struct tle_tcp_stream *s, uint32_t lock) +static inline void * +bhash_lookup6(struct rte_hash *t, rte_xmm_t addr, uint16_t port, uint8_t reuse) { - uint32_t type; - struct stbl_key k; + int rc; + void *s = NULL; + struct bhash_key key = { + .port = port, + .addr6 = addr, + }; - if (se == NULL) - return; + rc = rte_hash_lookup_data(t, &key, &s); + if (rc == -ENOENT) { + memcpy(&key.addr6, &tle_ipv6_any, sizeof(key.addr6)); + rc = rte_hash_lookup_data(t, &key, &s); + } - se->data = NULL; + if (rc >= 0) { + if (reuse) + return bhash_reuseport_get_stream(s); + else + return s; + } - type = s->s.type; - stbl_stream_fill_key(&k, &s->s, type); - if (lock != 0) - stbl_lock(st, type); - rte_hash_del_key(st->ht[type].t, &k); - if (lock != 0) - stbl_unlock(st, type); + return NULL; } #ifdef __cplusplus |