diff options
-rw-r--r-- | README | 4 | ||||
-rw-r--r-- | examples/l4fwd/README | 5 | ||||
-rw-r--r-- | examples/l4fwd/lcore.h | 7 | ||||
-rw-r--r-- | examples/l4fwd/main.c | 3 | ||||
-rw-r--r-- | examples/l4fwd/parse.c | 51 | ||||
-rw-r--r-- | examples/l4fwd/port.h | 2 | ||||
-rw-r--r-- | examples/l4fwd/tcp.h | 6 | ||||
-rw-r--r-- | lib/libtle_l4p/ctx.c | 10 | ||||
-rw-r--r-- | lib/libtle_l4p/halfsiphash.h | 100 | ||||
-rw-r--r-- | lib/libtle_l4p/syncookie.h | 76 | ||||
-rw-r--r-- | lib/libtle_l4p/tcp_ctl.h | 13 | ||||
-rw-r--r-- | lib/libtle_l4p/tcp_rxtx.c | 162 | ||||
-rw-r--r-- | lib/libtle_l4p/tcp_tx_seg.h | 114 | ||||
-rw-r--r-- | lib/libtle_l4p/tle_ctx.h | 11 | ||||
-rw-r--r-- | test/gtest/Makefile | 2 |
15 files changed, 487 insertions, 79 deletions
@@ -33,6 +33,10 @@ to make the resulting host stack easily usable by existing non-vpp aware software. + The library uses siphash logic from the below source + https://github.com/veorq/SipHash + + 2. INSTALLATION GUIDE 1) Obtain latest DPDK and build it. diff --git a/examples/l4fwd/README b/examples/l4fwd/README index 658fe3a..a232537 100644 --- a/examples/l4fwd/README +++ b/examples/l4fwd/README @@ -130,6 +130,11 @@ -L | --listen /* open TCP streams in server mode (listen). */ \ -a | --enable-arp /* enable arp responses (request not supported) */ \ -v | --verbose /* different level of verbose mode */ \ + -H | --hash <string> /* hash algorithm i.e. siphash or jhash to be */ \ + /* used to generate the sequence number. */ \ + -K | --seckey <string> /* 16 character long secret key used by */ \ + /* hash algorithms to generate the */ \ + /* sequence number. */ \ <port0_params> <port1_params> ... <portN_params> Note that: options -U and -T cannot be used together. diff --git a/examples/l4fwd/lcore.h b/examples/l4fwd/lcore.h index d88e434..11cc239 100644 --- a/examples/l4fwd/lcore.h +++ b/examples/l4fwd/lcore.h @@ -16,6 +16,8 @@ #ifndef LCORE_H_ #define LCORE_H_ +#include <rte_random.h> + #include "dpdk_legacy.h" /* @@ -64,6 +66,11 @@ create_context(struct netbe_lcore *lc, const struct tle_ctx_param *ctx_prm) cprm.lookup4_data = lc; cprm.lookup6 = lpm6_dst_lookup; cprm.lookup6_data = lc; + if (cprm.secret_key.u64[0] == 0 && + cprm.secret_key.u64[1] == 0) { + cprm.secret_key.u64[0] = rte_rand(); + cprm.secret_key.u64[1] = rte_rand(); + } frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S * FRAG_TTL; diff --git a/examples/l4fwd/main.c b/examples/l4fwd/main.c index 37bd03e..7613a95 100644 --- a/examples/l4fwd/main.c +++ b/examples/l4fwd/main.c @@ -68,9 +68,8 @@ static char proto_name[3][10] = {"udp", "tcp", ""}; static const struct rte_eth_conf port_conf_default = { .rxmode = { - .max_rx_pkt_len = ETHER_MAX_VLAN_FRAME_LEN, .hw_vlan_strip = 1, - .jumbo_frame = 1, + .jumbo_frame = 0, }, }; diff --git a/examples/l4fwd/parse.c b/examples/l4fwd/parse.c index 6593221..4850312 100644 --- a/examples/l4fwd/parse.c +++ b/examples/l4fwd/parse.c @@ -13,6 +13,9 @@ * limitations under the License. */ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> #include "netbe.h" #include "parse.h" @@ -61,6 +64,12 @@ static const struct { #define OPT_SHORT_LISTEN 'L' #define OPT_LONG_LISTEN "listen" +#define OPT_SHORT_HASH 'H' +#define OPT_LONG_HASH "hash" + +#define OPT_SHORT_SEC_KEY 'K' +#define OPT_LONG_SEC_KEY "seckey" + #define OPT_SHORT_VERBOSE 'v' #define OPT_LONG_VERBOSE "verbose" @@ -75,6 +84,8 @@ static const struct option long_opt[] = { {OPT_LONG_STREAMS, 1, 0, OPT_SHORT_STREAMS}, {OPT_LONG_UDP, 0, 0, OPT_SHORT_UDP}, {OPT_LONG_TCP, 0, 0, OPT_SHORT_TCP}, + {OPT_LONG_HASH, 1, 0, OPT_SHORT_HASH}, + {OPT_LONG_SEC_KEY, 1, 0, OPT_SHORT_SEC_KEY}, {OPT_LONG_LISTEN, 0, 0, OPT_SHORT_LISTEN}, {OPT_LONG_VERBOSE, 1, 0, OPT_SHORT_VERBOSE}, {NULL, 0, 0, 0} @@ -298,7 +309,7 @@ parse_netbe_arg(struct netbe_port *prt, const char *arg, rte_cpuset_t *pcpu) union parse_val val[RTE_DIM(hndl)]; memset(val, 0, sizeof(val)); - val[2].u64 = ETHER_MAX_VLAN_FRAME_LEN - ETHER_CRC_LEN; + val[2].u64 = ETHER_MAX_LEN - ETHER_CRC_LEN; rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man), keys_opt, RTE_DIM(keys_opt), hndl, val); @@ -709,6 +720,17 @@ netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp) return rc; } +static uint32_t +parse_hash_alg(const char *val) +{ + if (strcmp(val, "jhash") == 0) + return TLE_JHASH; + else if (strcmp(val, "siphash") == 0) + return TLE_SIPHASH; + else + return TLE_HASH_NUM; +} + int parse_app_options(int argc, char **argv, struct netbe_cfg *cfg, struct tle_ctx_param *ctx_prm, @@ -722,8 +744,8 @@ parse_app_options(int argc, char **argv, struct netbe_cfg *cfg, optind = 0; optarg = NULL; - while ((opt = getopt_long(argc, argv, "aB:LPR:S:TUb:f:s:v:", long_opt, - &opt_idx)) != EOF) { + while ((opt = getopt_long(argc, argv, "aB:LPR:S:TUb:f:s:v:H:K:", + long_opt, &opt_idx)) != EOF) { if (opt == OPT_SHORT_ARP) { cfg->arp = 1; } else if (opt == OPT_SHORT_SBULK) { @@ -778,7 +800,28 @@ parse_app_options(int argc, char **argv, struct netbe_cfg *cfg, } else if (opt == OPT_SHORT_LISTEN) { listen = 1; cfg->server = 1; - } else { + } else if (opt == OPT_SHORT_HASH) { + ctx_prm->hash_alg = parse_hash_alg(optarg); + if (ctx_prm->hash_alg >= TLE_HASH_NUM) { + rte_exit(EXIT_FAILURE, + "%s: invalid hash algorithm %s " + "for option: \'%c\'\n", + __func__, optarg, opt); + } + } else if (opt == OPT_SHORT_SEC_KEY) { + n = strlen(optarg); + if (n != sizeof(ctx_prm->secret_key)) { + rte_exit(EXIT_FAILURE, + "%s: invalid length %s " + "for option \'%c\' " + "must be 16 characters long\n", + __func__, optarg, opt); + } + memcpy(&ctx_prm->secret_key, optarg, + sizeof(ctx_prm->secret_key)); + } + + else { rte_exit(EXIT_FAILURE, "%s: unknown option: \'%c\'\n", __func__, opt); diff --git a/examples/l4fwd/port.h b/examples/l4fwd/port.h index bc13dca..04f3ec2 100644 --- a/examples/l4fwd/port.h +++ b/examples/l4fwd/port.h @@ -181,6 +181,8 @@ port_init(struct netbe_port *uprt, uint32_t proto) port_conf.rxmode.hw_ip_checksum = 1; } port_conf.rxmode.max_rx_pkt_len = uprt->mtu + ETHER_CRC_LEN; + if (port_conf.rxmode.max_rx_pkt_len > ETHER_MAX_LEN) + port_conf.rxmode.jumbo_frame = 1; rc = update_rss_conf(uprt, &dev_info, &port_conf, proto); if (rc != 0) diff --git a/examples/l4fwd/tcp.h b/examples/l4fwd/tcp.h index f6ca3a5..e4aadb5 100644 --- a/examples/l4fwd/tcp.h +++ b/examples/l4fwd/tcp.h @@ -279,9 +279,9 @@ netfe_fwd_tcp(uint32_t lcore, struct netfe_stream *fes) __func__, lcore, proto_name[fes->proto], fed->s, n, k); - fed->stat.txp += k; - fed->stat.drops += n - k; - fes->stat.fwp += k; + fed->stat.txp += k; + fed->stat.drops += n - k; + fes->stat.fwp += k; } else { NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n", diff --git a/lib/libtle_l4p/ctx.c b/lib/libtle_l4p/ctx.c index 7ebef9d..6eb33eb 100644 --- a/lib/libtle_l4p/ctx.c +++ b/lib/libtle_l4p/ctx.c @@ -21,6 +21,7 @@ #include "stream.h" #include "misc.h" +#include <halfsiphash.h> #define LPORT_START 0x8000 #define LPORT_END MAX_PORT_NUM @@ -66,6 +67,8 @@ check_ctx_prm(const struct tle_ctx_param *prm) { if (prm->proto >= TLE_PROTO_NUM) return -EINVAL; + if (prm->hash_alg >= TLE_HASH_NUM) + return -EINVAL; return 0; } @@ -108,6 +111,13 @@ tle_ctx_create(const struct tle_ctx_param *ctx_prm) tle_pbm_init(ctx->use + i, LPORT_START_BLK); ctx->streams.nb_free = ctx->prm.max_streams; + + /* Initialization of siphash state is done here to speed up the + * fastpath processing. + */ + if (ctx->prm.hash_alg == TLE_SIPHASH) + siphash_initialization(&ctx->prm.secret_key, + &ctx->prm.secret_key); return ctx; } diff --git a/lib/libtle_l4p/halfsiphash.h b/lib/libtle_l4p/halfsiphash.h new file mode 100644 index 0000000..e8e21e4 --- /dev/null +++ b/lib/libtle_l4p/halfsiphash.h @@ -0,0 +1,100 @@ +/* + * SipHash reference C implementation + + * Copyright (c) 2016 Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com> + + * To the extent possible under law, the author(s) have dedicated all copyright + * and related and neighboring rights to this software to the public domain + * worldwide. This software is distributed without any warranty. + + * You should have received a copy of the CC0 Public Domain Dedication along + * with this software. If not, see + * <http://creativecommons.org/publicdomain/zero/1.0/>. + */ + +#ifndef _SIPHASH_ +#define _SIPHASH_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* The below siphash logic is taken from the source + * https://github.com/veorq/SipHash + */ + +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +#include <rte_debug.h> + +#define STATE_V2 0x6c796765 +#define STATE_V3 0x74656462 + +#define ROTL(x, b) (uint32_t)(((x) << (b)) | ((x) >> (32 - (b)))) + +/* + * Siphash hash functionality logically divided into different + * phases and the functions are named based on the same. + * SipHash-2-4 is used i.e: 2 compression rounds and 4 finalization rounds. + */ +static inline void +sipround(rte_xmm_t *v) +{ + v->u32[0] += v->u32[1]; + v->u32[1] = ROTL(v->u32[1], 5); + v->u32[1] ^= v->u32[0]; + v->u32[0] = ROTL(v->u32[0], 16); + v->u32[2] += v->u32[3]; + v->u32[3] = ROTL(v->u32[3], 8); + v->u32[3] ^= v->u32[2]; + v->u32[0] += v->u32[3]; + v->u32[3] = ROTL(v->u32[3], 7); + v->u32[3] ^= v->u32[0]; + v->u32[2] += v->u32[1]; + v->u32[1] = ROTL(v->u32[1], 13); + v->u32[1] ^= v->u32[2]; + v->u32[2] = ROTL(v->u32[2], 16); +} + +static inline void +siphash_initialization(rte_xmm_t *v, const rte_xmm_t *k) +{ + uint32_t k0 = k->u32[0]; + uint32_t k1 = k->u32[1]; + + v->u32[0] = k0; + v->u32[1] = k1; + v->u32[2] = STATE_V2 ^ k0; + v->u32[3] = STATE_V3 ^ k1; +} + +static inline void +siphash_compression(const uint32_t *in, size_t len, rte_xmm_t *v) +{ + uint32_t i; + + for (i = 0; i < len; i++) { + v->u32[3] ^= in[i]; + sipround(v); + sipround(v); + v->u32[0] ^= in[i]; + } +} + +static inline void +siphash_finalization(rte_xmm_t *v) +{ + v->u32[2] ^= 0xff; + sipround(v); + sipround(v); + sipround(v); + sipround(v); +} + +#ifdef __cplusplus +} +#endif + +#endif /* __SIPHASH__ */ diff --git a/lib/libtle_l4p/syncookie.h b/lib/libtle_l4p/syncookie.h index ad70b7d..da2e166 100644 --- a/lib/libtle_l4p/syncookie.h +++ b/lib/libtle_l4p/syncookie.h @@ -16,16 +16,16 @@ #ifndef _SYNCOOKIE_H_ #define _SYNCOOKIE_H_ -#include "tcp_misc.h" #include <rte_jhash.h> +#include "tcp_misc.h" +#include <tle_ctx.h> +#include <halfsiphash.h> + #ifdef __cplusplus extern "C" { #endif -#define SYNC_SEED0 0x736f6d65 -#define SYNC_SEED1 0x646f7261 - struct sync_in4 { uint32_t seq; union l4_ports port; @@ -64,35 +64,61 @@ static const rte_xmm_t mss6len = { /* allow around 2 minutes for 3-way handshake. */ #define SYNC_MAX_TMO 0x20000 - /* ??? use SipHash as FreeBSD does. ??? */ static inline uint32_t -sync_hash4(const union pkt_info *pi, uint32_t seq) +sync_hash4(const union pkt_info *pi, uint32_t seq, rte_xmm_t *secret_key, + uint32_t hash_alg) { - uint32_t v0, v1; struct sync_in4 in4; + rte_xmm_t state; + uint32_t v0, v1; in4.seq = seq; in4.port = pi->port; in4.addr = pi->addr4; - v0 = SYNC_SEED0; - v1 = SYNC_SEED1; - rte_jhash_32b_2hashes(&in4.seq, sizeof(in4) / sizeof(uint32_t), - &v0, &v1); - return v0 + v1; + if (hash_alg == TLE_JHASH) { + v0 = secret_key->u32[0]; + v1 = secret_key->u32[1]; + rte_jhash_32b_2hashes(&in4.seq, sizeof(in4) / sizeof(uint32_t), + &v0, &v1); + return v0 + v1; + } else { + state = *secret_key; + siphash_compression(&in4.seq, sizeof(in4) / sizeof(uint32_t), + &state); + siphash_finalization(&state); + return (state.u32[0] ^ state.u32[1] ^ + state.u32[2] ^ state.u32[3]); + } } static inline uint32_t -sync_hash6(const union pkt_info *pi, uint32_t seq) +sync_hash6(const union pkt_info *pi, uint32_t seq, rte_xmm_t *secret_key, + uint32_t hash_alg) { + uint32_t port_seq[2]; + rte_xmm_t state; uint32_t v0, v1; - v0 = SYNC_SEED0; - v1 = SYNC_SEED1; - rte_jhash_32b_2hashes(pi->addr6->raw.u32, - sizeof(*pi->addr6) / sizeof(uint32_t), &v0, &v1); - return rte_jhash_3words(v0, seq, pi->port.raw, v1); + if (hash_alg == TLE_JHASH) { + v0 = secret_key->u32[0]; + v1 = secret_key->u32[1]; + rte_jhash_32b_2hashes(pi->addr6->raw.u32, + sizeof(*pi->addr6) / sizeof(uint32_t), + &v0, &v1); + return rte_jhash_3words(v0, seq, pi->port.raw, v1); + } else { + state = *secret_key; + siphash_compression(pi->addr6->raw.u32, + sizeof(*pi->addr6) / sizeof(uint32_t), &state); + port_seq[0] = pi->port.raw; + port_seq[1] = seq; + siphash_compression(port_seq, RTE_DIM(port_seq), &state); + siphash_finalization(&state); + return (state.u32[0] ^ state.u32[1] ^ + state.u32[2] ^ state.u32[3]); + } } static inline uint32_t @@ -105,15 +131,16 @@ sync_mss2idx(uint16_t mss, const rte_xmm_t *msl) } static inline uint32_t -sync_gen_seq(const union pkt_info *pi, uint32_t seq, uint32_t ts, uint16_t mss) +sync_gen_seq(const union pkt_info *pi, uint32_t seq, uint32_t ts, uint16_t mss, + uint32_t hash_alg, rte_xmm_t *secret_key) { uint32_t h, mi; if (pi->tf.type == TLE_V4) { - h = sync_hash4(pi, seq); + h = sync_hash4(pi, seq, secret_key, hash_alg); mi = sync_mss2idx(mss, &mss4len); } else { - h = sync_hash6(pi, seq); + h = sync_hash6(pi, seq, secret_key, hash_alg); mi = sync_mss2idx(mss, &mss6len); } @@ -131,11 +158,14 @@ sync_gen_ts(uint32_t ts, uint32_t wscale) static inline int sync_check_ack(const union pkt_info *pi, uint32_t seq, uint32_t ack, - uint32_t ts) + uint32_t ts, uint32_t hash_alg, rte_xmm_t *secret_key) { uint32_t h, mi, pts; - h = (pi->tf.type == TLE_V4) ? sync_hash4(pi, seq) : sync_hash6(pi, seq); + if (pi->tf.type == TLE_V4) + h = sync_hash4(pi, seq, secret_key, hash_alg); + else + h = sync_hash6(pi, seq, secret_key, hash_alg); h = ack - h; pts = h & ~SYNC_MSS_MASK; diff --git a/lib/libtle_l4p/tcp_ctl.h b/lib/libtle_l4p/tcp_ctl.h index 95c2bbc..8ffb924 100644 --- a/lib/libtle_l4p/tcp_ctl.h +++ b/lib/libtle_l4p/tcp_ctl.h @@ -45,7 +45,14 @@ tcp_stream_up(struct tle_tcp_stream *s) static inline uint32_t calc_rx_wnd(const struct tle_tcp_stream *s, uint32_t scale) { - return s->rx.q->prod.mask << scale; + uint32_t wnd; + + /* peer doesn't support WSCALE option, wnd size is limited to 64K */ + if (scale == TCP_WSCALE_NONE) { + wnd = s->rx.q->prod.mask << TCP_WSCALE_DEFAULT; + return RTE_MIN(wnd, (uint32_t)UINT16_MAX); + } else + return s->rx.q->prod.mask << scale; } /* empty stream's receive queue */ @@ -89,13 +96,13 @@ tcp_stream_reset(struct tle_ctx *ctx, struct tle_tcp_stream *s) rte_atomic32_set(&s->tx.arm, 0); /* reset TCB */ - uop = s->tcb.uop & (TCP_OP_LISTEN | TCP_OP_CONNECT); + uop = s->tcb.uop & ~TCP_OP_CLOSE; memset(&s->tcb, 0, sizeof(s->tcb)); /* reset cached destination */ memset(&s->tx.dst, 0, sizeof(s->tx.dst)); - if (uop != 0) { + if (uop != TCP_OP_ACCEPT) { /* free stream's destination port */ stream_clear_ctx(ctx, &s->s); if (uop == TCP_OP_LISTEN) diff --git a/lib/libtle_l4p/tcp_rxtx.c b/lib/libtle_l4p/tcp_rxtx.c index 6085814..ceaa2bc 100644 --- a/lib/libtle_l4p/tcp_rxtx.c +++ b/lib/libtle_l4p/tcp_rxtx.c @@ -27,6 +27,7 @@ #include "tcp_ctl.h" #include "tcp_rxq.h" #include "tcp_txq.h" +#include "tcp_tx_seg.h" #define TCP_MAX_PKT_SEG 0x20 @@ -640,7 +641,9 @@ sync_ack(struct tle_tcp_stream *s, const union pkt_info *pi, get_syn_opts(&s->tcb.so, (uintptr_t)(th + 1), m->l4_len - sizeof(*th)); s->tcb.rcv.nxt = si->seq + 1; - seq = sync_gen_seq(pi, s->tcb.rcv.nxt, ts, s->tcb.so.mss); + seq = sync_gen_seq(pi, s->tcb.rcv.nxt, ts, s->tcb.so.mss, + s->s.ctx->prm.hash_alg, + &s->s.ctx->prm.secret_key); s->tcb.so.ts.ecr = s->tcb.so.ts.val; s->tcb.so.ts.val = sync_gen_ts(ts, s->tcb.so.wscale); s->tcb.so.wscale = (s->tcb.so.wscale == TCP_WSCALE_NONE) ? @@ -761,14 +764,17 @@ rx_check_seqack(struct tcb *tcb, uint32_t seq, uint32_t ack, uint32_t len, static inline int restore_syn_opt(struct syn_opts *so, const union pkt_info *pi, - const union seg_info *si, uint32_t ts, const struct rte_mbuf *mb) + const union seg_info *si, uint32_t ts, const struct rte_mbuf *mb, + uint32_t hash_alg, rte_xmm_t *secret_key) { int32_t rc; uint32_t len; const struct tcp_hdr *th; /* check that ACK, etc fields are what we expected. */ - rc = sync_check_ack(pi, si->seq, si->ack - 1, ts); + rc = sync_check_ack(pi, si->seq, si->ack - 1, ts, + hash_alg, + secret_key); if (rc < 0) return rc; @@ -917,12 +923,12 @@ rx_ack_listen(struct tle_tcp_stream *s, struct stbl *st, if (pi->tf.flags != TCP_FLAG_ACK || rx_check_stream(s, pi) != 0) return -EINVAL; - rc = restore_syn_opt(&so, pi, si, tms, mb); + ctx = s->s.ctx; + rc = restore_syn_opt(&so, pi, si, tms, mb, ctx->prm.hash_alg, + &ctx->prm.secret_key); if (rc < 0) return rc; - ctx = s->s.ctx; - /* allocate new stream */ ts = get_stream(ctx); cs = TCP_STREAM(ts); @@ -1532,7 +1538,7 @@ rx_synack(struct tle_tcp_stream *s, uint32_t ts, uint32_t state, s->tcb.so = so; s->tcb.snd.una = s->tcb.snd.nxt; - s->tcb.snd.mss = so.mss; + s->tcb.snd.mss = calc_smss(so.mss, &s->tx.dst); s->tcb.snd.wnd = si->wnd << so.wscale; s->tcb.snd.wu.wl1 = si->seq; s->tcb.snd.wu.wl2 = si->ack; @@ -1546,6 +1552,11 @@ rx_synack(struct tle_tcp_stream *s, uint32_t ts, uint32_t state, s->tcb.rcv.irs = si->seq; s->tcb.rcv.nxt = si->seq + 1; + /* if peer doesn't support WSCALE opt, recalculate RCV.WND */ + s->tcb.rcv.wscale = (so.wscale == TCP_WSCALE_NONE) ? + TCP_WSCALE_NONE : TCP_WSCALE_DEFAULT; + s->tcb.rcv.wnd = calc_rx_wnd(s, s->tcb.rcv.wscale); + /* calculate initial rto */ rto_estimate(&s->tcb, ts - s->tcb.snd.ts); @@ -2053,7 +2064,9 @@ tx_syn(struct tle_tcp_stream *s, const struct sockaddr *addr) s->tcb.so.mss = calc_smss(s->tx.dst.mtu, &s->tx.dst); /* note that rcv.nxt is 0 here for sync_gen_seq.*/ - seq = sync_gen_seq(&pi, s->tcb.rcv.nxt, tms, s->tcb.so.mss); + seq = sync_gen_seq(&pi, s->tcb.rcv.nxt, tms, s->tcb.so.mss, + s->s.ctx->prm.hash_alg, + &s->s.ctx->prm.secret_key); s->tcb.snd.iss = seq; s->tcb.snd.rcvr = seq; s->tcb.snd.una = seq; @@ -2142,13 +2155,42 @@ tle_tcp_stream_recv(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num) return n; } +static inline int32_t +tx_segments(struct tle_tcp_stream *s, uint64_t ol_flags, + struct rte_mbuf *segs[], uint32_t num) +{ + uint32_t i; + int32_t rc; + + for (i = 0; i != num; i++) { + /* Build L2/L3/L4 header */ + rc = tcp_fill_mbuf(segs[i], s, &s->tx.dst, ol_flags, s->s.port, + 0, TCP_FLAG_ACK, 0, 0); + if (rc != 0) { + free_segments(segs, num); + break; + } + } + + if (i == num) { + /* queue packets for further transmission. */ + rc = rte_ring_mp_enqueue_bulk(s->tx.q, (void **)segs, num); + if (rc != 0) + free_segments(segs, num); + } + + return rc; +} + uint16_t tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num) { - uint32_t i, j, mss, n, state, type; + uint32_t i, j, k, mss, n, state, type; + int32_t rc; uint64_t ol_flags; struct tle_tcp_stream *s; struct tle_dev *dev; + struct rte_mbuf *segs[TCP_MAX_PKT_SEG]; s = TCP_STREAM(ts); @@ -2161,53 +2203,87 @@ tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num) state = s->tcb.state; if (state != TCP_ST_ESTABLISHED && state != TCP_ST_CLOSE_WAIT) { rte_errno = ENOTCONN; - n = 0; - } else { - mss = s->tcb.snd.mss; - dev = s->tx.dst.dev; - type = s->s.type; - ol_flags = dev->tx.ol_flags[type]; + rwl_release(&s->tx.use); + return 0; + } - /* prepare and check for TX */ - for (i = 0; i != num; i++) { + mss = s->tcb.snd.mss; + dev = s->tx.dst.dev; + type = s->s.type; + ol_flags = dev->tx.ol_flags[type]; - /* !!! need to be modified !!! */ + k = 0; + rc = 0; + while (k != num) { + /* prepare and check for TX */ + for (i = k; i != num; i++) { if (pkt[i]->pkt_len > mss || - pkt[i]->nb_segs > TCP_MAX_PKT_SEG) { - rte_errno = EBADMSG; + pkt[i]->nb_segs > TCP_MAX_PKT_SEG) break; - } else if (tcp_fill_mbuf(pkt[i], s, &s->tx.dst, - ol_flags, s->s.port, 0, TCP_FLAG_ACK, - 0, 0) != 0) + rc = tcp_fill_mbuf(pkt[i], s, &s->tx.dst, ol_flags, + s->s.port, 0, TCP_FLAG_ACK, 0, 0); + if (rc != 0) break; } - /* queue packets for further transmision. */ - n = rte_ring_mp_enqueue_burst(s->tx.q, (void **)pkt, i); + if (i != k) { + /* queue packets for further transmission. */ + n = rte_ring_mp_enqueue_burst(s->tx.q, (void **)pkt + k, + (i - k)); + k += n; + + /* + * for unsent, but already modified packets: + * remove pkt l2/l3 headers, restore ol_flags + */ + if (i != k) { + ol_flags = ~dev->tx.ol_flags[type]; + for (j = k; j != i; j++) { + rte_pktmbuf_adj(pkt[j], pkt[j]->l2_len + + pkt[j]->l3_len + + pkt[j]->l4_len); + pkt[j]->ol_flags &= ol_flags; + } + break; + } + } - /* notify BE about more data to send */ - if (n != 0) - txs_enqueue(s->s.ctx, s); + if (rc != 0) { + rte_errno = -rc; + break; - /* - * for unsent, but already modified packets: - * remove pkt l2/l3 headers, restore ol_flags - */ - if (n != i) { - ol_flags = ~dev->tx.ol_flags[type]; - for (j = n; j != i; j++) { - rte_pktmbuf_adj(pkt[j], pkt[j]->l2_len + - pkt[j]->l3_len + pkt[j]->l4_len); - pkt[j]->ol_flags &= ol_flags; + /* segment large packet and enqueue for sending */ + } else if (i != num) { + /* segment the packet. */ + rc = tcp_segmentation(pkt[i], segs, RTE_DIM(segs), + &s->tx.dst, mss); + if (rc < 0) { + rte_errno = -rc; + break; } - /* if possible, rearm stream write event. */ - } else if (rte_ring_free_count(s->tx.q) != 0 && - s->tx.ev != NULL) - tle_event_raise(s->tx.ev); + + rc = tx_segments(s, dev->tx.ol_flags[type], segs, rc); + if (rc == 0) { + /* free the large mbuf */ + rte_pktmbuf_free(pkt[i]); + /* set the mbuf as consumed */ + k++; + } else + /* no space left in tx queue */ + break; + } } + /* notify BE about more data to send */ + if (k != 0) + txs_enqueue(s->s.ctx, s); + /* if possible, re-arm stream write event. */ + if (rte_ring_free_count(s->tx.q) != 0 && s->tx.ev != NULL) + tle_event_raise(s->tx.ev); + rwl_release(&s->tx.use); - return n; + + return k; } /* send data and FIN (if needed) */ diff --git a/lib/libtle_l4p/tcp_tx_seg.h b/lib/libtle_l4p/tcp_tx_seg.h new file mode 100644 index 0000000..3a80fdd --- /dev/null +++ b/lib/libtle_l4p/tcp_tx_seg.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2016 Intel Corporation. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _TCP_TX_SEG_H_ +#define _TCP_TX_SEG_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void +free_segments(struct rte_mbuf *mb[], uint32_t num) +{ + uint32_t i; + + for (i = 0; i != num; i++) + rte_pktmbuf_free(mb[i]); +} + +static inline int32_t +tcp_segmentation(struct rte_mbuf *mbin, struct rte_mbuf *mbout[], uint16_t num, + const struct tle_dest *dst, uint16_t mss) +{ + struct rte_mbuf *in_seg = NULL; + uint32_t nbseg, in_seg_data_pos; + uint32_t more_in_segs; + + in_seg = mbin; + in_seg_data_pos = 0; + nbseg = 0; + + /* Check that pkts_out is big enough to hold all fragments */ + if (mss * num < (uint16_t)mbin->pkt_len) + return -ENOSPC; + + more_in_segs = 1; + while (more_in_segs) { + struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL; + uint32_t more_out_segs; + + /* Allocate direct buffer */ + out_pkt = rte_pktmbuf_alloc(dst->head_mp); + if (out_pkt == NULL) { + free_segments(mbout, nbseg); + return -ENOMEM; + } + + out_seg_prev = out_pkt; + more_out_segs = 1; + while (more_out_segs && more_in_segs) { + struct rte_mbuf *out_seg = NULL; + uint32_t len; + + /* Allocate indirect buffer */ + out_seg = rte_pktmbuf_alloc(dst->head_mp); + if (out_seg == NULL) { + rte_pktmbuf_free(out_pkt); + free_segments(mbout, nbseg); + return -ENOMEM; + } + out_seg_prev->next = out_seg; + out_seg_prev = out_seg; + + /* Prepare indirect buffer */ + rte_pktmbuf_attach(out_seg, in_seg); + len = mss; + if (len > (in_seg->data_len - in_seg_data_pos)) + len = in_seg->data_len - in_seg_data_pos; + + out_seg->data_off = in_seg->data_off + in_seg_data_pos; + out_seg->data_len = (uint16_t)len; + out_pkt->pkt_len = (uint16_t)(len + out_pkt->pkt_len); + out_pkt->nb_segs += 1; + in_seg_data_pos += len; + + /* Current output packet (i.e. fragment) done ? */ + if (out_pkt->pkt_len >= mss) + more_out_segs = 0; + + /* Current input segment done ? */ + if (in_seg_data_pos == in_seg->data_len) { + in_seg = in_seg->next; + in_seg_data_pos = 0; + + if (in_seg == NULL) + more_in_segs = 0; + } + } + + /* Write the segment to the output list */ + mbout[nbseg] = out_pkt; + nbseg++; + } + + return nbseg; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _TCP_TX_SEG_H_ */ diff --git a/lib/libtle_l4p/tle_ctx.h b/lib/libtle_l4p/tle_ctx.h index a3516bf..144dbe7 100644 --- a/lib/libtle_l4p/tle_ctx.h +++ b/lib/libtle_l4p/tle_ctx.h @@ -97,6 +97,12 @@ enum { TLE_PROTO_NUM }; +enum { + TLE_JHASH, + TLE_SIPHASH, + TLE_HASH_NUM +}; + struct tle_ctx_param { int32_t socket_id; /**< socket ID to allocate memory for. */ uint32_t proto; /**< L4 proto to handle. */ @@ -116,6 +122,11 @@ struct tle_ctx_param { /**< will be called by send() to get IPv6 packet destination info. */ void *lookup6_data; /**< opaque data pointer for lookup6() callback. */ + + uint32_t hash_alg; + /**< hash algorithm to be used to generate sequence number. */ + rte_xmm_t secret_key; + /**< secret key to be used to calculate the hash. */ }; /** diff --git a/test/gtest/Makefile b/test/gtest/Makefile index 9b1341d..648c233 100644 --- a/test/gtest/Makefile +++ b/test/gtest/Makefile @@ -76,7 +76,7 @@ SRCS-y += test_tle_udp_destroy.cpp SRCS-y += test_tle_udp_event.cpp #SRCS-y += test_tle_udp_stream.cpp SRCS-y += test_tle_udp_stream_gen.cpp -#SRCS-y += test_tle_tcp_stream.cpp +SRCS-y += test_tle_tcp_stream.cpp #SRCS-y += test_tle_tcp_stream_gen.cpp SYMLINK-y-app += test_scapy_gen.py |