aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README4
-rw-r--r--examples/l4fwd/README5
-rw-r--r--examples/l4fwd/lcore.h7
-rw-r--r--examples/l4fwd/main.c3
-rw-r--r--examples/l4fwd/parse.c51
-rw-r--r--examples/l4fwd/port.h2
-rw-r--r--examples/l4fwd/tcp.h6
-rw-r--r--lib/libtle_l4p/ctx.c10
-rw-r--r--lib/libtle_l4p/halfsiphash.h100
-rw-r--r--lib/libtle_l4p/syncookie.h76
-rw-r--r--lib/libtle_l4p/tcp_ctl.h13
-rw-r--r--lib/libtle_l4p/tcp_rxtx.c162
-rw-r--r--lib/libtle_l4p/tcp_tx_seg.h114
-rw-r--r--lib/libtle_l4p/tle_ctx.h11
-rw-r--r--test/gtest/Makefile2
15 files changed, 487 insertions, 79 deletions
diff --git a/README b/README
index f6ff9ed..d33b2df 100644
--- a/README
+++ b/README
@@ -33,6 +33,10 @@
to make the resulting host stack easily usable by existing non-vpp aware
software.
+ The library uses siphash logic from the below source
+ https://github.com/veorq/SipHash
+
+
2. INSTALLATION GUIDE
1) Obtain latest DPDK and build it.
diff --git a/examples/l4fwd/README b/examples/l4fwd/README
index 658fe3a..a232537 100644
--- a/examples/l4fwd/README
+++ b/examples/l4fwd/README
@@ -130,6 +130,11 @@
-L | --listen /* open TCP streams in server mode (listen). */ \
-a | --enable-arp /* enable arp responses (request not supported) */ \
-v | --verbose /* different level of verbose mode */ \
+ -H | --hash <string> /* hash algorithm i.e. siphash or jhash to be */ \
+ /* used to generate the sequence number. */ \
+ -K | --seckey <string> /* 16 character long secret key used by */ \
+ /* hash algorithms to generate the */ \
+ /* sequence number. */ \
<port0_params> <port1_params> ... <portN_params>
Note that: options -U and -T cannot be used together.
diff --git a/examples/l4fwd/lcore.h b/examples/l4fwd/lcore.h
index d88e434..11cc239 100644
--- a/examples/l4fwd/lcore.h
+++ b/examples/l4fwd/lcore.h
@@ -16,6 +16,8 @@
#ifndef LCORE_H_
#define LCORE_H_
+#include <rte_random.h>
+
#include "dpdk_legacy.h"
/*
@@ -64,6 +66,11 @@ create_context(struct netbe_lcore *lc, const struct tle_ctx_param *ctx_prm)
cprm.lookup4_data = lc;
cprm.lookup6 = lpm6_dst_lookup;
cprm.lookup6_data = lc;
+ if (cprm.secret_key.u64[0] == 0 &&
+ cprm.secret_key.u64[1] == 0) {
+ cprm.secret_key.u64[0] = rte_rand();
+ cprm.secret_key.u64[1] = rte_rand();
+ }
frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) /
MS_PER_S * FRAG_TTL;
diff --git a/examples/l4fwd/main.c b/examples/l4fwd/main.c
index 37bd03e..7613a95 100644
--- a/examples/l4fwd/main.c
+++ b/examples/l4fwd/main.c
@@ -68,9 +68,8 @@ static char proto_name[3][10] = {"udp", "tcp", ""};
static const struct rte_eth_conf port_conf_default = {
.rxmode = {
- .max_rx_pkt_len = ETHER_MAX_VLAN_FRAME_LEN,
.hw_vlan_strip = 1,
- .jumbo_frame = 1,
+ .jumbo_frame = 0,
},
};
diff --git a/examples/l4fwd/parse.c b/examples/l4fwd/parse.c
index 6593221..4850312 100644
--- a/examples/l4fwd/parse.c
+++ b/examples/l4fwd/parse.c
@@ -13,6 +13,9 @@
* limitations under the License.
*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
#include "netbe.h"
#include "parse.h"
@@ -61,6 +64,12 @@ static const struct {
#define OPT_SHORT_LISTEN 'L'
#define OPT_LONG_LISTEN "listen"
+#define OPT_SHORT_HASH 'H'
+#define OPT_LONG_HASH "hash"
+
+#define OPT_SHORT_SEC_KEY 'K'
+#define OPT_LONG_SEC_KEY "seckey"
+
#define OPT_SHORT_VERBOSE 'v'
#define OPT_LONG_VERBOSE "verbose"
@@ -75,6 +84,8 @@ static const struct option long_opt[] = {
{OPT_LONG_STREAMS, 1, 0, OPT_SHORT_STREAMS},
{OPT_LONG_UDP, 0, 0, OPT_SHORT_UDP},
{OPT_LONG_TCP, 0, 0, OPT_SHORT_TCP},
+ {OPT_LONG_HASH, 1, 0, OPT_SHORT_HASH},
+ {OPT_LONG_SEC_KEY, 1, 0, OPT_SHORT_SEC_KEY},
{OPT_LONG_LISTEN, 0, 0, OPT_SHORT_LISTEN},
{OPT_LONG_VERBOSE, 1, 0, OPT_SHORT_VERBOSE},
{NULL, 0, 0, 0}
@@ -298,7 +309,7 @@ parse_netbe_arg(struct netbe_port *prt, const char *arg, rte_cpuset_t *pcpu)
union parse_val val[RTE_DIM(hndl)];
memset(val, 0, sizeof(val));
- val[2].u64 = ETHER_MAX_VLAN_FRAME_LEN - ETHER_CRC_LEN;
+ val[2].u64 = ETHER_MAX_LEN - ETHER_CRC_LEN;
rc = parse_kvargs(arg, keys_man, RTE_DIM(keys_man),
keys_opt, RTE_DIM(keys_opt), hndl, val);
@@ -709,6 +720,17 @@ netfe_parse_cfg(const char *fname, struct netfe_lcore_prm *lp)
return rc;
}
+static uint32_t
+parse_hash_alg(const char *val)
+{
+ if (strcmp(val, "jhash") == 0)
+ return TLE_JHASH;
+ else if (strcmp(val, "siphash") == 0)
+ return TLE_SIPHASH;
+ else
+ return TLE_HASH_NUM;
+}
+
int
parse_app_options(int argc, char **argv, struct netbe_cfg *cfg,
struct tle_ctx_param *ctx_prm,
@@ -722,8 +744,8 @@ parse_app_options(int argc, char **argv, struct netbe_cfg *cfg,
optind = 0;
optarg = NULL;
- while ((opt = getopt_long(argc, argv, "aB:LPR:S:TUb:f:s:v:", long_opt,
- &opt_idx)) != EOF) {
+ while ((opt = getopt_long(argc, argv, "aB:LPR:S:TUb:f:s:v:H:K:",
+ long_opt, &opt_idx)) != EOF) {
if (opt == OPT_SHORT_ARP) {
cfg->arp = 1;
} else if (opt == OPT_SHORT_SBULK) {
@@ -778,7 +800,28 @@ parse_app_options(int argc, char **argv, struct netbe_cfg *cfg,
} else if (opt == OPT_SHORT_LISTEN) {
listen = 1;
cfg->server = 1;
- } else {
+ } else if (opt == OPT_SHORT_HASH) {
+ ctx_prm->hash_alg = parse_hash_alg(optarg);
+ if (ctx_prm->hash_alg >= TLE_HASH_NUM) {
+ rte_exit(EXIT_FAILURE,
+ "%s: invalid hash algorithm %s "
+ "for option: \'%c\'\n",
+ __func__, optarg, opt);
+ }
+ } else if (opt == OPT_SHORT_SEC_KEY) {
+ n = strlen(optarg);
+ if (n != sizeof(ctx_prm->secret_key)) {
+ rte_exit(EXIT_FAILURE,
+ "%s: invalid length %s "
+ "for option \'%c\' "
+ "must be 16 characters long\n",
+ __func__, optarg, opt);
+ }
+ memcpy(&ctx_prm->secret_key, optarg,
+ sizeof(ctx_prm->secret_key));
+ }
+
+ else {
rte_exit(EXIT_FAILURE,
"%s: unknown option: \'%c\'\n",
__func__, opt);
diff --git a/examples/l4fwd/port.h b/examples/l4fwd/port.h
index bc13dca..04f3ec2 100644
--- a/examples/l4fwd/port.h
+++ b/examples/l4fwd/port.h
@@ -181,6 +181,8 @@ port_init(struct netbe_port *uprt, uint32_t proto)
port_conf.rxmode.hw_ip_checksum = 1;
}
port_conf.rxmode.max_rx_pkt_len = uprt->mtu + ETHER_CRC_LEN;
+ if (port_conf.rxmode.max_rx_pkt_len > ETHER_MAX_LEN)
+ port_conf.rxmode.jumbo_frame = 1;
rc = update_rss_conf(uprt, &dev_info, &port_conf, proto);
if (rc != 0)
diff --git a/examples/l4fwd/tcp.h b/examples/l4fwd/tcp.h
index f6ca3a5..e4aadb5 100644
--- a/examples/l4fwd/tcp.h
+++ b/examples/l4fwd/tcp.h
@@ -279,9 +279,9 @@ netfe_fwd_tcp(uint32_t lcore, struct netfe_stream *fes)
__func__, lcore, proto_name[fes->proto],
fed->s, n, k);
- fed->stat.txp += k;
- fed->stat.drops += n - k;
- fes->stat.fwp += k;
+ fed->stat.txp += k;
+ fed->stat.drops += n - k;
+ fes->stat.fwp += k;
} else {
NETFE_TRACE("%s(%u, %p): no fwd stream for %u pkts;\n",
diff --git a/lib/libtle_l4p/ctx.c b/lib/libtle_l4p/ctx.c
index 7ebef9d..6eb33eb 100644
--- a/lib/libtle_l4p/ctx.c
+++ b/lib/libtle_l4p/ctx.c
@@ -21,6 +21,7 @@
#include "stream.h"
#include "misc.h"
+#include <halfsiphash.h>
#define LPORT_START 0x8000
#define LPORT_END MAX_PORT_NUM
@@ -66,6 +67,8 @@ check_ctx_prm(const struct tle_ctx_param *prm)
{
if (prm->proto >= TLE_PROTO_NUM)
return -EINVAL;
+ if (prm->hash_alg >= TLE_HASH_NUM)
+ return -EINVAL;
return 0;
}
@@ -108,6 +111,13 @@ tle_ctx_create(const struct tle_ctx_param *ctx_prm)
tle_pbm_init(ctx->use + i, LPORT_START_BLK);
ctx->streams.nb_free = ctx->prm.max_streams;
+
+ /* Initialization of siphash state is done here to speed up the
+ * fastpath processing.
+ */
+ if (ctx->prm.hash_alg == TLE_SIPHASH)
+ siphash_initialization(&ctx->prm.secret_key,
+ &ctx->prm.secret_key);
return ctx;
}
diff --git a/lib/libtle_l4p/halfsiphash.h b/lib/libtle_l4p/halfsiphash.h
new file mode 100644
index 0000000..e8e21e4
--- /dev/null
+++ b/lib/libtle_l4p/halfsiphash.h
@@ -0,0 +1,100 @@
+/*
+ * SipHash reference C implementation
+
+ * Copyright (c) 2016 Jean-Philippe Aumasson <jeanphilippe.aumasson@gmail.com>
+
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide. This software is distributed without any warranty.
+
+ * You should have received a copy of the CC0 Public Domain Dedication along
+ * with this software. If not, see
+ * <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+#ifndef _SIPHASH_
+#define _SIPHASH_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The below siphash logic is taken from the source
+ * https://github.com/veorq/SipHash
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_debug.h>
+
+#define STATE_V2 0x6c796765
+#define STATE_V3 0x74656462
+
+#define ROTL(x, b) (uint32_t)(((x) << (b)) | ((x) >> (32 - (b))))
+
+/*
+ * Siphash hash functionality logically divided into different
+ * phases and the functions are named based on the same.
+ * SipHash-2-4 is used i.e: 2 compression rounds and 4 finalization rounds.
+ */
+static inline void
+sipround(rte_xmm_t *v)
+{
+ v->u32[0] += v->u32[1];
+ v->u32[1] = ROTL(v->u32[1], 5);
+ v->u32[1] ^= v->u32[0];
+ v->u32[0] = ROTL(v->u32[0], 16);
+ v->u32[2] += v->u32[3];
+ v->u32[3] = ROTL(v->u32[3], 8);
+ v->u32[3] ^= v->u32[2];
+ v->u32[0] += v->u32[3];
+ v->u32[3] = ROTL(v->u32[3], 7);
+ v->u32[3] ^= v->u32[0];
+ v->u32[2] += v->u32[1];
+ v->u32[1] = ROTL(v->u32[1], 13);
+ v->u32[1] ^= v->u32[2];
+ v->u32[2] = ROTL(v->u32[2], 16);
+}
+
+static inline void
+siphash_initialization(rte_xmm_t *v, const rte_xmm_t *k)
+{
+ uint32_t k0 = k->u32[0];
+ uint32_t k1 = k->u32[1];
+
+ v->u32[0] = k0;
+ v->u32[1] = k1;
+ v->u32[2] = STATE_V2 ^ k0;
+ v->u32[3] = STATE_V3 ^ k1;
+}
+
+static inline void
+siphash_compression(const uint32_t *in, size_t len, rte_xmm_t *v)
+{
+ uint32_t i;
+
+ for (i = 0; i < len; i++) {
+ v->u32[3] ^= in[i];
+ sipround(v);
+ sipround(v);
+ v->u32[0] ^= in[i];
+ }
+}
+
+static inline void
+siphash_finalization(rte_xmm_t *v)
+{
+ v->u32[2] ^= 0xff;
+ sipround(v);
+ sipround(v);
+ sipround(v);
+ sipround(v);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __SIPHASH__ */
diff --git a/lib/libtle_l4p/syncookie.h b/lib/libtle_l4p/syncookie.h
index ad70b7d..da2e166 100644
--- a/lib/libtle_l4p/syncookie.h
+++ b/lib/libtle_l4p/syncookie.h
@@ -16,16 +16,16 @@
#ifndef _SYNCOOKIE_H_
#define _SYNCOOKIE_H_
-#include "tcp_misc.h"
#include <rte_jhash.h>
+#include "tcp_misc.h"
+#include <tle_ctx.h>
+#include <halfsiphash.h>
+
#ifdef __cplusplus
extern "C" {
#endif
-#define SYNC_SEED0 0x736f6d65
-#define SYNC_SEED1 0x646f7261
-
struct sync_in4 {
uint32_t seq;
union l4_ports port;
@@ -64,35 +64,61 @@ static const rte_xmm_t mss6len = {
/* allow around 2 minutes for 3-way handshake. */
#define SYNC_MAX_TMO 0x20000
-
/* ??? use SipHash as FreeBSD does. ??? */
static inline uint32_t
-sync_hash4(const union pkt_info *pi, uint32_t seq)
+sync_hash4(const union pkt_info *pi, uint32_t seq, rte_xmm_t *secret_key,
+ uint32_t hash_alg)
{
- uint32_t v0, v1;
struct sync_in4 in4;
+ rte_xmm_t state;
+ uint32_t v0, v1;
in4.seq = seq;
in4.port = pi->port;
in4.addr = pi->addr4;
- v0 = SYNC_SEED0;
- v1 = SYNC_SEED1;
- rte_jhash_32b_2hashes(&in4.seq, sizeof(in4) / sizeof(uint32_t),
- &v0, &v1);
- return v0 + v1;
+ if (hash_alg == TLE_JHASH) {
+ v0 = secret_key->u32[0];
+ v1 = secret_key->u32[1];
+ rte_jhash_32b_2hashes(&in4.seq, sizeof(in4) / sizeof(uint32_t),
+ &v0, &v1);
+ return v0 + v1;
+ } else {
+ state = *secret_key;
+ siphash_compression(&in4.seq, sizeof(in4) / sizeof(uint32_t),
+ &state);
+ siphash_finalization(&state);
+ return (state.u32[0] ^ state.u32[1] ^
+ state.u32[2] ^ state.u32[3]);
+ }
}
static inline uint32_t
-sync_hash6(const union pkt_info *pi, uint32_t seq)
+sync_hash6(const union pkt_info *pi, uint32_t seq, rte_xmm_t *secret_key,
+ uint32_t hash_alg)
{
+ uint32_t port_seq[2];
+ rte_xmm_t state;
uint32_t v0, v1;
- v0 = SYNC_SEED0;
- v1 = SYNC_SEED1;
- rte_jhash_32b_2hashes(pi->addr6->raw.u32,
- sizeof(*pi->addr6) / sizeof(uint32_t), &v0, &v1);
- return rte_jhash_3words(v0, seq, pi->port.raw, v1);
+ if (hash_alg == TLE_JHASH) {
+ v0 = secret_key->u32[0];
+ v1 = secret_key->u32[1];
+ rte_jhash_32b_2hashes(pi->addr6->raw.u32,
+ sizeof(*pi->addr6) / sizeof(uint32_t),
+ &v0, &v1);
+ return rte_jhash_3words(v0, seq, pi->port.raw, v1);
+ } else {
+ state = *secret_key;
+ siphash_compression(pi->addr6->raw.u32,
+ sizeof(*pi->addr6) / sizeof(uint32_t), &state);
+ port_seq[0] = pi->port.raw;
+ port_seq[1] = seq;
+ siphash_compression(port_seq, RTE_DIM(port_seq), &state);
+ siphash_finalization(&state);
+ return (state.u32[0] ^ state.u32[1] ^
+ state.u32[2] ^ state.u32[3]);
+ }
}
static inline uint32_t
@@ -105,15 +131,16 @@ sync_mss2idx(uint16_t mss, const rte_xmm_t *msl)
}
static inline uint32_t
-sync_gen_seq(const union pkt_info *pi, uint32_t seq, uint32_t ts, uint16_t mss)
+sync_gen_seq(const union pkt_info *pi, uint32_t seq, uint32_t ts, uint16_t mss,
+ uint32_t hash_alg, rte_xmm_t *secret_key)
{
uint32_t h, mi;
if (pi->tf.type == TLE_V4) {
- h = sync_hash4(pi, seq);
+ h = sync_hash4(pi, seq, secret_key, hash_alg);
mi = sync_mss2idx(mss, &mss4len);
} else {
- h = sync_hash6(pi, seq);
+ h = sync_hash6(pi, seq, secret_key, hash_alg);
mi = sync_mss2idx(mss, &mss6len);
}
@@ -131,11 +158,14 @@ sync_gen_ts(uint32_t ts, uint32_t wscale)
static inline int
sync_check_ack(const union pkt_info *pi, uint32_t seq, uint32_t ack,
- uint32_t ts)
+ uint32_t ts, uint32_t hash_alg, rte_xmm_t *secret_key)
{
uint32_t h, mi, pts;
- h = (pi->tf.type == TLE_V4) ? sync_hash4(pi, seq) : sync_hash6(pi, seq);
+ if (pi->tf.type == TLE_V4)
+ h = sync_hash4(pi, seq, secret_key, hash_alg);
+ else
+ h = sync_hash6(pi, seq, secret_key, hash_alg);
h = ack - h;
pts = h & ~SYNC_MSS_MASK;
diff --git a/lib/libtle_l4p/tcp_ctl.h b/lib/libtle_l4p/tcp_ctl.h
index 95c2bbc..8ffb924 100644
--- a/lib/libtle_l4p/tcp_ctl.h
+++ b/lib/libtle_l4p/tcp_ctl.h
@@ -45,7 +45,14 @@ tcp_stream_up(struct tle_tcp_stream *s)
static inline uint32_t
calc_rx_wnd(const struct tle_tcp_stream *s, uint32_t scale)
{
- return s->rx.q->prod.mask << scale;
+ uint32_t wnd;
+
+ /* peer doesn't support WSCALE option, wnd size is limited to 64K */
+ if (scale == TCP_WSCALE_NONE) {
+ wnd = s->rx.q->prod.mask << TCP_WSCALE_DEFAULT;
+ return RTE_MIN(wnd, (uint32_t)UINT16_MAX);
+ } else
+ return s->rx.q->prod.mask << scale;
}
/* empty stream's receive queue */
@@ -89,13 +96,13 @@ tcp_stream_reset(struct tle_ctx *ctx, struct tle_tcp_stream *s)
rte_atomic32_set(&s->tx.arm, 0);
/* reset TCB */
- uop = s->tcb.uop & (TCP_OP_LISTEN | TCP_OP_CONNECT);
+ uop = s->tcb.uop & ~TCP_OP_CLOSE;
memset(&s->tcb, 0, sizeof(s->tcb));
/* reset cached destination */
memset(&s->tx.dst, 0, sizeof(s->tx.dst));
- if (uop != 0) {
+ if (uop != TCP_OP_ACCEPT) {
/* free stream's destination port */
stream_clear_ctx(ctx, &s->s);
if (uop == TCP_OP_LISTEN)
diff --git a/lib/libtle_l4p/tcp_rxtx.c b/lib/libtle_l4p/tcp_rxtx.c
index 6085814..ceaa2bc 100644
--- a/lib/libtle_l4p/tcp_rxtx.c
+++ b/lib/libtle_l4p/tcp_rxtx.c
@@ -27,6 +27,7 @@
#include "tcp_ctl.h"
#include "tcp_rxq.h"
#include "tcp_txq.h"
+#include "tcp_tx_seg.h"
#define TCP_MAX_PKT_SEG 0x20
@@ -640,7 +641,9 @@ sync_ack(struct tle_tcp_stream *s, const union pkt_info *pi,
get_syn_opts(&s->tcb.so, (uintptr_t)(th + 1), m->l4_len - sizeof(*th));
s->tcb.rcv.nxt = si->seq + 1;
- seq = sync_gen_seq(pi, s->tcb.rcv.nxt, ts, s->tcb.so.mss);
+ seq = sync_gen_seq(pi, s->tcb.rcv.nxt, ts, s->tcb.so.mss,
+ s->s.ctx->prm.hash_alg,
+ &s->s.ctx->prm.secret_key);
s->tcb.so.ts.ecr = s->tcb.so.ts.val;
s->tcb.so.ts.val = sync_gen_ts(ts, s->tcb.so.wscale);
s->tcb.so.wscale = (s->tcb.so.wscale == TCP_WSCALE_NONE) ?
@@ -761,14 +764,17 @@ rx_check_seqack(struct tcb *tcb, uint32_t seq, uint32_t ack, uint32_t len,
static inline int
restore_syn_opt(struct syn_opts *so, const union pkt_info *pi,
- const union seg_info *si, uint32_t ts, const struct rte_mbuf *mb)
+ const union seg_info *si, uint32_t ts, const struct rte_mbuf *mb,
+ uint32_t hash_alg, rte_xmm_t *secret_key)
{
int32_t rc;
uint32_t len;
const struct tcp_hdr *th;
/* check that ACK, etc fields are what we expected. */
- rc = sync_check_ack(pi, si->seq, si->ack - 1, ts);
+ rc = sync_check_ack(pi, si->seq, si->ack - 1, ts,
+ hash_alg,
+ secret_key);
if (rc < 0)
return rc;
@@ -917,12 +923,12 @@ rx_ack_listen(struct tle_tcp_stream *s, struct stbl *st,
if (pi->tf.flags != TCP_FLAG_ACK || rx_check_stream(s, pi) != 0)
return -EINVAL;
- rc = restore_syn_opt(&so, pi, si, tms, mb);
+ ctx = s->s.ctx;
+ rc = restore_syn_opt(&so, pi, si, tms, mb, ctx->prm.hash_alg,
+ &ctx->prm.secret_key);
if (rc < 0)
return rc;
- ctx = s->s.ctx;
-
/* allocate new stream */
ts = get_stream(ctx);
cs = TCP_STREAM(ts);
@@ -1532,7 +1538,7 @@ rx_synack(struct tle_tcp_stream *s, uint32_t ts, uint32_t state,
s->tcb.so = so;
s->tcb.snd.una = s->tcb.snd.nxt;
- s->tcb.snd.mss = so.mss;
+ s->tcb.snd.mss = calc_smss(so.mss, &s->tx.dst);
s->tcb.snd.wnd = si->wnd << so.wscale;
s->tcb.snd.wu.wl1 = si->seq;
s->tcb.snd.wu.wl2 = si->ack;
@@ -1546,6 +1552,11 @@ rx_synack(struct tle_tcp_stream *s, uint32_t ts, uint32_t state,
s->tcb.rcv.irs = si->seq;
s->tcb.rcv.nxt = si->seq + 1;
+ /* if peer doesn't support WSCALE opt, recalculate RCV.WND */
+ s->tcb.rcv.wscale = (so.wscale == TCP_WSCALE_NONE) ?
+ TCP_WSCALE_NONE : TCP_WSCALE_DEFAULT;
+ s->tcb.rcv.wnd = calc_rx_wnd(s, s->tcb.rcv.wscale);
+
/* calculate initial rto */
rto_estimate(&s->tcb, ts - s->tcb.snd.ts);
@@ -2053,7 +2064,9 @@ tx_syn(struct tle_tcp_stream *s, const struct sockaddr *addr)
s->tcb.so.mss = calc_smss(s->tx.dst.mtu, &s->tx.dst);
/* note that rcv.nxt is 0 here for sync_gen_seq.*/
- seq = sync_gen_seq(&pi, s->tcb.rcv.nxt, tms, s->tcb.so.mss);
+ seq = sync_gen_seq(&pi, s->tcb.rcv.nxt, tms, s->tcb.so.mss,
+ s->s.ctx->prm.hash_alg,
+ &s->s.ctx->prm.secret_key);
s->tcb.snd.iss = seq;
s->tcb.snd.rcvr = seq;
s->tcb.snd.una = seq;
@@ -2142,13 +2155,42 @@ tle_tcp_stream_recv(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
return n;
}
+static inline int32_t
+tx_segments(struct tle_tcp_stream *s, uint64_t ol_flags,
+ struct rte_mbuf *segs[], uint32_t num)
+{
+ uint32_t i;
+ int32_t rc;
+
+ for (i = 0; i != num; i++) {
+ /* Build L2/L3/L4 header */
+ rc = tcp_fill_mbuf(segs[i], s, &s->tx.dst, ol_flags, s->s.port,
+ 0, TCP_FLAG_ACK, 0, 0);
+ if (rc != 0) {
+ free_segments(segs, num);
+ break;
+ }
+ }
+
+ if (i == num) {
+ /* queue packets for further transmission. */
+ rc = rte_ring_mp_enqueue_bulk(s->tx.q, (void **)segs, num);
+ if (rc != 0)
+ free_segments(segs, num);
+ }
+
+ return rc;
+}
+
uint16_t
tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
{
- uint32_t i, j, mss, n, state, type;
+ uint32_t i, j, k, mss, n, state, type;
+ int32_t rc;
uint64_t ol_flags;
struct tle_tcp_stream *s;
struct tle_dev *dev;
+ struct rte_mbuf *segs[TCP_MAX_PKT_SEG];
s = TCP_STREAM(ts);
@@ -2161,53 +2203,87 @@ tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
state = s->tcb.state;
if (state != TCP_ST_ESTABLISHED && state != TCP_ST_CLOSE_WAIT) {
rte_errno = ENOTCONN;
- n = 0;
- } else {
- mss = s->tcb.snd.mss;
- dev = s->tx.dst.dev;
- type = s->s.type;
- ol_flags = dev->tx.ol_flags[type];
+ rwl_release(&s->tx.use);
+ return 0;
+ }
- /* prepare and check for TX */
- for (i = 0; i != num; i++) {
+ mss = s->tcb.snd.mss;
+ dev = s->tx.dst.dev;
+ type = s->s.type;
+ ol_flags = dev->tx.ol_flags[type];
- /* !!! need to be modified !!! */
+ k = 0;
+ rc = 0;
+ while (k != num) {
+ /* prepare and check for TX */
+ for (i = k; i != num; i++) {
if (pkt[i]->pkt_len > mss ||
- pkt[i]->nb_segs > TCP_MAX_PKT_SEG) {
- rte_errno = EBADMSG;
+ pkt[i]->nb_segs > TCP_MAX_PKT_SEG)
break;
- } else if (tcp_fill_mbuf(pkt[i], s, &s->tx.dst,
- ol_flags, s->s.port, 0, TCP_FLAG_ACK,
- 0, 0) != 0)
+ rc = tcp_fill_mbuf(pkt[i], s, &s->tx.dst, ol_flags,
+ s->s.port, 0, TCP_FLAG_ACK, 0, 0);
+ if (rc != 0)
break;
}
- /* queue packets for further transmision. */
- n = rte_ring_mp_enqueue_burst(s->tx.q, (void **)pkt, i);
+ if (i != k) {
+ /* queue packets for further transmission. */
+ n = rte_ring_mp_enqueue_burst(s->tx.q, (void **)pkt + k,
+ (i - k));
+ k += n;
+
+ /*
+ * for unsent, but already modified packets:
+ * remove pkt l2/l3 headers, restore ol_flags
+ */
+ if (i != k) {
+ ol_flags = ~dev->tx.ol_flags[type];
+ for (j = k; j != i; j++) {
+ rte_pktmbuf_adj(pkt[j], pkt[j]->l2_len +
+ pkt[j]->l3_len +
+ pkt[j]->l4_len);
+ pkt[j]->ol_flags &= ol_flags;
+ }
+ break;
+ }
+ }
- /* notify BE about more data to send */
- if (n != 0)
- txs_enqueue(s->s.ctx, s);
+ if (rc != 0) {
+ rte_errno = -rc;
+ break;
- /*
- * for unsent, but already modified packets:
- * remove pkt l2/l3 headers, restore ol_flags
- */
- if (n != i) {
- ol_flags = ~dev->tx.ol_flags[type];
- for (j = n; j != i; j++) {
- rte_pktmbuf_adj(pkt[j], pkt[j]->l2_len +
- pkt[j]->l3_len + pkt[j]->l4_len);
- pkt[j]->ol_flags &= ol_flags;
+ /* segment large packet and enqueue for sending */
+ } else if (i != num) {
+ /* segment the packet. */
+ rc = tcp_segmentation(pkt[i], segs, RTE_DIM(segs),
+ &s->tx.dst, mss);
+ if (rc < 0) {
+ rte_errno = -rc;
+ break;
}
- /* if possible, rearm stream write event. */
- } else if (rte_ring_free_count(s->tx.q) != 0 &&
- s->tx.ev != NULL)
- tle_event_raise(s->tx.ev);
+
+ rc = tx_segments(s, dev->tx.ol_flags[type], segs, rc);
+ if (rc == 0) {
+ /* free the large mbuf */
+ rte_pktmbuf_free(pkt[i]);
+ /* set the mbuf as consumed */
+ k++;
+ } else
+ /* no space left in tx queue */
+ break;
+ }
}
+ /* notify BE about more data to send */
+ if (k != 0)
+ txs_enqueue(s->s.ctx, s);
+ /* if possible, re-arm stream write event. */
+ if (rte_ring_free_count(s->tx.q) != 0 && s->tx.ev != NULL)
+ tle_event_raise(s->tx.ev);
+
rwl_release(&s->tx.use);
- return n;
+
+ return k;
}
/* send data and FIN (if needed) */
diff --git a/lib/libtle_l4p/tcp_tx_seg.h b/lib/libtle_l4p/tcp_tx_seg.h
new file mode 100644
index 0000000..3a80fdd
--- /dev/null
+++ b/lib/libtle_l4p/tcp_tx_seg.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016 Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_TX_SEG_H_
+#define _TCP_TX_SEG_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void
+free_segments(struct rte_mbuf *mb[], uint32_t num)
+{
+ uint32_t i;
+
+ for (i = 0; i != num; i++)
+ rte_pktmbuf_free(mb[i]);
+}
+
+static inline int32_t
+tcp_segmentation(struct rte_mbuf *mbin, struct rte_mbuf *mbout[], uint16_t num,
+ const struct tle_dest *dst, uint16_t mss)
+{
+ struct rte_mbuf *in_seg = NULL;
+ uint32_t nbseg, in_seg_data_pos;
+ uint32_t more_in_segs;
+
+ in_seg = mbin;
+ in_seg_data_pos = 0;
+ nbseg = 0;
+
+ /* Check that pkts_out is big enough to hold all fragments */
+ if (mss * num < (uint16_t)mbin->pkt_len)
+ return -ENOSPC;
+
+ more_in_segs = 1;
+ while (more_in_segs) {
+ struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL;
+ uint32_t more_out_segs;
+
+ /* Allocate direct buffer */
+ out_pkt = rte_pktmbuf_alloc(dst->head_mp);
+ if (out_pkt == NULL) {
+ free_segments(mbout, nbseg);
+ return -ENOMEM;
+ }
+
+ out_seg_prev = out_pkt;
+ more_out_segs = 1;
+ while (more_out_segs && more_in_segs) {
+ struct rte_mbuf *out_seg = NULL;
+ uint32_t len;
+
+ /* Allocate indirect buffer */
+ out_seg = rte_pktmbuf_alloc(dst->head_mp);
+ if (out_seg == NULL) {
+ rte_pktmbuf_free(out_pkt);
+ free_segments(mbout, nbseg);
+ return -ENOMEM;
+ }
+ out_seg_prev->next = out_seg;
+ out_seg_prev = out_seg;
+
+ /* Prepare indirect buffer */
+ rte_pktmbuf_attach(out_seg, in_seg);
+ len = mss;
+ if (len > (in_seg->data_len - in_seg_data_pos))
+ len = in_seg->data_len - in_seg_data_pos;
+
+ out_seg->data_off = in_seg->data_off + in_seg_data_pos;
+ out_seg->data_len = (uint16_t)len;
+ out_pkt->pkt_len = (uint16_t)(len + out_pkt->pkt_len);
+ out_pkt->nb_segs += 1;
+ in_seg_data_pos += len;
+
+ /* Current output packet (i.e. fragment) done ? */
+ if (out_pkt->pkt_len >= mss)
+ more_out_segs = 0;
+
+ /* Current input segment done ? */
+ if (in_seg_data_pos == in_seg->data_len) {
+ in_seg = in_seg->next;
+ in_seg_data_pos = 0;
+
+ if (in_seg == NULL)
+ more_in_segs = 0;
+ }
+ }
+
+ /* Write the segment to the output list */
+ mbout[nbseg] = out_pkt;
+ nbseg++;
+ }
+
+ return nbseg;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_TX_SEG_H_ */
diff --git a/lib/libtle_l4p/tle_ctx.h b/lib/libtle_l4p/tle_ctx.h
index a3516bf..144dbe7 100644
--- a/lib/libtle_l4p/tle_ctx.h
+++ b/lib/libtle_l4p/tle_ctx.h
@@ -97,6 +97,12 @@ enum {
TLE_PROTO_NUM
};
+enum {
+ TLE_JHASH,
+ TLE_SIPHASH,
+ TLE_HASH_NUM
+};
+
struct tle_ctx_param {
int32_t socket_id; /**< socket ID to allocate memory for. */
uint32_t proto; /**< L4 proto to handle. */
@@ -116,6 +122,11 @@ struct tle_ctx_param {
/**< will be called by send() to get IPv6 packet destination info. */
void *lookup6_data;
/**< opaque data pointer for lookup6() callback. */
+
+ uint32_t hash_alg;
+ /**< hash algorithm to be used to generate sequence number. */
+ rte_xmm_t secret_key;
+ /**< secret key to be used to calculate the hash. */
};
/**
diff --git a/test/gtest/Makefile b/test/gtest/Makefile
index 9b1341d..648c233 100644
--- a/test/gtest/Makefile
+++ b/test/gtest/Makefile
@@ -76,7 +76,7 @@ SRCS-y += test_tle_udp_destroy.cpp
SRCS-y += test_tle_udp_event.cpp
#SRCS-y += test_tle_udp_stream.cpp
SRCS-y += test_tle_udp_stream_gen.cpp
-#SRCS-y += test_tle_tcp_stream.cpp
+SRCS-y += test_tle_tcp_stream.cpp
#SRCS-y += test_tle_tcp_stream_gen.cpp
SYMLINK-y-app += test_scapy_gen.py