64 files changed, 12149 insertions, 1258 deletions
diff --git a/lib/Makefile b/lib/Makefile
index 6317af9..9bbe159 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -25,5 +25,6 @@ DIRS-y += libtle_misc
 DIRS-y += libtle_dring
 DIRS-y += libtle_timer
 DIRS-y += libtle_l4p
+DIRS-y += libtle_glue
 
 include $(TLDK_ROOT)/mk/tle.subdir.mk
diff --git a/lib/libtle_glue/Makefile b/lib/libtle_glue/Makefile
new file mode 100644
index 0000000..13ceb82
--- /dev/null
+++ b/lib/libtle_glue/Makefile
@@ -0,0 +1,62 @@
+# Copyright (c) 2018 Ant Financial Services Group.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overwritten by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = libtle_glue.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := tle_glue_version.map
+
+LIBABIVER := 1
+
+# source files
+SRCS-y += fd.c
+SRCS-y += ctx.c
+SRCS-y += arp.c
+SRCS-y += icmp.c
+SRCS-y += rxcb.c
+SRCS-y += port.c
+SRCS-y += sym.c
+SRCS-y += init.c
+SRCS-y += be.c
+SRCS-y += epoll.c
+SRCS-y += socket.c
+SRCS-y += rxtx.c
+SRCS-y += poll.c
+SRCS-y += util.c
+SRCS-y += tcp.c
+SRCS-y += udp.c
+SRCS-y += select.c
+
+ifeq ($(PACKETDRILL),y)
+SRCS-y += packetdrill.c
+endif
+
+# install this header file
+SYMLINK-y-include += tle_glue.h
+
+# this lib dependencies
+DEPDIRS-y += lib/libtle_l4p
+
+include $(TLDK_ROOT)/mk/tle.lib.mk
diff --git a/lib/libtle_glue/arp.c b/lib/libtle_glue/arp.c
new file mode 100644
index 0000000..9b13d9e
--- /dev/null
+++ b/lib/libtle_glue/arp.c
@@ -0,0 +1,935 @@
+/*
+ * Copyright (c) 2019 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/icmp6.h>
+
+#include <rte_ethdev.h>
+#include <rte_arp.h>
+#include <rte_ip.h>
+#include <rte_hash.h>
+#include <rte_byteorder.h>
+
+#include "log.h"
+#include "ctx.h"
+#include "internal.h"
+#include "tle_timer.h"
+#include "util.h"
+#include "ndp.h"
+#include "gateway.h"
+
+#define IPV6_MULTI_MASK_LEN 13
+
+const struct in6_addr ipv6_all_multi = {{{
+	0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01
+}}};
+
+const struct in6_addr ipv6_multi_mask = {{{
+	0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+}}};
+
+static inline void
+set_multicast_mac_v6(struct ether_addr *addr, const struct in6_addr *ip6_addr)
+{
+	unaligned_uint16_t *ea_words = (unaligned_uint16_t *)addr;
+
+	ea_words[0] = 0x3333;
+	ea_words[1] = ip6_addr->__in6_u.__u6_addr16[6];
+	ea_words[2] = ip6_addr->__in6_u.__u6_addr16[7];
+}
+
+static inline void
+set_multicast_ipv6(uint8_t ipv6[16])
+{
+	rte_memcpy(ipv6, &ipv6_multi_mask, IPV6_MULTI_MASK_LEN);
+}
+
+static inline void
+set_broadcast_addr(struct ether_addr *addr)
+{
+	unaligned_uint16_t *ea_words = (unaligned_uint16_t *)addr;
+
+	ea_words[0] = 0xFFFF;
+	ea_words[1] = 0xFFFF;
+	ea_words[2] = 0xFFFF;
+}
+
+static inline bool
+match_addr(struct glue_ctx *ctx, struct rte_mbuf *pkt, const struct in_addr *addr)
+{
+	struct ipv4_hdr *ip4h;
+	const struct in_addr *gw;
+
+	ip4h = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *, pkt->l2_len);
+	if ((ip4h->version_ihl >> 4) != 4)
+		return false;
+
+	gw = ipv4_gateway_lookup(ctx, (struct in_addr *)&ip4h->dst_addr);
+	if (gw->s_addr != addr->s_addr)
+		return false;
+
+	return true;
+}
+
+static inline bool
+match_addr6(struct glue_ctx *ctx, struct rte_mbuf *pkt,
+	    const struct in6_addr *addr)
+{
+	struct ipv6_hdr *ip6h;
+	const struct in6_addr *gw;
+
+	ip6h = rte_pktmbuf_mtod_offset(pkt, struct ipv6_hdr *, pkt->l2_len);
+	if (((ip6h->vtc_flow & 0xffffff00) >> 4) != 6)
+		return false;
+
+	gw = ipv6_gateway_lookup(ctx, (struct in6_addr *)&ip6h->dst_addr);
+	if (memcmp(gw, addr, sizeof(struct in6_addr)) != 0)
+		return false;
+
+	return true;
+}
+
+static inline void
+send_pkts(struct glue_ctx *ctx, struct rte_mbuf **pkts, uint16_t nb,
+	  const char *prefix)
+{
+	uint16_t i, sent;
+
+	sent = rte_eth_tx_burst(ctx->port_id, ctx->queue_id, pkts, nb);
+	for (i = sent; i < nb; i++)
+		rte_pktmbuf_free(pkts[i]);
+
+	RTE_SET_USED(prefix);
+	TRACE("%s, send %u/%u pkts", prefix, sent, nb);
+}
+
+static void
+flush_arp_wait(int af, struct glue_ctx *ctx, const void *addr,
+	       struct ether_addr *e_addr)
+{
+	struct rte_mbuf *pkt, *pre, *pkts[MAX_PKTS_BURST];
+	struct ether_hdr *eth;
+	uint32_t nb_pkts;
+
+	pre = NULL;
+	nb_pkts = 0;
+	for (pkt = ctx->arp_wait; pkt; pkt = pkt->next_pkt) {
+		if ((af == AF_INET &&
+		     !match_addr(ctx, pkt, (const struct in_addr *)addr)) ||
+		    (af == AF_INET6 &&
+		     !match_addr6(ctx, pkt, (const struct in6_addr *)addr))) {
+			pre = pkt;
+			continue;
+		}
+
+		if (pre == NULL)
+			ctx->arp_wait = pkt->next_pkt;
+		else
+			pre->next_pkt = pkt->next_pkt;
+		eth = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+		ether_addr_copy(e_addr, &eth->d_addr);
+		pkts[nb_pkts++] = pkt;
+		if (nb_pkts == MAX_PKTS_BURST) {
+			send_pkts(ctx, pkts, nb_pkts, "ARP learned");
+			nb_pkts = 0;
+		}
+	}
+	if (nb_pkts)
+		send_pkts(ctx, pkts, nb_pkts, "ARP learned");
+}
+
+static inline void
+ipv4_dst_set(struct glue_ctx *ctx, struct tle_dest *dst,
+	     const struct in_addr *addr, struct ether_addr *e_addr)
+{
+	struct ether_hdr *eth;
+	struct ipv4_hdr *ip4h;
+
+	if (is_ipv4_loopback_addr(addr->s_addr, ctx))
+		dst->mtu = MTU_LOOPBACK;
+	else
+		dst->mtu = MTU_NORMAL;
+	dst->l2_len = sizeof(*eth);
+	dst->head_mp = get_mempool_by_socket(0); /* fix me */
+
+	eth = (struct ether_hdr *)dst->hdr;
+	ether_addr_copy(&ctx->mac, &eth->s_addr);
+	if (e_addr == NULL)
+		set_broadcast_addr(&eth->d_addr);
+	else
+		ether_addr_copy(e_addr, &eth->d_addr);
+	eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
+	dst->l3_len = sizeof(*ip4h);
+	ip4h = (struct ipv4_hdr *)(eth + 1);
+	ip4h->dst_addr = addr->s_addr;
+	ip4h->version_ihl = 4 << 4 | sizeof(*ip4h) / IPV4_IHL_MULTIPLIER;
+	ip4h->time_to_live = 64;
+	ip4h->next_proto_id = IPPROTO_TCP;
+}
+
+static inline void
+ipv6_dst_set(struct glue_ctx *ctx, struct tle_dest *dst,
+	     const struct in6_addr *addr, struct ether_addr *e_addr)
+{
+	struct ether_hdr *eth;
+	struct ipv6_hdr *ip6h;
+
+	if (is_ipv6_loopback_addr(addr, ctx))
+		dst->mtu = MTU_LOOPBACK;
+	else
+		dst->mtu = MTU_NORMAL;
+	dst->l2_len = sizeof(*eth);
+	dst->head_mp = get_mempool_by_socket(0); /* fix me */
+
+	eth = (struct ether_hdr *)dst->hdr;
+	ether_addr_copy(&ctx->mac, &eth->s_addr);
+	if (e_addr == NULL)
+		set_broadcast_addr(&eth->d_addr);
+	else
+		ether_addr_copy(e_addr, &eth->d_addr);
+	eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+
+	dst->l3_len = sizeof(*ip6h);
+	ip6h = (struct ipv6_hdr *)(eth + 1);
+	rte_memcpy(ip6h->dst_addr, addr, sizeof(struct in6_addr));
+	ip6h->vtc_flow = 6 << 4;
+	ip6h->hop_limits = 255;
+	ip6h->proto = IPPROTO_TCP;
+}
+
+#define arp_timer(ctx, entry, interval) \
+	tle_timer_start(ctx->arp_tmw, entry, interval)
+
+void
+ipv4_dst_add(struct glue_ctx *ctx, const struct in_addr *addr,
+	     struct ether_addr *e_addr)
+{
+	struct arp_entry *entry;
+	struct tle_dest *dst;
+	struct ether_hdr *eth;
+	uint64_t idx;
+	bool check_wait;
+	int rc;
+
+	rc = rte_hash_lookup_data(ctx->arp_hash, addr, (void**)&idx);
+	if (rc >= 0) {
+		entry = &ctx->arp4[idx];
+		dst = &entry->dst;
+		eth = (struct ether_hdr *)dst->hdr;
+		check_wait = is_broadcast_ether_addr(&eth->d_addr);
+
+		/* update arp entry, reset timer */
+		ether_addr_copy(e_addr, &eth->d_addr);
+		print_arp(AF_INET, addr, &eth->d_addr, "UPDATE");
+		if(entry->timer != NULL)
+			tle_timer_stop(ctx->arp_tmw, entry->timer);
+		entry->timer = arp_timer(ctx, entry, ARP_ENTRY_EXPIRE);
+		entry->inuse = 0;
+		entry->req_time = 0;
+
+		if(check_wait)
+			flush_arp_wait(AF_INET, ctx, addr, e_addr);
+
+		return;
+	}
+
+	idx = ctx->arp4_num;
+	entry = &ctx->arp4[idx];
+	dst = &entry->dst;
+
+	ipv4_dst_set(ctx, dst, addr, e_addr);
+	if (e_addr == NULL) {
+		entry->timer = arp_timer(ctx, entry, ARP_REQUEST_EXPIRE);
+		entry->req_time = 1;
+	} else {
+		entry->timer = arp_timer(ctx, entry, ARP_ENTRY_EXPIRE);
+		entry->inuse = 0;
+	}
+
+	rc = rte_hash_add_key_data(ctx->arp_hash, addr, (void *)idx);
+	if (rc < 0)
+		rte_panic("Failed to add ARP entry");
+
+	ctx->arp4_num++;
+	eth = (struct ether_hdr *)dst->hdr;
+	print_arp(AF_INET, addr, &eth->d_addr, "ADD");
+}
+
+void
+ipv6_dst_add(struct glue_ctx *ctx, const struct in6_addr *addr,
+	     struct ether_addr *e_addr)
+{
+	struct arp_entry* entry;
+	struct tle_dest *dst;
+	struct ether_hdr *eth;
+	uint64_t idx;
+	bool check_wait;
+	int rc;
+
+	rc = rte_hash_lookup_data(ctx->arp6_hash, addr, (void**)&idx);
+	if (rc >= 0) {
+		entry = &ctx->arp6[idx];
+		dst = &entry->dst;
+		eth = (struct ether_hdr *)dst->hdr;
+		check_wait = is_broadcast_ether_addr(&eth->d_addr);
+
+		/* update arp entry, reset timer */
+		ether_addr_copy(e_addr, &eth->d_addr);
+		print_arp(AF_INET6, addr, &eth->d_addr, "UPDATE");
+		if(entry->timer != NULL)
+			tle_timer_stop(ctx->arp_tmw, entry->timer);
+		entry->timer = arp_timer(ctx, entry, ARP_ENTRY_EXPIRE);
+		entry->inuse = 0;
+		entry->req_time = 0;
+
+		if(check_wait)
+			flush_arp_wait(AF_INET6, ctx, addr, e_addr);
+
+		return;
+	}
+
+	idx = ctx->arp6_num;
+	entry = &ctx->arp6[idx];
+	dst = &entry->dst;
+
+	ipv6_dst_set(ctx, dst, addr, e_addr);
+	if (e_addr == NULL) {
+		entry->timer = arp_timer(ctx, entry, ARP_REQUEST_EXPIRE);
+		entry->req_time = 1;
+	} else {
+		entry->timer = arp_timer(ctx, entry, ARP_ENTRY_EXPIRE);
+		entry->inuse = 0;
+	}
+
+	rc = rte_hash_add_key_data(ctx->arp6_hash, addr, (void *)idx);
+	if (rc < 0)
+		rte_panic("Failed to add ARP6 entry");
+
+	eth = (struct ether_hdr *)dst->hdr;
+	print_arp(AF_INET6, addr, &eth->d_addr, "ADD");
+	ctx->arp6_num++;
+}
+
+static inline int
+arp_ip_exist(const struct rte_hash *h, const void *ip)
+{
+	return rte_hash_lookup(h, ip) >= 0;
+}
+
+struct rte_mbuf *
+ndp_recv(struct glue_ctx *ctx, struct rte_mbuf *m,
+	 uint32_t l2len, uint32_t l3len)
+{
+	struct ether_hdr *eth_h;
+	struct ipv6_hdr *ipv6_h;
+	struct nd_neighbor_solicit *ns_h;
+	struct nd_opt_hdr *opth;
+
+	eth_h = rte_pktmbuf_mtod(m, struct ether_hdr *);
+	ipv6_h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, l2len);
+	ns_h = rte_pktmbuf_mtod_offset(m, struct nd_neighbor_solicit *,
+				       l2len + l3len);
+
+	if (ipv6_h->payload_len < sizeof(struct nd_neighbor_solicit))
+		goto drop;
+
+	/* We only learn mac when:
+	 * 1. Normal NS for my ip, whose TargetAddr is me
+	 * 2. Normal NA to my ip, whose DstIpv6 is me
+	 * 3. Unsolicited NA, and we already have an entry for that IP
+	 */
+
+	/* NS message */
+	if (ns_h->nd_ns_hdr.icmp6_type == ND_NEIGHBOR_SOLICIT) {
+		/* not support Duplicate Address Detect NS yet */
+		if (IN6_IS_ADDR_UNSPECIFIED(ipv6_h->src_addr))
+			goto drop;
+
+		if (memcmp(&ns_h->nd_ns_target, &ctx->ipv6, sizeof(ctx->ipv6)))
+			goto drop;
+
+		/* NS message, target is my ipv6 addr */
+		opth = (struct nd_opt_hdr*)(ns_h + 1);
+		ipv6_dst_add(ctx, (struct in6_addr *)ipv6_h->src_addr,
+			     (struct ether_addr *)(opth + 1));
+
+		/* response NA message */
+		ether_addr_copy(&ctx->mac, &eth_h->s_addr);
+		ether_addr_copy((struct ether_addr*)(opth + 1),
+				&eth_h->d_addr);
+
+		rte_memcpy(ipv6_h->dst_addr, ipv6_h->src_addr,
+			   sizeof(struct in6_addr));
+		rte_memcpy(ipv6_h->src_addr, &ctx->ipv6,
+			   sizeof(struct in6_addr));
+
+		ns_h->nd_ns_hdr.icmp6_type = ND_NEIGHBOR_ADVERT;
+		ns_h->nd_ns_hdr.icmp6_dataun.icmp6_un_data8[0] = 0x60;
+		ns_h->nd_ns_hdr.icmp6_cksum = 0;
+
+		opth->nd_opt_type = ND_OPT_TARGET_LINKLAYER_ADDR;
+		ether_addr_copy(&ctx->mac, (struct ether_addr*)(opth + 1));
+
+		ns_h->nd_ns_hdr.icmp6_cksum = rte_ipv6_udptcp_cksum(ipv6_h, ns_h);
+
+		if (m->pkt_len < ETHER_MIN_LEN)
+			rte_pktmbuf_append(m, ETHER_MIN_LEN - m->pkt_len);
+
+		send_pkts(ctx, &m, 1, "NDP NA reply");
+		return NULL;
+	}
+
+	/* NA message */
+	if (memcmp(ipv6_h->dst_addr, &ctx->ipv6, sizeof(ctx->ipv6)) == 0 ||
+	    (memcmp(ipv6_h->dst_addr, &ipv6_all_multi, sizeof(ctx->ipv6)) == 0 &&
+	     arp_ip_exist(ctx->arp6_hash, &ns_h->nd_ns_target))) {
+		opth = (struct nd_opt_hdr *)(ns_h + 1);
+		ipv6_dst_add(ctx, &ns_h->nd_ns_target,
+			     (struct ether_addr *)(opth + 1));
+	}
+
+drop:
+	rte_pktmbuf_free(m);
+	return NULL;
+}
+
+struct rte_mbuf *
+arp_recv(struct glue_ctx *ctx, struct rte_mbuf *m, uint32_t l2len)
+{
+	struct ether_hdr *eth;
+	struct arp_hdr *ahdr;
+	struct arp_ipv4 *adata;
+	uint32_t tip;
+
+	eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+	ahdr = rte_pktmbuf_mtod_offset(m, struct arp_hdr *, l2len);
+
+	if (ahdr->arp_hrd != rte_be_to_cpu_16(ARP_HRD_ETHER) ||
+	    ahdr->arp_pro != rte_be_to_cpu_16(ETHER_TYPE_IPv4))
+		goto drop;
+
+	adata = &ahdr->arp_data;
+	tip = adata->arp_tip;
+
+	/* We only learn mac when:
+	 * 1. tip is me, or
+	 * 2. this is a RARP, and we already have an entry for that IP
+	 */
+	if (tip == ctx->ipv4 ||
+	    (tip == INADDR_ANY && arp_ip_exist(ctx->arp_hash, &adata->arp_sip)))
+		ipv4_dst_add(ctx, (struct in_addr *)&adata->arp_sip,
+			     &adata->arp_sha);
+
+	/* We only do ARP reply when:
+	 * 1. tip is me.
+	 */
+	if (ahdr->arp_op == rte_be_to_cpu_16(ARP_OP_REQUEST) &&
+	    tip == ctx->ipv4) {
+		eth->d_addr = eth->s_addr;
+		eth->s_addr = ctx->mac;
+		ahdr->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY);
+
+		adata->arp_tip = adata->arp_sip;
+		adata->arp_sip = tip;
+
+		adata->arp_tha = adata->arp_sha;
+		adata->arp_sha = ctx->mac;
+		if (m->pkt_len < ETHER_MIN_LEN)
+			rte_pktmbuf_append(m, ETHER_MIN_LEN - m->pkt_len);
+		send_pkts(ctx, &m, 1, "ARP reply");
+		return NULL;
+	}
+drop:
+	rte_pktmbuf_free(m);
+	return NULL;
+}
+
+static void
+arp6_send_request(struct glue_ctx *ctx, const struct in6_addr *addr)
+{
+	struct rte_mempool *mp = get_mempool_by_socket(0); /* fix me */
+	struct ether_hdr *eth;
+	struct ipv6_hdr *ip6h;
+	struct nd_neighbor_solicit *nsh;
+	struct nd_opt_hdr *opth;
+	struct ether_addr *sll_addr;
+	struct rte_mbuf *m;
+#ifdef ENABLE_TRACE
+	char str_ip[64];
+#endif
+
+	m = rte_pktmbuf_alloc(mp);
+	if (m == NULL)
+		rte_panic("Failed to alloc mbuf for ndp ns request");
+
+	eth = (struct ether_hdr *)rte_pktmbuf_append(m, sizeof(*eth));
+	ether_addr_copy(&ctx->mac, &eth->s_addr);
+	set_multicast_mac_v6(&eth->d_addr, addr);
+	eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+
+	ip6h = (struct ipv6_hdr*)rte_pktmbuf_append(m, sizeof(struct ipv6_hdr));
+	ip6h->vtc_flow = 6 << 4;
+	ip6h->payload_len = sizeof(struct nd_neighbor_solicit) +
+			    sizeof(struct nd_opt_hdr) +
+			    sizeof(struct ether_addr);
+	ip6h->proto = IPPROTO_ICMPV6;
+	ip6h->hop_limits = 255;
+	rte_memcpy(ip6h->src_addr, &ctx->ipv6, sizeof(struct in6_addr));
+	rte_memcpy(ip6h->dst_addr, addr, sizeof(struct in6_addr));
+	set_multicast_ipv6(ip6h->dst_addr);
+
+	nsh = (struct nd_neighbor_solicit *)rte_pktmbuf_append(m, sizeof(*nsh));
+	nsh->nd_ns_hdr.icmp6_type = ND_NEIGHBOR_SOLICIT;
+	nsh->nd_ns_hdr.icmp6_code = 0;
+	nsh->nd_ns_hdr.icmp6_cksum = 0;
+	nsh->nd_ns_hdr.icmp6_dataun.icmp6_un_data32[0] = 0;
+	rte_memcpy(&nsh->nd_ns_target, addr, sizeof(struct in6_addr));
+
+	opth = (struct nd_opt_hdr *)rte_pktmbuf_append(m, sizeof(*opth));
+	opth->nd_opt_type = ND_OPT_SOURCE_LINKLAYER_ADDR;
+	opth->nd_opt_len = 1;
+
+	sll_addr = (struct ether_addr *)rte_pktmbuf_append(m, sizeof(*sll_addr));
+	ether_addr_copy(&ctx->mac, sll_addr);
+
+	nsh->nd_ns_hdr.icmp6_cksum = rte_ipv6_udptcp_cksum(ip6h, nsh);
+
+	send_pkts(ctx, &m, 1, "ARP6 request");
+}
+
+static void
+arp_send_request(struct glue_ctx *ctx, const struct in_addr *addr)
+{
+	struct rte_mempool *mp = get_mempool_by_socket(0); /* fix me */
+	struct ether_hdr *eth;
+	struct arp_hdr *ahdr;
+	struct arp_ipv4 *adata;
+	struct rte_mbuf *m;
+	uint16_t pad_len, i;
+	char *pad;
+
+	m = rte_pktmbuf_alloc(mp);
+	if (m == NULL)
+		rte_panic("Failed to alloc mbuf for arp request");
+
+	eth = (struct ether_hdr *)rte_pktmbuf_append(m, sizeof(*eth));
+	ether_addr_copy(&ctx->mac, &eth->s_addr);
+	set_broadcast_addr(&eth->d_addr);
+	eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_ARP);
+
+	ahdr = (struct arp_hdr *)rte_pktmbuf_append(m, sizeof(*ahdr));
+	ahdr->arp_hrd = rte_be_to_cpu_16(ARP_HRD_ETHER);
+	ahdr->arp_pro = rte_be_to_cpu_16(ETHER_TYPE_IPv4);
+	ahdr->arp_hln = sizeof(struct ether_addr);
+	ahdr->arp_pln = sizeof(*addr);
+	ahdr->arp_op = rte_be_to_cpu_16(ARP_OP_REQUEST);
+	adata = &ahdr->arp_data;
+	ether_addr_copy(&ctx->mac, &adata->arp_sha);
+	adata->arp_sip = ctx->ipv4;
+	set_broadcast_addr(&adata->arp_tha);
+	adata->arp_tip = addr->s_addr;
+
+	pad_len = ETHER_MIN_LEN - sizeof(*eth) - sizeof(*ahdr);
+	pad = rte_pktmbuf_append(m, pad_len);
+	for (i = 0; i < pad_len; ++i)
+		pad[i] = 0;
+
+	send_pkts(ctx, &m, 1, "ARP request");
+}
+
+#define addr2ipv4(addr) (&((const struct sockaddr_in *)addr)->sin_addr)
+#define addr2ipv6(addr) (&((const struct sockaddr_in6 *)addr)->sin6_addr)
+void
+mac_check(struct glue_ctx *ctx, const struct sockaddr *addr)
+{
+	int rc;
+	const struct in_addr *addr4 = NULL;
+	const struct in6_addr *addr6 = NULL;
+
+	if(addr->sa_family == AF_INET) {
+		addr4 = ipv4_gateway_lookup(ctx, addr2ipv4(addr));
+		rc = rte_hash_lookup(ctx->arp_hash, addr4);
+	} else {
+		addr6 = ipv6_gateway_lookup(ctx, addr2ipv6(addr));
+		rc = rte_hash_lookup(ctx->arp6_hash, addr6);
+	}
+	if (rc >= 0)
+		return;
+
+	if(addr->sa_family == AF_INET)
+		arp_send_request(ctx, addr4);
+	else
+		arp6_send_request(ctx, addr6);
+}
+
+static int
+arp_inherit(struct glue_ctx *ctx, const struct in_addr *addr)
+{
+	struct glue_ctx *next;
+	struct tle_dest *dst;
+	struct ether_hdr *eth;
+	uint64_t idx;
+	uint16_t i;
+	int rc;
+
+	for (i = 0; i < nb_ctx; i++) {
+		next = &ctx_array[i++];
+		if (next == NULL || next == ctx)
+			continue;
+
+		rc = rte_hash_lookup_data(next->arp_hash, addr, (void **)&idx);
+		if (rc < 0)
+			continue;
+
+		dst = &next->arp4[idx].dst;
+		eth = (struct ether_hdr *)dst->hdr;
+		ipv4_dst_add(ctx, addr, &eth->d_addr);
+		return 0;
+	}
+
+	return -1;
+}
+
+static int
+arp6_inherit(struct glue_ctx *ctx, const struct in6_addr *addr)
+{
+	struct glue_ctx *next;
+	struct ether_hdr *eth;
+	struct tle_dest *dst;
+	uint64_t idx;
+	uint16_t i;
+	int rc;
+
+	for (i = 0; i < nb_ctx; i++) {
+		next = &ctx_array[i++];
+		if (next == NULL || next == ctx)
+			continue;
+
+		rc = rte_hash_lookup_data(next->arp6_hash, addr, (void **)&idx);
+		if (rc < 0)
+			continue;
+
+		dst = &next->arp6[idx].dst;
+		eth = (struct ether_hdr *)dst->hdr;
+		ipv6_dst_add(ctx, addr, &eth->d_addr);
+		return 0;
+	}
+
+	return -1;
+}
+
+#define len_dest(dst) \
+	(offsetof(struct tle_dest, hdr) + dst->l2_len + dst->l3_len)
+
+int
+arp_ipv6_dst_lookup(void *data, const struct in6_addr *addr,
+		    struct tle_dest *res, int proto)
+{
+	int32_t rc;
+	uint64_t idx;
+	struct tle_dest *dst;
+	struct ipv6_hdr *ip6h;
+	struct glue_ctx *ctx = data;
+
+	if (is_ipv6_loopback_addr(addr, ctx)) {
+		dst = &ctx->lb_dst_v6;
+		rte_memcpy(res, dst, len_dest(dst));
+		if (proto == IPPROTO_TCP)
+			res->dev = ctx->lb_tcp_dev;
+		else
+			res->dev = ctx->lb_udp_dev;
+		rc = 0;
+		goto set_proto;
+	}
+
+	rc = rte_hash_lookup_data(ctx->arp6_hash, addr, (void **)&idx);
+	if (rc >= 0) {
+		if (!ctx->arp6[idx].inuse)
+			ctx->arp6[idx].inuse = 1;
+		dst = &ctx->arp6[idx].dst;
+		rte_memcpy(res, dst, len_dest(dst));
+	} else {
+		memset(res, 0, sizeof(*res));
+		ipv6_dst_set(ctx, res, addr, NULL);
+		rc = 0;
+	}
+
+	if (proto == IPPROTO_TCP)
+		res->dev = ctx->tcp_dev;
+	else
+		res->dev = ctx->udp_dev;
+
+set_proto:
+	ip6h = (struct ipv6_hdr *)&res->hdr[res->l2_len];
+	ip6h->proto = proto;
+	return rc;
+}
+
+int
+arp_ipv4_dst_lookup(void *data, const struct in_addr *addr,
+		    struct tle_dest *res, int proto)
+{
+	int32_t rc;
+	uint64_t idx;
+	struct tle_dest *dst;
+	struct ipv4_hdr *ip4h;
+	struct glue_ctx *ctx = data;
+
+	if (is_ipv4_loopback_addr(addr->s_addr, ctx)) {
+		dst = &ctx->lb_dst;
+		rte_memcpy(res, dst, len_dest(dst));
+		if (proto == IPPROTO_TCP)
+			res->dev = ctx->lb_tcp_dev;
+		else
+			res->dev = ctx->lb_udp_dev;
+		rc = 0;
+		goto set_proto;
+	}
+
+	rc = rte_hash_lookup_data(ctx->arp_hash, addr, (void **)&idx);
+	if (rc >= 0) {
+		if (!ctx->arp4[idx].inuse)
+			ctx->arp4[idx].inuse = 1;
+		dst = &ctx->arp4[idx].dst;
+		rte_memcpy(res, dst, len_dest(dst));
+	} else {
+		memset(res, 0, sizeof(*res));
+		ipv4_dst_set(ctx, res, addr, NULL);
+		rc = 0;
+	}
+
+	if (proto == IPPROTO_TCP)
+		res->dev = ctx->tcp_dev;
+	else
+		res->dev = ctx->udp_dev;
+
+set_proto:
+	ip4h = (struct ipv4_hdr *)&res->hdr[res->l2_len];
+	ip4h->next_proto_id = proto;
+	return rc;
+}
+
+int
+mac_fill(struct glue_ctx *ctx, struct rte_mbuf *m)
+{
+	int32_t rc;
+	uint64_t idx;
+	uint8_t ipver;
+	struct arp_entry* entry;
+	struct ether_addr *dst, *dst1;
+	struct ipv4_hdr *ipv4_hdr;
+	struct ipv6_hdr *ipv6_hdr;
+	const struct in_addr *addr4 = NULL;
+	const struct in6_addr *addr6 = NULL;
+
+	dst = rte_pktmbuf_mtod(m, struct ether_addr *);
+	if (!is_broadcast_ether_addr(dst))
+		return 0;
+
+	ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
+	ipv6_hdr = (struct ipv6_hdr*)ipv4_hdr;
+	ipver = ipv4_hdr->version_ihl >> 4;
+
+retry:
+	if (ipver == 4) {
+		addr4 = (struct in_addr *)&ipv4_hdr->dst_addr;
+		addr4 = ipv4_gateway_lookup(ctx, addr4);
+		rc = rte_hash_lookup_data(ctx->arp_hash, addr4, (void **)&idx);
+		if (rc >= 0)
+			entry = &ctx->arp4[idx];
+	} else {
+		addr6 = (struct in6_addr *)ipv6_hdr->dst_addr;
+		addr6 = ipv6_gateway_lookup(ctx, addr6);
+		rc = rte_hash_lookup_data(ctx->arp6_hash, addr6, (void **)&idx);
+		if (rc >= 0)
+			entry = &ctx->arp6[idx];
+	}
+	
+	if (rc >= 0) {
+		dst1 = (struct ether_addr *)entry->dst.hdr;
+		if (!is_broadcast_ether_addr(dst1)) {
+			ether_addr_copy(dst1 , dst);
+			return 0;
+		}
+
+		if (ipver == 4)
+			arp_send_request(ctx, addr4);
+		else
+			arp6_send_request(ctx, addr6);
+		entry->req_time++;
+		if (entry->timer != NULL)
+			tle_timer_stop(ctx->arp_tmw, entry->timer);
+		entry->timer = arp_timer(ctx, entry, ARP_REQUEST_EXPIRE);
+	} else {
+		if (ipver == 4) {
+			if (arp_inherit(ctx, addr4) == 0)
+				goto retry;
+			ipv4_dst_add(ctx, addr4, NULL);
+			arp_send_request(ctx, addr4);
+		} else {
+			if (arp6_inherit(ctx, addr6) == 0)
+				goto retry;
+			ipv6_dst_add(ctx, addr6, NULL);
+			arp6_send_request(ctx, addr6);
+		}
+	}
+
+	return -1;
+}
+
+static inline const struct in_addr *
+get_addr_from_entry(struct arp_entry *e)
+{
+	const struct ipv4_hdr *ipv4;
+	const struct in_addr *addr;
+
+	ipv4 = (struct ipv4_hdr *)(e->dst.hdr + e->dst.l2_len);
+	addr = (const struct in_addr *)&ipv4->dst_addr;
+	return addr;
+}
+
+static inline const struct in6_addr *
+get_addr6_from_entry(struct arp_entry *e)
+{
+	const struct ipv6_hdr *ipv6;
+	const struct in6_addr *addr;
+
+	ipv6 = (struct ipv6_hdr *)(e->dst.hdr + e->dst.l2_len);
+	addr = (const struct in6_addr *)ipv6->dst_addr;
+	return addr;
+}
+
+static void
+drop_arp_wait(int af, struct glue_ctx *ctx, const void *addr)
+{
+	struct rte_mbuf *pkt, *pre;
+
+	for (pre = NULL, pkt = ctx->arp_wait; pkt; pkt = pkt->next_pkt) {
+		if ((af == AF_INET &&
+		     !match_addr(ctx, pkt, (const struct in_addr *)addr)) ||
+		    (af == AF_INET6 &&
+		     !match_addr6(ctx, pkt, (const struct in6_addr *)addr))) {
+			pre = pkt;
+			continue;
+		}
+
+		if (pre == NULL)
+			ctx->arp_wait = pkt->next_pkt;
+		else
+			pre->next_pkt = pkt->next_pkt;
+
+		rte_pktmbuf_free(pkt);
+	}
+}
+
+static void
+arp_entry_del(struct glue_ctx *ctx, int af, struct arp_entry *e)
+{
+	const void *addr;
+	struct arp_entry *t;
+	uint32_t idx, last_idx;
+	const struct rte_hash *h;
+
+	if (af == AF_INET) {
+		addr = get_addr_from_entry(e);
+		t = ctx->arp4;
+		h = ctx->arp_hash;
+		last_idx = ctx->arp4_num - 1;
+	} else {
+		addr = get_addr6_from_entry(e);
+		t = ctx->arp6;
+		h = ctx->arp6_hash;
+		last_idx = ctx->arp6_num - 1;
+	}
+
+	idx = e - t;
+	if (idx > last_idx) /* entry has been moved */
+		return;
+
+	print_arp(af, addr, (struct ether_addr *)e->dst.hdr, "DELETE");
+
+	if (e->req_time > ARP_MAX_REQ_TIMES)
+		drop_arp_wait(af, ctx, addr);
+
+	rte_hash_del_key(h, addr);
+
+	if (idx < last_idx) {
+		/* replace current entry with last entry */
+		rte_memcpy(e, t + last_idx, sizeof(*e));
+		rte_hash_add_key_data(h, addr, (void *)(uintptr_t)idx);
+		tle_timer_stop(ctx->arp_tmw, t[last_idx].timer);
+		if (e->req_time > 0)
+			e->timer = arp_timer(ctx, e, ARP_REQUEST_EXPIRE);
+		else {
+			e->timer = arp_timer(ctx, e, ARP_ENTRY_EXPIRE);
+			e->inuse = 0;
+		}
+	}
+
+	/* we always delete the last entry to keep it contiguous */
+	t[last_idx].timer = NULL;
+	t[last_idx].inuse = 0;
+	t[last_idx].req_time = 0;
+	if (af == AF_INET)
+		ctx->arp4_num--;
+	else
+		ctx->arp6_num--;
+}
+
+void
+mac_timeout(struct glue_ctx *ctx)
+{
+#define ARP_PROCESS_MAX	32
+	struct arp_entry *entry[ARP_PROCESS_MAX], *e;
+	struct tle_timer_wheel *tw;
+	const struct in_addr *addr4;
+	const struct in6_addr *addr6;
+	uint32_t i, cnt;
+	uint8_t *l3h;
+
+	tw = ctx->arp_tmw;
+	tle_timer_expire(tw, rte_get_tsc_cycles() >> ctx->cycles_ms_shift);
+	cnt = tle_timer_get_expired_bulk(tw, (void**)entry, ARP_PROCESS_MAX);
+	if (cnt == 0)
+		return;
+
+	for(i = 0; i < cnt; i++) {
+		e = entry[i];
+		e->timer = NULL;
+		l3h = e->dst.hdr + e->dst.l2_len;
+		if (e->inuse ||
+		    (e->req_time > 0 && e->req_time <= ARP_MAX_REQ_TIMES)) {
+			if (((struct ipv4_hdr *)l3h)->version_ihl >> 4 == 4) {
+				addr4 = get_addr_from_entry(e);
+				arp_send_request(ctx, addr4);
+			} else {
+				addr6 = get_addr6_from_entry(e);
+				arp6_send_request(ctx, addr6);
+			}
+
+			e->timer = arp_timer(ctx, e, ARP_REQUEST_EXPIRE);
+			e->inuse = 0;
+			e->req_time++;
+		} else {
+			if (((struct ipv4_hdr *)l3h)->version_ihl >> 4 == 4)
+				arp_entry_del(ctx, AF_INET, e);
+			else
+				arp_entry_del(ctx, AF_INET6, e);
+		}
+	}
+}
diff --git a/lib/libtle_glue/be.c b/lib/libtle_glue/be.c
new file mode 100644
index 0000000..7e2227e
--- /dev/null
+++ b/lib/libtle_glue/be.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+
+#include <tle_tcp.h>
+#include <tle_udp.h>
+
+#include "config.h"
+#include "log.h"
+#include "util.h"
+#include "internal.h"
+
+static inline void
+rte_pktmbuf_copy_seg(struct rte_mbuf *dst, struct rte_mbuf* src)
+{
+	size_t offset = offsetof(struct rte_mbuf, data_off);
+	rte_memcpy((char*)dst + offset, (char*)src + offset,
+		   sizeof(struct rte_mbuf) - offset);
+	rte_mbuf_refcnt_set(dst, 1);
+	dst->ol_flags &= ~IND_ATTACHED_MBUF;
+	rte_memcpy(rte_pktmbuf_mtod(dst, void*), rte_pktmbuf_mtod(src, void*),
+		   src->data_len);
+}
+
+static inline struct rte_mbuf*
+rte_pktmbuf_copy(struct rte_mbuf *md, struct rte_mempool* mp)
+{
+	struct rte_mbuf *mc, *mi, **prev;
+	uint32_t pktlen;
+	uint16_t nseg;
+
+	if (unlikely ((mc = rte_pktmbuf_alloc(mp)) == NULL))
+		return NULL;
+
+	mi = mc;
+	prev = &mi->next;
+	pktlen = md->pkt_len;
+	nseg = 0;
+
+	do {
+		nseg++;
+		rte_pktmbuf_copy_seg(mi, md);
+		*prev = mi;
+		prev = &mi->next;
+	} while ((md = md->next) != NULL &&
+	    (mi = rte_pktmbuf_alloc(mp)) != NULL);
+
+	*prev = NULL;
+	mc->nb_segs = nseg;
+	mc->pkt_len = pktlen;
+
+	/* Allocation of new indirect segment failed */
+	if (unlikely(mi == NULL)) {
+		rte_pktmbuf_free(mc);
+		return NULL;
+	}
+
+	__rte_mbuf_sanity_check(mc, 1);
+	return mc;
+}
+
+static inline int
+process_rx_pkts(struct glue_ctx *ctx, struct rte_mbuf *pkts[],
+		uint32_t n, uint8_t from_loopback)
+{
+	uint32_t i, j, k, jt, ju, jd;
+	struct rte_mbuf *tcp[MAX_PKTS_BURST];
+	struct rte_mbuf *udp[MAX_PKTS_BURST];
+	struct rte_mbuf *drop[MAX_PKTS_BURST];
+	int32_t rc[MAX_PKTS_BURST];
+	struct tle_dev *tcp_dev, *udp_dev;
+	struct rte_mempool *mp;
+	struct rte_mbuf *tmp;
+	uint64_t ts;
+
+	if (n == 0)
+		return 0;
+
+	if (unlikely(from_loopback)) {
+		tcp_dev = ctx->lb_tcp_dev;
+		udp_dev = ctx->lb_udp_dev;
+		mp = pkts[0]->pool;
+		for (i = 0; i < n; i++) {
+			tmp = rte_pktmbuf_copy(pkts[i], mp);
+			if (tmp != NULL) {
+				rte_pktmbuf_free(pkts[i]);
+				pkts[i] = tmp;
+				pkts[i]->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
+				pkts[i]->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+			} else {
+				k = i;
+				for (; i < n; i++) {
+					rte_pktmbuf_free(pkts[i]);
+				}
+				n = k;
+			}
+		}
+	} else {
+		tcp_dev = ctx->tcp_dev;
+		udp_dev = ctx->udp_dev;
+	}
+
+	ts = rte_get_tsc_cycles() >> (ctx->cycles_ms_shift - 10);
+
+	for (j = 0, jt = 0, ju = 0, jd = 0; j < n; j++) {
+		pkts[j]->timestamp = ts;
+		switch (pkts[j]->packet_type & RTE_PTYPE_L4_MASK) {
+		case RTE_PTYPE_L4_TCP:
+			tcp[jt++] = pkts[j];
+			break;
+		case RTE_PTYPE_L4_UDP:
+			udp[ju++] = pkts[j];
+			break;
+		case RTE_PTYPE_L4_ICMP:
+			/* TODO */
+		case RTE_PTYPE_L4_FRAG:
+			/* TODO */
+		default:
+			drop[jd++] = pkts[j];
+		}
+	}
+
+	if (jt > 0) {
+		k = tle_tcp_rx_bulk(tcp_dev, tcp, drop + jd, rc, jt);
+		jd += jt - k;
+
+		TRACE("(port=%u, queue=%u), %u/%u (TCP) pkts are received",
+		      port_id, queue_id, k, n);
+	}
+
+	if (ju > 0) {
+		k = tle_udp_rx_bulk(udp_dev, udp, drop + jd, rc, ju);
+		jd += ju - k;
+
+		TRACE("(port=%u, queue=%u), %u/%u (UDP) pkts are received",
+		      port_id, queue_id, k, n);
+	}
+
+	for (j = 0; j < jd; j++)
+		rte_pktmbuf_free(drop[j]);
+
+	return jt + ju - jd;
+}
+
+static inline int
+be_rx(struct glue_ctx *ctx)
+{
+	int ret;
+	uint32_t n;
+	struct rte_mbuf *pkts[MAX_PKTS_BURST];
+	uint16_t port_id = ctx->port_id;
+	uint16_t queue_id = ctx->queue_id;
+
+	n = rte_eth_rx_burst(port_id, queue_id, pkts, RTE_DIM(pkts));
+	ret = process_rx_pkts(ctx, pkts, n, 0);
+
+	return ret;
+}
+
+int
+be_tx(struct glue_ctx *ctx)
+{
+	uint32_t n, j, k, s, ret;
+	const uint16_t max_pkts = MAX_PKTS_BURST;
+	struct rte_mbuf *pkts[max_pkts];
+	struct rte_mbuf *_pkts[max_pkts];
+	uint16_t port_id = ctx->port_id;
+	uint16_t queue_id = ctx->queue_id;
+
+	ret = 0;
+	tle_tcp_process(ctx->tcp_ctx, TCP_MAX_PROCESS);
+
+	n = tle_tcp_tx_bulk(ctx->lb_tcp_dev, pkts, max_pkts);
+	n += tle_udp_tx_bulk(ctx->lb_udp_dev, pkts + n, max_pkts - n);
+	if (n > 0) {
+		ret += n;
+		rte_eth_tx_burst(ctx->lb_port_id, 0, pkts, n);
+		/* loopback device could receive after transmit immediately */
+		n = rte_eth_rx_burst(ctx->lb_port_id, 0, pkts, RTE_DIM(pkts));
+		process_rx_pkts(ctx, pkts, n, 1);
+
+		/* wake up look-aside backend */
+		wake_lookaside_backend(ctx);
+	}
+
+	n = tle_tcp_tx_bulk(ctx->tcp_dev, pkts, max_pkts);
+	n += tle_udp_tx_bulk(ctx->udp_dev, pkts + n, max_pkts - n);
+	if (n == 0)
+		return 0;
+
+	ret += n;
+	s = 0;
+	for (j = 0; j != n; j++) {
+		if (mac_fill(ctx, pkts[j]) == 0) {
+			PKT_DUMP(pkts[j]);
+			_pkts[s++] = pkts[j];
+			continue;
+		}
+
+		pkts[j]->next_pkt = ctx->arp_wait;
+		ctx->arp_wait = pkts[j];
+	}
+
+	/* For virtio-user/vhost-kernel test case, it's normal that vhost
+	 * kthread cannot catch up with packets generation speed in stack.
+	 * Shall we drop those packets immdiately or retry some times to
+	 * keep those packets? We find dropping packets here is not a good
+	 * idea, which leads to lots of retrans and inefficiency of vhost
+	 * kthread. Even below code does not work well:
+	 *
+	 * for (k = 0, retry = 0; k < s && retry < 10000; retry++)
+	 *	k += rte_eth_tx_burst(port_id, queue_id, _pkts + k, s - k);
+	 * 
+	 * So we choose to blockingly send out packes.
+	 */
+	k = 0;
+	while (k < s)
+		k += rte_eth_tx_burst(port_id, queue_id, _pkts + k, s - k);
+
+	for (j = k; j != s; j++)
+		rte_pktmbuf_free(_pkts[j]);
+
+	TRACE("(port=%u, queue=%u), %u/%u pkts are sent",
+		port_id, queue_id, k, s);
+
+	return ret;
+}
+
+int
+be_process(struct glue_ctx *ctx)
+{
+	int ret;
+
+	if (unlikely(stopped))
+		return 0;
+
+	ret = be_rx(ctx);
+	mac_timeout(ctx);
+	ret += be_tx(ctx);
+
+	return ret;
+}
diff --git a/lib/libtle_glue/config.h b/lib/libtle_glue/config.h
new file mode 100644
index 0000000..976495e
--- /dev/null
+++ b/lib/libtle_glue/config.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_GLUE_CONFIG_H_
+#define _TLE_GLUE_CONFIG_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_STREAMS_PER_CORE		64 * 1024
+#define MIN_STREAMS_PER_CORE		16
+#define DELTA_STREAMS			64
+#define FRAG_BUCKET			8
+#define FRAG_ENTRIES_PER_BUCKET		8
+#define MAX_ARP_ENTRY			(1 << 10)
+
+/* RCV buffer & SND buffer
+ * This is not a reall rcv/snd buffer implementation. Below number means
+ * the slots to store mbufs of sent or received data. Each slot could
+ * contains a single mbuf with size of (1500B or 2048B) or a chained
+ * mbuf with size <= 64KB.
+ *
+ * TODO: add real snd/rcv buffer
+ */
+#define MAX_RECV_BUFS_PER_STREAM	256
+#define MAX_SEND_BUFS_PER_STREAM	256
+
+#ifdef LOOK_ASIDE_BACKEND
+#define MAX_NB_CTX			1
+#else
+#define MAX_NB_CTX			16
+#endif
+
+#define MAX_MBUFS			0x80000
+/* should calculated by:
+ * MAX_NB_CTX * MAX_STREAMS_PER_CORE * (MAX_RECV_BUFS_PER_STREAM + MAX_SEND_BUFS_PER_STREAM))
+ */
+
+#define MBUF_DYNAMIC_SIZE		0x800
+
+#define MBUF_PERCORE_CACHE		32
+
+#define MAX_PKTS_BURST			0x20
+
+#define TCP_MAX_PROCESS			32
+
+#define ARP_ENTRY_EXPIRE		60000U
+#define ARP_REQUEST_EXPIRE		1000U  /* ms */
+#define ARP_MAX_REQ_TIMES		5
+
+#define MTU_NORMAL			1500
+#define MTU_LOOPBACK			65535
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*_TLE_GLUE_CONFIG_H_ */
diff --git a/lib/libtle_glue/ctx.c b/lib/libtle_glue/ctx.c
new file mode 100644
index 0000000..dc78f39
--- /dev/null
+++ b/lib/libtle_glue/ctx.c
@@ -0,0 +1,535 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include <rte_malloc.h>
+#include <rte_random.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_hash.h>
+#include <rte_spinlock.h>
+
+#include "config.h"
+#include "ctx.h"
+#include "log.h"
+#include "util.h"
+#include "internal.h"
+#include "gateway.h"
+#include "tle_timer.h"
+
+RTE_DEFINE_PER_LCORE(struct glue_ctx *, glue_ctx);
+
+int nb_ctx;
+struct glue_ctx ctx_array[MAX_NB_CTX];
+struct glue_ctx *default_ctx = &ctx_array[0];
+
+static int
+ipv4_dst_lookup_tcp(void *data, const struct in_addr *addr,
+		    struct tle_dest *res)
+{
+	addr = ipv4_gateway_lookup(data, addr);
+	return arp_ipv4_dst_lookup(data, addr, res, IPPROTO_TCP);
+}
+
+static int
+ipv4_dst_lookup_udp(void *data, const struct in_addr *addr,
+		    struct tle_dest *res)
+{
+	addr = ipv4_gateway_lookup(data, addr);
+	return arp_ipv4_dst_lookup(data, addr, res, IPPROTO_UDP);
+}
+
+static int
+ipv6_dst_lookup_tcp(void *data, const struct in6_addr *addr,
+		    struct tle_dest *res)
+{
+	addr = ipv6_gateway_lookup(data, addr);
+	return arp_ipv6_dst_lookup(data, addr, res, IPPROTO_TCP);
+}
+
+static int
+ipv6_dst_lookup_udp(void *data, const struct in6_addr *addr,
+		    struct tle_dest *res)
+{
+	addr = ipv6_gateway_lookup(data, addr);
+	return arp_ipv6_dst_lookup(data, addr, res, IPPROTO_UDP);
+}
+
+static struct tle_ctx *
+proto_ctx_create(uint32_t socket_id, uint32_t proto, void *data)
+{
+	struct tle_ctx_param cprm;
+
+	if (proto != TLE_PROTO_TCP && proto != TLE_PROTO_UDP)
+		rte_panic("Invalid proto [%u]\n", proto);
+
+	cprm.socket_id = socket_id;
+	cprm.proto = proto;
+	cprm.max_streams = MAX_STREAMS_PER_CORE;
+	cprm.min_streams = MIN_STREAMS_PER_CORE;
+	cprm.delta_streams = DELTA_STREAMS;
+	cprm.max_stream_rbufs = MAX_RECV_BUFS_PER_STREAM;
+	cprm.max_stream_sbufs = MAX_SEND_BUFS_PER_STREAM;
+	if (proto == TLE_PROTO_TCP) {
+		cprm.lookup4 = ipv4_dst_lookup_tcp;
+		cprm.lookup6 = ipv6_dst_lookup_tcp;
+	} else {
+		cprm.lookup4 = ipv4_dst_lookup_udp;
+		cprm.lookup6 = ipv6_dst_lookup_udp;
+	}
+	cprm.lookup4_data = data;
+	cprm.lookup6_data = data;
+#ifdef LOOK_ASIDE_BACKEND
+	cprm.flags = 0;
+#else
+	cprm.flags = TLE_CTX_FLAG_ST; /* ctx will be used by single thread*/
+#endif
+	cprm.send_bulk_size = 0; /* 32 if 0 */
+	cprm.hash_alg = TLE_SIPHASH;
+	cprm.secret_key.u64[0] = rte_rand();
+	cprm.secret_key.u64[1] = rte_rand();
+	cprm.icw = 0; /**< congestion window, default is 2*MSS if 0. */
+	cprm.timewait = 1;  /* TLE_TCP_TIMEWAIT_DEFAULT */
+
+	return tle_ctx_create(&cprm);
+}
+
+static int
+evq_init(struct glue_ctx *ctx, uint32_t socket_id)
+{
+	struct tle_evq_param eprm = {
+		.socket_id = socket_id,
+		.max_events = 0, /* We don't pre-allocate any event */
+	};
+
+	ctx->ereq = tle_evq_create(&eprm);
+	if (ctx->ereq == NULL)
+		rte_panic("Cannot create ereq");
+
+	ctx->rxeq = tle_evq_create(&eprm);
+	if (ctx->rxeq == NULL)
+		rte_panic("Cannot create rxeq");
+
+	ctx->txeq = tle_evq_create(&eprm);
+	if (ctx->txeq == NULL)
+		rte_panic("Cannot create txeq");
+
+	return 0;
+}
+
+static void
+tle_ctx_init(struct glue_ctx *ctx, uint32_t socket_id)
+{
+	struct tle_dev_param dprm;
+	struct rte_eth_dev_info dev_info;
+	uint16_t port_id = 0;	/* currently only use one port */
+
+	ctx->tcp_ctx = proto_ctx_create(socket_id, TLE_PROTO_TCP, ctx);
+	if (ctx->tcp_ctx == NULL)
+		rte_panic("Cannot create tle_ctx for tcp");
+
+	ctx->udp_ctx = proto_ctx_create(socket_id, TLE_PROTO_UDP, ctx);
+	if (ctx->udp_ctx == NULL)
+		rte_panic("Cannot create tle_ctx for udp");
+
+	memset(&dprm, 0, sizeof(dprm));
+
+	/* offloading check and set */
+	rte_eth_dev_info_get(port_id, &dev_info);
+	dprm.rx_offload = dev_info.rx_offload_capa & rx_offload;
+	dprm.tx_offload = dev_info.tx_offload_capa & tx_offload;
+
+	dprm.local_addr4.s_addr = ctx->ipv4;
+	rte_memcpy(&dprm.local_addr6, &ctx->ipv6, sizeof(struct in6_addr));
+	dprm.bl4.nb_port = 0;
+	dprm.bl4.port = NULL;
+	dprm.bl6.nb_port = 0;
+	dprm.bl6.port = NULL;
+
+	ctx->tcp_dev = tle_add_dev(ctx->tcp_ctx, &dprm);
+	if (ctx->tcp_dev == NULL)
+		rte_panic("add tle_dev for tcp failed: %u", rte_errno);
+
+	ctx->udp_dev = tle_add_dev(ctx->udp_ctx, &dprm);
+	if (ctx->udp_dev == NULL)
+		rte_panic("add tle_dev for udp failed: %u", rte_errno);
+
+	if (ctx == default_ctx) {
+		dprm.rx_offload = rx_offload;
+		dprm.tx_offload = tx_offload;
+		dprm.local_addr4.s_addr = htonl(INADDR_LOOPBACK);
+		rte_memcpy(&dprm.local_addr6, &in6addr_loopback,
+			   sizeof(struct in6_addr));
+
+		ctx->lb_tcp_dev = tle_add_dev(ctx->tcp_ctx, &dprm);
+		if (ctx->lb_tcp_dev == NULL)
+			rte_panic("failed to add loopback tcp dev: %u\n",
+				  rte_errno);
+
+		ctx->lb_udp_dev = tle_add_dev(ctx->udp_ctx, &dprm);
+		if (ctx->lb_udp_dev == NULL)
+			rte_panic("failed to add loopback udp dev: %u\n",
+				  rte_errno);
+	}
+
+	evq_init(ctx, socket_id);
+}
+
+static uint32_t
+get_ip(void)
+{
+	struct in_addr addr;
+	const char *ip_str = getenv(DPDK_IP);
+
+	if (ip_str == NULL) {
+		ip_str = DPDK_IP_DEF;
+		GLUE_LOG(INFO, "will use the default IP %s", DPDK_IP_DEF);
+	} else
+		GLUE_LOG(INFO, "will use the IP %s", ip_str);
+
+	if (inet_aton(ip_str, &addr) == 0)
+		rte_panic("Invalid addr from env DPDK_IP: %s", ip_str);
+
+	return addr.s_addr;
+}
+
+static uint8_t
+get_ip_mask(void)
+{
+	const char *mask_str = getenv(DPDK_IP_MASK);
+
+	if (mask_str == NULL) {
+		mask_str = DPDK_IP_MASK_DEF;
+		GLUE_LOG(INFO, "will use the default IP Mask %s", DPDK_IP_MASK_DEF);
+	} else
+		GLUE_LOG(INFO, "will use the IP Mask %s", mask_str);
+
+	return (uint8_t)atoi(mask_str);
+}
+
+static uint32_t
+get_ip_gate(void)
+{
+	struct in_addr addr;
+	const char *ip_str = getenv(DPDK_IP_GATEWAY);
+
+	if (ip_str == NULL) {
+		ip_str = DPDK_IP_GATEWAY_DEF;
+		GLUE_LOG(INFO, "will use the default IP gateway %s",
+			 DPDK_IP_GATEWAY_DEF);
+	} else
+		GLUE_LOG(INFO, "will use the IP gateway %s", ip_str);
+
+	if (inet_aton(ip_str, &addr) == 0)
+		rte_panic("Invalid addr from env DPDK_IP_GATEWAY: %s", ip_str);
+
+	return addr.s_addr;
+}
+
+static struct in6_addr*
+get_ipv6(void)
+{
+	static struct in6_addr addr;
+	const char *ip_str = getenv(DPDK_IPV6);
+
+	if (ip_str == NULL) {
+		ip_str = DPDK_IPV6_DEF;
+		GLUE_LOG(INFO, "will use the default IP(V6) %s", DPDK_IPV6_DEF);
+	} else
+		GLUE_LOG(INFO, "will use the IP(V6) %s", ip_str);
+
+	if (inet_pton(AF_INET6, ip_str, &addr) == 0)
+		rte_panic("Invalid addr from env DPDK_IPV6: %s", ip_str);
+
+	return &addr;
+}
+
+static uint8_t
+get_ipv6_mask(void)
+{
+	const char *mask_str = getenv(DPDK_IPV6_MASK);
+
+	if (mask_str == NULL) {
+		mask_str = DPDK_IPV6_MASK_DEF;
+		GLUE_LOG(INFO, "will use the default IPV6 Mask %s",
+			 DPDK_IPV6_MASK_DEF);
+	} else
+		GLUE_LOG(INFO, "will use the IPV6 Mask %s", mask_str);
+
+	return (uint8_t)atoi(mask_str);
+}
+
+static struct in6_addr*
+get_ipv6_gate(void)
+{
+	static struct in6_addr addr;
+	const char *ip_str = getenv(DPDK_IPV6_GATEWAY);
+
+	if (ip_str == NULL) {
+		ip_str = DPDK_IPV6_GATEWAY_DEF;
+		GLUE_LOG(INFO, "will use the default IP(V6) gateway %s",
+			 DPDK_IPV6_GATEWAY_DEF);
+	} else
+		GLUE_LOG(INFO, "will use the IP(V6) gateway %s", ip_str);
+
+	if (inet_pton(AF_INET6, ip_str, &addr) == 0)
+		rte_panic("Invalid addr from env DPDK_IPV6_GATEWAY: %s", ip_str);
+
+	return &addr;
+}
+
+static bool
+lo4_enabled(void)
+{
+	const char *str = getenv("DPDK_LO4_ENABLED");
+	if (str != NULL && strcmp(str, "0") == 0)
+		return false;
+	return true;
+}
+
+static bool
+lo6_enabled(void)
+{
+	const char *str = getenv("DPDK_LO6_ENABLED");
+	if (str == NULL || strcmp(str, "1") != 0)
+		return false;
+	return true;
+}
+
+static void
+loopback_dst_init(struct glue_ctx *ctx)
+{
+	struct tle_dest *dst;
+	struct ether_hdr *eth;
+	struct ipv4_hdr *ip4h;
+	struct ipv6_hdr *ip6h;
+
+	/* init ipv4 dst */
+	dst = &ctx->lb_dst;
+	dst->mtu = 65535;
+
+	dst->l2_len = sizeof(*eth);
+	dst->head_mp = get_mempool_by_socket(0); /* fix me */
+	eth = (struct ether_hdr *)dst->hdr;
+	memset(eth, 0, 2 * sizeof(eth->d_addr));
+	eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
+	dst->l3_len = sizeof(*ip4h);
+	ip4h = (struct ipv4_hdr *)(eth + 1);
+	ip4h->dst_addr = htonl(INADDR_LOOPBACK);
+	ip4h->version_ihl = 4 << 4 | sizeof(*ip4h) / IPV4_IHL_MULTIPLIER;
+	ip4h->time_to_live = 64;
+	ip4h->next_proto_id = IPPROTO_TCP;
+
+	/* init ipv6 dst */
+	dst = &ctx->lb_dst_v6;
+	dst->mtu = 65535;
+
+	dst->l2_len = sizeof(*eth);
+	dst->head_mp = get_mempool_by_socket(0); /* fix me */
+	eth = (struct ether_hdr *)dst->hdr;
+	memset(eth, 0, 2 * sizeof(eth->d_addr));
+	eth->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+
+	dst->l3_len = sizeof(*ip6h);
+	ip6h = (struct ipv6_hdr *)(eth + 1);
+	rte_memcpy(ip6h->dst_addr, &in6addr_loopback, sizeof(struct in6_addr));
+	ip6h->vtc_flow = 6 << 4;
+	ip6h->hop_limits = 255;
+	ip6h->proto = IPPROTO_TCP;
+}
+
+static void
+arp_hash_init(struct glue_ctx *ctx, unsigned socket_id)
+{
+	char str[RTE_HASH_NAMESIZE];
+	struct rte_hash_parameters hprm;
+
+	/* init ipv4 arp hash */
+	snprintf(str, sizeof(str), "arp_hash_4@ctx%u", ctx->queue_id);
+	memset(&hprm, 0, sizeof(hprm));
+	hprm.name = str;
+	hprm.entries = MAX_ARP_ENTRY * 2;
+	hprm.socket_id = socket_id;
+	hprm.key_len = sizeof(struct in_addr);
+	ctx->arp_hash = rte_hash_create(&hprm);
+	if (ctx->arp_hash == NULL) {
+		rte_panic("Failed to init hashtable for ARP");
+	}
+
+	/* init ipv6 arp hash */
+	snprintf(str, sizeof(str), "arp_hash_6@ctx%u", ctx->queue_id);
+	memset(&hprm, 0, sizeof(hprm));
+	hprm.name = str;
+	hprm.entries = MAX_ARP_ENTRY * 2;
+	hprm.socket_id = socket_id;
+	hprm.key_len = sizeof(struct in6_addr);
+	ctx->arp6_hash = rte_hash_create(&hprm);
+	if (ctx->arp6_hash == NULL) {
+		rte_panic("Failed to init hashtable for ARP6");
+	}
+}
+
+/* get current timestamp in ms, see tcp_get_tms() */
+static inline uint64_t
+arp_get_tms(uint32_t mshift)
+{
+	uint64_t ts;
+
+	ts = rte_get_tsc_cycles() >> mshift;
+	return ts;
+}
+
+static void
+arp_timer_init(struct glue_ctx *ctx, unsigned socket_id)
+{
+	struct tle_timer_wheel_args twprm;
+
+	twprm.tick_size = 1000U;
+	twprm.max_timer = MAX_ARP_ENTRY + 8;
+	twprm.socket_id = socket_id;
+	ctx->arp_tmw = tle_timer_create(&twprm,
+					arp_get_tms(ctx->cycles_ms_shift));
+	if (ctx->arp_tmw == NULL)
+		rte_panic("Failed to init timer wheel for ARP");
+}
+
+static void
+glue_ctx_init(struct glue_ctx *ctx, uint32_t socket_id)
+{
+	uint64_t ms;
+
+	ctx->arp4 = rte_zmalloc_socket(NULL,
+				       sizeof(struct arp_entry) * MAX_ARP_ENTRY,
+				       RTE_CACHE_LINE_SIZE, socket_id);
+	ctx->arp6 = rte_zmalloc_socket(NULL,
+				       sizeof(struct arp_entry) * MAX_ARP_ENTRY,
+				       RTE_CACHE_LINE_SIZE, socket_id);
+	if (!ctx->arp4 || !ctx->arp6)
+		rte_panic("Failed to allocate arp table");
+
+	ctx->port_id = 0;
+	ctx->queue_id = nb_ctx - 1;
+	ctx->ipv4 = get_ip();
+	ctx->ipv4_ml = get_ip_mask();
+	ctx->ipv4_gw.s_addr = get_ip_gate();
+	ctx->lo4_enabled = lo4_enabled();
+	rte_memcpy(&ctx->ipv6, get_ipv6(), sizeof(struct in6_addr));
+	ctx->ipv6_ml = get_ipv6_mask();
+	rte_memcpy(&ctx->ipv6_gw, get_ipv6_gate(), sizeof(struct in6_addr));
+	ctx->lo6_enabled = lo6_enabled();
+
+	/* caclulate closest shift to convert from cycles to ms (approximate) */
+	ms = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S;
+	ctx->cycles_ms_shift = sizeof(ms) * CHAR_BIT - __builtin_clzll(ms) - 1;
+
+	arp_hash_init(ctx, socket_id);
+	arp_timer_init(ctx, socket_id);
+	ctx->arp_wait = NULL;
+
+	ctx->frag_tbl = rte_ip_frag_table_create(FRAG_BUCKET,
+						 FRAG_ENTRIES_PER_BUCKET,
+						 FRAG_BUCKET * FRAG_ENTRIES_PER_BUCKET,
+						 rte_get_tsc_hz(),
+						 socket_id);
+	if (ctx->frag_tbl == NULL)
+		rte_panic("Failed to create ip defrag table");
+
+	PERCPU_MIB = &ctx->mib;
+}
+
+static int ctx_seq;
+static rte_spinlock_t ctx_lock = RTE_SPINLOCK_INITIALIZER;
+
+uint8_t
+glue_ctx_alloc(void)
+{
+	uint32_t socket_id;
+	struct glue_ctx *ctx;
+
+	/* fix me: we need a fine grainer lock */
+	rte_spinlock_lock(&ctx_lock);
+
+	GLUE_LOG(INFO, "allocate ctx: %d", ctx_seq);
+	if (ctx_seq == 0)
+		/* Called from constructor init() */
+		ctx_seq = 1;
+	else if (ctx_seq == 1) {
+		/* Called from first epoll_create() or poll() */
+		ctx_seq = 2;
+		ctx = default_ctx;
+		goto unlock;
+	}
+
+	if (nb_ctx >= MAX_NB_CTX)
+		rte_panic("Exceed the max number of ctx");
+
+	ctx = &ctx_array[nb_ctx++];
+	GLUE_LOG(INFO, "%u ctx allocated, and will init", nb_ctx);
+
+	socket_id = get_socket_id();
+
+	glue_ctx_init(ctx, socket_id);
+
+	/* reconfigure the "physical" port whenever # of ctx changes */
+	port_reconfig();
+
+	if (ctx == default_ctx) {
+		loopback_dst_init(ctx);
+
+		ctx->lb_port_id = create_loopback(socket_id);
+		GLUE_LOG(INFO, "loopback port_id: %u", ctx->lb_port_id);
+	}
+
+	rte_eth_macaddr_get(ctx->port_id, &ctx->mac);
+
+	tle_ctx_init(ctx, socket_id);
+
+unlock:
+	rte_spinlock_unlock(&ctx_lock);
+	return ctx - ctx_array;
+}
+
+void
+glue_ctx_free(struct glue_ctx *ctx __rte_unused)
+{
+	if (nb_ctx == 1 && ctx_seq == 2) {
+		GLUE_LOG(INFO, "free ctx");
+		ctx_seq = 1;
+		return;
+	}
+
+	rte_panic("close epoll fd on running is not supported\n");
+}
+
+struct glue_ctx *
+glue_ctx_lookup(uint16_t port_id, uint16_t queue_id)
+{
+	int i;
+
+	if (port_id == 1) /* loopback */
+		return default_ctx;
+
+	for (i = 0; i < nb_ctx; i++) {
+		if (ctx_array[i].port_id == port_id && 
+		    ctx_array[i].queue_id == queue_id)
+			return &ctx_array[i];
+	}
+
+	return NULL;
+}
diff --git a/lib/libtle_glue/ctx.h b/lib/libtle_glue/ctx.h
new file mode 100644
index 0000000..e78b68f
--- /dev/null
+++ b/lib/libtle_glue/ctx.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_GLUE_SOCK_H_
+#define _TLE_GLUE_SOCK_H_
+
+#include <stdbool.h>
+#include <pthread.h>
+
+#include <rte_memzone.h>
+#include <rte_mempool.h>
+#include <rte_ether.h>
+#include <rte_ip_frag.h>
+
+#include <tle_ctx.h>
+#include <tle_event.h>
+#include <tle_stats.h>
+
+#include <sys/queue.h>
+
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DPDK_IP				"DPDK_IP"
+#define DPDK_IP_DEF			"0.0.0.0"
+#define DPDK_IP_MASK			"DPDK_IP_MASK"
+#define DPDK_IP_MASK_DEF		"16"
+#define DPDK_IP_GATEWAY			"DPDK_IP_GATEWAY"
+#define DPDK_IP_GATEWAY_DEF		"0.0.0.0"
+#define DPDK_IPV6			"DPDK_IPV6"
+#define DPDK_IPV6_DEF			"::"
+#define DPDK_IPV6_MASK			"DPDK_IPV6_MASK"
+#define DPDK_IPV6_MASK_DEF		"64"
+#define DPDK_IPV6_GATEWAY		"DPDK_IPV6_GATEWAY"
+#define DPDK_IPV6_GATEWAY_DEF		"::"
+
+struct arp_entry {
+	struct tle_dest dst;
+	uint8_t inuse;
+	uint8_t req_time;
+	void* timer;
+};
+
+struct glue_ctx {
+	struct tle_ctx *tcp_ctx;
+	struct tle_dev *tcp_dev;
+	struct tle_dev *lb_tcp_dev;
+	struct tle_ctx *udp_ctx;
+	struct tle_dev *udp_dev;
+	struct tle_dev *lb_udp_dev;
+
+	struct tle_evq *ereq;
+	struct tle_evq *rxeq;
+	struct tle_evq *txeq;
+
+	uint16_t port_id;
+	uint16_t queue_id;
+	uint16_t lb_port_id;
+
+	struct {
+		uint8_t ipv4_ml;
+		uint8_t ipv6_ml;
+	};
+
+	struct ether_addr mac;
+	struct rte_mbuf *arp_wait;
+	struct tle_timer_wheel *arp_tmw;
+	uint32_t cycles_ms_shift;  /* to convert from cycles to ms */
+
+	struct {
+		uint32_t ipv4;
+		struct in_addr ipv4_gw;
+		bool lo4_enabled;
+
+		uint32_t arp4_num;
+		struct arp_entry *arp4;
+		struct rte_hash *arp_hash;
+	};
+
+	struct {
+		struct in6_addr ipv6;
+		struct in6_addr ipv6_gw;
+		bool lo6_enabled;
+
+		uint32_t arp6_num;
+		struct arp_entry *arp6;
+		struct rte_hash *arp6_hash;
+	};
+
+	struct {
+		rte_spinlock_t frag_lock;
+		struct rte_ip_frag_tbl *frag_tbl;
+		struct rte_ip_frag_death_row frag_dr;
+	};
+
+	struct tle_dest lb_dst;
+	struct tle_dest lb_dst_v6;
+
+	struct tle_mib mib;
+} __rte_cache_aligned;
+
+extern int nb_ctx;
+extern struct glue_ctx *default_ctx;
+extern struct glue_ctx ctx_array[MAX_NB_CTX];
+
+RTE_DECLARE_PER_LCORE(struct glue_ctx *, glue_ctx);
+
+static inline struct glue_ctx *
+get_ctx(void)
+{
+	if (RTE_PER_LCORE(glue_ctx))
+		return RTE_PER_LCORE(glue_ctx);
+	return default_ctx;
+}
+
+static inline uint8_t
+get_cid(void)
+{
+	return get_ctx() - ctx_array;
+}
+
+uint8_t glue_ctx_alloc(void);
+
+struct glue_ctx * glue_ctx_lookup(uint16_t port_id, uint16_t queue_id);
+
+void glue_ctx_free(struct glue_ctx *ctx);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_GLUE_SOCK_H_ */
diff --git a/lib/libtle_glue/epoll.c b/lib/libtle_glue/epoll.c
new file mode 100644
index 0000000..1c8751b
--- /dev/null
+++ b/lib/libtle_glue/epoll.c
@@ -0,0 +1,577 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <errno.h>
+
+#include <rte_common.h>
+#include <rte_spinlock.h>
+#include <rte_malloc.h>
+#include <rte_ethdev.h>
+#include <rte_atomic.h>
+#include <rte_eal_interrupts.h>
+
+#include "fd.h"
+#include "ctx.h"
+#include "sym.h"
+#include "log.h"
+#include "util.h"
+#include "sock.h"
+#include "internal.h"
+#include "tle_glue.h"
+#include "../libtle_l4p/udp_stream.h"
+#include "../libtle_l4p/tcp_stream.h"
+
+#define EPOLL_DATA_SPECIAL	0xFFFFFFFFFFFFFF01
+
+/* We don't use rte_eth_dev_rx_intr_ctl_q as it has its
+ * own way to specify event.data
+ */
+static int
+dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id, int efd, int op, int rx)
+{
+	int fd, ret;
+	uint32_t vec, efd_idx;
+	struct rte_eth_dev *dev;
+	struct rte_intr_handle *intr_handle;
+	static struct epoll_event ev = {
+		.events = EPOLLIN | EPOLLPRI | EPOLLET,
+		.data = {
+			.u64 = EPOLL_DATA_SPECIAL,
+		},
+	};
+	char buf[32];
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	if (queue_id >= dev->data->nb_rx_queues)
+		return -EINVAL;
+
+	if (!dev->intr_handle)
+		return -ENOTSUP;
+
+	intr_handle = dev->intr_handle;
+	if (!intr_handle->intr_vec)
+		return -EPERM;
+
+	vec = intr_handle->intr_vec[queue_id];
+
+	efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ?
+                (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec;
+
+	fd = intr_handle->efds[efd_idx];
+
+	if (rx) {
+		/* almost all devices use eventfd, we shall read out */
+		ret = read(fd, buf, sizeof(uint64_t));
+		RTE_SET_USED(ret);
+	}
+
+	return k_epoll_ctl(efd, op, fd, &ev);
+}
+
+int
+PRE(epoll_create)(int size)
+{
+	int epfd;
+	struct sock *so;
+	
+	if (!fd_table_initialized)
+		return k_epoll_create(size);
+
+	epfd = get_unused_fd();
+	if (epfd == -1) {
+		errno = EMFILE;
+		return -1;
+	}
+
+
+	so = fd2sock(epfd);
+	so->cid = glue_ctx_alloc();
+
+	so->shadow_efd = k_epoll_create(1);
+	if (so->shadow_efd < 0)
+		rte_panic("Failed to create shadow efd");
+
+	if (dev_rx_intr_ctl_q(CTX(so)->port_id, CTX(so)->queue_id,
+			      so->shadow_efd, RTE_INTR_EVENT_ADD, 0) < 0)
+		rte_panic("Failed to epoll_ctl rxq interrupt fd");
+
+	so->epoll = 1;
+
+	return epfd;
+}
+
+int
+PRE(epoll_create1)(int flags __rte_unused)
+{
+	return PRE(epoll_create)(1);
+}
+
+int
+PRE(epoll_ctl)(int epfd, int op, int fd, struct epoll_event *event)
+{
+	struct sock *so_ep;
+	struct sock *so;
+
+	if (is_kernel_fd(epfd)) {
+		if (!is_kernel_fd(fd))
+			rte_panic("kernel epoll (%d) on an userspace fd: %d",
+				  epfd, fd);
+
+		return k_epoll_ctl(epfd, op, fd, event);
+	}
+
+	so_ep = fd2sock(epfd);
+
+	if (is_kernel_fd(fd)) {
+		/* Use a shadow epoll fd for possible kernel I/O events. */
+		return k_epoll_ctl(so_ep->shadow_efd, op, fd, event);
+	}
+
+	so = fd2sock(fd);
+
+	if (unlikely(so->cid != so_ep->cid))
+		rte_panic("Different ctx %d and %d for epoll fd and socket fd",
+			  so_ep->cid, so->cid);
+
+	GLUE_DEBUG("epoll_ctl: op = %x, fd = %d, event = %x",
+		   op, fd, event->events);
+	switch (op) {
+	case EPOLL_CTL_ADD:
+		if (so->event.events) {
+			errno = EEXIST;
+			return -1;
+		}
+
+#ifdef LOOK_ASIDE_BACKEND
+		if (event->events & EPOLLIN)
+			tle_event_active(&so->rxev, TLE_SEV_DOWN);
+		if (event->events & EPOLLOUT)
+			tle_event_active(&so->txev, TLE_SEV_DOWN);
+#endif
+		so->event = *event;
+
+		break;
+	case EPOLL_CTL_MOD:
+		if (so->event.events == 0) {
+			errno = ENOENT;
+			return -1;
+		}
+
+#ifdef LOOK_ASIDE_BACKEND
+		if (event->events & EPOLLIN)
+			tle_event_active(&so->rxev, TLE_SEV_DOWN);
+		else
+			tle_event_idle(&so->rxev);
+		if (event->events & EPOLLOUT)
+			tle_event_active(&so->txev, TLE_SEV_DOWN);
+		else
+			tle_event_idle(&so->txev);
+#endif
+		so->event = *event;
+		break;
+	case EPOLL_CTL_DEL:
+		if (so->event.events == 0) {
+			errno = ENOENT;
+			return -1;
+		}
+
+#ifdef LOOK_ASIDE_BACKEND
+		if (so->event.events & EPOLLIN)
+			tle_event_idle(&so->rxev);
+		if (so->event.events & EPOLLOUT)
+			tle_event_idle(&so->txev);
+#endif
+		so->event.events = 0;
+		break;
+	default:
+		errno = EINVAL;
+		return -1;
+	}
+
+	return 0;
+}
+
+static inline int32_t
+tle_evq_fetch(struct tle_evq *evq, const void *evd[],
+	      uint32_t num, uint32_t event)
+{
+	uint32_t i, k;
+	uint32_t polled;
+	struct tle_event *ev;
+	struct tle_event *next;
+
+	if (evq->nb_armed == 0)
+		return 0;
+
+	rte_compiler_barrier();
+
+	rte_spinlock_lock(&evq->lock);
+	ev = TAILQ_FIRST(&evq->armed);
+	for (i = 0, k = 0; i != evq->nb_armed; i++) {
+		next = TAILQ_NEXT(ev, ql);
+		polled = ((const struct sock *)ev->data)->event.events;
+		/* Always report EPOLLHUP, see man epoll_ctl(2) */
+		if (polled && ((polled | EPOLLHUP) & event)) {
+			evd[k++] = ev->data;
+			TAILQ_REMOVE(&evq->armed, ev, ql);
+			/* don't down erev; and assign NULL to data means this
+			 * ev is already removed from the queue, refer to
+			 * tle_event_idle_err().
+			 */
+			if (event != EPOLLHUP)
+				ev->state = TLE_SEV_DOWN;
+			else
+				ev->data = NULL;
+		}
+		if (k == num)
+			break;
+		ev = next;
+	}
+	evq->nb_armed -= k;
+	rte_spinlock_unlock(&evq->lock);
+	return k;
+}
+
+static int
+evq_drain(struct tle_evq *q, uint32_t event,
+	  struct epoll_event *events, int maxevents)
+{
+	uint32_t i, n;
+	struct sock *socks[maxevents];
+
+	n = tle_evq_fetch(q, (const void **)(uintptr_t)socks, maxevents, event);
+	for (i = 0; i < n; ++i) {
+		events[i].events = event;
+		events[i].data = socks[i]->event.data;
+
+		/* when EPOLLHUP happens, also return EPOLLIN and EPOLLOUT
+		 * if they are registered. So as to emulate behaviour of linux
+		 * kernel.
+		 * Some applications (e.g. redis) need these events to determine
+		 * following works.
+		 */
+		if (event & EPOLLHUP)
+			events[i].events |= (socks[i]->event.events &
+					     (EPOLLIN | EPOLLOUT));
+
+		/* if multiple events of single socket are triggered,
+		 * return single event with multiple event types rather than
+		 * multiple events.
+		 *
+		 * we drain evq in order of EPOLLOUT -> EPOLLIN -> EPOLLHUP,
+		 * so only need to check event in evq that has not been drained.
+		 */
+		switch (event) {
+		case EPOLLOUT:
+			if ((socks[i]->event.events & EPOLLIN) &&
+			    tle_event_state(&socks[i]->rxev) == TLE_SEV_UP) {
+				tle_event_down(&socks[i]->rxev);
+				events[i].events |= EPOLLIN;
+			}
+			/* fallthrough */
+		case EPOLLIN:
+			if (tle_event_state(&socks[i]->erev) == TLE_SEV_UP) {
+				rte_spinlock_lock(&socks[i]->erev.head->lock);
+				if (socks[i]->erev.data != NULL &&
+				    tle_event_state(&socks[i]->erev) == TLE_SEV_UP) {
+					TAILQ_REMOVE(&socks[i]->erev.head->armed,
+						     &socks[i]->erev, ql);
+					socks[i]->erev.head->nb_armed--;
+					socks[i]->erev.data = NULL;
+				}
+				rte_spinlock_unlock(&socks[i]->erev.head->lock);
+				events[i].events |= EPOLLHUP;
+			}
+		}
+
+		GLUE_DEBUG("event for fd = %d, event = %x",
+			   socks[i]->event.data.fd, event);
+	}
+	return n;
+}
+
+#ifdef LOOK_ASIDE_BACKEND
+rte_atomic32_t flag_sleep;
+
+int
+epoll_kernel_wait(struct glue_ctx *ctx, int efd,
+		  struct epoll_event *events,
+		  int maxevents, int timeout, int *rx)
+{
+	struct epoll_event event;
+	uint16_t port_id = ctx->port_id;
+	uint16_t queue_id = ctx->queue_id;
+
+	RTE_SET_USED(events);
+	RTE_SET_USED(maxevents);
+	RTE_SET_USED(rx);
+
+	rte_eth_dev_rx_intr_enable(port_id, queue_id);
+
+	/* TODO: timeout shall be limited by the latest tcp timer */
+
+	if (be_process(ctx) > 0) /* use this way to avoid concurrency */ {
+		/* Do nothing */
+	} else
+		sleep_with_lock(efd, &event, 1, timeout);
+
+	rte_eth_dev_rx_intr_disable(port_id, queue_id);
+	/* We don't have kernel events for report, so just return zero */
+	return 0;
+}
+#else
+int
+epoll_kernel_wait(struct glue_ctx *ctx, int efd,
+		  struct epoll_event *events,
+		  int maxevents, int timeout, int *rx)
+{
+	int i, j, rc;
+	int flag_tmp = 0;
+	uint16_t port_id = ctx->port_id;
+	uint16_t queue_id = ctx->queue_id;
+#define LEAST_EVENTS 8
+	struct epoll_event s_events[LEAST_EVENTS];
+	struct epoll_event *r_events;
+	int r_maxevents;
+	int fastpath = 0;
+
+	*rx = 0;
+
+	if (efd == -1) {
+		flag_tmp = 1;
+		efd = k_epoll_create(1);
+		if (efd < 0)
+			rte_panic("Failed to create tmp efd");
+	}
+
+	if (stopped) {
+		rc = k_epoll_pwait(efd, events, maxevents, timeout, NULL);
+		goto check;
+	}
+
+	if (maxevents < LEAST_EVENTS) {
+		r_events = s_events;
+		r_maxevents = maxevents + 1;
+	} else {
+		r_events = events;
+		r_maxevents = maxevents;
+	}
+
+	if (flag_tmp &&
+	    dev_rx_intr_ctl_q(port_id, queue_id, efd, RTE_INTR_EVENT_ADD, 0) < 0)
+		/* TODO: fall back to busy polling */
+		rte_panic("Failed to enable rxq interrupt");
+
+	rte_eth_dev_rx_intr_enable(port_id, queue_id);
+
+	/* TODO: timeout shall be limited by the latest tcp timer */
+
+	if (timeout != 0 && be_process(ctx) > 0) {
+		/* use this way to avoid concurrency */
+		rc = 0;
+		fastpath = 1;
+	} else
+		rc = sleep_with_lock(efd, r_events, r_maxevents, timeout);
+
+	rte_eth_dev_rx_intr_disable(port_id, queue_id);
+
+	/* filter out rxq event */
+	for (i = 0, j = 0; i < rc; ++i) {
+		if (r_events[i].data.u64 == EPOLL_DATA_SPECIAL) {
+			*rx = true;
+			if (i + 1 < rc) {
+				memcpy(&r_events[j], &r_events[i+1],
+				       (rc-i-1) * sizeof(*events));
+			}
+			rc -= 1;
+			break;
+		} else {
+			if (i != j)
+				r_events[j] = r_events[i];
+			j++;
+		}
+	}
+
+	if (rc > 0 && maxevents < LEAST_EVENTS)
+		memcpy(events, r_events, rc * sizeof(*events));
+
+	if (flag_tmp)
+		dev_rx_intr_ctl_q(port_id, queue_id, efd,
+				  RTE_INTR_EVENT_DEL, *rx);
+
+	if (fastpath)
+		*rx = true;
+check:
+	if (flag_tmp)
+		close(efd);
+
+	return rc;
+}
+#endif
+
+/* If only there are some packets to process, we don't sleep; we will poll
+ * for some number of iterations to check packets.
+ *
+ * TODO: change to wait for a period of time?
+ */
+#define IDLE_ITERATIONS	5
+
+int
+poll_common(struct glue_ctx *ctx, struct epoll_event *events,
+	    int maxevents, int timeout, int shadow_efd)
+{
+	int rx;
+	int total = 0;
+	int idle = IDLE_ITERATIONS;
+
+again:
+	/* We will start with send, then recv, and last err queue, as we want
+	 * to serve exiting connections firstly, then new connections, and
+	 * lastly, the wrong connections.
+	 */
+
+	/* 0. send evq */
+	total += evq_drain(ctx->txeq, EPOLLOUT,
+			   events + total, maxevents-total);
+	if (total == maxevents)
+		return total;
+
+	/* 1. recv evq */
+	total += evq_drain(ctx->rxeq, EPOLLIN,
+			   events + total, maxevents-total);
+	if (total == maxevents)
+		return total;
+
+	/* 2. err evq */
+	total += evq_drain(ctx->ereq, EPOLLHUP,
+			   events + total, maxevents-total);
+
+	if (total > 0)
+		return total;
+
+	if (idle > 0) {
+		if (be_process(ctx) == 0)
+			idle--;
+		else
+			idle = IDLE_ITERATIONS;
+		goto again;
+	}
+
+	if (timeout == 0)
+		return 0;
+
+	/* Setup rxq interrupt mode, and check kernel I/O events */
+	total = epoll_kernel_wait(ctx, shadow_efd, events,
+				  maxevents, timeout, &rx);
+
+	/* Kernel I/O events are available (total > 0) or
+	 * timeout (total < 0) or something bad happens.
+	 */
+	if (total != 0)
+		return total;
+
+	/* Check userspace I/O events */
+	idle = IDLE_ITERATIONS;
+	be_process(ctx);
+	goto again;
+}
+
+int
+PRE(epoll_wait)(int epfd, struct epoll_event *events,
+		int maxevents, int timeout)
+{
+	struct sock *so;
+
+	if (is_kernel_fd(epfd))
+		return k_epoll_pwait(epfd, events, maxevents, timeout, NULL);
+
+	so = fd2sock(epfd);
+
+	/* thread <> context binding happens here */
+	if (RTE_PER_LCORE(glue_ctx) == NULL)
+		RTE_PER_LCORE(glue_ctx) = CTX(so);
+
+	return poll_common(CTX(so), events, maxevents, timeout, so->shadow_efd);
+}
+
+int
+PRE(epoll_pwait)(int epfd, struct epoll_event *events,
+	    int maxevents, int timeout, const sigset_t *sigmask)
+{
+	if (sigmask != NULL) {
+		rte_panic("epoll_pwait with signal is not supported");
+	}
+
+	return epoll_wait(epfd, events, maxevents, timeout);
+}
+
+int
+fd_ready(int fd, int events)
+{
+	int ret = 0;
+	struct sock *so = fd2sock(fd);
+
+	if (unlikely(!so->s)) {
+		if (tle_event_state(&so->erev) == TLE_SEV_UP)
+		/* socket has been shutdown */
+			return events | EPOLLHUP;
+		else /* socket is not set up yet */
+			return 0;
+	}
+
+	if (unlikely(IS_TCP(so) &&
+		     TCP_STREAM(so->s)->tcb.state == TCP_ST_CLOSED)) {
+		return events | EPOLLHUP | EPOLLERR;
+	}
+
+	if (tle_event_state(&so->erev) == TLE_SEV_UP)
+		ret |= EPOLLHUP;
+
+	if (events & EPOLLIN) {
+		if (so->rx_left ||
+		    (IS_TCP(so) && rte_ring_count(TCP_STREAM(so->s)->rx.q) > 0) ||
+		    (IS_UDP(so) && rte_ring_count(UDP_STREAM(so->s)->rx.q) > 0))
+			ret |= EPOLLIN;
+	}
+
+	if (events & EPOLLOUT) {
+		if ((IS_TCP(so) &&
+		     TCP_STREAM(so->s)->tcb.state >= TCP_ST_ESTABLISHED &&
+		     rte_ring_free_count(TCP_STREAM(so->s)->tx.q) > 0) ||
+		    (IS_UDP(so) &&
+		     rte_ring_count(UDP_STREAM(so->s)->tx.drb.r) > 0))
+			ret |= EPOLLOUT;
+	}
+
+	return ret;
+}
+
+void
+v_get_stats_snmp(unsigned long mibs[])
+{
+	int i, j, k;
+
+	memcpy(mibs, &default_mib, sizeof(default_mib));
+
+	for (i = 0; i < nb_ctx; ++i) {
+		for (j = 0; j < TCP_MIB_MAX; ++j)
+			mibs[j] += ctx_array[i].mib.tcp.mibs[j];
+
+		for (k = 0; k < UDP_MIB_MAX; ++k)
+			mibs[j+k] += ctx_array[i].mib.udp.mibs[k];
+	}
+}
diff --git a/lib/libtle_glue/fd.c b/lib/libtle_glue/fd.c
new file mode 100644
index 0000000..cc855f9
--- /dev/null
+++ b/lib/libtle_glue/fd.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <string.h>
+
+#include "fd.h"
+#include "log.h"
+#include "util.h"
+#include "config.h"
+
+bool fd_table_initialized;
+
+struct fd_table fd_table = { .fd_base = INT_MAX, };
+
+static int
+get_ulimit_nofile(void)
+{
+	struct rlimit rlim;
+
+#define GLUE_BASE_FD 1024
+	if (getrlimit(RLIMIT_NOFILE, &rlim) < 0)
+		return GLUE_BASE_FD;
+
+	return rlim.rlim_cur; /* soft limit, rlim_max is the hard limit */
+}
+
+static void
+fd_num_set(int *fd_base, int *fd_num)
+{
+	int limit = get_ulimit_nofile();
+
+	/* fix me: alignment of power of two */
+	/* fix me: use dup2 to occupy these fds */
+	*fd_num = limit / 2;
+	*fd_num = RTE_MIN(MAX_STREAMS_PER_CORE * 2 * MAX_NB_CTX, *fd_num);
+
+	*fd_base = limit - *fd_num;
+	GLUE_LOG(INFO, "fd_base = %d, fd_num = %d", *fd_base, *fd_num);
+}
+
+static void
+add_fd(struct rte_mempool *mp __rte_unused, void *opaque __rte_unused,
+       void *obj, unsigned obj_idx)
+{
+	((struct sock *)obj)->fd = obj_idx + fd_table.fd_base;
+	fd_table.socks[obj_idx] = obj;
+}
+
+void
+fd_init(void)
+{
+	int ret;
+	size_t sz;
+	uint32_t socket_id;
+	int fd_base, fd_num;
+	struct rte_mempool *mp = NULL;
+	char name[RTE_MEMPOOL_NAMESIZE];
+
+	socket_id = get_socket_id();
+
+	fd_num_set(&fd_base, &fd_num);
+
+	sz = sizeof(fd_table.socks[0]) * fd_num;
+	fd_table.socks = rte_zmalloc_socket("fdtable", sz,
+				RTE_CACHE_LINE_SIZE, socket_id);
+	if (fd_table.socks == NULL) {
+		GLUE_LOG(ERR, "Failed to malloc fd table");
+		goto err;
+	}
+
+	snprintf(name, RTE_MEMPOOL_NAMESIZE, "mp_fd_%d_%d", fd_base, fd_num);
+	mp = rte_mempool_create_empty(name, fd_num - 1, sizeof(struct sock),
+				      32, 0, socket_id, MEMPOOL_F_DYNAMIC);
+	if (mp == NULL) {
+		GLUE_LOG(ERR, "Failed to create mp for fd table");
+		goto err;
+	}
+
+	GLUE_LOG(INFO, "sizeof(struct sock): %lu, elt_size of fd table = %u",
+		 sizeof(struct sock), mp->elt_size);
+
+	ret = rte_mempool_set_ops_byname(mp, "ring_mp_mc", NULL);
+	if (ret != 0) {
+		GLUE_LOG(ERR, "Failed to set mp ops: %d", ret);
+		goto err;
+	}
+
+	rte_mempool_set_dynamic_size(mp, 1024);
+	rte_mempool_set_dynamic_cb(mp, add_fd);
+
+	fd_table.mp = mp;
+	fd_table.fd_base = fd_base;
+	fd_table.fd_num = fd_num;
+
+	/* should populate after fd_table is set */
+	ret = rte_mempool_populate_default(mp);
+	if (ret < 0) {
+		GLUE_LOG(ERR, "Failed to populate mp: %d", ret);
+		goto err;
+	}
+
+	fd_table_initialized = true;
+
+	return;
+err:
+	rte_mempool_free(mp);
+	rte_panic("Failed to init fd_table");
+}
diff --git a/lib/libtle_glue/fd.h b/lib/libtle_glue/fd.h
new file mode 100644
index 0000000..d0ac4fe
--- /dev/null
+++ b/lib/libtle_glue/fd.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_GLUE_FD_H_
+#define _TLE_GLUE_FD_H_
+
+#include <stdbool.h>
+#include <sys/epoll.h>
+#include <fcntl.h>
+
+#include <rte_mempool.h>
+#include <rte_malloc.h>
+
+#include <tle_event.h>
+#include <tle_ctx.h>
+#include <tle_tcp.h>
+
+#include "log.h"
+#include "sock.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct fd_table {
+	int fd_base; /* The mininum fd, 64 aligned */
+	int fd_num;  /* The number of fds, 64 aligned */
+	struct rte_mempool *mp; /* O(1) get and put */
+	struct sock **socks;
+};
+
+extern bool fd_table_initialized;
+extern struct fd_table fd_table;
+
+static inline struct sock *
+fd2sock(int fd)
+{
+	return fd_table.socks[fd - fd_table.fd_base];
+}
+
+static inline int
+sock2fd(struct sock *so)
+{
+	return so->fd;
+}
+
+static inline int
+get_unused_fd(void)
+{
+	struct sock *so;
+
+	if (unlikely(rte_mempool_get(fd_table.mp, (void **)&so) < 0)) {
+		GLUE_LOG(ERR, "FDs have been exhausted");
+		return -1;
+	}
+
+	so->valid = 1;
+	return sock2fd(so);
+}
+
+static inline void
+tle_event_idle_err(struct tle_event *ev)
+{
+	struct tle_evq *q;
+
+	if (ev->state == TLE_SEV_IDLE)
+		return;
+
+	q = ev->head;
+	rte_compiler_barrier();
+
+	rte_spinlock_lock(&q->lock);
+	if (ev->state == TLE_SEV_UP && ev->data) {
+		TAILQ_REMOVE(&q->armed, ev, ql);
+		q->nb_armed--;
+	}
+	ev->state = TLE_SEV_IDLE;
+	rte_spinlock_unlock(&q->lock);
+}
+
+static inline void
+put_free_fd(int fd)
+{
+	struct sock *so = fd2sock(fd);
+
+	rte_mempool_put(fd_table.mp, so);
+}
+
+static inline bool
+is_kernel_fd(int fd)
+{
+	return fd < fd_table.fd_base;
+}
+
+void fd_init(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_GLUE_FD_H_ */
diff --git a/lib/libtle_glue/gateway.h b/lib/libtle_glue/gateway.h
new file mode 100644
index 0000000..29de6b1
--- /dev/null
+++ b/lib/libtle_glue/gateway.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2019 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_GATEWAY_H_
+#define _TLE_GATEWAY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline bool
+is_ipv4_loopback_addr(in_addr_t addr, struct glue_ctx *ctx)
+{
+	if (addr == ctx->ipv4 || addr == htonl(INADDR_LOOPBACK))
+		return true;
+	else
+		return false;
+}
+
+static inline bool
+is_ipv6_loopback_addr(const struct in6_addr *addr, struct glue_ctx *ctx)
+{
+	if (memcmp(addr, &ctx->ipv6, sizeof(struct in6_addr)) == 0 ||
+	    IN6_IS_ADDR_LOOPBACK(addr) ||
+	    (IN6_IS_ADDR_V4COMPAT(addr) &&
+	     addr->__in6_u.__u6_addr32[3] == htonl(INADDR_LOOPBACK)) ||
+	    (IN6_IS_ADDR_V4MAPPED(addr) &&
+	     addr->__in6_u.__u6_addr32[3] == htonl(INADDR_LOOPBACK)))
+		return true;
+	else
+		return false;
+}
+
+static inline const struct in_addr *
+ipv4_gateway_lookup(void *data, const struct in_addr *addr)
+{
+	uint8_t ls;
+	struct glue_ctx *ctx = data;
+
+	if (is_ipv4_loopback_addr(addr->s_addr, ctx))
+		return addr;
+
+	ls = 32 - ctx->ipv4_ml;
+	if ((addr->s_addr << ls) == (ctx->ipv4 << ls))
+		return addr;
+
+	if (ctx->ipv4_gw.s_addr != 0)
+		return &ctx->ipv4_gw;
+
+	return addr;
+}
+
+static inline const struct in6_addr *
+ipv6_gateway_lookup(void *data, const struct in6_addr *addr)
+{
+	uint8_t ls;
+	struct glue_ctx *ctx = data;
+
+	if (is_ipv6_loopback_addr(addr, ctx))
+		return addr;
+
+	if (ctx->ipv6_ml <= 64) {
+		ls = 64 - ctx->ipv6_ml;
+		if ((*(const uint64_t*)addr << ls) ==
+		    (*(const uint64_t*)&ctx->ipv6 << ls))
+			return addr;
+	} else if (*(const uint64_t*)addr == *(const uint64_t*)&ctx->ipv6) {
+		ls = 128 - ctx->ipv6_ml;
+		if ((*((const uint64_t*)addr + 1) << ls) ==
+		    (*((const uint64_t*)&ctx->ipv6 + 1) << ls))
+			return addr;
+	}
+
+	if (!IN6_IS_ADDR_UNSPECIFIED(&ctx->ipv6_gw))
+		return &ctx->ipv6_gw;
+
+	return addr;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_GATEWAY_H_ */
diff --git a/lib/libtle_glue/icmp.c b/lib/libtle_glue/icmp.c
new file mode 100644
index 0000000..aba1c4b
--- /dev/null
+++ b/lib/libtle_glue/icmp.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <time.h>
+#include <netinet/icmp6.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_ethdev.h>
+#include <rte_icmp.h>
+#include <rte_ip.h>
+
+#include "log.h"
+#include "ctx.h"
+#include "internal.h"
+
+#define ICMP_ECHOREPLY          0    /* Echo Reply                      */
+#define ICMP_ECHO               8    /* Echo Request                    */
+#define ICMP_TIMESTAMP          13   /* Timestamp Request               */
+#define ICMP_TIMESTAMPREPLY     14   /* Timestamp Reply                 */
+
+/* Codes for TIME_EXCEEDED. */
+#define ICMP_EXC_TTL            0    /* TTL count exceeded              */
+#define ICMP_EXC_FRAGTIME       1    /* Fragment Reass time exceeded    */
+
+/* Parameters used to convert the timespec values */
+#define SECONDS_PER_DAY         86400L
+#define MSEC_PER_SEC            1000L
+#define USEC_PER_MSEC           1000L
+#define NSEC_PER_USEC           1000L
+#define NSEC_PER_MSEC           (NSEC_PER_USEC * USEC_PER_MSEC)
+
+#define IS_IPV4_BCAST(x)        ((x) == (uint32_t)0xFFFFFFFF)
+
+struct icmp_pkt {
+	struct icmp_hdr icmp_h;
+	uint32_t times[3];
+};
+
+/* Return remainder for ``dividend / divisor`` */
+static inline uint32_t
+div_uint64_rem(uint64_t dividend, uint32_t divisor)
+{
+	return dividend % divisor;
+}
+
+/* Return milliseconds since midnight (UTC) in network byte order. */
+static uint32_t
+current_timestamp(void)
+{
+	struct timespec ts;
+	uint32_t msecs;
+	uint32_t secs;
+
+	(void)clock_gettime(CLOCK_REALTIME, &ts);
+
+	/* Get secs since midnight. */
+	secs = div_uint64_rem(ts.tv_sec, SECONDS_PER_DAY);
+	/* Convert to msecs. */
+	msecs = secs * MSEC_PER_SEC;
+	/* Convert nsec to msec. */
+	msecs += (uint32_t)ts.tv_nsec / NSEC_PER_MSEC;
+
+	/* Convert to network byte order. */
+	return rte_cpu_to_be_32(msecs);
+}
+
+/*
+ * Process the checksum of an ICMP packet. The checksum field must be set
+ * to 0 by the caller.
+ */
+static uint16_t
+icmp_cksum(const struct icmp_hdr *icmp, uint32_t data_len)
+{
+	uint16_t cksum;
+
+	cksum = rte_raw_cksum(icmp, sizeof(struct icmp_hdr) + data_len);
+	return (cksum == 0xffff) ? cksum : ~cksum;
+}
+
+/**
+ * Receive and handle an ICMP packet.
+ *
+ * @param ctx
+ *   The pointer to the glue context.
+ * @param pkt
+ *   The pointer to the raw packet data.
+ * @param l2_len
+ *   The the size of the l2 header.
+ * @return
+ *   MUST return NULL now. :-)
+ */
+struct rte_mbuf *
+icmp_recv(struct glue_ctx *ctx, struct rte_mbuf *pkt,
+	  uint32_t l2_len, uint32_t l3_len)
+{
+	struct ether_addr eth_addr;
+	struct icmp_pkt *icmp_pkt;
+	struct ether_hdr *eth_h;
+	struct icmp_hdr *icmp_h;
+	struct ipv4_hdr *ip_h;
+	uint32_t ip_addr;
+	uint32_t cksum;
+
+	eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+	ip_h = (struct ipv4_hdr *) ((char *)eth_h + l2_len);
+
+	icmp_h = (struct icmp_hdr *)((char *)ip_h + l3_len);
+	if (icmp_h->icmp_type != IP_ICMP_ECHO_REQUEST &&
+	    icmp_h->icmp_type != ICMP_TIMESTAMP)
+		goto drop_pkt;
+
+	icmp_pkt = (struct icmp_pkt *)icmp_h;
+
+	ether_addr_copy(&eth_h->s_addr, &eth_addr);
+	ether_addr_copy(&eth_h->d_addr, &eth_h->s_addr);
+	ether_addr_copy(&eth_addr, &eth_h->d_addr);
+
+	/*
+	 * Similar to Linux implementation, we silently drop the broadcast or
+	 * multicast ICMP pakcets.
+	 *
+	 *     RFC 1122: 3.2.2.6 An ICMP_ECHO to broadcast MAY be
+	 *         silently ignored.
+	 *     RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently
+	 *         discarded if to broadcast/multicast.
+	 */
+	ip_addr = rte_be_to_cpu_32(ip_h->dst_addr);
+	if (IS_IPV4_MCAST(ip_addr) || IS_IPV4_BCAST(ip_addr))
+		goto drop_pkt;
+
+	ip_addr = ip_h->src_addr;
+	ip_h->src_addr = ip_h->dst_addr;
+	ip_h->dst_addr = ip_addr;
+
+	if (icmp_h->icmp_type == IP_ICMP_ECHO_REQUEST &&
+	    icmp_h->icmp_code == 0) {
+
+		/* Must clear checksum field before calling the helper. */
+		ip_h->hdr_checksum = 0;
+		ip_h->hdr_checksum = rte_ipv4_cksum(ip_h);
+
+		icmp_h->icmp_type = IP_ICMP_ECHO_REPLY;
+		icmp_h->icmp_code = 0;
+
+		/*
+		 * Fix me: the data part of an ICMP echo request/reply
+		 * message is implementation specific, we don't know
+		 * how to verify or calculate the checksum.
+		 *
+		 * Need to see BSD or LINUX implementation.
+		 */
+		cksum = ~icmp_h->icmp_cksum & 0xffff;
+		cksum += ~rte_cpu_to_be_16(IP_ICMP_ECHO_REQUEST << 8) & 0xffff;
+		cksum += rte_cpu_to_be_16(IP_ICMP_ECHO_REPLY << 8);
+		cksum = (cksum & 0xffff) + (cksum >> 16);
+		cksum = (cksum & 0xffff) + (cksum >> 16);
+		icmp_h->icmp_cksum = ~cksum;
+
+	} else if (icmp_h->icmp_type == ICMP_TIMESTAMP &&
+		   icmp_h->icmp_code == 0) {
+
+		/*
+		 * RFC 1122: 3.2.2.8 MAY implement ICMP timestamp requests.
+		 *     SHOULD be in the kernel for minimum random latency.
+		 *     MUST be accurate to a few minutes.
+		 *     MUST be updated at least at 15Hz.
+		 */
+		icmp_h->icmp_type = ICMP_TIMESTAMPREPLY;
+		icmp_h->icmp_code = 0;
+		icmp_pkt->times[1] = current_timestamp();
+		icmp_pkt->times[2] = icmp_pkt->times[1];
+
+		icmp_h->icmp_cksum = 0;
+		/* the data part of an ICMP timestamp reply is 12 bytes. */
+		icmp_h->icmp_cksum = icmp_cksum(icmp_h, 12);
+	} else
+		goto drop_pkt;
+
+	if (pkt->pkt_len < ETHER_MIN_LEN)
+		rte_pktmbuf_append(pkt, ETHER_MIN_LEN - pkt->pkt_len);
+
+	if (rte_eth_tx_burst(ctx->port_id, ctx->queue_id, &pkt, 1))
+		GLUE_LOG(DEBUG, "Send ICMP echo reply OK");
+
+	return NULL;
+
+drop_pkt:
+	rte_pktmbuf_free(pkt);
+	return NULL;
+}
+
+/**
+ * Receive and handle an ICMPv6 packet.
+ *
+ * @param ctx
+ *   The pointer to the glue context.
+ * @param pkt
+ *   The pointer to the raw packet data.
+ * @param l2_len
+ *   The the size of the l2 header.
+ * @return
+ *   MUST return NULL now. :-)
+ */
+struct rte_mbuf *
+icmp6_recv(struct glue_ctx *ctx, struct rte_mbuf *pkt,
+	   uint32_t l2_len, uint32_t l3_len)
+{
+	struct ether_addr eth_addr;
+	struct ether_hdr *eth_h;
+	struct icmp6_hdr *icmp6_h;
+	struct ipv6_hdr *ipv6_h;
+	struct in6_addr ipv6_addr;
+	uint32_t cksum;
+
+	eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+	ipv6_h = (struct ipv6_hdr *) ((char *)eth_h + l2_len);
+
+	icmp6_h = (struct icmp6_hdr *)((char *)ipv6_h + l3_len);
+
+	/* NDP pkt */
+	if ((icmp6_h->icmp6_type == ND_NEIGHBOR_SOLICIT ||
+	     icmp6_h->icmp6_type == ND_NEIGHBOR_ADVERT) &&
+	    icmp6_h->icmp6_code == 0)
+		return ndp_recv(ctx, pkt, l2_len, l3_len);
+
+	/* only support ECHO now, other types of pkts are dropped */
+	if ((icmp6_h->icmp6_type != ICMP6_ECHO_REQUEST &&
+	     icmp6_h->icmp6_type != ICMP6_ECHO_REPLY) ||
+	    icmp6_h->icmp6_code != 0)
+		goto drop_pkt;
+
+	ether_addr_copy(&eth_h->s_addr, &eth_addr);
+	ether_addr_copy(&eth_h->d_addr, &eth_h->s_addr);
+	ether_addr_copy(&eth_addr, &eth_h->d_addr);
+
+	/*
+	 * Now, we silently drop the anycast or multicast ICMP pakcets.
+	 * But it does not conform to RFC 4443. Maybe fix it latter.
+	 *
+	 *     RFC 4443: 4.2  An Echo Reply SHOULD be sent in response to an
+	 *     Echo Request message sent to an IPv6 multicast or anycast address.
+	 *     In this case, thesource address of the reply MUST be a unicast
+	 *     address belonging to the interface on which the Echo Request
+	 *     message was received.
+	 */
+	switch (icmp6_h->icmp6_type) {
+	case ICMP6_ECHO_REQUEST:
+		if (memcmp(ipv6_h->dst_addr, &ctx->ipv6,
+			   sizeof(struct in6_addr)) != 0)
+			goto drop_pkt;
+
+		rte_memcpy(&ipv6_addr, ipv6_h->src_addr,
+			   sizeof(struct in6_addr));
+		rte_memcpy(ipv6_h->src_addr, ipv6_h->dst_addr,
+			   sizeof(struct in6_addr));
+		rte_memcpy(ipv6_h->dst_addr, &ipv6_addr,
+			   sizeof(struct in6_addr));
+
+		icmp6_h->icmp6_type = ICMP6_ECHO_REPLY;
+
+		cksum = ~icmp6_h->icmp6_cksum & 0xffff;
+		cksum += ~rte_cpu_to_be_16(ICMP6_ECHO_REQUEST << 8) & 0xffff;
+		cksum += rte_cpu_to_be_16(ICMP6_ECHO_REPLY << 8);
+		cksum = (cksum & 0xffff) + (cksum >> 16);
+		cksum = (cksum & 0xffff) + (cksum >> 16);
+		icmp6_h->icmp6_cksum = ~cksum;
+
+		break;
+	default:
+		goto drop_pkt;
+	}
+
+	if (pkt->pkt_len < ETHER_MIN_LEN)
+		rte_pktmbuf_append(pkt, ETHER_MIN_LEN - pkt->pkt_len);
+
+	if (rte_eth_tx_burst(ctx->port_id, ctx->queue_id, &pkt, 1))
+		GLUE_LOG(DEBUG, "Send ICMP echo reply OK");
+
+	return NULL;
+
+drop_pkt:
+	rte_pktmbuf_free(pkt);
+	return NULL;
+}
diff --git a/lib/libtle_glue/init.c b/lib/libtle_glue/init.c
new file mode 100644
index 0000000..d845ef8
--- /dev/null
+++ b/lib/libtle_glue/init.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <sched.h>
+#include <pthread.h>
+#include <stdlib.h>
+
+#include <rte_compat.h>
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_eal.h>
+
+#include "util.h"
+#include "fd.h"
+#include "ctx.h"
+#include "sym.h"
+#include "log.h"
+#include "internal.h"
+#include "tle_glue.h"
+
+void
+glue_init1(int argc, char **argv)
+{
+	GLUE_LOG(INFO, "init: DPDK and fd table...");
+
+	if (rte_eal_init(argc, argv) < 0)
+		rte_panic("Failed to init DPDK");
+
+	fd_init();
+}
+
+static void __attribute__((constructor(1000)))
+glue_init(void)
+{
+	char *p;
+	int i, err, argc = 0;
+	char **argv = NULL, **argv_to_release = NULL;
+	char *vnic, *params, *no_huge;
+	cpu_set_t cpuset;
+	pthread_t tid = pthread_self();
+
+	symbol_init();
+
+#define DPDK_PARAMS "DPDK_PARAMS"
+	params = getenv(DPDK_PARAMS);
+#define DPDK_NO_HUGE "DPDK_NO_HUGE"
+	no_huge = getenv(DPDK_NO_HUGE);
+#define DPDK_VNIC "DPDK_VNIC"
+	vnic = getenv(DPDK_VNIC);
+
+	if (params == NULL && no_huge == NULL && vnic == NULL)
+		return;
+
+	argv = grow_argv(argv, argc, 1);
+	argv[argc++] = xstrdup("userspace-stack");
+
+	/* Get the main thread affinity */
+	CPU_ZERO(&cpuset);
+	err = pthread_getaffinity_np(tid, sizeof(cpu_set_t), &cpuset);
+	if (!err) {
+		for (i = 0; i < CPU_SETSIZE; i++) {
+			if (CPU_ISSET(i, &cpuset)) {
+				argv = grow_argv(argv, argc, 2);
+				argv[argc++] = xstrdup("-l");
+				argv[argc++] = xasprintf("%d", i);
+				i = CPU_SETSIZE;
+			}
+		}
+	} else {
+		argv = grow_argv(argv, argc, 2);
+		argv[argc++] = xstrdup("-l");
+		argv[argc++] = xasprintf("0");
+	}
+
+	if (params)
+		p = strtok(params, " ");
+	else
+		p = NULL;
+	while (p != NULL) {
+		argv = grow_argv(argv, argc, 1);
+		argv[argc++] = xstrdup(p);
+		p = strtok(NULL, " ");
+	}
+
+	if (no_huge) {
+		argv = grow_argv(argv, argc, 3);
+		argv[argc++] = xstrdup("-m");
+		argv[argc++] = xstrdup("2048");
+		argv[argc++] = xstrdup("--no-huge");
+	}
+
+	if (vnic) {
+		argv = grow_argv(argv, argc, 2);
+		argv[argc++] = xstrdup(vnic);
+		argv[argc++] = xstrdup("--no-pci");
+	}
+
+	argv = grow_argv(argv, argc, 1);
+	argv[argc++] = xstrdup("--");
+
+	argv_to_release = grow_argv(argv_to_release, 0, argc);
+	for (i = 0; i < argc; ++i)
+		argv_to_release[i] = argv[i];
+
+	glue_init1(argc, argv);
+
+	/* Alloc and setup this default ctx for any sockets operations before
+	 * thread/ctx binding which happens when epoll_wait.
+	 */
+	glue_ctx_alloc();
+
+	release_argv(argc, argv_to_release, argv);
+
+	/* Set back the affinity */
+	err = pthread_setaffinity_np(tid, sizeof(cpu_set_t), &cpuset);
+	if (err)
+		GLUE_LOG(ERR, "Failed to set back affinity");
+}
+
+static void __attribute__((destructor))
+glue_uninit(void)
+{
+	struct sock *so;
+	struct glue_ctx *ctx;
+	int i, max = fd_table.fd_base + fd_table.fd_num;
+
+	/* TODO: lets optimize it */
+	for (i = fd_table.fd_base; i < max; i++) {
+		so = fd2sock(i);
+		if (!so || !so->valid)
+			continue;
+		if (IS_TCP(so))
+			tle_tcp_stream_kill(so->s);
+	}
+
+	for (i = 0; i < nb_ctx; ++i) {
+		ctx = glue_ctx_lookup(0, i);
+		while (be_process(ctx)) { /* empty */ };
+	}
+}
diff --git a/lib/libtle_glue/internal.h b/lib/libtle_glue/internal.h
new file mode 100644
index 0000000..91fe784
--- /dev/null
+++ b/lib/libtle_glue/internal.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_GLUE_INTERNAL_H_
+#define _TLE_GLUE_INTERNAL_H_
+
+#include <rte_mbuf.h>
+#include <rte_atomic.h>
+
+#include <tle_ctx.h>
+
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <sys/epoll.h>
+
+#include "ctx.h"
+#include "sym.h"
+#include <rte_mempool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int stopped;
+
+extern uint64_t rx_offload;
+extern uint64_t tx_offload;
+
+void port_reconfig(void);
+
+uint16_t create_loopback(uint32_t socket_id);
+
+struct rte_mempool * get_mempool_by_socket(int32_t socket_id);
+
+int be_process(struct glue_ctx *ctx);
+
+int be_tx(struct glue_ctx *ctx);
+
+struct rte_mbuf * arp_recv(struct glue_ctx *ctx,
+			   struct rte_mbuf *m, uint32_t l2len);
+
+struct rte_mbuf * ndp_recv(struct glue_ctx *ctx,
+			   struct rte_mbuf *m, uint32_t l2len, uint32_t l3len);
+
+
+void mac_check(struct glue_ctx *ctx, const struct sockaddr* addr);
+
+int arp_ipv4_dst_lookup(void *data, const struct in_addr *addr,
+			struct tle_dest *res, int proto);
+
+int arp_ipv6_dst_lookup(void *data, const struct in6_addr *addr,
+			struct tle_dest *res, int proto);
+
+int mac_fill(struct glue_ctx *ctx, struct rte_mbuf *m);
+
+void mac_timeout(struct glue_ctx *ctx);
+
+int setup_rx_cb(uint16_t port_id, uint16_t qid);
+
+int epoll_kernel_wait(struct glue_ctx *ctx, int efd,
+		      struct epoll_event *events,
+		      int maxevents, int timeout, int *rx);
+
+int poll_common(struct glue_ctx *ctx, struct epoll_event *events,
+		int maxevents, int timeout, int shadow_efd);
+
+int dev_rxq_wakeup(uint16_t port_id);
+
+struct rte_mbuf * icmp_recv(struct glue_ctx *ctx, struct rte_mbuf *pkt,
+			    uint32_t l2len, uint32_t l3len);
+
+struct rte_mbuf * icmp6_recv(struct glue_ctx *ctx, struct rte_mbuf *pkt,
+			     uint32_t l2len, uint32_t l3len);
+
+uint16_t typen_rx_callback(uint16_t port, uint16_t queue,
+			   struct rte_mbuf *pkt[], uint16_t nb_pkts,
+			   uint16_t max_pkts, void *user_param);
+
+void ipv4_dst_add(struct glue_ctx *ctx, const struct in_addr *addr,
+		  struct ether_addr *e_addr);
+
+void ipv6_dst_add(struct glue_ctx *ctx, const struct in6_addr *addr,
+		  struct ether_addr *e_addr);
+
+#ifdef LOOK_ASIDE_BACKEND
+extern rte_atomic32_t flag_sleep;
+
+enum {
+	IOTHREAD_BUSY = 0, /* io thread is busy */
+	IOTHREAD_SLEEP,    /* io thread is sleeping */
+	IOTHREAD_PREEMPT,  /* io thread is preempted by another worker thread */
+};
+
+static inline int
+sleep_with_lock(int efd, struct epoll_event *events, int max, int to)
+{
+	int rc;
+
+	rte_atomic32_set(&flag_sleep, IOTHREAD_SLEEP);
+	rc = k_epoll_pwait(efd, events, max, to, NULL);
+	while (rte_atomic32_cmpset((volatile uint32_t *)&flag_sleep,
+				   IOTHREAD_SLEEP, IOTHREAD_BUSY) == 0);
+
+	return rc;
+}
+
+static inline void
+be_tx_with_lock(struct glue_ctx *ctx)
+{
+	if (rte_atomic32_cmpset((volatile uint32_t *)&flag_sleep,
+				IOTHREAD_SLEEP, IOTHREAD_PREEMPT)) {
+		while (be_tx(ctx) > 0) {};
+		rte_atomic32_set(&flag_sleep, IOTHREAD_SLEEP);
+	}
+}
+
+static inline void
+wake_lookaside_backend(struct glue_ctx *ctx)
+{
+	if (rte_atomic32_read(&flag_sleep) == IOTHREAD_PREEMPT)
+		dev_rxq_wakeup(ctx->port_id);
+}
+
+static inline bool
+io_thread_in_sleep(void)
+{
+	return rte_atomic32_read(&flag_sleep) == IOTHREAD_SLEEP;
+}
+#else
+#define sleep_with_lock k_epoll_wait
+#define be_tx_with_lock(ctx) do {} while(0)
+#define wake_lookaside_backend(ctx) do {} while(0)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_GLUE_INTERNAL_H_ */
diff --git a/lib/libtle_glue/log.h b/lib/libtle_glue/log.h
new file mode 100644
index 0000000..da31ea3
--- /dev/null
+++ b/lib/libtle_glue/log.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2019 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _GLUE_LOG_H_
+#define _GLUE_LOG_H_
+
+#include <arpa/inet.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <rte_vect.h>
+#include <rte_memcpy.h>
+#include <rte_spinlock.h>
+#include <rte_log.h>
+#include <rte_errno.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * logging related macros.
+ */
+
+#define GLUE_LOG(lvl, fmt, args...) RTE_LOG(lvl, USER1, fmt "\n", ##args)
+
+#define	DUMMY_MACRO	do {} while (0)
+
+#ifdef ENABLE_DEBUG
+#define	GLUE_DEBUG(fmt, arg...)	fprintf(stderr, fmt "\n", ##arg)
+#else
+#define	GLUE_DEBUG(fmt, arg...)	DUMMY_MACRO
+#endif
+
+#ifdef ENABLE_TRACE
+#define	TRACE(fmt, arg...)	fprintf(stderr, fmt "\n", ##arg)
+#define	PKT_DUMP(p)		rte_pktmbuf_dump(stderr, (p), 64)
+#else
+#define	TRACE(fmt, arg...)	DUMMY_MACRO
+#define	PKT_DUMP(p)		DUMMY_MACRO
+#endif
+
+#ifdef DEBUG_ARP
+static inline void
+print_arp(int af, const void *src, const struct ether_addr *mac,
+	  const char *action)
+{
+	char str_ip[64];
+	char str_mac[32];
+	socklen_t sz;
+
+	ether_format_addr(str_mac, sizeof(str_mac), mac);
+	sz = (af == AF_INET) ? sizeof(struct in_addr) : sizeof(struct in6_addr);
+	inet_ntop(af, src, str_ip, sz);
+	RTE_LOG(INFO, "%s ARP entry: %s\tmac=%s", action, str_ip, str_mac);
+}
+#else
+#define	print_arp(arg...)	DUMMY_MACRO
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _GLUE_LOG_H_ */
diff --git a/lib/libtle_glue/ndp.h b/lib/libtle_glue/ndp.h
new file mode 100644
index 0000000..a61ff5b
--- /dev/null
+++ b/lib/libtle_glue/ndp.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2019 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_NDP_H_
+#define _TLE_NDP_H_
+
+#define ND_OPT_SOURCE_LINKLAYER_ADDR 1
+#define ND_OPT_TARGET_LINKLAYER_ADDR 2
+#define ND_OPT_PREFIX_INFORMATION 3
+#define ND_OPT_REDIRECTED_HEADER 4
+#define ND_OPT_MTU 5
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_NDP_H_ */
diff --git a/lib/libtle_glue/packetdrill.c b/lib/libtle_glue/packetdrill.c
new file mode 100644
index 0000000..79d1d52
--- /dev/null
+++ b/lib/libtle_glue/packetdrill.c
@@ -0,0 +1,544 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <arpa/inet.h>
+
+#include "packetdrill.h"
+#include "tle_glue.h"
+#include "internal.h"
+#include "fd.h"
+
+#include <rte_arp.h>
+#include <rte_common.h>
+#include <rte_ethdev.h>
+#include <rte_ip.h>
+#include <rte_vhost.h>
+
+static int vhost_vid;
+enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
+static const char *sockname = "/tmp/sock0";
+
+static int
+new_device(int vid)
+{
+	vhost_vid = vid;
+
+	/* Disable notifications. */
+	rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
+	rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
+
+	return 0;
+}
+
+static void
+destroy_device(int vid)
+{
+	RTE_SET_USED(vid);
+}
+
+static const struct vhost_device_ops device_ops =
+{
+	.new_device =  new_device,
+	.destroy_device = destroy_device,
+};
+
+static void
+vhost_init(void)
+{
+	unlink(sockname);
+
+	if (rte_vhost_driver_register(sockname, 0) != 0)
+		rte_exit(EXIT_FAILURE, "failed to register vhost driver \n");
+
+	if (rte_vhost_driver_callback_register(sockname, &device_ops) != 0)
+		rte_exit(EXIT_FAILURE, "failed to register vhost driver callbacks.\n");
+
+	if (rte_vhost_driver_start(sockname) < 0)
+		rte_exit(EXIT_FAILURE, "failed to start vhost driver.\n");
+
+	rte_log_set_level(RTE_LOGTYPE_USER1, RTE_LOG_NOTICE);
+}
+
+static uint64_t
+now_usecs(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return ((uint64_t) tv.tv_sec * 1000000) + tv.tv_usec;
+}
+
+static void
+pd_free(void *userdata)
+{
+	RTE_SET_USED(userdata);
+}
+
+static int
+pd_socket(void *userdata, int domain, int type, int protocol)
+{
+	RTE_SET_USED(userdata);
+	return PRE(socket)(domain, type, protocol);
+}
+
+static int
+pd_bind(void *userdata, int sockfd, const struct sockaddr *addr,
+	socklen_t addrlen)
+{
+	RTE_SET_USED(userdata);
+	return PRE(bind)(sockfd, addr, addrlen);
+}
+
+static int
+pd_listen(void *userdata, int sockfd, int backlog)
+{
+	RTE_SET_USED(userdata);
+	return PRE(listen)(sockfd, backlog);
+}
+
+static int
+pd_accept(void *userdata, int sockfd, struct sockaddr *addr,
+	  socklen_t *addrlen)
+{
+	RTE_SET_USED(userdata);
+	return PRE(accept)(sockfd, addr, addrlen);
+}
+
+static int
+pd_connect(void *userdata, int sockfd, const struct sockaddr *addr,
+	   socklen_t addrlen)
+{
+	RTE_SET_USED(userdata);
+	return PRE(connect)(sockfd, addr, addrlen);
+}
+
+static ssize_t
+pd_read(void *userdata, int fd, void *buf, size_t count)
+{
+	RTE_SET_USED(userdata);
+	return PRE(read)(fd, buf, count);
+}
+
+static ssize_t
+pd_readv(void *userdata, int fd, const struct iovec *iov, int iovcnt)
+{
+	RTE_SET_USED(userdata);
+	return PRE(readv)(fd, iov, iovcnt);
+}
+
+static ssize_t
+pd_recv(void *userdata, int sockfd, void *buf, size_t len, int flags)
+{
+	RTE_SET_USED(userdata);
+	return PRE(recv)(sockfd, buf, len, flags);
+}
+
+static ssize_t
+pd_recvfrom(void *userdata, int sockfd, void *buf, size_t len,
+	    int flags, struct sockaddr *src_addr, socklen_t *addrlen)
+{
+	RTE_SET_USED(userdata);
+	return PRE(recvfrom)(sockfd, buf, len, flags, src_addr, addrlen);
+}
+
+static ssize_t
+pd_recvmsg(void *userdata, int sockfd, struct msghdr *msg, int flags)
+{
+	RTE_SET_USED(userdata);
+	return PRE(recvmsg)(sockfd, msg, flags);
+}
+
+static ssize_t
+pd_write(void *userdata, int fd, const void *buf, size_t count)
+{
+	RTE_SET_USED(userdata);
+	return PRE(write)(fd, buf, count);
+}
+
+static ssize_t
+pd_writev(void *userdata, int fd, const struct iovec *iov, int iovcnt)
+{
+	RTE_SET_USED(userdata);
+	return PRE(writev)(fd, iov, iovcnt);
+}
+
+static ssize_t
+pd_send(void *userdata, int sockfd, const void *buf, size_t len, int flags)
+{
+	RTE_SET_USED(userdata);
+	return PRE(send)(sockfd, buf, len, flags);
+}
+
+static ssize_t
+pd_sendto(void *userdata, int sockfd, const void *buf, size_t len, int flags,
+	  const struct sockaddr *dest_addr, socklen_t addrlen)
+{
+	RTE_SET_USED(userdata);
+	return PRE(sendto)(sockfd, buf, len, flags, dest_addr, addrlen);
+}
+
+static ssize_t
+pd_sendmsg(void *userdata, int sockfd, const struct msghdr *msg, int flags)
+{
+	RTE_SET_USED(userdata);
+	return PRE(sendmsg)(sockfd, msg, flags);
+}
+
+static int
+pd_fcntl(void *userdata, int fd, int cmd, ...)
+{
+	void *arg;
+	va_list ap;
+
+	va_start(ap, cmd);
+	arg = va_arg(ap, void *);
+	va_end(ap);
+
+	RTE_SET_USED(userdata);
+	return PRE(fcntl)(fd, cmd, arg);
+}
+
+static int
+pd_ioctl(void *userdata, int fd, unsigned long request, ...)
+{
+	void *arg;
+	va_list ap;
+
+	va_start(ap, request);
+	arg = va_arg(ap, void *);
+	va_end(ap);
+
+	RTE_SET_USED(userdata);
+	return PRE(ioctl)(fd, request, arg);
+}
+
+static int
+pd_close(void *userdata, int fd)
+{
+	RTE_SET_USED(userdata);
+	return PRE(close)(fd);
+}
+
+static int
+pd_shutdown(void *userdata, int sockfd, int how)
+{
+	RTE_SET_USED(userdata);
+	return PRE(shutdown)(sockfd, how); 
+}
+
+static int
+pd_getsockopt(void *userdata, int sockfd, int level, int optname,
+	      void *optval, socklen_t *optlen)
+{
+	RTE_SET_USED(userdata);
+	return PRE(getsockopt)(sockfd, level, optname, optval, optlen);
+}
+
+static int
+pd_setsockopt(void *userdata, int sockfd, int level, int optname,
+	      const void *optval, socklen_t optlen)
+{
+	RTE_SET_USED(userdata);
+	return PRE(setsockopt)(sockfd, level, optname, optval, optlen);
+}
+
+static int
+pd_poll(void *userdata, struct pollfd *fds, nfds_t nfds, int timeout)
+{
+	RTE_SET_USED(userdata);
+	return PRE(poll)(fds, nfds, timeout);
+}
+
+static struct rte_mbuf *
+from_buf_to_mbuf(const void *buf, size_t count)
+{
+	struct rte_mempool *mp = get_mempool_by_socket(0);
+	uint16_t nb_mbufs = (count + RTE_MBUF_DEFAULT_DATAROOM - 1) /
+			    RTE_MBUF_DEFAULT_DATAROOM;
+	struct rte_mbuf *mbufs[nb_mbufs + 1];
+	uint16_t i, copy_len;
+	size_t done = 0;
+	char *dst;
+
+	if (unlikely(rte_pktmbuf_alloc_bulk(mp, mbufs, nb_mbufs) < 0))
+		rte_exit(EXIT_FAILURE, "allocate mbuf fails\n");
+
+	for (i = 0; i < nb_mbufs; ++i) {
+		copy_len = RTE_MIN((size_t)RTE_MBUF_DEFAULT_DATAROOM,
+				   count - done);
+		dst = rte_pktmbuf_mtod(mbufs[i], char *);
+		rte_memcpy(dst, (const char *)buf + done, copy_len);
+		done += copy_len;
+		mbufs[i]->data_len = copy_len;
+		if (i > 0)
+			mbufs[i-1]->next = mbufs[i];
+	}
+
+	mbufs[0]->pkt_len = count;
+	mbufs[0]->nb_segs = nb_mbufs;
+
+	return mbufs[0];
+}
+
+/* Send @count bytes of data starting from @buf to the TCP stack.
+ * Return 0 on success or -1 on error.
+ */
+static int
+pd_netdev_send(void *userdata, const void *buf, size_t count)
+{
+	struct ether_hdr *hdr;
+	struct rte_mbuf *m;
+
+	RTE_SET_USED(userdata);
+
+	m = from_buf_to_mbuf(buf, count);
+
+	// add l2 header
+	hdr = (struct ether_hdr *)rte_pktmbuf_prepend(m, sizeof(struct ether_hdr));
+	hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+	
+	if (rte_vhost_enqueue_burst(vhost_vid, VIRTIO_RXQ, &m, 1) == 1)
+		return 0;
+
+	return -1;
+}
+
+static inline struct rte_mbuf *
+from_mbuf_to_buf(struct rte_mbuf *m, char *buf, size_t len, int ispeek, int needcpy)
+{
+	void *src;
+	uint32_t done = 0;
+	uint32_t left = len, orig_pkt_len;
+	uint16_t copy_len, seg_len;
+	struct rte_mbuf *m_next, *orig_pkt;
+
+	if (len == 0)
+		return m;
+
+	orig_pkt = m;
+	orig_pkt_len = m->pkt_len;
+
+	do {
+		seg_len = rte_pktmbuf_data_len(m);
+		copy_len = RTE_MIN(seg_len, left);
+		src = rte_pktmbuf_mtod(m, void *);
+		if (needcpy)
+			rte_memcpy(buf + done, src, copy_len);
+		done += copy_len;
+		left -= copy_len;
+		if (copy_len < seg_len) {
+			if (!ispeek) {
+				rte_pktmbuf_adj(m, copy_len);
+			}
+			break;
+		}
+		m_next = m->next;
+		if (!ispeek) {
+			rte_pktmbuf_free_seg(m);
+		}
+		m = m_next;
+	} while (left && m);
+
+	if (m && !ispeek)
+		m->pkt_len = orig_pkt_len - done;
+
+	if(ispeek)
+		return orig_pkt;
+	else
+		return m;
+}
+
+/* Sniff the next packet leaving the TCP stack.
+ * Put packet data in @buf.  @count is passed in as the buffer size.
+ * The actual number of bytes received should be put in @count.
+ * Set @count to 0 if received nothing.
+ * Set @time_usecs to the receive timestamp.
+ * Return 0 on success or -1 on error. */
+static int
+pd_netdev_recv(void *userdata, void *buf, size_t *count, long long *time_usecs)
+{
+	struct rte_mbuf *m;
+	struct rte_mempool *mp = get_mempool_by_socket(0);
+
+	RTE_SET_USED(userdata);
+
+	while (rte_vhost_dequeue_burst(vhost_vid, VIRTIO_TXQ, mp, &m, 1) == 0);
+
+	// remove l2 header
+	rte_pktmbuf_adj(m, sizeof(struct ether_hdr));
+
+	*count = m->pkt_len;
+	from_mbuf_to_buf(m, buf, *count, 0, 1);
+
+	*time_usecs = now_usecs();
+	return 0;
+}
+
+static int
+pd_usleep(void *userdata, useconds_t usec)
+{
+	RTE_SET_USED(userdata);
+	return usleep(usec);
+}
+
+static int
+pd_gettimeofday(void *userdata, struct timeval *tv, struct timezone *tz)
+{
+	RTE_SET_USED(userdata);
+	return gettimeofday(tv, tz);
+}
+
+static int
+pd_epoll_create(void *userdata, int size)
+{
+	RTE_SET_USED(userdata);
+	return PRE(epoll_create)(size);
+}
+
+static int
+pd_epoll_ctl(void *userdata, int epfd, int op, int fd,
+	     struct epoll_event *event)
+{
+	RTE_SET_USED(userdata);
+	return PRE(epoll_ctl)(epfd, op, fd, event);
+}
+
+static int
+pd_epoll_wait(void *userdata, int epfd, struct epoll_event *events,
+	      int maxevents, int timeout)
+{
+	RTE_SET_USED(userdata);
+	return PRE(epoll_wait)(epfd, events, maxevents, timeout);
+}
+
+static int
+pd_pipe(void *userdata, int pipefd[2])
+{
+	RTE_SET_USED(userdata);
+	return pipe(pipefd);
+}
+
+static int
+pd_splice(void *userdata, int fd_in, loff_t *off_in, int fd_out,
+	  loff_t *off_out, size_t len, unsigned int flags)
+{
+	RTE_SET_USED(userdata);
+	return PRE(splice)(fd_in, off_in, fd_out, off_out, len, flags);
+}
+
+static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void *
+io(void *arg)
+{
+	int epfd;
+	struct in_addr ipv4;
+	struct ether_addr mac = { .addr_bytes = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff}, };
+	struct epoll_event events[128];
+
+	RTE_SET_USED(arg);
+
+	setenv(DPDK_IP, "192.168.0.2", 1);
+	setenv(DPDK_IP_MASK, "16", 1);
+	setenv(DPDK_IP_GATEWAY, "192.168.0.1", 1);
+
+	setenv(DPDK_IPV6, "fd3d:fa7b:d17d::0", 1);
+	setenv(DPDK_IPV6_MASK, "48", 1);
+	setenv(DPDK_IPV6_GATEWAY, "fd3d:fa7b:d17d:8888::0", 1);
+
+	epfd = PRE(epoll_create)(0);
+
+	inet_pton(AF_INET, "192.168.0.1", &ipv4);
+
+	ipv4_dst_add(default_ctx, &ipv4, &mac);
+
+	pthread_mutex_unlock(&lock);
+
+	while (1) {
+		PRE(epoll_wait)(epfd, events, 128, 0);
+	}
+
+	return NULL;
+}
+
+void
+packetdrill_interface_init(const char *flags,
+			   struct packetdrill_interface *ifc)
+{
+	int argc = 0;
+	char *argv[16];
+	pthread_t tid;
+
+	RTE_SET_USED(flags);
+
+	argv[argc++] = strdup("test");
+	argv[argc++] = strdup("-l");
+	argv[argc++] = strdup("0");
+	argv[argc++] = strdup("--no-pci");
+	argv[argc++] = strdup("--in-memory");
+	argv[argc++] = strdup("--single-file-segments");
+	argv[argc++] = strdup("--");
+
+	if (rte_eal_init(argc, argv) < 0)
+		rte_exit(EXIT_FAILURE, "Failed to init DPDK\n");
+
+	fd_init();
+
+	vhost_init();
+
+	if (rte_eal_hotplug_add("vdev", "virtio_user0", "path=/tmp/sock0") < 0)
+		rte_exit(EXIT_FAILURE, "hot plug virtio-user failed\n");
+
+	pthread_mutex_lock(&lock);
+
+	pthread_create(&tid, NULL, io, NULL);
+
+	pthread_mutex_lock(&lock);
+
+	ifc->free = pd_free;
+	ifc->socket = pd_socket;
+	ifc->bind = pd_bind;
+	ifc->listen = pd_listen;
+	ifc->accept = pd_accept;
+	ifc->connect = pd_connect;
+	ifc->read = pd_read;
+	ifc->readv = pd_readv;
+	ifc->recv = pd_recv;
+	ifc->recvfrom = pd_recvfrom;
+	ifc->recvmsg = pd_recvmsg;
+	ifc->write = pd_write;
+	ifc->writev = pd_writev;
+	ifc->send = pd_send;
+	ifc->sendto = pd_sendto;
+	ifc->sendmsg = pd_sendmsg;
+	ifc->fcntl = pd_fcntl;
+	ifc->ioctl = pd_ioctl;
+	ifc->close = pd_close;
+	ifc->shutdown = pd_shutdown;
+	ifc->getsockopt = pd_getsockopt;
+	ifc->setsockopt = pd_setsockopt;
+	ifc->poll = pd_poll;
+	ifc->netdev_send = pd_netdev_send;
+	ifc->netdev_receive = pd_netdev_recv;
+	ifc->usleep = pd_usleep;
+	ifc->gettimeofday = pd_gettimeofday;
+	ifc->epoll_create = pd_epoll_create;
+	ifc->epoll_ctl = pd_epoll_ctl;
+	ifc->epoll_wait = pd_epoll_wait;
+	ifc->pipe = pd_pipe;
+	ifc->splice = pd_splice;
+}
diff --git a/lib/libtle_glue/packetdrill.h b/lib/libtle_glue/packetdrill.h
new file mode 100644
index 0000000..6f84a87
--- /dev/null
+++ b/lib/libtle_glue/packetdrill.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2015 Google Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+/*
+ * Author: xiaoj@google.com (Xiao Jia)
+ *
+ * Interface for packetdrill.
+ *
+ * To be tested against as a shared object (*.so) file, implement this
+ * interface, export a function "packetdrill_interface_init", and
+ * initialize the interface struct passed in with your own functions.
+ */
+
+#ifndef __PACKETDRILL_H__
+#define __PACKETDRILL_H__
+
+#include <poll.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+
+struct packetdrill_interface {
+	void *userdata;
+	void (*free)(void *userdata);
+	int (*socket)(void *userdata, int domain, int type, int protocol);
+	int (*bind)(void *userdata, int sockfd, const struct sockaddr *addr,
+		    socklen_t addrlen);
+	int (*listen)(void *userdata, int sockfd, int backlog);
+	int (*accept)(void *userdata, int sockfd, struct sockaddr *addr,
+		      socklen_t *addrlen);
+	int (*connect)(void *userdata, int sockfd, const struct sockaddr *addr,
+		       socklen_t addrlen);
+	ssize_t (*read)(void *userdata, int fd, void *buf, size_t count);
+	ssize_t (*readv)(void *userdata, int fd, const struct iovec *iov,
+			 int iovcnt);
+	ssize_t (*recv)(void *userdata, int sockfd, void *buf, size_t len,
+			int flags);
+	ssize_t (*recvfrom)(void *userdata, int sockfd, void *buf, size_t len,
+			    int flags, struct sockaddr *src_addr,
+			    socklen_t *addrlen);
+	ssize_t (*recvmsg)(void *userdata, int sockfd, struct msghdr *msg,
+			   int flags);
+	ssize_t (*write)(void *userdata, int fd, const void *buf, size_t count);
+	ssize_t (*writev)(void *userdata, int fd, const struct iovec *iov,
+			  int iovcnt);
+	ssize_t (*send)(void *userdata, int sockfd, const void *buf, size_t len,
+			int flags);
+	ssize_t (*sendto)(void *userdata, int sockfd, const void *buf,
+			  size_t len, int flags,
+			  const struct sockaddr *dest_addr, socklen_t addrlen);
+	ssize_t (*sendmsg)(void *userdata, int sockfd, const struct msghdr *msg,
+			   int flags);
+	int (*fcntl)(void *userdata, int fd, int cmd, ...);
+	int (*ioctl)(void *userdata, int fd, unsigned long request, ...);
+	int (*close)(void *userdata, int fd);
+	int (*shutdown)(void *userdata, int sockfd, int how);
+	int (*getsockopt)(void *userdata, int sockfd, int level, int optname,
+			  void *optval, socklen_t *optlen);
+	int (*setsockopt)(void *userdata, int sockfd, int level, int optname,
+			  const void *optval, socklen_t optlen);
+	int (*poll)(void *userdata, struct pollfd *fds, nfds_t nfds,
+		    int timeout);
+	/* Send @count bytes of data starting from @buf to the TCP stack.
+	 * Return 0 on success or -1 on error. */
+	int (*netdev_send)(void *userdata, const void *buf, size_t count);
+	/* Sniff the next packet leaving the TCP stack.
+	 * Put packet data in @buf.  @count is passed in as the buffer size.
+	 * The actual number of bytes received should be put in @count.
+	 * Set @count to 0 if received nothing.
+	 * Set @time_usecs to the receive timestamp.
+	 * Return 0 on success or -1 on error. */
+	int (*netdev_receive)(void *userdata, void *buf, size_t *count,
+			      long long *time_usecs);
+	int (*usleep)(void *userdata, useconds_t usec);
+	int (*gettimeofday)(void *userdata, struct timeval *tv,
+			    struct timezone *tz);
+	int (*epoll_create)(void *userdata, int size);
+	int (*epoll_ctl)(void *userdata, int epfd, int op, int fd,
+			 struct epoll_event *event);
+	int (*epoll_wait)(void *userdata, int epfd, struct epoll_event *events,
+			  int maxevents, int timeout);
+	int (*pipe)(void *userdata, int pipefd[2]);
+	int (*splice)(void *userdata, int fd_in, loff_t *off_in, int fd_out,
+		      loff_t *off_out, size_t len, unsigned int flags);
+};
+
+typedef void (*packetdrill_interface_init_t)(const char *flags,
+					     struct packetdrill_interface *);
+
+void
+packetdrill_interface_init(const char *flags, struct packetdrill_interface *ifc);
+
+#endif /* __PACKETDRILL_H__ */
diff --git a/lib/libtle_glue/poll.c b/lib/libtle_glue/poll.c
new file mode 100644
index 0000000..ebc0110
--- /dev/null
+++ b/lib/libtle_glue/poll.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <signal.h>
+#include <poll.h>
+
+#include "fd.h"
+#include "ctx.h"
+#include "sym.h"
+#include "log.h"
+#include "util.h"
+#include "internal.h"
+#include "tle_glue.h"
+
+int
+PRE(poll)(struct pollfd *fds, nfds_t nfds, int timeout)
+{
+	int efd;
+	int total = 0, j;
+	int tmp_ev;
+	uint32_t i;
+	uint32_t k_n = 0;
+	int k_fds[nfds];
+	struct sock *so;
+	struct glue_ctx *ctx;
+	struct epoll_event k_ev;
+	struct epoll_event events[nfds];
+
+	for (i = 0; i < nfds; ++i) {
+		if (is_kernel_fd(fds[i].fd)) {
+			k_fds[k_n++] = i;
+			continue;
+		}
+
+		so = fd2sock(fds[i].fd);
+		if (!so->valid)
+			continue;
+
+		fds[i].revents = fd_ready(fds[i].fd, fds[i].events);
+		if (fds[i].revents) {
+			total++;
+			continue;
+		}
+
+		/* We fill sock->event here as we need this when
+		 * we filter events in poll_common(). But it was
+		 * originally set by epoll_ctl(). Now we have to
+		 * assume that there are no application which
+		 * uses epoll and poll at the same time.
+		 */
+		so->event.events = fds[i].events;
+		so->event.data.u32 = i; /* store idx */
+	}
+
+	if (k_n == nfds)
+		return k_poll(fds, nfds, timeout);
+
+	if (total > 0)
+		return total;
+
+	/* thread <> context binding happens here */
+	if (RTE_PER_LCORE(glue_ctx) == NULL) {
+		ctx = &ctx_array[glue_ctx_alloc()];
+		RTE_PER_LCORE(glue_ctx) = ctx;
+	} else
+		ctx = RTE_PER_LCORE(glue_ctx);
+
+	total = poll_common(ctx, events, nfds, 0, -1);
+
+	/* We assume kernel I/O events are not as important as user ones */
+	if (total > 0)
+		goto format;
+
+	efd = k_epoll_create(1);
+	if (efd < 0)
+		rte_panic("k_epoll_create failed %d", errno);
+
+	for (i = 0; i < k_n; ++i) {
+		k_ev.events = fds[k_fds[i]].events;
+		k_ev.data.u32 = k_fds[i]; /* store idx */
+		k_epoll_ctl(efd, EPOLL_CTL_ADD, fds[k_fds[i]].fd, &k_ev);
+	}
+
+	total = poll_common(ctx, events, nfds, timeout, efd);
+	k_close(efd);
+format:
+	for (j = 0; j < total; ++j) {
+		tmp_ev = events[j].events;
+		if (tmp_ev == POLLHUP) {
+			tmp_ev |= POLLERR | (fds[events[j].data.u32].events &
+				  (POLLIN | POLLOUT));
+		}
+		fds[events[j].data.u32].revents = tmp_ev;
+	}
+
+	return total;
+}
+
+int
+PRE(ppoll)(struct pollfd *fds, nfds_t nfds,
+	const struct timespec *tmo_p, const sigset_t *sigmask)
+{
+	int timeout;
+
+	if (sigmask != NULL)
+		rte_panic("ppoll with signal is not supported");
+
+	if (tmo_p == NULL)
+		timeout = -1;
+	else
+		timeout = tmo_p->tv_sec * 1000 + tmo_p->tv_nsec / 1000000;
+
+	return poll(fds, nfds, timeout);
+}
+
+extern int __poll_chk(struct pollfd *fds, nfds_t nfds, int timeout,
+		__SIZE_TYPE__ fdslen);
+int
+__poll_chk(struct pollfd *fds, nfds_t nfds, int timeout,
+	   __SIZE_TYPE__ fdslen __rte_unused)
+{
+	return poll(fds, nfds, timeout);
+}
diff --git a/lib/libtle_glue/port.c b/lib/libtle_glue/port.c
new file mode 100644
index 0000000..7a4cf2e
--- /dev/null
+++ b/lib/libtle_glue/port.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sys/eventfd.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_eth_ring.h>
+
+#include "log.h"
+#include "ctx.h"
+#include "config.h"
+#include "internal.h"
+
+int stopped;
+
+static struct rte_mempool *mpool[RTE_MAX_NUMA_NODES];
+
+struct rte_mempool *
+get_mempool_by_socket(int32_t socket_id)
+{
+	struct rte_mempool *mp;
+	char name[RTE_MEMPOOL_NAMESIZE];
+
+	if (socket_id == SOCKET_ID_ANY)
+		socket_id = 0;
+
+	if (mpool[socket_id])
+		return mpool[socket_id];
+
+	snprintf(name, sizeof(name), "MP%u", socket_id);
+	mp = rte_pktmbuf_dynamic_pool_create(name, MAX_MBUFS - 1,
+					     MBUF_PERCORE_CACHE, 0,
+					     RTE_MBUF_DEFAULT_BUF_SIZE,
+					     socket_id, MBUF_DYNAMIC_SIZE);
+
+	if (mp == NULL)
+		rte_panic("Failed to create mbuf mempool");
+
+	mpool[socket_id] = mp;
+	return mp;
+}
+
+static void
+update_rss_conf(uint16_t port_id)
+{
+	struct rte_eth_rss_conf rss_conf = {
+		.rss_key = NULL,
+		.rss_key_len = 0,
+		.rss_hf = ETH_RSS_IP | ETH_RSS_TCP | ETH_RSS_UDP,
+	};
+
+	if (rte_eth_dev_rss_hash_update(port_id, &rss_conf) < 0)
+		rte_panic("Failed to update rss hash");
+}
+
+static void
+queue_init(uint16_t port_id, uint16_t nb_queues,
+	   struct rte_eth_dev_info *dev_info,
+	   struct rte_eth_conf *port_conf)
+{
+	uint16_t q;
+	int32_t socket_id, rc;
+	uint16_t nb_rxd = 1024, nb_txd = 1024;
+	struct rte_mempool *mp;
+	struct rte_eth_txconf txq_conf = dev_info->default_txconf;
+	struct rte_eth_rxconf rxq_conf = dev_info->default_rxconf;
+
+	socket_id = rte_eth_dev_socket_id(port_id);
+	mp = get_mempool_by_socket(socket_id);
+
+	dev_info->default_rxconf.rx_drop_en = 1;
+
+	rc = rte_eth_dev_adjust_nb_rx_tx_desc(port_id, &nb_rxd, &nb_txd);
+	if (rc < 0)
+		rte_panic("Cannot adjust number of desc");
+
+	rxq_conf.offloads = port_conf->rxmode.offloads;
+	txq_conf.offloads = port_conf->txmode.offloads;
+
+	/* faster free of tx entries */
+	txq_conf.tx_free_thresh = nb_txd - 64;
+
+	for (q = 0; q < nb_queues; q++) {
+		rc = rte_eth_rx_queue_setup(port_id, q, nb_rxd,
+					    socket_id, &rxq_conf, mp);
+		if (rc < 0)
+			rte_panic("rx queue=%u setup failed: %d", q, rc);
+
+		rc = setup_rx_cb(port_id, q);
+		if (rc < 0)
+			rte_panic("rx queue=%u rx setup failed: %d", q, rc);
+	}
+
+	for (q = 0; q < nb_queues; q++) {
+		rc = rte_eth_tx_queue_setup(port_id, q, nb_txd,
+					    socket_id, &txq_conf);
+		if (rc < 0)
+			rte_panic("tx queue=%u setup failed: %d", q, rc);
+	}
+}
+
+uint64_t rx_offload =
+	DEV_RX_OFFLOAD_IPV4_CKSUM |
+	DEV_RX_OFFLOAD_UDP_CKSUM |
+	DEV_RX_OFFLOAD_TCP_CKSUM;
+/* nice to have:
+	DEV_RX_OFFLOAD_CRC_STRIP |
+	DEV_RX_OFFLOAD_TCP_LRO |
+	DEV_RX_OFFLOAD_HEADER_SPLIT |
+	DEV_RX_OFFLOAD_SCATTER |
+	DEV_RX_OFFLOAD_TIMESTAMP
+*/
+
+uint64_t tx_offload =
+	DEV_TX_OFFLOAD_UDP_CKSUM |
+	DEV_TX_OFFLOAD_TCP_CKSUM |
+	DEV_TX_OFFLOAD_TCP_TSO |
+	DEV_TX_OFFLOAD_MULTI_SEGS;
+
+int
+dev_rxq_wakeup(uint16_t port_id)
+{
+	int fd;
+	uint16_t qid;
+	uint32_t vec, efd_idx;
+	struct rte_eth_dev *dev;
+	struct rte_intr_handle *intr_handle;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+
+	dev = &rte_eth_devices[port_id];
+	intr_handle = dev->intr_handle;
+	if (!intr_handle)
+		return -ENOTSUP;
+	if (!intr_handle->intr_vec)
+		return -EPERM;
+
+	for (qid = 0; qid < dev->data->nb_rx_queues; qid++) {
+		vec = intr_handle->intr_vec[qid];
+		efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ?
+			(vec - RTE_INTR_VEC_RXTX_OFFSET) : vec;
+		fd = intr_handle->efds[efd_idx];
+		if (eventfd_write(fd, (eventfd_t) 1) < 0)
+			return -errno;
+	}
+
+	return 0;
+}
+
+void
+port_reconfig(void)
+{
+	int32_t rc;
+	struct rte_eth_dev_info dev_info;
+	uint16_t port_id = 0; /* We use and only use port 0 */
+	uint16_t nb_port;
+	uint16_t nb_queues = nb_ctx;
+
+	struct rte_eth_conf port_conf = {
+		.intr_conf = {
+			.rxq = 1,
+		},
+	};
+
+	/* 0. dev number check */
+	nb_port = rte_eth_dev_count_avail();
+	if (nb_port < 1 || nb_port >2)
+		rte_panic("One port is mandatory with an optional loopback device\n");
+
+	stopped = 1;
+	rte_wmb();
+	/* wake up all rxqs */
+	if (nb_ctx > 1)
+		dev_rxq_wakeup(port_id);
+
+	usleep(1); /* fix me: this cannot gurantee correctness */
+
+	rte_eth_dev_stop(port_id);
+
+	/* 1. offloading check and set*/
+	rte_eth_dev_info_get(port_id, &dev_info);
+	rx_offload &= dev_info.rx_offload_capa;
+	port_conf.rxmode.offloads = rx_offload;
+	tx_offload &= dev_info.tx_offload_capa;
+	port_conf.txmode.offloads = tx_offload;
+
+	GLUE_LOG(INFO, "configure queues = %d, offloads: rx = %"PRIx64", tx = %"PRIx64,
+		 nb_queues, rx_offload, tx_offload);
+
+	/* 2. dev configure */
+	rc = rte_eth_dev_configure(port_id, nb_queues, nb_queues, &port_conf);
+	if (rc != 0)
+		rte_panic("Failed to configure device, %d", rc);
+
+	/* 3. queue setup */
+	queue_init(port_id, nb_queues, &dev_info, &port_conf);
+
+	/* 4. rss conf */
+	if (nb_queues > 1)
+		update_rss_conf(port_id);
+
+	/* 5. dev start */
+	if (rte_eth_dev_start(port_id) < 0)
+		rte_panic("Failed to start device");
+
+	stopped = 0;
+}
+
+uint16_t
+create_loopback(uint32_t socket_id)
+{
+	int ret;
+	struct rte_ring* lb_queue;
+	static uint16_t lb_port_id = 0xFFFF;
+	const char *ring_name = "loopback-ring";
+
+	if (lb_port_id != 0xFFFF)
+		return lb_port_id;
+
+	lb_queue = rte_ring_create(ring_name, MAX_PKTS_BURST * 8, socket_id,
+				   RING_F_SP_ENQ | RING_F_SC_DEQ);
+	if (!lb_queue)
+		rte_panic("Failed to create ring for loopback\n");
+	ret = rte_eth_from_ring(lb_queue);
+	if (ret < 0)
+		rte_panic("Failed to create ethdev from ring\n");
+	lb_port_id = ret;
+
+	if (setup_rx_cb(lb_port_id, 0) < 0)
+		rte_panic("Failed to set up rx cb for loopback\n");
+
+	return lb_port_id;
+}
diff --git a/lib/libtle_glue/rxcb.c b/lib/libtle_glue/rxcb.c
new file mode 100644
index 0000000..51f31c9
--- /dev/null
+++ b/lib/libtle_glue/rxcb.c
@@ -0,0 +1,834 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <rte_ethdev.h>
+#include <rte_arp.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+
+#include <netinet/in.h>
+#include <netinet/ip6.h>
+
+#include "log.h"
+#include "ctx.h"
+#include "internal.h"
+
+struct ptype2cb {
+	uint32_t mask;
+	const char *name;
+	rte_rx_callback_fn fn;
+};
+
+enum {
+	ETHER_ARP_PTYPE = 0x1,
+	IPV4_PTYPE = 0x2,
+	IPV4_EXT_PTYPE = 0x4,
+	IPV6_PTYPE = 0x8,
+	IPV6_EXT_PTYPE = 0x10,
+	TCP_PTYPE = 0x20,
+	UDP_PTYPE = 0x40,
+	ICMP_PTYPE = 0x80,
+};
+
+static inline uint64_t
+_mbuf_tx_offload(uint64_t il2, uint64_t il3, uint64_t il4, uint64_t tso,
+	uint64_t ol3, uint64_t ol2)
+{
+	return il2 | il3 << 7 | il4 << 16 | tso << 24 | ol3 << 40 | ol2 << 49;
+}
+
+static inline int32_t
+fill_pkt_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t l3, uint32_t l4)
+{
+	if (l2 + l3 + l4 > m->pkt_len)
+		return -1;
+	m->tx_offload = _mbuf_tx_offload(l2, l3, l4, 0, 0, 0);
+	return 0;
+}
+
+static inline int
+is_ipv4_frag(const struct ipv4_hdr *iph)
+{
+	const uint16_t mask = rte_cpu_to_be_16(~IPV4_HDR_DF_FLAG);
+
+	return ((mask & iph->fragment_offset) != 0);
+}
+
+static inline uint32_t
+get_tcp_header_size(struct rte_mbuf *m, uint32_t l2_len, uint32_t l3_len)
+{
+	const struct tcp_hdr *tcp;
+
+	tcp = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, l2_len + l3_len);
+	return (tcp->data_off >> 4) * 4;
+}
+
+static inline int32_t
+adjust_ipv4_pktlen(struct rte_mbuf *m, uint32_t l2_len)
+{
+	uint32_t plen, trim;
+	const struct ipv4_hdr *iph;
+
+	iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2_len);
+	plen = rte_be_to_cpu_16(iph->total_length) + l2_len;
+	if (plen < m->pkt_len) {
+		trim = m->pkt_len - plen;
+		rte_pktmbuf_trim(m, trim);
+	} else if (plen > m->pkt_len)
+		return -1;
+
+	return 0;
+}
+
+static inline int32_t
+adjust_ipv6_pktlen(struct rte_mbuf *m, uint32_t l2_len)
+{
+	uint32_t plen, trim;
+	const struct ipv6_hdr *iph;
+
+	iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, l2_len);
+	plen = rte_be_to_cpu_16(iph->payload_len) + sizeof(*iph) + l2_len;
+	if (plen < m->pkt_len) {
+		trim = m->pkt_len - plen;
+		rte_pktmbuf_trim(m, trim);
+	} else if (plen > m->pkt_len)
+		return -1;
+
+	return 0;
+}
+
+static inline uint32_t
+get_ipv4_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t proto, uint32_t frag)
+{
+	const struct ipv4_hdr *iph;
+	int32_t dlen, len;
+
+	dlen = rte_pktmbuf_data_len(m);
+	dlen -= l2;
+
+	iph = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2);
+	len = (iph->version_ihl & IPV4_HDR_IHL_MASK) * IPV4_IHL_MULTIPLIER;
+
+	if (frag != 0 && is_ipv4_frag(iph)) {
+		m->packet_type &= ~RTE_PTYPE_L4_MASK;
+		m->packet_type |= RTE_PTYPE_L4_FRAG;
+	}
+
+	if (len > dlen || (proto <= IPPROTO_MAX && iph->next_proto_id != proto))
+		m->packet_type = RTE_PTYPE_UNKNOWN;
+
+	return len;
+}
+
+static inline uint32_t
+get_ipv6x_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t *fproto)
+{
+	const struct ipv6_hdr *ip6h;
+	const struct ip6_ext *ipx;
+	uint32_t nproto;
+	int32_t dlen, len, ofs;
+
+	ip6h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr*, l2);
+	nproto = ip6h->proto;
+	len = sizeof(struct ipv6_hdr);
+
+	dlen = rte_pktmbuf_data_len(m);
+	dlen -= l2;
+
+	ofs = l2 + len;
+	ipx = rte_pktmbuf_mtod_offset(m, const struct ip6_ext *, ofs);
+
+	while (ofs > 0 && len < dlen) {
+		switch (nproto) {
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_ROUTING:
+		case IPPROTO_DSTOPTS:
+			ofs = (ipx->ip6e_len + 1) << 3;
+			break;
+		case IPPROTO_AH:
+			ofs = (ipx->ip6e_len + 2) << 2;
+			break;
+		case IPPROTO_FRAGMENT:
+			/*
+			 * tso_segsz is not used by RX, so use it as temporary
+			 * buffer to store the fragment offset.
+			 */
+			m->tso_segsz = l2 + len;
+			ofs = sizeof(struct ip6_frag);
+			m->packet_type &= ~RTE_PTYPE_L4_MASK;
+			m->packet_type |= RTE_PTYPE_L4_FRAG;
+			break;
+		case IPPROTO_TCP:
+		case IPPROTO_UDP:
+		case IPPROTO_ICMPV6:
+			ofs = 0;
+			if (*fproto == 0)
+				*fproto = nproto;
+			break;
+		default:
+			ofs = 0;
+		}
+
+		if (ofs > 0) {
+			nproto = ipx->ip6e_nxt;
+			len += ofs;
+			ipx += ofs / sizeof(*ipx);
+		}
+	}
+
+	/* unrecognized or invalid packet. */
+	if (*fproto == 0 || len > dlen)
+		m->packet_type = RTE_PTYPE_UNKNOWN;
+
+	return len;
+}
+
+static inline uint32_t
+get_ipv6_hdr_len(struct rte_mbuf *m, uint32_t l2, uint32_t fproto)
+{
+	const struct ipv6_hdr *iph;
+
+	iph = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *,
+				      sizeof(struct ether_hdr));
+
+	if (iph->proto == fproto)
+		return sizeof(struct ipv6_hdr);
+	else
+		return get_ipv6x_hdr_len(m, l2, &fproto);
+}
+
+static inline struct rte_mbuf*
+process_ipv4_frag(struct rte_mbuf *m, struct glue_ctx *ctx,
+		  uint32_t l2_len, uint32_t l3_len)
+{
+	struct ipv4_hdr* iph;
+
+	m->l2_len = l2_len;
+	m->l3_len = l3_len;
+	/* fixme: ip checksum should be checked here.
+	 * After reassemble, the ip checksum would be invalid.
+	 */
+	m = rte_ipv4_frag_reassemble_packet(ctx->frag_tbl,
+					    &ctx->frag_dr, m, rte_rdtsc(),
+					    rte_pktmbuf_mtod_offset(m, struct ipv4_hdr*, m->l2_len));
+	rte_ip_frag_free_death_row(&ctx->frag_dr, 3);
+	if (m == NULL)
+		return NULL;
+	iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr*, m->l2_len);
+	switch (iph->next_proto_id) {
+	case IPPROTO_TCP:
+		m->packet_type &= ~RTE_PTYPE_L4_MASK;
+		m->packet_type |= RTE_PTYPE_L4_TCP;
+		break;
+	case IPPROTO_UDP:
+		m->packet_type &= ~RTE_PTYPE_L4_MASK;
+		m->packet_type |= RTE_PTYPE_L4_UDP;
+		break;
+	}
+	return m;
+}
+
+static inline struct rte_mbuf*
+process_ipv6_frag(struct rte_mbuf *m, struct glue_ctx *ctx,
+		  uint32_t l2_len, uint32_t l3_len)
+{
+	struct ipv6_hdr* ip6h;
+
+	m->l2_len = l2_len;
+	m->l3_len = l3_len;
+	m = rte_ipv6_frag_reassemble_packet(ctx->frag_tbl,
+			&ctx->frag_dr, m, rte_rdtsc(),
+			rte_pktmbuf_mtod_offset(m, struct ipv6_hdr*, l2_len),
+			rte_pktmbuf_mtod_offset(m, struct ipv6_extension_fragment*,
+						m->tso_segsz));
+	rte_ip_frag_free_death_row(&ctx->frag_dr, 3);
+	if (m == NULL)
+		return NULL;
+	ip6h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr*, m->l2_len);
+	switch (ip6h->proto) {
+	case IPPROTO_TCP:
+		m->packet_type &= ~RTE_PTYPE_L4_MASK;
+		m->packet_type |= RTE_PTYPE_L4_TCP;
+		break;
+	case IPPROTO_UDP:
+		m->packet_type &= ~RTE_PTYPE_L4_MASK;
+		m->packet_type |= RTE_PTYPE_L4_UDP;
+		break;
+	}
+	return m;
+}
+
+static inline struct rte_mbuf *
+fill_ptypes_and_hdr_len(struct glue_ctx *ctx, struct rte_mbuf *m)
+{
+	uint32_t dlen, l2_len, l3_len, l4_len, proto;
+	const struct ether_hdr *eth;
+	uint32_t ptypes;
+	uint16_t etp;
+	int32_t error = 0;
+
+	dlen = rte_pktmbuf_data_len(m);
+
+	/* L2 */
+	l2_len = sizeof(*eth);
+
+	eth = rte_pktmbuf_mtod(m, const struct ether_hdr *);
+	etp = eth->ether_type;
+	while (etp == rte_be_to_cpu_16(ETHER_TYPE_VLAN)) {
+		etp = rte_pktmbuf_mtod_offset(m, struct vlan_hdr*, l2_len)->eth_proto;
+		l2_len += sizeof(struct vlan_hdr);
+	}
+
+	if (etp == rte_be_to_cpu_16(ETHER_TYPE_ARP))
+		return arp_recv(ctx, m, l2_len);
+
+	if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv4)) {
+		const struct ipv4_hdr *hdr;
+
+		/* L3 */
+		hdr = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, l2_len);
+		error = adjust_ipv4_pktlen(m, l2_len);
+		if (error) {
+			rte_pktmbuf_free(m);
+			return NULL;
+		}
+		l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_MAX + 1, 1);
+
+		if ((m->packet_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_FRAG) {
+			m = process_ipv4_frag(m, ctx, l2_len, l3_len);
+			if (m == NULL)
+				return NULL;
+			hdr = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr*,
+					 	      m->l2_len);
+			l3_len = get_ipv4_hdr_len(m, m->l2_len,
+						  IPPROTO_MAX + 1, 0);
+		}
+
+		/* L4 */
+		switch (hdr->next_proto_id) {
+		case IPPROTO_ICMP:
+			return icmp_recv(ctx, m, l2_len, l3_len);
+		case IPPROTO_TCP:
+			ptypes = RTE_PTYPE_L4_TCP |
+				 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+				 RTE_PTYPE_L2_ETHER;
+			l4_len = get_tcp_header_size(m, l2_len, l3_len);
+			break;
+		case IPPROTO_UDP:
+			ptypes = RTE_PTYPE_L4_UDP |
+				 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+				 RTE_PTYPE_L2_ETHER;
+			l4_len = sizeof(struct udp_hdr);
+			break;
+		default:
+			GLUE_LOG(ERR, "drop ipv4 pkt of unknow L4: (%d)",
+				 hdr->next_proto_id);
+			rte_pktmbuf_free(m);
+			return NULL;
+		}
+
+	} else if (etp == rte_be_to_cpu_16(ETHER_TYPE_IPv6) &&
+		   dlen >= l2_len + sizeof(struct ipv6_hdr) + sizeof(struct udp_hdr)) {
+		/* L3 */
+		error = adjust_ipv6_pktlen(m, l2_len);
+		if (error) {
+			rte_pktmbuf_free(m);
+			return NULL;
+		}
+		proto = 0;
+		l3_len = get_ipv6x_hdr_len(m, l2_len, &proto);
+
+		if ((m->packet_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_FRAG) {
+			m = process_ipv6_frag(m, ctx, l2_len, l3_len);
+			if (m == NULL)
+				return NULL;
+			l3_len = get_ipv6x_hdr_len(m, m->l2_len, &proto);
+		}
+
+		/* L4 */
+		switch (proto) {
+		case IPPROTO_TCP:
+			ptypes = RTE_PTYPE_L4_TCP |
+				 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+				 RTE_PTYPE_L2_ETHER;
+			l4_len = get_tcp_header_size(m, l2_len, l3_len);
+			break;
+		case IPPROTO_UDP:
+			ptypes = RTE_PTYPE_L4_UDP |
+				 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+				 RTE_PTYPE_L2_ETHER;
+			l4_len = sizeof(struct udp_hdr);
+			break;
+		case IPPROTO_ICMPV6:
+			return icmp6_recv(ctx, m, l2_len, l3_len);
+		default:
+			GLUE_DEBUG("drop ipv6 pkt of unknown L4: (%x)", proto);
+			rte_pktmbuf_free(m);
+			return NULL;
+		}
+	} else {
+		GLUE_DEBUG("Drop unknown L3 packet: %x", etp);
+		rte_pktmbuf_free(m);
+		return NULL;
+	}
+
+	m->packet_type = ptypes;
+	error = fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+	if (error) {
+		rte_pktmbuf_free(m);
+		return NULL;
+	}
+
+	return m;
+}
+
+/* exclude NULLs from the final list of packets. */
+static inline uint32_t
+compress_pkt_list(struct rte_mbuf *pkt[], uint32_t nb_pkt, uint32_t nb_zero)
+{
+	uint32_t i, j, k, l;
+
+	for (j = nb_pkt; nb_zero != 0 && j-- != 0; ) {
+
+		/* found a hole. */
+		if (pkt[j] == NULL) {
+
+			/* find how big is it. */
+			for (i = j; i-- != 0 && pkt[i] == NULL; )
+				;
+			/* fill the hole. */
+			for (k = j + 1, l = i + 1; k != nb_pkt; k++, l++)
+				pkt[l] = pkt[k];
+
+			nb_pkt -= j - i;
+			nb_zero -= j - i;
+			j = i + 1;
+		}
+	}
+
+	return nb_pkt;
+}
+
+static inline struct rte_mbuf *
+common_fill_hdr_len(struct rte_mbuf *m, uint32_t tp, struct glue_ctx *ctx)
+{
+	uint32_t l4_len, l3_len, l2_len = sizeof(struct ether_hdr);
+	int32_t error = 0;
+
+	switch (tp) {
+	/* possibly fragmented packets. */
+	case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER):
+	case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER):
+		l3_len = get_ipv4_hdr_len(m, l2_len, IPPROTO_MAX + 1, 1);
+		if ((m->packet_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_FRAG) {
+			m = process_ipv4_frag(m, ctx, l2_len, l3_len);
+			if (m == NULL)
+				return NULL;
+			tp = m->packet_type & (RTE_PTYPE_L2_MASK |
+					       RTE_PTYPE_L3_MASK |
+					       RTE_PTYPE_L4_MASK);
+		}
+		break;
+	case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER):
+	case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER):
+		l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_MAX + 1);
+		if ((m->packet_type & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_FRAG) {
+			m = process_ipv6_frag(m, ctx, l2_len, l3_len);
+			if (m == NULL)
+				return NULL;
+			tp = m->packet_type & (RTE_PTYPE_L2_MASK |
+					       RTE_PTYPE_L3_MASK |
+					       RTE_PTYPE_L4_MASK);
+		}
+		break;
+	}
+
+	switch (tp) {
+	/* non fragmented tcp packets. */
+	case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER):
+		l3_len = sizeof(struct ipv4_hdr);
+		l4_len = get_tcp_header_size(m, l2_len, l3_len);
+		error = adjust_ipv4_pktlen(m, l2_len);
+		break;
+	case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER):
+		l3_len = sizeof(struct ipv6_hdr);
+		l4_len = get_tcp_header_size(m, l2_len, l3_len);
+		error = adjust_ipv6_pktlen(m, l2_len);
+		break;
+	case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER):
+		l3_len = get_ipv4_hdr_len(m, l2_len,
+					  IPPROTO_TCP, 0);
+		l4_len = get_tcp_header_size(m, l2_len, l3_len);
+		error = adjust_ipv4_pktlen(m, l2_len);
+		break;
+	case (RTE_PTYPE_L4_TCP | RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER):
+		l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_TCP);
+		l4_len = get_tcp_header_size(m, l2_len, l3_len);
+		error = adjust_ipv6_pktlen(m, l2_len);
+		break;
+
+	/* non fragmented udp packets. */
+	case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER):
+		l3_len = sizeof(struct ipv4_hdr);
+		l4_len = sizeof(struct udp_hdr);
+		error = adjust_ipv4_pktlen(m, l2_len);
+		break;
+	case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER):
+		l3_len = sizeof(struct ipv6_hdr);
+		l4_len = sizeof(struct udp_hdr);
+		error = adjust_ipv6_pktlen(m, l2_len);
+		break;
+	case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER):
+		l3_len = get_ipv4_hdr_len(m, l2_len,
+					  IPPROTO_UDP, 0);
+		l4_len = sizeof(struct udp_hdr);
+		error = adjust_ipv4_pktlen(m, l2_len);
+		break;
+	case (RTE_PTYPE_L4_UDP | RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER):
+		l3_len = get_ipv6_hdr_len(m, l2_len, IPPROTO_UDP);
+		l4_len = sizeof(struct udp_hdr);
+		error = adjust_ipv6_pktlen(m, l2_len);
+		break;
+	default:
+		GLUE_LOG(ERR, "drop unknown pkt");
+		rte_pktmbuf_free(m);
+		return NULL;
+	}
+
+	if (error) {
+		rte_pktmbuf_free(m);
+		return NULL;
+	}
+	error = fill_pkt_hdr_len(m, l2_len, l3_len, l4_len);
+	if (error) {
+		rte_pktmbuf_free(m);
+		return NULL;
+	}
+	return m;
+}
+
+
+/*
+ * HW can recognize L2-arp/L3 with/without extensions/L4 (i40e)
+ */
+static uint16_t
+type0_rx_callback(uint16_t port,
+		  uint16_t queue,
+		  struct rte_mbuf *pkt[],
+		  uint16_t nb_pkts,
+		  uint16_t max_pkts,
+		  void *user_param)
+{
+	uint32_t j, tp, l2_len, l3_len;
+	struct glue_ctx *ctx;
+	uint16_t nb_zero = 0;
+
+	RTE_SET_USED(port);
+	RTE_SET_USED(queue);
+	RTE_SET_USED(max_pkts);
+
+	ctx = user_param;
+
+	for (j = 0; j != nb_pkts; j++) {
+		tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK |
+		     RTE_PTYPE_L3_MASK | RTE_PTYPE_L2_MASK);
+
+		switch (tp) {
+		case (RTE_PTYPE_L2_ETHER_ARP):
+			arp_recv(ctx, pkt[j], sizeof(struct ether_hdr));
+			pkt[j] = NULL;
+			nb_zero++;
+			break;
+		case (RTE_PTYPE_L4_ICMP | RTE_PTYPE_L3_IPV4 |
+		      RTE_PTYPE_L2_ETHER):
+		case (RTE_PTYPE_L4_ICMP | RTE_PTYPE_L3_IPV4_EXT |
+		      RTE_PTYPE_L2_ETHER):
+			l2_len = sizeof(struct ether_hdr);
+			l3_len = get_ipv4_hdr_len(pkt[j], l2_len, IPPROTO_ICMP, 0);
+			icmp_recv(ctx, pkt[j], l2_len, l3_len);
+			pkt[j] = NULL;
+			nb_zero++;
+			break;
+		case (RTE_PTYPE_L4_ICMP | RTE_PTYPE_L3_IPV6 |
+		      RTE_PTYPE_L2_ETHER):
+		case (RTE_PTYPE_L4_ICMP | RTE_PTYPE_L3_IPV6_EXT |
+		      RTE_PTYPE_L2_ETHER):
+			l2_len = sizeof(struct ether_hdr);
+			l3_len = get_ipv6_hdr_len(pkt[j], l2_len, IPPROTO_ICMPV6);
+			icmp6_recv(ctx, pkt[j], l2_len, l3_len);
+			pkt[j] = NULL;
+			nb_zero++;
+			break;
+		default:
+			if (common_fill_hdr_len(pkt[j], tp, ctx) == NULL) {
+				pkt[j] = NULL;
+				nb_zero++;
+			}
+			break;
+		}
+	}
+
+	if (nb_zero == 0)
+		return nb_pkts;
+
+	return compress_pkt_list(pkt, nb_pkts, nb_zero);
+}
+
+/*
+ * HW can recognize L2/L3/L4 and fragments; but cannot recognize ARP
+ * nor ICMP (ixgbe).
+ */
+static uint16_t
+type1_rx_callback(uint16_t port,
+		  uint16_t queue,
+		  struct rte_mbuf *pkt[],
+		  uint16_t nb_pkts,
+		  uint16_t max_pkts,
+		  void *user_param)
+{
+	uint32_t j, tp, l2_len, l3_len;
+	struct glue_ctx *ctx;
+	uint16_t nb_zero = 0;
+	const struct ether_hdr *eth;
+	const struct ipv4_hdr *ip4;
+	const struct ipv6_hdr *ip6;
+	uint16_t etp;
+
+	RTE_SET_USED(port);
+	RTE_SET_USED(queue);
+	RTE_SET_USED(max_pkts);
+
+	ctx = user_param;
+
+	for (j = 0; j != nb_pkts; j++) {
+		tp = pkt[j]->packet_type & (RTE_PTYPE_L4_MASK | RTE_PTYPE_L3_MASK |
+					    RTE_PTYPE_L2_MASK);
+
+		switch (tp) {
+		case RTE_PTYPE_L2_ETHER:
+			eth = rte_pktmbuf_mtod(pkt[j], const struct ether_hdr *);
+			etp = eth->ether_type;
+			if (etp == rte_be_to_cpu_16(ETHER_TYPE_ARP))
+				arp_recv(ctx, pkt[j], sizeof(*eth));
+			pkt[j] = NULL;
+			nb_zero++;
+			break;
+		case (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L2_ETHER):
+		case (RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L2_ETHER):
+			ip4 = rte_pktmbuf_mtod_offset(pkt[j],
+						      const struct ipv4_hdr *,
+						      sizeof(*eth));
+			if (ip4->next_proto_id == IPPROTO_ICMP) {
+				l2_len = sizeof(struct ether_hdr);
+				l3_len = get_ipv4_hdr_len(pkt[j], l2_len,
+							  IPPROTO_ICMP, 0);
+				icmp_recv(ctx, pkt[j], l2_len, l3_len);
+			} else
+				rte_pktmbuf_free(pkt[j]);
+
+			pkt[j] = NULL;
+			nb_zero++;
+			break;
+		case (RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L2_ETHER):
+		case (RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L2_ETHER):
+			ip6 = rte_pktmbuf_mtod_offset(pkt[j],
+						      const struct ipv6_hdr *,
+						      sizeof(*eth));
+			if (ip6->proto == IPPROTO_ICMPV6) {
+				l2_len = sizeof(struct ether_hdr);
+				l3_len = get_ipv6_hdr_len(pkt[j], l2_len,
+							  IPPROTO_ICMPV6);
+				icmp6_recv(ctx, pkt[j], l2_len, l3_len);
+			} else
+				rte_pktmbuf_free(pkt[j]);
+
+			pkt[j] = NULL;
+			nb_zero++;
+			break;
+		default:
+			if (common_fill_hdr_len(pkt[j], tp, ctx) == NULL) {
+				pkt[j] = NULL;
+				nb_zero++;
+			}
+			break;
+		}
+	}
+
+	if (nb_zero == 0)
+		return nb_pkts;
+
+	return compress_pkt_list(pkt, nb_pkts, nb_zero);
+}
+
+/*
+ * generic, assumes HW doesn't recognize any packet type.
+ */
+uint16_t
+typen_rx_callback(uint16_t port,
+		  uint16_t queue,
+		  struct rte_mbuf *pkt[],
+		  uint16_t nb_pkts,
+		  uint16_t max_pkts,
+		  void *user_param)
+{
+	uint32_t j;
+	uint16_t nb_zero;
+	struct glue_ctx *ctx;
+
+	RTE_SET_USED(port);
+	RTE_SET_USED(queue);
+	RTE_SET_USED(max_pkts);
+
+	ctx = user_param;
+
+	nb_zero = 0;
+	for (j = 0; j != nb_pkts; j++) {
+		/* fix me: now we avoid checking ip checksum */
+		pkt[j]->ol_flags &= (~PKT_RX_IP_CKSUM_BAD);
+		pkt[j]->packet_type = 0;
+		pkt[j] = fill_ptypes_and_hdr_len(ctx, pkt[j]);
+		nb_zero += (pkt[j] == NULL);
+	}
+
+	if (nb_zero == 0)
+		return nb_pkts;
+
+	return compress_pkt_list(pkt, nb_pkts, nb_zero);
+}
+
+static uint32_t
+get_ptypes(uint16_t port_id)
+{
+	uint32_t smask;
+	int32_t i, rc;
+	const uint32_t pmask =
+		RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK;
+
+	smask = 0;
+	rc = rte_eth_dev_get_supported_ptypes(port_id, pmask, NULL, 0);
+	if (rc < 0) {
+		RTE_LOG(ERR, USER1,
+			"%s(port=%u) failed to get supported ptypes;\n",
+			__func__, port_id);
+		return smask;
+	}
+
+	uint32_t ptype[rc];
+	rc = rte_eth_dev_get_supported_ptypes(port_id, pmask, ptype, rc);
+
+	for (i = 0; i != rc; i++) {
+		switch (ptype[i]) {
+		case RTE_PTYPE_L2_ETHER_ARP:
+			smask |= ETHER_ARP_PTYPE;
+			break;
+		case RTE_PTYPE_L3_IPV4:
+		case RTE_PTYPE_L3_IPV4_EXT_UNKNOWN:
+			smask |= IPV4_PTYPE;
+			break;
+		case RTE_PTYPE_L3_IPV4_EXT:
+			smask |= IPV4_EXT_PTYPE;
+			break;
+		case RTE_PTYPE_L3_IPV6:
+		case RTE_PTYPE_L3_IPV6_EXT_UNKNOWN:
+			smask |= IPV6_PTYPE;
+			break;
+		case RTE_PTYPE_L3_IPV6_EXT:
+			smask |= IPV6_EXT_PTYPE;
+			break;
+		case RTE_PTYPE_L4_TCP:
+			smask |= TCP_PTYPE;
+			break;
+		case RTE_PTYPE_L4_UDP:
+			smask |= UDP_PTYPE;
+			break;
+		case RTE_PTYPE_L4_ICMP:
+			smask |= ICMP_PTYPE;
+			break;
+		}
+	}
+
+	return smask;
+}
+
+/* In rx callbacks, we need to check and make sure below things are done,
+ * either by hw or by sw:
+ * 1. filter out arp packets, and handle arp packets properly
+ *    - for arp request packet, reply arp if it's requesting myself.
+ * 2. fill l2, l3, l4 header length
+ *
+ * 3. GSO/GRO setup (TODO)
+ *
+ */
+int
+setup_rx_cb(uint16_t port_id, uint16_t qid)
+{
+	int32_t rc;
+	uint32_t i, n, smask;
+	const void *cb;
+	struct glue_ctx *ctx;
+	const struct ptype2cb *ptype2cb;
+
+	static const struct ptype2cb tcp_arp_ptype2cb[] = {
+		{ /* i40e */
+			.mask = ETHER_ARP_PTYPE |
+				ICMP_PTYPE |
+				IPV4_PTYPE | IPV4_EXT_PTYPE |
+				IPV6_PTYPE | IPV6_EXT_PTYPE |
+				TCP_PTYPE | UDP_PTYPE,
+			.name = "HW l2-arp/l3x/l4-tcp ptype",
+			.fn = type0_rx_callback,
+		},
+		{ /* ixgbe does not support ARP ptype */
+			.mask = IPV4_PTYPE | IPV4_EXT_PTYPE |
+				IPV6_PTYPE | IPV6_EXT_PTYPE |
+				TCP_PTYPE | UDP_PTYPE,
+			.name = "HW l3x/l4-tcp ptype",
+			.fn = type1_rx_callback,
+		},
+		{ /* virtio */
+			.mask = 0,
+			.name = "HW does not support any ptype",
+			.fn = typen_rx_callback,
+		},
+	};
+
+	ctx = glue_ctx_lookup(port_id, qid);
+	if (ctx == NULL) {
+		GLUE_LOG(ERR, "no ctx fount by port(%d) and queue (%d)",
+			 port_id, qid);
+		return -EINVAL;
+	}
+
+	smask = get_ptypes(port_id);
+
+	ptype2cb = tcp_arp_ptype2cb;
+	n = RTE_DIM(tcp_arp_ptype2cb);
+
+	for (i = 0; i != n; i++) {
+		if ((smask & ptype2cb[i].mask) == ptype2cb[i].mask) {
+			cb = rte_eth_add_rx_callback(port_id, qid,
+				 		     ptype2cb[i].fn, ctx);
+			rc = -rte_errno;
+			GLUE_LOG(ERR, "%s(port=%u), setup RX callback \"%s\";",
+				 __func__, port_id,  ptype2cb[i].name);
+			return ((cb == NULL) ? rc : 0);
+		}
+	}
+
+	GLUE_LOG(ERR, "%s(port=%u) failed to find an appropriate callback",
+		 __func__, port_id);
+	return -ENOENT;
+}
diff --git a/lib/libtle_glue/rxtx.c b/lib/libtle_glue/rxtx.c
new file mode 100644
index 0000000..b80a3ac
--- /dev/null
+++ b/lib/libtle_glue/rxtx.c
@@ -0,0 +1,573 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sym.h"
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_atomic.h>
+
+#include <tle_tcp.h>
+
+#include <stddef.h>
+#include <fcntl.h>
+
+#include "tle_glue.h"
+#include "fd.h"
+#include "util.h"
+#include "internal.h"
+
+rte_atomic32_t thr_cnt;
+
+#define MAX_UDP_PKT_LEN ((2 << 16) - 1 - sizeof(struct ipv4_hdr) - sizeof(struct udp_hdr))
+
+static inline struct rte_mbuf *
+from_mbuf_to_buf(struct rte_mbuf *m, char *buf,
+		 size_t len, int ispeek, int needcpy)
+{
+	void *src;
+	uint32_t done = 0;
+	uint32_t left = len, orig_pkt_len;
+	uint16_t copy_len, seg_len, segs;
+	struct rte_mbuf *m_next, *orig_pkt;
+
+	if (len == 0)
+		return m;
+
+	orig_pkt = m;
+	orig_pkt_len = m->pkt_len;
+	segs = m->nb_segs;
+
+	do {
+		seg_len = rte_pktmbuf_data_len(m);
+		copy_len = RTE_MIN(seg_len, left);
+		src = rte_pktmbuf_mtod(m, void *);
+		if (needcpy)
+			rte_memcpy(buf + done, src, copy_len);
+		done += copy_len;
+		left -= copy_len;
+		if (copy_len < seg_len) {
+			if (!ispeek)
+				rte_pktmbuf_adj(m, copy_len);
+			break;
+		}
+		m_next = m->next;
+		if (!ispeek) {
+			rte_pktmbuf_free_seg(m);
+			segs--;
+		}
+		m = m_next;
+	} while (left && m);
+
+	if (m && !ispeek) {
+		m->nb_segs = segs;
+		m->pkt_len = orig_pkt_len - done;
+	}
+
+	if(ispeek)
+		return orig_pkt;
+	else
+		return m;
+}
+
+static inline bool
+is_peer_closed(struct sock *so)
+{
+	if (errno == EAGAIN && tle_event_state(&so->erev) == TLE_SEV_UP)
+		return true;
+
+	return false;
+}
+
+static ssize_t
+_recv(int sockfd, void *buf, size_t len, struct sockaddr *src_addr, int flags)
+{
+	int rx;
+	ssize_t rc;
+	ssize_t recvlen;
+	size_t tmplen;
+	struct sock *so;
+	struct rte_mbuf *m;
+	struct epoll_event event;
+	int needcpy;
+
+	if (RTE_PER_LCORE(_lcore_id) == LCORE_ID_ANY) {
+		RTE_PER_LCORE(_lcore_id) = rte_atomic32_add_return(&thr_cnt, 1);
+	}
+
+	so = fd2sock(sockfd);
+
+	if (so->s == NULL) {
+		if (IS_UDP(so) && is_nonblock(so, flags))
+			errno = EAGAIN;
+		else
+			errno = ENOTCONN;
+		return -1;
+	}
+
+	if (so->rx_left) {
+		m = so->rx_left;
+		so->rx_left = NULL;
+		if (src_addr) {
+			OPS(so)->getname(so, src_addr, 1);
+			/* fixme: cannot get addr for UDP in this way */
+		}
+	} else {
+		rc = OPS(so)->recv(so->s, &m, 1, src_addr);
+		if (rc == 0) {
+			if (is_nonblock(so, flags)) {
+				/* socket closed, return 0 */
+				if (is_peer_closed(so)) {
+					GLUE_DEBUG("peer closed: %d", sockfd);
+					return 0;
+				}
+
+				/* According to linux stack,
+				 * receive from shutdown tcp socket returns 0.
+				 * And receive from shutdown udp socket generate
+				 * EAGAIN. In special case, we return ESHUTDOWN
+				 * to notify upper application.
+				 */
+				if (so->shutdown & RECV_SHUTDOWN) {
+					if (so->proto == PROTO_TCP)
+						return 0;
+					else {
+#ifdef LOOK_ASIDE_BACKEND
+						errno = ESHUTDOWN;
+#else
+						errno = EAGAIN;
+#endif
+						return -1;
+					}
+				}
+				return -1;
+			}
+
+			do {
+				/* in blocking mode, recv from shutdown socket
+				 * return 0 immediately */
+				if (so->shutdown & RECV_SHUTDOWN)
+					return 0;
+
+				/* some error occured, return -1 */
+				if (errno != EAGAIN)
+					return -1;
+
+				/* socket closed, return 0 */
+				if (is_peer_closed(so)) {
+					GLUE_DEBUG("peer closed: %d", sockfd);
+					return 0;
+				}
+
+				epoll_kernel_wait(CTX(so), -1, &event, 1, 1, &rx);
+
+				be_process(CTX(so));
+			} while((rc = OPS(so)->recv(so->s, &m, 1, src_addr)) == 0);
+		}
+	}
+
+	/* get one pkt */
+	if (!so->option.timestamp)
+		so->s->timestamp = m->timestamp;
+
+	needcpy = 1;
+	recvlen = RTE_MIN(m->pkt_len, len);
+	if (flags & MSG_TRUNC) {
+		if (IS_UDP(so))
+			recvlen = m->pkt_len;
+		else
+			/* According to linux manual, data will be discarded
+			 * if recv TCP stream with MSG_TRUNC flag */
+			needcpy = 0;
+	}
+
+	so->rx_left = from_mbuf_to_buf(m, buf, len, flags & MSG_PEEK, needcpy);
+
+	if (((flags & MSG_PEEK) == 0) && IS_UDP(so) && so->rx_left) {
+		rte_pktmbuf_free(so->rx_left);
+		so->rx_left = NULL;
+	}
+
+	/* UDP socket only receive one pkt at one time */
+	if (IS_UDP(so) || (flags & MSG_PEEK)) {
+		return recvlen;
+	}
+	/* TCP socket: try best to fill buf */
+	len -= recvlen;
+	buf = (char*)buf + recvlen;
+	while (len) {
+		if (OPS(so)->recv(so->s, &m, 1, src_addr) == 0)
+			break;
+
+		tmplen = (m->pkt_len < len) ? m->pkt_len : len;
+		so->rx_left = from_mbuf_to_buf(m, buf, tmplen, 0, needcpy);
+		len -= tmplen;
+		recvlen += tmplen;
+		buf = (char*)buf + tmplen;
+	}
+
+	if (so->rx_left)
+		tle_event_raise(&so->rxev);
+
+	/* may send window increase ACK after receive*/
+	if (recvlen > 0)
+		be_tx_with_lock(CTX(so));
+
+	return recvlen;
+}
+
+ssize_t PRE(recv)(int sockfd, void *buf, size_t len, int flags)
+{
+	if (is_kernel_fd(sockfd))
+		return k_read(sockfd, buf, len);
+
+	return _recv(sockfd, buf, len, NULL, flags);
+}
+
+ssize_t PRE(recvfrom)(int sockfd, void *buf, size_t len, int flags,
+		 struct sockaddr *src_addr, socklen_t *addrlen)
+{
+	ssize_t rc;
+ 	if (is_kernel_fd(sockfd))
+ 		return k_recv(sockfd, buf, len, flags);
+
+	if (src_addr && !addrlen) {
+		errno = EINVAL;
+		return -1;
+	}
+	rc = _recv(sockfd, buf, len, src_addr, flags);
+	if (rc >= 0 && src_addr) {
+		if (src_addr->sa_family == AF_INET) {
+			*addrlen = sizeof(struct sockaddr_in);
+		} else {
+			*addrlen = sizeof(struct sockaddr_in6);
+ 		}
+ 	}
+	return rc;
+}
+
+#define RECV_CONTINUE	(-2)
+static inline ssize_t
+try_recvmsg(struct sock *so, struct msghdr *msg, int flags)
+{
+	ssize_t sz;
+
+	if (so->s == NULL) {
+		if (IS_UDP(so) && is_nonblock(so, flags))
+			errno = EAGAIN;
+		else
+			errno = ENOTCONN;
+		return -1;
+	}
+
+	sz = OPS(so)->readv(so->s, msg, flags);
+	if (sz >= 0) { /* get data */
+		/* may send window increase ACK after receive*/
+		if (sz > 0)
+			be_tx_with_lock(CTX(so));
+		return sz;
+	}
+	else if (errno != EAGAIN) /* error occurred */
+		return -1;
+	else if (is_peer_closed(so)) {
+		GLUE_DEBUG("peer closed: %d", so->fd);
+		return 0;
+	} else if (is_nonblock(so, flags))
+		return -1;
+
+	return RECV_CONTINUE;
+}
+
+ssize_t PRE(recvmsg)(int sockfd, struct msghdr *msg, int flags)
+{
+	ssize_t sz;
+	struct sock *so;
+
+	if (is_kernel_fd(sockfd))
+		return k_recvmsg(sockfd, msg, flags);
+
+	so = fd2sock(sockfd);
+
+	if (so->rx_left == NULL && OPS(so)->readv &&
+	    (flags & MSG_PEEK) == 0 &&
+	    ((flags & MSG_TRUNC) == 0 || so->proto == PROTO_UDP)) {
+		/* udp_readv supports MSG_TRUNC, tcp_readv not yet.
+		 * so only udp socket implement with readv interface.
+		 */
+		sz = try_recvmsg(so, msg, flags);
+		if (sz != RECV_CONTINUE)
+			return sz;
+	}
+
+	/* 1. rx_left != NULL; 2. get no data, fall back to blocking read */
+
+	if (so->rx_left != NULL && msg != NULL && msg->msg_control != NULL) {
+		if (so->option.timestamp)
+			tle_set_timestamp(msg, so->rx_left);
+		else
+			msg->msg_controllen = 0;
+	}
+
+	sz = PRE(recvfrom)(sockfd, msg->msg_iov[0].iov_base,
+			   msg->msg_iov[0].iov_len, flags,
+			   (struct sockaddr *)msg->msg_name,
+			   &msg->msg_namelen);
+
+	return sz;
+}
+
+ssize_t PRE(read)(int fd, void *buf, size_t count)
+{
+	if (is_kernel_fd(fd))
+		return k_read(fd, buf, count);
+
+	return _recv(fd, buf, count, NULL, 0);
+}
+
+#define DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
+
+ssize_t PRE(readv)(int fd, const struct iovec *iov, int iovcnt)
+{
+	ssize_t sz;
+	struct sock *so;
+	struct msghdr msg;
+
+	if (is_kernel_fd(fd))
+		return k_readv(fd, iov, iovcnt);
+
+	if (RTE_PER_LCORE(_lcore_id) == LCORE_ID_ANY) {
+		RTE_PER_LCORE(_lcore_id) = rte_atomic32_add_return(&thr_cnt, 1);
+	}
+
+	so = fd2sock(fd);
+
+	if (so->rx_left == NULL && OPS(so)->readv) {
+		memset(&msg, 0, sizeof(msg));
+		msg.msg_iov = DECONST(struct iovec *, iov);
+		msg.msg_iovlen = iovcnt;
+		sz = try_recvmsg(so, &msg, 0);
+		if (sz != RECV_CONTINUE)
+			return sz;
+	}
+
+	/* 1. rx_left != NULL; 2. get no data, fall back to blocking read */
+
+	/* fixme: when so->rx_left != NULL, also needs readv.
+	 * maybe need to modify readv interface args of ops */
+	return _recv(fd, iov[0].iov_base, iov[0].iov_len, NULL, 0);
+}
+
+static ssize_t
+_send(int sockfd, const void *buf, size_t len,
+	const struct sockaddr *peer, int flags)
+{
+	struct sock *so = fd2sock(sockfd);
+	struct rte_mempool *mp = get_mempool_by_socket(0); /* fix me */
+	uint16_t nb_mbufs = (len + RTE_MBUF_DEFAULT_DATAROOM - 1)
+			    / RTE_MBUF_DEFAULT_DATAROOM;
+	uint16_t i, cnt, copy_len;
+	int rc;
+	struct rte_mbuf *mbufs[nb_mbufs + 1];
+	size_t done = 0;
+	uint32_t left = 0;
+	char *dst;
+	int blocking = !is_nonblock(so, flags);
+
+	if (RTE_PER_LCORE(_lcore_id) == LCORE_ID_ANY) {
+		RTE_PER_LCORE(_lcore_id) = rte_atomic32_add_return(&thr_cnt, 1);
+	}
+
+	if (!blocking && len > def_sndbuf && so->proto == PROTO_TCP) {
+		len = def_sndbuf;
+		nb_mbufs = (len + RTE_MBUF_DEFAULT_DATAROOM - 1)
+			   / RTE_MBUF_DEFAULT_DATAROOM;
+	}
+
+	if (unlikely(len == 0)) {
+		if (so->proto == PROTO_TCP)
+			return 0;
+		else
+			nb_mbufs = 1;
+	}
+
+	if (unlikely(len > MAX_UDP_PKT_LEN && IS_UDP(so))) {
+		errno = EMSGSIZE;
+		return -1;
+	}
+
+	if (blocking)
+		be_process(get_ctx());
+
+	if (unlikely(rte_pktmbuf_alloc_bulk(mp, mbufs, nb_mbufs) < 0)) {
+		errno = ENOMEM;
+		return -1;
+	}
+
+	for (i = 0; i < nb_mbufs; ++i) {
+		copy_len = RTE_MIN((size_t)RTE_MBUF_DEFAULT_DATAROOM,
+				   len - done);
+		dst = rte_pktmbuf_mtod(mbufs[i], char *);
+		rte_memcpy(dst, (const char *)buf + done, copy_len);
+		done += copy_len;
+		mbufs[i]->data_len = copy_len;
+		mbufs[i]->pkt_len = copy_len;
+	}
+
+	cnt = 0;
+do_send:
+	rc = OPS(so)->send(so, mbufs + cnt, nb_mbufs - cnt, peer);
+
+	cnt += rc;
+
+	if (cnt > 0)
+		be_tx_with_lock(CTX(so));
+
+	if (cnt > 0 && blocking)
+		be_process(get_ctx());
+
+	if (blocking &&
+	    cnt < nb_mbufs &&
+	    (rc > 0 || errno == EAGAIN) &&
+	    tle_event_state(&so->erev) != TLE_SEV_UP) {
+		be_process(get_ctx());
+		goto do_send;
+	}
+
+	for (i = cnt; i < nb_mbufs; ++i) {
+		left += mbufs[i]->pkt_len;
+		rte_pktmbuf_free_seg(mbufs[i]);
+	}
+
+	if (cnt == 0)
+		return -1;
+	else
+		return len - left;
+}
+
+ssize_t PRE(send)(int sockfd, const void *buf, size_t len, int flags)
+{
+	if (is_kernel_fd(sockfd))
+		return k_write(sockfd, buf, len);
+
+	/* MSG_NOSIGNAL means "Do not generate SIGPIPE". Ignore this flag */
+	flags &= ~MSG_NOSIGNAL;
+
+	return _send(sockfd, buf, len, NULL, flags);
+}
+
+ssize_t PRE(sendto)(int sockfd, const void *buf, size_t len, int flags,
+		    const struct sockaddr *dest_addr, socklen_t addrlen)
+{
+	if (is_kernel_fd(sockfd))
+		return k_sendto(sockfd, buf, len, flags, dest_addr, addrlen);
+
+	/* MSG_NOSIGNAL means "Do not generate SIGPIPE". Ignore this flag */
+	flags &= ~MSG_NOSIGNAL;
+
+	return _send(sockfd, buf, len, dest_addr, flags);
+}
+
+ssize_t PRE(sendmsg)(int sockfd, const struct msghdr *msg, int flags)
+{
+	ssize_t ret;
+	struct sock *so;
+
+	if (is_kernel_fd(sockfd))
+		return k_sendmsg(sockfd, msg, flags);
+
+	/* MSG_NOSIGNAL means "Do not generate SIGPIPE". Ignore this flag */
+	flags &= ~MSG_NOSIGNAL;
+
+	so = fd2sock(sockfd);
+	if (OPS(so)->writev) {
+		ret = OPS(so)->writev(so, msg->msg_iov, msg->msg_iovlen,
+				      msg->msg_name);
+		if (ret < 0) {
+			if (errno != EAGAIN || is_nonblock(so, flags))
+				return -1;
+		} else {
+			/* TODO: blocking && ret < total length */
+			be_tx_with_lock(CTX(so));
+			return ret;
+		}
+
+		/* fall through to blocking send */
+	}
+
+	return _send(sockfd, msg->msg_iov[0].iov_base, msg->msg_iov[0].iov_len,
+		     (struct sockaddr *)msg->msg_name, flags);
+}
+
+ssize_t PRE(write)(int fd, const void *buf, size_t count)
+{
+	if (is_kernel_fd(fd))
+		return k_write(fd, buf, count);
+
+	return _send(fd, buf, count, NULL, 0);
+}
+
+ssize_t PRE(writev)(int fd, const struct iovec *iov, int iovcnt)
+{
+	ssize_t ret;
+	struct sock *so;
+
+	if (is_kernel_fd(fd))
+		return k_writev(fd, iov, iovcnt);
+
+	if (RTE_PER_LCORE(_lcore_id) == LCORE_ID_ANY) {
+		RTE_PER_LCORE(_lcore_id) = rte_atomic32_add_return(&thr_cnt, 1);
+	}
+
+	so = fd2sock(fd);
+	if (OPS(so)->writev) {
+		ret = OPS(so)->writev(so, iov, iovcnt, NULL);
+		if (ret < 0) {
+			if (errno != EAGAIN || is_nonblock(so, 0))
+				return -1;
+		} else {
+			/* TODO: blocking && ret < total length */
+			be_tx_with_lock(CTX(so));
+			return ret;
+		}
+
+		/* fall through to blocking send */
+	}
+
+	return _send(fd, iov[0].iov_base, iov[0].iov_len, NULL, 0);
+}
+
+/* advanced functions */
+ssize_t PRE(splice)(int fd_in, loff_t *off_in, int fd_out,
+		loff_t *off_out, size_t len, unsigned int flags)
+{
+	if (is_kernel_fd(fd_in) && is_kernel_fd(fd_out))
+		return k_splice(fd_in, off_in, fd_out, off_out, len, flags);
+
+	rte_panic("splice is not supported yet");
+	errno = EOPNOTSUPP;
+	return -1;
+}
+
+ssize_t PRE(sendfile)(int out_fd, int in_fd, off_t *offset, size_t count)
+{
+	if (is_kernel_fd(out_fd) && is_kernel_fd(in_fd))
+		return k_sendfile(out_fd, in_fd, offset, count);
+
+	rte_panic("sendfile is not supported yet");
+	errno = EOPNOTSUPP;
+	return -1;
+}
diff --git a/lib/libtle_glue/select.c b/lib/libtle_glue/select.c
new file mode 100644
index 0000000..b3b8539
--- /dev/null
+++ b/lib/libtle_glue/select.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <signal.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "fd.h"
+#include "ctx.h"
+#include "sym.h"
+#include "log.h"
+#include "util.h"
+#include "internal.h"
+#include "tle_glue.h"
+
+#define FD_ZERO_N(s, n) do { memset((s)->fds_bits, 0, n/sizeof(long)); } while(0)
+
+static int
+fdset_to_events_user(int nfds, fd_set *fdset, int *total, int event)
+{
+	int i, num = 0;
+	struct sock *so;
+	const struct tle_event *ev;
+
+	for (i = fd_table.fd_base; i < nfds; ++i) {
+		if (!FD_ISSET(i, fdset))
+			continue;
+
+		so = fd2sock(i); /* fix me: check if fd is opened */
+
+		switch (event) {
+		case EPOLLIN:
+			ev = &so->rxev;
+			break;
+		case EPOLLOUT:
+			ev = &so->txev;
+			break;
+		case EPOLLERR:
+			ev = &so->erev;
+			break;
+		default:
+			rte_panic("non-sense value\n");
+		}
+		/* Check event is ready */
+		if (TLE_SEV_UP == tle_event_state(ev)) {
+			*total = *total + 1;
+		} else {
+			FD_CLR(i, fdset);
+			num++;
+		}
+
+		/* We fill sock->event here as we need this when
+		 * we filter events in poll_common(). But it was
+		 * originally set by epoll_ctl(). Now we have to
+		 * assume that there are no application which
+		 * uses epoll/poll/select at the same time.
+		 */
+		so->event.events |= event;
+		so->event.data.u32 = i;
+	}
+
+	return num;
+}
+
+static int
+fdset_to_events_kernel(int nfds, fd_set *fdset, int efd, int event)
+{
+	int i, num = 0;
+	struct epoll_event k_ev;
+
+	for (i = 0; i < nfds; ++i) {
+		if (!FD_ISSET(i, fdset))
+			continue;
+
+		k_ev.events = event;
+		k_ev.data.u32 = i;
+		k_epoll_ctl(efd, EPOLL_CTL_ADD, i, &k_ev);
+		num++;
+	}
+
+	return num;
+}
+
+int
+PRE(select)(int nfds, fd_set *readfds, fd_set *writefds,
+	    fd_set *exceptfds, struct timeval *timeout)
+{
+	int to;
+	struct glue_ctx *ctx;
+	int j, efd, total = 0, max = 0;
+
+	/* thread <> context binding happens here */
+	if (RTE_PER_LCORE(glue_ctx) == NULL) {
+		ctx = &ctx_array[glue_ctx_alloc()];
+		RTE_PER_LCORE(glue_ctx) = ctx;
+	} else
+		ctx = RTE_PER_LCORE(glue_ctx);
+
+	/* step 0, process some packets */
+	be_process(ctx);
+
+	/* step 1, check if any userspace events are ready */
+
+	if (readfds)
+		max += fdset_to_events_user(nfds, readfds,
+					    &total, EPOLLIN);
+	if (writefds)
+		max += fdset_to_events_user(nfds, writefds,
+					    &total, EPOLLOUT);
+	if (exceptfds)
+		max += fdset_to_events_user(nfds, writefds,
+					    &total, EPOLLERR);
+	if (total > 0) {
+		/* userspace events go firstly */
+		if (readfds)
+			FD_ZERO_N(readfds, fd_table.fd_base);
+		if (writefds)
+			FD_ZERO_N(writefds, fd_table.fd_base);
+		if (exceptfds)
+			FD_ZERO_N(exceptfds, fd_table.fd_base);
+
+		return total;
+	}
+
+	/* step 2, only wait for kernel events? */
+	if (max == 0)
+		return k_select(nfds, readfds, writefds, exceptfds, timeout);
+
+	/* step 3, slow path: wait for I/O and kernel events */
+	efd = k_epoll_create(1);
+	if (efd < 0)
+		rte_panic("k_epoll_create failed %d", errno);
+
+	nfds = RTE_MIN(nfds, fd_table.fd_base);
+	if (readfds)
+		max += fdset_to_events_kernel(nfds, readfds,
+					      efd, EPOLLIN);
+	if (writefds)
+		max += fdset_to_events_kernel(nfds, writefds,
+					      efd, EPOLLOUT);
+	if (exceptfds)
+		max += fdset_to_events_kernel(nfds, exceptfds,
+					      efd, EPOLLERR);
+
+	struct epoll_event events[max];
+
+	if (timeout)
+		to = timeout->tv_sec * 1000 + timeout->tv_usec / 1000;
+	else
+		to = -1;
+	total = poll_common(ctx, events, max, to, efd);
+
+	k_close(efd);
+	for (j = 0; j < total; ++j) {
+		if (events[j].events & EPOLLIN)
+			FD_SET(events[j].data.fd, readfds);
+
+		if (events[j].events & EPOLLOUT)
+			FD_SET(events[j].data.fd, writefds);
+
+		if ((events[j].events & (EPOLLHUP | EPOLLERR)) && exceptfds)
+			FD_SET(events[j].data.fd, exceptfds);
+	}
+	return total;
+}
+
+int
+PRE(pselect)(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
+	     const struct timespec *timeout, const sigset_t *sigmask)
+{
+	struct timeval tv, *tv_to;
+
+	if (sigmask != NULL)
+		rte_panic("pselect with signal is not supported");
+
+	if (timeout) {
+		tv.tv_usec = timeout->tv_nsec / 1000;
+		tv.tv_sec = timeout->tv_sec;
+		tv_to = &tv;
+	} else
+		tv_to = NULL;
+
+	return select(nfds, readfds, writefds, exceptfds, tv_to);
+}
diff --git a/lib/libtle_glue/sock.h b/lib/libtle_glue/sock.h
new file mode 100644
index 0000000..fcd6362
--- /dev/null
+++ b/lib/libtle_glue/sock.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef _SOCK_H_
+#define _SOCK_H_
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <tle_event.h>
+#include <tle_ctx.h>
+
+#include "ctx.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern unsigned int def_sndbuf;
+extern unsigned int def_rcvbuf;
+
+#ifndef TCP_FASTOPEN
+#define TCP_FASTOPEN 23
+#endif
+
+#ifndef TCP_USER_TIMEOUT
+#define TCP_USER_TIMEOUT 18
+#endif
+
+#ifndef TCP_FASTOPEN_CONNECT
+#define TCP_FASTOPEN_CONNECT	30
+#endif
+
+struct sock;
+
+struct proto {
+	int (*setsockopt)(struct sock *sk, int optname, const void *optval,
+			  socklen_t optlen);
+	int (*getsockopt)(struct sock *sk, int optname, void *optval,
+			  socklen_t *option);
+	int (*getname)(struct sock *sk, struct sockaddr *addr, int peer);
+
+	int (*bind)(struct sock *sk, const struct sockaddr *addr);
+	int (*listen)(struct sock *sk, int backlog);
+	int (*connect)(struct sock *sk, const struct sockaddr *addr);
+	int (*accept)(struct sock *sk, struct sockaddr *addr,
+		      socklen_t *addrlen, int flags);
+
+	ssize_t (*recv)(struct tle_stream *s, struct rte_mbuf *pkt[],
+			uint16_t num, struct sockaddr *addr);
+	ssize_t (*send)(struct sock *sk, struct rte_mbuf *pkt[],
+			uint16_t num, const struct sockaddr *dst_addr);
+
+	ssize_t (*readv)(struct tle_stream *s, struct msghdr *msg, int flags);
+	ssize_t (*writev)(struct sock *sk, const struct iovec *iov,
+			  int iovcnt, const struct sockaddr *dst_addr);
+
+	int (*shutdown)(struct sock *sk, int how);
+	int (*close)(struct tle_stream *s);
+
+	void (*update_cfg)(struct sock *sk);
+
+	char name[32];
+};
+
+enum {
+	PROTO_TCP,
+	PROTO_UDP
+};
+
+#define RECV_SHUTDOWN	1
+#define SEND_SHUTDOWN	2
+
+extern struct proto udp_prot;
+extern struct proto tcp_prot;
+extern struct proto *supported_proto_ops[];
+
+struct sock {
+	int		    fd;
+	uint32_t	    cid:8,     /* ctx id for indexing ctx_array */
+			    domain:8,  /* for AF_INET, AF_INET6 */
+			    proto:8,   /* PROTO_TCP, PROTO_UDP */
+			    valid:1,
+			    epoll:1,
+			    ubind:1,
+			    ubindany:1,
+			    nonblock:1,
+			    tcp_connected:1,
+			    shutdown:2;
+	struct tle_stream   *s;
+	struct rte_mbuf     *rx_left;
+	tle_stream_options_t option;
+	union {
+		struct epoll_event event;
+		int	    shadow_efd;
+	};
+	struct tle_event    txev;
+	struct tle_event    rxev;
+	struct tle_event    erev;
+} __rte_cache_aligned;
+
+#define CTX(so)    (&ctx_array[so->cid])
+#define OPS(so)    (supported_proto_ops[so->proto])
+#define IS_TCP(so) (so->proto == PROTO_TCP)
+#define IS_UDP(so) (so->proto == PROTO_UDP)
+
+static inline int
+is_nonblock(struct sock *so, int flags)
+{
+	return (flags & MSG_DONTWAIT) || so->nonblock;
+}
+
+static inline struct tle_ctx *
+get_sock_ctx(struct sock *so)
+{
+	if (IS_TCP(so))
+		return CTX(so)->tcp_ctx;
+	else
+		return CTX(so)->udp_ctx;
+}
+
+static inline size_t
+get_sockaddr_len(sa_family_t family)
+{
+	switch (family) {
+	case AF_INET:
+		return sizeof(struct sockaddr_in);
+	case AF_INET6:
+		return sizeof(struct sockaddr_in6);
+	case AF_UNSPEC:
+		return sizeof(sa_family_t);
+	default:
+		return 0;
+	}
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*_SOCK_H_ */
diff --git a/lib/libtle_glue/socket.c b/lib/libtle_glue/socket.c
new file mode 100644
index 0000000..31b28be
--- /dev/null
+++ b/lib/libtle_glue/socket.c
@@ -0,0 +1,720 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "sym.h"
+
+#include <stdarg.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include "tle_glue.h"
+#include "fd.h"
+#include "log.h"
+#include "util.h"
+#include "internal.h"
+#include "sock.h"
+
+struct proto *supported_proto_ops[] = {
+	[PROTO_TCP] = &tcp_prot,
+	[PROTO_UDP] = &udp_prot,
+};
+
+/* for setup, settings, and destroy */
+int PRE(socket)(int domain, int type, int protocol)
+{
+	int fd;
+	struct sock *so;
+	
+	if ((domain != AF_INET && domain != AF_INET6) ||
+	    (type != SOCK_STREAM && type != SOCK_DGRAM))
+		return k_socket(domain, type, protocol);
+
+	if (domain == AF_INET) {
+		if (default_ctx->ipv4 == 0 && !default_ctx->lo4_enabled) {
+			errno = EAFNOSUPPORT;
+			return -1;
+		}
+	} else {
+		if (IN6_IS_ADDR_UNSPECIFIED(&default_ctx->ipv6) &&
+		    !default_ctx->lo6_enabled) {
+			errno = EAFNOSUPPORT;
+			return -1;
+		}
+	}
+
+	fd = get_unused_fd();
+	if (fd < 0) {
+		errno = ENFILE;
+		return -1;
+	}
+	so = fd2sock(fd);
+	so->cid = get_cid();
+	if (type == SOCK_STREAM)
+		so->proto = PROTO_TCP;
+	else /* type == SOCK_DGRAM */
+		so->proto = PROTO_UDP;
+
+	so->domain = domain;
+	so->option.raw = 0;
+	so->option.mulloop = 1;
+	so->option.multtl = 1;
+	if (type == SOCK_STREAM) {
+		so->option.tcpquickack = 1;
+		/* linux default value: 2 hours */
+		so->option.keepidle = 2 * 60 * 60;
+		/* linux default value: 75seconds */
+		so->option.keepintvl = 75;
+		/* linux default value: 9 */
+		so->option.keepcnt = 9;
+	}
+
+	sock_alloc_events(so);
+
+	GLUE_DEBUG("socket fd = %d", fd);
+	printf("socket fd = %d", fd);
+	return fd;
+}
+
+int PRE(bind)(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
+{
+	struct sock *so;
+
+	if (is_kernel_fd(sockfd))
+		return k_bind(sockfd, addr, addrlen);
+
+	so = fd2sock(sockfd);
+	if (so->s) {
+		/* The socket is already bound to an address */
+		errno = EINVAL;
+		return -1;
+	}
+
+	if (addrlen < get_sockaddr_len(addr->sa_family)) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	so->cid = get_cid(); /* allow ctx reset as stream is null */
+	if (OPS(so)->bind)
+		return OPS(so)->bind(so, addr);
+
+	errno = EOPNOTSUPP;
+	return -1;
+}
+
+int PRE(listen)(int sockfd, int backlog)
+{
+	struct sock *so;
+
+	if (is_kernel_fd(sockfd))
+		return k_listen(sockfd, backlog);
+
+	so = fd2sock(sockfd);
+
+	if (OPS(so)->listen)
+		return OPS(so)->listen(so, backlog);
+
+	errno = EOPNOTSUPP;
+	return -1;
+}
+
+int PRE(accept)(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
+{
+	struct sock *so;
+
+	if (is_kernel_fd(sockfd))
+		return k_accept(sockfd, addr, addrlen);
+
+	so = fd2sock(sockfd);
+	if (OPS(so)->accept)
+		return OPS(so)->accept(so, addr, addrlen, 0);
+
+	errno = EOPNOTSUPP;
+	return -1;
+}
+
+int PRE(accept4)(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags)
+{
+	int fd;
+	struct sock *so;
+
+	if (is_kernel_fd(sockfd))
+		return k_accept4(sockfd, addr, addrlen, flags);
+
+	fd = PRE(accept)(sockfd, addr, addrlen);
+
+	/* inherit NONBLOCK flag */
+	if (fd >= 0 && (flags & SOCK_NONBLOCK)) {
+		so = fd2sock(fd);
+		so->nonblock = 1;
+	}
+
+	return fd;
+}
+
+int PRE(connect)(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
+{
+	struct sock *so;
+
+	if (is_kernel_fd(sockfd))
+		return k_connect(sockfd, addr, addrlen);
+
+	if (addrlen < get_sockaddr_len(addr->sa_family)) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	so = fd2sock(sockfd);
+	so->cid = get_cid();
+
+	if (!(is_nonblock(so, 0)))
+		mac_check(CTX(so), addr);
+
+	if (OPS(so)->connect)
+		return OPS(so)->connect(so, addr);
+
+	errno = EOPNOTSUPP;
+	return -1;
+}
+
+unsigned int def_sndbuf = 212992;
+unsigned int def_rcvbuf = 212992;
+static struct linger ling;
+
+int PRE(getsockopt)(int sockfd, int level, int optname,
+		    void *optval, socklen_t *optlen)
+{
+	struct sock *so;
+	union {
+		int val;
+		uint64_t val64;
+		struct linger ling;
+		struct timeval tm;
+	} *p = optval;
+
+
+	if (is_kernel_fd(sockfd))
+		return k_getsockopt(sockfd, level, optname, optval, optlen);
+
+	if (!optval && !optlen)
+		return -1;
+
+	so = fd2sock(sockfd);
+
+	switch (level) {
+	case IPPROTO_IP:
+		switch (optname) {
+		case IP_OPTIONS:
+			*optlen = 0;
+			return 0;
+		case IP_MULTICAST_LOOP:
+			p->val = so->option.mulloop;
+			return 0;
+		case IP_MULTICAST_TTL:
+			p->val = so->option.multtl;
+			return 0;
+		}
+		break;
+	case IPPROTO_IPV6:
+		switch (optname) {
+		case IPV6_V6ONLY:
+			p->val = so->option.ipv6only;
+			return 0;
+		}
+		break;
+	case SOL_SOCKET:
+		/* man socket(7), see /usr/include/asm-generic/socket.h */
+		switch (optname) {
+		case SO_REUSEADDR:
+			p->val = so->option.reuseaddr;
+			return 0;
+		case SO_REUSEPORT:
+			p->val = so->option.reuseport;
+			return 0;
+		case SO_ERROR:
+			if (TLE_SEV_DOWN == tle_event_state(&so->erev))
+				p->val = 0;
+			else
+				p->val = ECONNREFUSED;
+				/* fixe me: ETIMEDOUT */
+			return 0;
+		case SO_LINGER:
+			p->ling.l_onoff = 0;
+			return 0;
+		case SO_SNDBUF:
+			p->val = def_sndbuf;
+			return 0;
+		case SO_RCVBUF:
+			p->val = def_rcvbuf;
+			return 0;
+		case SO_ACCEPTCONN:
+			if (IS_TCP(so)
+			    && TCP_STREAM(so->s)->tcb.state == TCP_ST_LISTEN)
+				p->val = 1;
+			else
+				p->val = 0;
+			return 0;
+		case SO_KEEPALIVE:
+			p->val = so->option.keepalive;
+			return 0;
+		case SO_TYPE:
+			if (IS_TCP(so))
+				p->val = SOCK_STREAM;
+			else
+				p->val = SOCK_DGRAM;
+			return 0;
+		case SO_OOBINLINE:
+			p->val = so->option.oobinline;
+			return 0;
+		case SO_TIMESTAMP:
+			p->val = so->option.timestamp;
+			return 0;
+		case SO_PROTOCOL:
+			if (so->proto == PROTO_TCP)
+				p->val = IPPROTO_TCP;
+			else
+				p->val = IPPROTO_UDP;
+			return 0;
+		default:
+			break;
+		}
+
+		break;
+	case SOL_TCP:
+	case SOL_UDP:
+		return OPS(so)->getsockopt(so, optname, optval, optlen);
+	}
+
+	GLUE_LOG(WARNING, "getsockopt(%d) with level = %d, optname = %d",
+		 sockfd, level, optname);
+	errno = EOPNOTSUPP;
+	return -1;
+}
+
+int PRE(setsockopt)(int sockfd, int level, int optname,
+		    const void *optval, socklen_t optlen)
+{
+	int val;
+	struct sock *so;
+	if (is_kernel_fd(sockfd))
+		return k_setsockopt(sockfd, level, optname, optval, optlen);
+	if (!optval && !optlen)
+		return -1;
+
+	val = 0; /* just to make compiler happy */
+	switch (optlen) {
+	case sizeof(char):
+		val = *(const char *)optval;
+		break;
+	case sizeof(int):
+		val = *(const int *)optval;
+		break;
+	}
+
+	so = fd2sock(sockfd);
+
+	switch (level) {
+	case IPPROTO_IP:
+		switch (optname) {
+		case IP_RECVERR:
+			/* needed by netperf */
+			return 0;
+		case IP_MULTICAST_LOOP:
+			if (val == 0)
+				so->option.mulloop = 0;
+			else
+				so->option.mulloop = 1;
+			if (so->s != NULL)
+				so->s->option.mulloop = so->option.mulloop;
+			return 0;
+		case IP_MULTICAST_TTL:
+			if (val > 255 || val < -1) {
+				errno = EINVAL;
+				return -1;
+			}
+			if(val == -1) {
+				val = 1;
+			}
+			so->option.multtl = val;
+			if (so->s != NULL)
+				so->s->option.multtl = so->option.multtl;
+			return 0;
+		case IP_ADD_MEMBERSHIP:
+			if (optlen < sizeof(struct ip_mreq)) {
+				errno = EINVAL;
+				return -1;
+			}
+			const struct ip_mreq* mreq = (const struct ip_mreq*)optval;
+			if (mreq->imr_multiaddr.s_addr == INADDR_ANY) {
+				errno = EINVAL;
+				return -1;
+			}
+			errno = EOPNOTSUPP;
+			return -1;
+		case IP_MTU_DISCOVER:
+			return 0;
+		case IP_TOS:
+			return 0;
+		case IP_RECVTOS:
+			return 0;
+		}
+		break;
+	case IPPROTO_IPV6:
+		switch (optname) {
+		case IPV6_V6ONLY:
+			if (val == 0)
+				so->option.ipv6only = 0;
+			else
+				so->option.ipv6only = 1;
+			if (so->s != NULL)
+				so->s->option.ipv6only = so->option.ipv6only;
+			return 0;
+		case IPV6_TCLASS:
+			return 0;
+		case IPV6_RECVTCLASS:
+			return 0;
+		}
+		break;
+	case SOL_SOCKET:
+		switch (optname) {
+		case SO_REUSEADDR:
+			if (val == 0)
+				so->option.reuseaddr = 0;
+			else
+				so->option.reuseaddr = 1;
+			if (so->s != NULL)
+				so->s->option.reuseaddr = so->option.reuseaddr;
+			return 0;
+		case SO_LINGER:
+			ling = *(const struct linger *)optval;
+			if (ling.l_onoff == 0)
+				return 0;
+			else {
+				GLUE_LOG(ERR, "app is enabling SO_LINGER which is not really supported");
+				return 0;
+			}
+			break;
+		case SO_KEEPALIVE:
+			if (val == 0)
+				so->option.keepalive = 0;
+			else
+				so->option.keepalive = 1;
+			if (so->s != NULL) {
+				so->s->option.keepalive = so->option.keepalive;
+				if (so->proto == PROTO_TCP)
+					tle_tcp_stream_set_keepalive(so->s);
+			}
+			return 0;
+		case SO_REUSEPORT:
+			if (val == 0)
+				so->option.reuseport = 0;
+			else
+				so->option.reuseport = 1;
+			if (so->s != NULL)
+				so->s->option.reuseport = so->option.reuseport;
+			return 0;
+		case SO_SNDBUF:
+			def_sndbuf = val;
+			return 0;
+		case SO_RCVBUF:
+			def_rcvbuf = val;
+			return 0;
+		case SO_DONTROUTE:
+			/* needed by netperf */
+			return 0;
+		case SO_BROADCAST:
+			/* needed by nc */
+			/* todo: only supported for DGRAM */
+			return 0;
+		case SO_TIMESTAMP:
+			so->option.timestamp = !!val;
+			if (so->s != NULL)
+				so->s->option.timestamp = so->option.timestamp;
+			return 0;
+		case SO_OOBINLINE:
+			if (val == 0)
+				so->option.oobinline = 0;
+			else
+				so->option.oobinline = 1;
+			if (so->s != NULL)
+				so->s->option.oobinline = so->option.oobinline;
+			return 0;
+		default:
+			break;
+		}
+		break;
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		return OPS(so)->setsockopt(so, optname, optval, optlen);
+	}
+
+	GLUE_LOG(WARNING, "setsockopt(%d) with level = %d, optname = %d\n",
+		 sockfd, level, optname);
+	errno = EOPNOTSUPP;
+	return -1;
+}
+
+/*
+ * Refer to glibc/sysdeps/unix/sysv/linux/fcntl.c
+ */
+int PRE(fcntl)(int fd, int cmd, ...)
+{
+	int rc;
+	void *arg;
+	va_list ap;
+	struct sock *so;
+
+	va_start(ap, cmd);
+	arg = va_arg(ap, void *);
+	va_end(ap);
+
+	if (is_kernel_fd(fd))
+		return k_fcntl(fd, cmd, arg);
+
+	so = fd2sock(fd);
+	switch (cmd) {
+	case F_SETFL:
+		if ((unsigned long)arg & O_NONBLOCK)
+			so->nonblock = 1;
+		else
+			so->nonblock = 0;
+		rc = 0;
+		break;
+	case F_GETFL:
+		if (so->nonblock)
+			rc = O_NONBLOCK | O_RDWR;
+		else
+			rc = O_RDWR;
+		break;
+	case F_SETFD:
+		rc = 0;
+		break;
+	default:
+		rc = -1;
+		errno = EOPNOTSUPP;
+		GLUE_LOG(WARNING, "fcntl(%d) with cmd = %d", fd, cmd);
+	}
+
+	return rc;
+}
+
+/*
+ * Refer to musl/src/misc/ioctl.c
+ */
+int PRE(ioctl)(int fd,  unsigned long int request, ...)
+{
+	int rc;
+	void *arg;
+	va_list ap;
+	uint16_t left;
+	struct sock *so;
+	struct rte_mbuf *m;
+
+	va_start(ap, request);
+	arg = va_arg(ap, void *);
+	va_end(ap);
+
+	if (is_kernel_fd(fd))
+		return k_ioctl(fd, request, arg);
+
+	so = fd2sock(fd);
+
+	switch (request) {
+	case FIONREAD: /* SIOCINQ */
+		if (so->s == NULL)
+			*(int *)arg = 0;
+		else if (IS_TCP(so)) {
+			left = tle_tcp_stream_inq(so->s);
+			if (so->rx_left)
+				left += rte_pktmbuf_pkt_len(so->rx_left);
+			*(int *)arg = left;
+		} else {
+			if (so->rx_left)
+				*(int *)arg = rte_pktmbuf_pkt_len(so->rx_left);
+			else {
+				if (tle_udp_stream_recv(so->s, &m , 1) == 0)
+					*(int *)arg = 0;
+				else {
+					*(int *)arg = rte_pktmbuf_pkt_len(m);
+					so->rx_left = m;
+				}
+			}
+		}
+		rc = 0;
+		break;
+	case FIONBIO:
+		if (*(int *)arg)
+			so->nonblock = 1;
+		else
+			so->nonblock = 0;
+		rc = 0;
+		break;
+	case SIOCGSTAMP:
+		if (so->s->timestamp == 0) {
+			errno = ENOENT;
+			rc = -1;
+		} else {
+			((struct timeval*)arg)->tv_sec = so->s->timestamp >> 20;
+			((struct timeval*)arg)->tv_usec = so->s->timestamp & 0xFFFFFUL;
+			rc = 0;
+		}
+		break;
+	default:
+		errno = EOPNOTSUPP;
+		rc = -1;
+		GLUE_LOG(WARNING, "ioctl(%d) with request = %ld", fd, request);
+	}
+
+	return rc;
+}
+
+int PRE(shutdown)(int sockfd, int how)
+{
+	struct sock *so;
+
+	if (is_kernel_fd(sockfd))
+		return k_shutdown(sockfd, how);
+
+	so = fd2sock(sockfd);
+	switch (how) {
+	case SHUT_RD:
+		so->shutdown |= RECV_SHUTDOWN;
+		break;
+	case SHUT_WR:
+		so->shutdown |= SEND_SHUTDOWN;
+		break;
+	case SHUT_RDWR:
+		so->shutdown = RECV_SHUTDOWN | SEND_SHUTDOWN;
+		break;
+	}
+	if (OPS(so)->shutdown)
+		return OPS(so)->shutdown(so, how);
+
+	errno = EOPNOTSUPP;
+	return -1;
+}
+
+static inline int
+getname(int sockfd, struct sockaddr *uaddr, socklen_t *addrlen, int peer)
+{
+	struct sock *so;
+	size_t socklen;
+	int rc;
+
+	so = fd2sock(sockfd);
+
+	/* This is ugly, but netperf ask for local addr (before any
+	 * connect or bind) to check family.
+	 *
+	 * To formally fix this, we shall bind a local address in advance
+	 */
+	socklen = get_sockaddr_len(so->domain);
+	/* fixme: It is not conform to linux standard, fix it later. */
+	if (*addrlen < socklen) {
+		errno = EINVAL;
+		return -1;
+	}
+	*addrlen = socklen;
+
+	if (so->s == NULL) {
+		if (peer) {
+			errno = ENOTCONN;
+			return -1;
+		} else {
+			memset(uaddr, 0, socklen);
+			uaddr->sa_family = so->domain;
+			return 0;
+		}
+	}
+
+	if (OPS(so)->getname) {
+		rc = OPS(so)->getname(so, uaddr, peer);
+		if (rc < 0)
+			return rc;
+		if (peer) {
+			if ((uaddr->sa_family == AF_INET &&
+			     ((struct sockaddr_in*)uaddr)->sin_addr.s_addr == 0) ||
+			    (uaddr->sa_family == AF_INET6 &&
+			     IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6*)
+						       uaddr)->sin6_addr))) {
+				errno = ENOTCONN;
+				return -1;
+			}
+		}
+		if (uaddr->sa_family == AF_INET && so->domain == AF_INET6)
+			trans_4mapped6_addr(uaddr);
+		return rc;
+	}
+
+	errno = EOPNOTSUPP;
+	return -1;
+}
+
+int PRE(getsockname)(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
+{
+	if (is_kernel_fd(sockfd))
+		return k_getsockname(sockfd, addr, addrlen);
+
+	return getname(sockfd, addr, addrlen, 0);
+}
+
+int PRE(getpeername)(int sockfd, struct sockaddr *addr, socklen_t *addrlen)
+{
+	if (is_kernel_fd(sockfd))
+		return k_getpeername(sockfd, addr, addrlen);
+
+	return getname(sockfd, addr, addrlen, 1);
+}
+
+int PRE(close)(int fd)
+{
+	struct sock *so;
+
+	if (is_kernel_fd(fd))
+		return k_close(fd);
+
+	GLUE_DEBUG("close fd = %d", fd);
+
+	so = fd2sock(fd);
+	if (unlikely(so->valid == 0)) {
+		errno = EBADF;
+		return -1;
+	} else if (unlikely(so->epoll)) {
+		k_close(so->shadow_efd);
+		glue_ctx_free(CTX(so));
+	} else if (so->s) {
+		if (OPS(so)->close)
+			OPS(so)->close(so->s);
+
+		if (IS_TCP(so))
+			be_tx_with_lock(CTX(so));
+
+		if (so->rx_left)
+			rte_pktmbuf_free(so->rx_left);
+	}
+	
+	tle_event_idle_err(&so->erev);
+	tle_event_idle(&so->rxev);
+	tle_event_idle(&so->txev);
+
+	memset(((int*)so) + 1, 0, sizeof(*so) - sizeof(int));
+	put_free_fd(fd);
+	return 0;
+}
diff --git a/lib/libtle_glue/sym.c b/lib/libtle_glue/sym.c
new file mode 100644
index 0000000..39b1707
--- /dev/null
+++ b/lib/libtle_glue/sym.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#ifndef __USE_GNU
+#define __USE_GNU
+#endif
+#include <dlfcn.h>
+
+#include <rte_debug.h>
+
+#include "sym.h"
+#include "log.h"
+
+#ifdef PRELOAD
+int (*k_epoll_create)(int size);
+int (*k_epoll_create1)(int flags);
+int (*k_epoll_create1)(int flags);
+int (*k_epoll_ctl)(int epfd, int op, int fd, struct epoll_event *event);
+int (*k_epoll_wait)(int epfd, struct epoll_event *events, int maxevents, int timeout);
+int (*k_epoll_pwait)(int epfd, struct epoll_event *events, int maxevents, int timeout, const sigset_t *sigmask);
+int (*k_poll)(struct pollfd *fds, nfds_t nfds, int timeout);
+int (*k_select)(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout);
+int (*k_pselect)(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timespec *timeout, const sigset_t *sigmask);
+int (*k_socket)(int domain, int type, int protocol);
+int (*k_listen)(int sockfd, int backlog);
+int (*k_bind)(int sockfd, const struct sockaddr *addr, socklen_t addrlen); 
+int (*k_accept)(int sockfd, struct sockaddr *addr, socklen_t *addrlen);
+int (*k_accept4)(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);
+int (*k_connect)(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
+int (*k_getsockopt)(int sockfd, int level, int optname, void *optval, socklen_t *optlen);
+int (*k_setsockopt)(int sockfd, int level, int optname, const void *optval, socklen_t optlen);
+int (*k_fcntl)(int fd, int cmd, ... /* arg */ );
+int (*k_ioctl)(int d, int request, ...);
+int (*k_shutdown)(int sockfd, int how);
+int (*k_close)(int fd);
+ssize_t (*k_recv)(int sockfd, void *buf, size_t len, int flags);
+ssize_t (*k_recvfrom)(int sockfd, void *buf, size_t len, int flags, struct sockaddr *src_addr, socklen_t *addrlen);
+ssize_t (*k_recvmsg)(int sockfd, struct msghdr *msg, int flags);
+ssize_t (*k_read)(int fd, void *buf, size_t count);
+ssize_t (*k_readv)(int fd, const struct iovec *iov, int iovcnt);
+ssize_t (*k_send)(int sockfd, const void *buf, size_t len, int flags);
+ssize_t (*k_sendto)(int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *dest_addr, socklen_t addrlen);
+ssize_t (*k_sendmsg)(int sockfd, const struct msghdr *msg, int flags);
+ssize_t (*k_write)(int fd, const void *buf, size_t count);
+ssize_t (*k_writev)(int fd, const struct iovec *iov, int iovcnt);
+ssize_t (*k_splice)(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags);
+ssize_t (*k_sendfile)(int out_fd, int in_fd, off_t *offset, size_t count);
+int (*k_getsockname)(int sockfd, struct sockaddr *addr, socklen_t *addrlen);
+int (*k_getpeername)(int sockfd, struct sockaddr *addr, socklen_t *addrlen);
+
+#define INIT_FUNC(func, handle) do {		\
+	k_##func = dlsym(handle, #func);	\
+	if ((error = dlerror()) != NULL)  {	\
+		rte_panic(#func "is not init");	\
+	}					\
+	RTE_ASSERT(k_##func);			\
+} while (0)
+
+#endif
+
+void
+symbol_init(void)
+{
+#ifdef PRELOAD
+	void *handle;
+	char *error;
+
+	TRACE("in %s", __func__);
+
+	handle = dlopen("libc.so.6", RTLD_NOW);
+	error = dlerror();
+	if (!handle) {
+		fprintf(stderr, "%s\n", error);
+		exit(EXIT_FAILURE);
+	}
+
+	INIT_FUNC(epoll_create, handle);
+	INIT_FUNC(epoll_create1, handle);
+	INIT_FUNC(epoll_create1, handle);
+	INIT_FUNC(epoll_ctl, handle);
+	INIT_FUNC(epoll_wait, handle);
+	INIT_FUNC(epoll_pwait, handle);
+	INIT_FUNC(socket, handle);
+	INIT_FUNC(listen, handle);
+	INIT_FUNC(bind, handle);
+	INIT_FUNC(accept, handle);
+	INIT_FUNC(accept4, handle);
+	INIT_FUNC(connect, handle);
+	INIT_FUNC(getsockopt, handle);
+	INIT_FUNC(setsockopt, handle);
+	INIT_FUNC(fcntl, handle);
+	INIT_FUNC(ioctl, handle);
+	INIT_FUNC(shutdown, handle);
+	INIT_FUNC(close, handle);
+	INIT_FUNC(recv, handle);
+	INIT_FUNC(recvfrom, handle);
+	INIT_FUNC(recvmsg, handle);
+	INIT_FUNC(read, handle);
+	INIT_FUNC(readv, handle);
+	INIT_FUNC(send, handle);
+	INIT_FUNC(sendto, handle);
+	INIT_FUNC(sendmsg, handle);
+	INIT_FUNC(write, handle);
+	INIT_FUNC(writev, handle);
+	INIT_FUNC(splice, handle);
+	INIT_FUNC(sendfile, handle);
+	INIT_FUNC(poll, handle);
+	INIT_FUNC(getsockname, handle);
+	INIT_FUNC(getpeername, handle);
+	INIT_FUNC(select, handle);
+	INIT_FUNC(pselect, handle);
+
+	dlclose(handle);
+#endif
+}
diff --git a/lib/libtle_glue/sym.h b/lib/libtle_glue/sym.h
new file mode 100644
index 0000000..b5a333d
--- /dev/null
+++ b/lib/libtle_glue/sym.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_KSYM_H_
+#define _TLE_KSYM_H_
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <sys/socket.h>
+
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <poll.h>
+#include <sys/uio.h>
+#include <sys/sendfile.h>
+#include <sys/select.h>
+#include <sys/time.h>
+
+#include "tle_glue.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void symbol_init(void);
+
+#ifdef PRELOAD
+int (*k_epoll_create)(int size);
+int (*k_epoll_create1)(int flags);
+int (*k_epoll_ctl)(int epfd, int op, int fd, struct epoll_event *event);
+int (*k_epoll_wait)(int epfd, struct epoll_event *events, int maxevents, int timeout);
+int (*k_epoll_pwait)(int epfd, struct epoll_event *events, int maxevents, int timeout, const sigset_t *sigmask);
+int (*k_poll)(struct pollfd *fds, nfds_t nfds, int timeout);
+int (*k_select)(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout);
+int (*k_pselect)(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timespec *timeout, const sigset_t *sigmask);
+
+int (*k_socket)(int domain, int type, int protocol);
+int (*k_listen)(int sockfd, int backlog);
+int (*k_bind)(int sockfd, const struct sockaddr *addr, socklen_t addrlen); 
+int (*k_accept)(int sockfd, struct sockaddr *addr, socklen_t *addrlen);
+int (*k_accept4)(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);
+int (*k_connect)(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
+int (*k_getsockopt)(int sockfd, int level, int optname, void *optval, socklen_t *optlen);
+int (*k_setsockopt)(int sockfd, int level, int optname, const void *optval, socklen_t optlen);
+int (*k_fcntl)(int fd, int cmd, ... /* arg */ );
+int (*k_ioctl)(int d, int request, ...);
+int (*k_shutdown)(int sockfd, int how);
+int (*k_close)(int fd);
+ssize_t (*k_recv)(int sockfd, void *buf, size_t len, int flags);
+ssize_t (*k_recvfrom)(int sockfd, void *buf, size_t len, int flags, struct sockaddr *src_addr, socklen_t *addrlen);
+ssize_t (*k_recvmsg)(int sockfd, struct msghdr *msg, int flags);
+ssize_t (*k_read)(int fd, void *buf, size_t count);
+ssize_t (*k_readv)(int fd, const struct iovec *iov, int iovcnt);
+ssize_t (*k_send)(int sockfd, const void *buf, size_t len, int flags);
+ssize_t (*k_sendto)(int sockfd, const void *buf, size_t len, int flags, const struct sockaddr *dest_addr, socklen_t addrlen);
+ssize_t (*k_sendmsg)(int sockfd, const struct msghdr *msg, int flags);
+ssize_t (*k_write)(int fd, const void *buf, size_t count);
+ssize_t (*k_writev)(int fd, const struct iovec *iov, int iovcnt);
+ssize_t (*k_splice)(int fd_in, loff_t *off_in, int fd_out, loff_t *off_out, size_t len, unsigned int flags);
+ssize_t (*k_sendfile)(int out_fd, int in_fd, off_t *offset, size_t count);
+int (*k_getsockname)(int sockfd, struct sockaddr *addr, socklen_t *addrlen);
+int (*k_getpeername)(int sockfd, struct sockaddr *addr, socklen_t *addrlen);
+#else
+#define k_epoll_create epoll_create
+#define k_epoll_create1 epoll_create1
+#define k_epoll_ctl epoll_ctl
+#define k_epoll_wait epoll_wait
+#define k_epoll_pwait epoll_pwait
+#define k_poll poll
+#define k_select select
+#define k_pselect pselect
+#define k_socket socket
+#define k_listen listen
+#define k_bind bind
+#define k_accept accept
+#define k_accept4 accept4
+#define k_connect connect
+#define k_getsockopt getsockopt
+#define k_setsockopt setsockopt
+#define k_fcntl fcntl
+#define k_ioctl ioctl
+#define k_shutdown shutdown
+#define k_close close
+#define k_recv recv
+#define k_recvfrom recvfrom
+#define k_recvmsg recvmsg
+#define k_read read
+#define k_readv readv
+#define k_send send
+#define k_sendto sendto
+#define k_sendmsg sendmsg
+#define k_write write
+#define k_writev writev
+#define k_splice splice
+#define k_sendfile sendfile
+#define k_getsockname getsockname
+#define k_getpeername getpeername
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_KSYM_H_ */
diff --git a/lib/libtle_glue/tcp.c b/lib/libtle_glue/tcp.c
new file mode 100644
index 0000000..e5186c0
--- /dev/null
+++ b/lib/libtle_glue/tcp.c
@@ -0,0 +1,558 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdarg.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <tle_tcp.h>
+
+#include "sym.h"
+#include "fd.h"
+#include "log.h"
+#include "util.h"
+#include "internal.h"
+#include "sock.h"
+
+#define MAX_TCP_KEEPIDLE  32767
+#define MAX_TCP_KEEPINTVL 32767
+#define MAX_TCP_KEEPCNT   127
+
+static inline void
+foo_support(const char *msg)
+{
+	GLUE_LOG(WARNING, "%s, return ok without really supporting it", msg);
+}
+
+static int
+tcp_setsockopt(struct sock *sk, int optname,
+	       const void *optval, socklen_t optlen)
+{
+	int val;
+
+	val = 0; /* just to make compiler happy */
+	if (optlen == sizeof(val))
+		val = *(const int *)optval;
+
+	/* man tcp(7) or  see /usr/include/netinet/tcp.h */
+	switch (optname) {
+	case TCP_NODELAY: /* antonym: TCP_CORK */
+		if (val == 0)
+			sk->option.tcpnodelay = 0;
+		else
+			sk->option.tcpnodelay = 1;
+		if (sk->s != NULL)
+			sk->s->option.tcpnodelay = sk->option.tcpnodelay;
+		return 0;
+	case TCP_CORK:
+		if (val == 0)
+			sk->option.tcpcork = 0;
+		else
+			sk->option.tcpcork = 1;
+		if (sk->s != NULL)
+			sk->s->option.tcpcork = sk->option.tcpcork;
+		return 0;
+	case TCP_KEEPIDLE:
+		if (val <= 0 || val > MAX_TCP_KEEPIDLE) {
+			errno = EINVAL;
+			return -1;
+		}
+		sk->option.keepidle = val;
+		if (sk->s != NULL) {
+			sk->s->option.keepidle = sk->option.keepidle;
+			tle_tcp_stream_set_keepalive(sk->s);
+		}
+		return 0;
+	case TCP_KEEPINTVL:
+		if (val <= 0 || val > MAX_TCP_KEEPINTVL) {
+			errno = EINVAL;
+			return -1;
+		}
+		sk->option.keepintvl = val;
+		if (sk->s != NULL) {
+			sk->s->option.keepintvl = sk->option.keepintvl;
+			tle_tcp_stream_set_keepalive(sk->s);
+		}
+		return 0;
+	case TCP_KEEPCNT:
+		if (val <= 0 || val > MAX_TCP_KEEPCNT) {
+			errno = EINVAL;
+			return -1;
+		}
+		sk->option.keepcnt = val;
+		if (sk->s != NULL)
+			sk->s->option.keepcnt = sk->option.keepcnt;
+		return 0;
+	case TCP_USER_TIMEOUT:
+		foo_support("set TCP_USER_TIMEOUT");
+		return 0;
+	case TCP_DEFER_ACCEPT:
+		if (val == 0)
+			return 0;
+		break;
+	case TCP_FASTOPEN:
+	case TCP_FASTOPEN_CONNECT:
+		if (val == 0)
+			return 0;
+		break;
+	case TCP_QUICKACK:
+		/* Based on below info, it's safe to just return 0:
+		 * "This flag is not permanent, it only enables a 
+		 * switch to or from quickack mode.  Subsequent
+		 * operationof the TCP protocol will once again ..."
+		 */
+		if (val == 0)
+			sk->option.tcpquickack = 0;
+		else
+			sk->option.tcpquickack = 8;
+		if (sk->s != NULL)
+			sk->s->option.tcpquickack = sk->option.tcpquickack;
+		return 0;
+	case TCP_CONGESTION:
+		/* only support NewReno; but we return success for
+		 * any kind of setting.
+		 */
+		foo_support("set TCP_CONGESTION");
+		return 0;
+	default:
+		break;
+	}
+
+	GLUE_LOG(WARNING, "setsockopt(%d) with level = SOL_TCP, optname = %d\n",
+		 sock2fd(sk), optname);
+	errno = EOPNOTSUPP;
+	return -1;
+}
+
+static int
+tcp_getsockopt(struct sock *sk, int optname,
+	       void *optval, socklen_t *optlen)
+{
+	int rc;
+	union {
+		int val;
+		uint64_t val64;
+		struct linger ling;
+		struct timeval tm;
+	} *p = optval;
+
+	RTE_SET_USED(optlen);
+
+	/* man tcp(7) or  see /usr/include/netinet/tcp.h */
+	switch (optname) {
+	case TCP_MAXSEG:
+		p->val = 64 * 1024;
+		return 0;
+	case TCP_FASTOPEN:
+	case TCP_FASTOPEN_CONNECT:
+		p->val = 0;
+		return 0;
+	case TCP_INFO:
+		/* needed by netperf */
+		rc = tle_tcp_stream_get_info(sk->s, optval, optlen);
+		if (rc < 0) {
+			errno = -rc;
+			return -1;
+		}
+		return 0;
+	case TCP_CONGESTION:
+		strncpy(optval, "NewReno", *optlen);
+		((char *)optval)[*optlen - 1] = '\0';
+		return 0;
+	case TCP_CORK:
+		p->val = sk->option.tcpcork;
+		return 0;
+	case TCP_QUICKACK:
+		p->val = sk->option.tcpquickack != 0 ? 1 : 0;
+		return 0;
+	case TCP_NODELAY:
+		p->val = sk->option.tcpnodelay;
+		return 0;
+	case TCP_KEEPIDLE:
+		p->val = sk->option.keepidle;
+		return 0;
+	case TCP_KEEPINTVL:
+		p->val = sk->option.keepintvl;
+		return 0;
+	case TCP_KEEPCNT:
+		p->val = sk->option.keepcnt;
+		return 0;
+	default:
+		break;
+	}
+
+	GLUE_LOG(WARNING, "getsockopt(%d) with level = SOL_TCP, optname = %d",
+		 sock2fd(sk), optname);
+	errno = EOPNOTSUPP;
+	return -1;
+}
+
+static int
+tcp_getname(struct sock *sk, struct sockaddr *addr, int peer)
+{
+	int rc;
+	int addrlen;
+	struct tle_tcp_stream_addr a;
+
+	rc = tle_tcp_stream_get_addr(sk->s, &a);
+	if (rc) {
+		errno = -rc;
+		return -1;
+	}
+
+	if (a.local.ss_family == AF_INET)
+		addrlen = sizeof(struct sockaddr_in);
+	else
+		addrlen = sizeof(struct sockaddr_in6);
+
+	if (peer)
+		memcpy(addr, &a.remote, addrlen);
+	else
+		memcpy(addr, &a.local, addrlen);
+
+	addr->sa_family = a.local.ss_family;
+
+	return 0;
+}
+
+static int
+tcp_bind(struct sock *sk, const struct sockaddr *addr)
+{
+	sk->s = open_bind(sk, addr, NULL);
+	if (sk->s == NULL)
+		return -1;
+	return 0;
+}
+
+static int
+tcp_listen(struct sock *sk, int backlog)
+{
+	int32_t rc;
+
+	if (backlog < 0) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	/*
+	 * if socket is unbind, should call open_bind to assign an ramdon addres
+	 * before listening
+	 */
+	if (sk->s == NULL) {
+		sk->s = open_bind(sk, NULL, NULL);
+		if (sk->s == NULL)
+			return -1;
+	}
+
+	rc = tle_tcp_stream_listen(sk->s);
+	if (rc) {
+		errno = -rc;
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+tcp_connect(struct sock *sk, const struct sockaddr *addr)
+{
+	int rc;
+	int rx;
+	int ret;
+	struct epoll_event event;
+	struct sockaddr_storage laddr;
+	struct sockaddr_storage raddr;
+	struct sockaddr_in *addr4;
+	struct sockaddr_in6 *addr6;
+	struct sockaddr *local = NULL;
+
+	/* TODO: For multi-thread case, we shall properly manage local
+	 * L4 port so that packets coming back can be put into the same
+	 * queue pair.
+	 */
+	if (sk->s) {
+		struct tle_tcp_stream *ts = TCP_STREAM(sk->s);
+		/* case 1: bind happens before connect;
+		 * case 2: connect after a previous connect, failed
+		 *	   or succeeded.
+		 */
+		if (ts->tcb.err != 0) {
+			errno = ts->tcb.err;
+			return -1;
+		}
+
+		int state = ts->tcb.state;
+
+		if (state >= TCP_ST_ESTABLISHED && sk->tcp_connected == 0) {
+			sk->tcp_connected = 1;
+			return 0; /* connect succeeds */
+		}
+
+		if (state == TCP_ST_CLOSED) {
+			if (tcp_getname(sk, (struct sockaddr *)&laddr, 0) == 0)
+				local = (struct sockaddr *)&laddr;
+			tle_tcp_stream_close(sk->s);
+			sk->s = NULL;
+			goto do_connect; /* case 1 */
+		} else if (state >= TCP_ST_SYN_SENT &&
+			   state < TCP_ST_ESTABLISHED)
+			errno = EALREADY;
+		else if (state >= TCP_ST_ESTABLISHED)
+			errno = EISCONN;
+		else
+			errno = EINVAL;
+		return -1;
+	}
+
+do_connect:
+	sk->s = open_bind(sk, local, addr);
+	if (sk->s == NULL) /* errno is set */
+		return -1;
+
+	if (sk->domain == AF_INET) {
+		addr4 = (struct sockaddr_in*)&raddr;
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = sk->s->port.src;
+		addr4->sin_addr.s_addr = sk->s->ipv4.addr.src;
+	} else {
+		addr6 = (struct sockaddr_in6*)&raddr;
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = sk->s->port.src;
+		rte_memcpy(&addr6->sin6_addr, &sk->s->ipv6.addr.src,
+			   sizeof(struct in6_addr));
+	}
+	rc = tle_tcp_stream_connect(sk->s, (const struct sockaddr*)&raddr);
+	if (rc < 0) {
+		errno = -rc;
+		return -1;
+	}
+
+	if (is_nonblock(sk, 0)) {
+		be_tx_with_lock(CTX(sk));
+		errno = EINPROGRESS; /* It could not be ready so fast */
+		return -1;
+	}
+
+	do {
+		be_process(CTX(sk));
+
+		if (tle_event_state(&sk->txev) == TLE_SEV_UP) {
+			sk->tcp_connected = 1;
+			tle_event_down(&sk->txev);
+			ret = 0;
+			break;
+		}
+
+		if (tle_event_state(&sk->erev) == TLE_SEV_UP) {
+			tle_event_down(&sk->erev);
+			errno = ECONNREFUSED;
+			ret = -1;
+			break;
+		}
+
+		/* fix me: timeout? */
+		epoll_kernel_wait(CTX(sk), -1, &event, 1, 1, &rx);
+	} while (1);
+	
+	return ret;
+}
+
+static void tcp_update_cfg(struct sock *sk);
+
+static int
+tcp_accept(struct sock *sk, struct sockaddr *addr,
+	   socklen_t *addrlen, int flags)
+{
+	int fd;
+	int rx;
+	struct sock *newsk;
+	struct tle_stream *rs;
+	struct epoll_event event;
+	struct tle_tcp_stream_addr a;
+
+	if (sk->s == NULL) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	fd = get_unused_fd();
+	if (fd < 0) {
+		errno = ENFILE;
+		return -1;
+	}
+
+	newsk = fd2sock(fd);
+again:
+	if (tle_tcp_stream_accept(sk->s, &rs, 1) == 0) {
+		if (rte_errno != EAGAIN) {
+			errno = rte_errno;
+			return -1;
+		}
+
+		if (is_nonblock(sk, flags)) {
+			newsk->valid = 0;
+			put_free_fd(fd);
+			errno = EAGAIN;
+			return -1;
+		}
+
+		epoll_kernel_wait(CTX(sk), -1, &event, 1, 1, &rx);
+		be_process(CTX(sk));
+		goto again;
+	}
+
+	newsk->s = rs;
+	newsk->cid = sk->cid;
+	newsk->domain = sk->domain;
+	newsk->proto = sk->proto;
+	newsk->option.raw = 0;
+	newsk->option.tcpquickack = 1;
+	newsk->option.mulloop = 1;
+	newsk->option.multtl = 1;
+	newsk->option.keepidle = 2 * 60 * 60;
+	newsk->option.keepintvl = 75;
+	newsk->option.keepcnt = 9;
+	newsk->s->option.raw = newsk->option.raw;
+	sock_alloc_events(newsk);
+	tcp_update_cfg(newsk);
+
+	if (addr) {
+		/* We assume this function never fails */
+		tle_tcp_stream_get_addr(rs, &a);
+
+		*addrlen = sizeof(struct sockaddr_in);
+		memcpy(addr, &a.remote, *addrlen);
+	}
+
+	GLUE_DEBUG("accept fd = %d", fd);
+	return fd;
+}
+
+static ssize_t
+tcp_send(struct sock *sk, struct rte_mbuf *pkt[],
+	 uint16_t num, const struct sockaddr *dst_addr)
+{
+	uint16_t rc;
+	RTE_SET_USED(dst_addr);
+
+	if (sk->s == NULL) {
+		errno = EPIPE;
+		return 0;
+	}
+
+	rc = tle_tcp_stream_send(sk->s, pkt, num);
+	if (rc == 0)
+		errno = rte_errno;
+	return rc;
+}
+
+static ssize_t
+tcp_recv(struct tle_stream *s, struct rte_mbuf *pkt[],
+	 uint16_t num, struct sockaddr *addr)
+{
+	uint16_t rc;
+
+	RTE_SET_USED(addr);
+
+	/* optimize me: merge multiple mbufs into one */
+	rc = tle_tcp_stream_recv(s, pkt, num);
+	if (rc == 0)
+		errno = rte_errno;
+
+	return rc;
+}
+
+static ssize_t
+tcp_readv(struct tle_stream *ts, struct msghdr *msg, int flags __rte_unused)
+{
+	ssize_t rc;
+
+	rc = tle_tcp_stream_recvmsg(ts, msg);
+	if (rc < 0)
+		errno = rte_errno;
+	return rc;
+}
+
+static ssize_t
+tcp_writev(struct sock *sk, const struct iovec *iov,
+	   int iovcnt, const struct sockaddr *dst_addr)
+{
+	ssize_t rc;
+	struct rte_mempool *mp = get_mempool_by_socket(0); /* fix me */
+
+	RTE_SET_USED(dst_addr);
+
+	if (sk->s == NULL) {
+		errno = EPIPE;
+		return -1;
+	}
+
+	rc = tle_tcp_stream_writev(sk->s, mp, iov, iovcnt);
+	if (rc < 0)
+		errno = rte_errno;
+	return rc;
+}
+
+static int
+tcp_shutdown(struct sock *sk, int how)
+{
+	int ret;
+
+	/* Refer to linux/net/ipv4/tcp.c:tcp_shutdown() */
+	if (how == SHUT_RD)
+		return 0;
+
+	ret = tle_tcp_stream_shutdown(sk->s, how);
+	if (ret < 0)
+		errno = rte_errno;
+	else
+		be_tx_with_lock(CTX(sk)); /* Make sure fin is sent */
+	return ret;
+
+}
+
+static void
+tcp_update_cfg(struct sock *sk)
+{
+	struct tle_tcp_stream_cfg prm = {0};
+
+	prm.recv_ev = &sk->rxev;
+	prm.send_ev = &sk->txev;
+	prm.err_ev  = &sk->erev;
+	tle_tcp_stream_update_cfg(&sk->s, &prm, 1);
+}
+
+struct proto tcp_prot = {
+	.name		= "TCP",
+	.setsockopt	= tcp_setsockopt,
+	.getsockopt	= tcp_getsockopt,
+	.getname	= tcp_getname,
+	.bind		= tcp_bind,
+	.listen		= tcp_listen,
+	.connect	= tcp_connect,
+	.accept		= tcp_accept,
+	.recv		= tcp_recv,
+	.send		= tcp_send,
+	.readv		= tcp_readv,
+	.writev		= tcp_writev,
+	.shutdown	= tcp_shutdown,
+	.close		= tle_tcp_stream_close,
+	.update_cfg	= tcp_update_cfg,
+};
diff --git a/lib/libtle_glue/tle_glue.h b/lib/libtle_glue/tle_glue.h
new file mode 100644
index 0000000..38357e4
--- /dev/null
+++ b/lib/libtle_glue/tle_glue.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_GLUE_H_
+#define _TLE_GLUE_H_
+
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <signal.h>
+#include <poll.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef PRELOAD
+
+#define PRE(name) name
+
+#else
+
+#define PRE(name) tle_ ## name
+
+#endif
+
+void glue_init1(int argc, char **argv);
+
+/* epoll */
+int PRE(epoll_create)(int size);
+int PRE(epoll_create1)(int flags);
+int PRE(epoll_ctl)(int epfd, int op, int fd, struct epoll_event *event);
+int PRE(epoll_wait)(int epfd, struct epoll_event *events, int maxevents, int timeout);
+int PRE(epoll_pwait)(int epfd, struct epoll_event *events,
+		int maxevents, int timeout, const sigset_t *sigmask);
+
+/* for setup, settings, and destroy */
+int PRE(socket)(int domain, int type, int protocol);
+int PRE(listen)(int sockfd, int backlog);
+int PRE(bind)(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
+int PRE(accept)(int sockfd, struct sockaddr *addr, socklen_t *addrlen);
+int PRE(accept4)(int sockfd, struct sockaddr *addr, socklen_t *addrlen, int flags);
+int PRE(connect)(int sockfd, const struct sockaddr *addr, socklen_t addrlen);
+int PRE(getsockopt)(int sockfd, int level, int optname,
+			void *optval, socklen_t *optlen);
+int PRE(setsockopt)(int sockfd, int level, int optname,
+			const void *optval, socklen_t optlen);
+int PRE(getsockname)(int sockfd, struct sockaddr *addr, socklen_t *addrlen);
+int PRE(getpeername)(int sockfd, struct sockaddr *addr, socklen_t *addrlen);
+int PRE(fcntl)(int fd, int cmd, ... /* arg */ );
+int PRE(ioctl)(int d,  unsigned long int request, ...);
+int PRE(shutdown)(int sockfd, int how);
+int PRE(close)(int fd);
+
+/* for recv */
+ssize_t PRE(recv)(int sockfd, void *buf, size_t len, int flags);
+ssize_t PRE(recvfrom)(int sockfd, void *buf, size_t len, int flags,
+			struct sockaddr *src_addr, socklen_t *addrlen);
+ssize_t PRE(recvmsg)(int sockfd, struct msghdr *msg, int flags);
+ssize_t PRE(read)(int fd, void *buf, size_t count);
+ssize_t PRE(readv)(int fd, const struct iovec *iov, int iovcnt);
+
+/* for send */
+ssize_t PRE(send)(int sockfd, const void *buf, size_t len, int flags);
+ssize_t PRE(sendto)(int sockfd, const void *buf, size_t len, int flags,
+		const struct sockaddr *dest_addr, socklen_t addrlen);
+ssize_t PRE(sendmsg)(int sockfd, const struct msghdr *msg, int flags);
+ssize_t PRE(write)(int fd, const void *buf, size_t count);
+ssize_t PRE(writev)(int fd, const struct iovec *iov, int iovcnt);
+
+/* advanced functions */
+ssize_t PRE(splice)(int fd_in, loff_t *off_in, int fd_out,
+		loff_t *off_out, size_t len, unsigned int flags);
+ssize_t PRE(sendfile)(int out_fd, int in_fd, off_t *offset, size_t count);
+
+/* poll */
+int PRE(poll)(struct pollfd *fds, nfds_t nfds, int timeout);
+int PRE(ppoll)(struct pollfd *fds, nfds_t nfds,
+		const struct timespec *tmo_p, const sigset_t *sigmask);
+
+/* select */
+int PRE(select)(int nfds, fd_set *readfds, fd_set *writefds,
+		fd_set *exceptfds, struct timeval *timeout);
+int PRE(pselect)(int nfds, fd_set *readfds, fd_set *writefds,
+		 fd_set *exceptfds, const struct timespec *timeout,
+		 const sigset_t *sigmask);
+
+/* non-posix APIs */
+int fd_ready(int fd, int events);
+void v_get_stats_snmp(unsigned long mibs[]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TLE_GLUE_H_ */
diff --git a/lib/libtle_glue/udp.c b/lib/libtle_glue/udp.c
new file mode 100644
index 0000000..9f199bc
--- /dev/null
+++ b/lib/libtle_glue/udp.c
@@ -0,0 +1,419 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdarg.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+
+#include <rte_ethdev.h>
+#include <tle_udp.h>
+
+#include "sym.h"
+#include "fd.h"
+#include "log.h"
+#include "util.h"
+#include "internal.h"
+#include "sock.h"
+
+static int
+udp_setsockopt(__rte_unused struct sock *sk, __rte_unused int optname,
+	       __rte_unused const void *optval, __rte_unused socklen_t optlen)
+{
+	return 0;
+}
+
+static int
+udp_getsockopt(__rte_unused struct sock *sk, __rte_unused int optname,
+	       __rte_unused void *optval, __rte_unused socklen_t *optlen)
+{
+	return 0;
+}
+
+static int
+udp_getname(struct sock *sk, struct sockaddr *addr, int peer)
+{
+	struct tle_udp_stream_param p;
+	size_t addrlen;
+	int rc;
+
+	rc = tle_udp_stream_get_param(sk->s, &p);
+	if (rc) {
+		errno = -rc;
+		return -1;
+	}
+
+	addrlen = get_sockaddr_len(sk->domain);
+	if (peer)
+		memcpy(addr, &p.remote_addr, addrlen);
+	else
+		memcpy(addr, &p.local_addr, addrlen);
+	addr->sa_family = p.local_addr.ss_family;
+	return 0;
+}
+
+static int
+udp_bind(struct sock *sk, const struct sockaddr *addr)
+{
+	if (sk->ubind) {
+		errno = EINVAL;
+		return -1;
+	}
+
+	sk->s = open_bind(sk, addr, NULL);
+	if (sk->s != NULL) {
+		sk->ubind = 1;
+		if (is_any_addr(addr))
+			sk->ubindany = 1;
+		return 0;
+	}
+
+	return -1;
+}
+
+static int
+udp_connect(struct sock *sk, const struct sockaddr *addr)
+{
+	struct sockaddr_storage laddr;
+
+	/* According to linux manual, connectionless sockets may dissolve the
+	 * association by connecting to an address with the sa_family member of
+	 * sockaddr set to AF_UNSPEC (supported on Linux since kernel 2.2).
+	 */
+	if (sk->ubind) {
+		if (udp_getname(sk, (struct sockaddr *)&laddr, 0))
+			return -1;
+		if (addr->sa_family == AF_UNSPEC) {
+			addr = NULL;
+			if (sk->ubindany)
+				set_any_addr((struct sockaddr *)&laddr);
+		}
+		sk->s = open_bind(sk, (const struct sockaddr *)&laddr, addr);
+	} else {
+		if (addr->sa_family == AF_UNSPEC) {
+			tle_udp_stream_close(sk->s);
+			sk->s = NULL;
+			return 0;
+		}
+		sk->s = open_bind(sk, NULL, addr);
+	}
+
+	if (sk->s)
+		return 0;
+
+	return -1;
+}
+
+static int
+udp_addr_prepare(struct sock *sk, const struct sockaddr **p_dst_addr,
+		 struct sockaddr_storage *addr)
+{
+	const struct sockaddr *dst_addr = *p_dst_addr;
+
+	if (dst_addr != NULL &&
+	    dst_addr->sa_family == AF_INET6 &&
+	    IN6_IS_ADDR_V4MAPPED(&((const struct sockaddr_in6 *)dst_addr)->sin6_addr)) {
+		rte_memcpy(addr, dst_addr, sizeof(struct sockaddr_in6));
+		dst_addr = (const struct sockaddr*)(addr);
+		*p_dst_addr = dst_addr;
+		retrans_4mapped6_addr((struct sockaddr_storage*)(addr));
+	}
+
+	if (sk->s == NULL) {
+		if (dst_addr == NULL) {
+			errno = EDESTADDRREQ;
+			return -1;
+		}
+
+		sk->s = open_bind(sk, NULL, dst_addr);
+		if (sk->s == NULL) /* errno is set */
+			return -1;
+	} else if (dst_addr != NULL) {
+		if (dst_addr->sa_family == AF_INET6 && sk->domain == AF_INET) {
+			errno = EINVAL;
+			return -1;
+		}
+		if (dst_addr->sa_family == AF_INET && sk->domain == AF_INET6) {
+			if (IN6_IS_ADDR_UNSPECIFIED(&sk->s->ipv6.addr.dst)) {
+				sk->s->type = TLE_V4;
+				sk->s->ipv4.addr.dst = 0;
+			} else {
+				errno = ENETUNREACH;
+				return -1;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/* abstract client info from mbuf into s */
+static inline void
+udp_pkt_addr(const struct rte_mbuf *m, struct sockaddr *addr,
+	     __rte_unused uint16_t family)
+{
+	const struct ipv4_hdr *ip4h;
+	const struct ipv6_hdr *ip6h;
+	const struct udp_hdr *udph;
+	struct sockaddr_in *in4;
+	struct sockaddr_in6 *in6;
+	int off = -(m->l4_len + m->l3_len);
+
+	udph = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, -m->l4_len);
+	ip4h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, off);
+	if ((ip4h->version_ihl>>4) == 4) {
+		addr->sa_family = AF_INET;
+		in4 = (struct sockaddr_in *)addr;
+		in4->sin_port = udph->src_port;
+		in4->sin_addr.s_addr = ip4h->src_addr;
+	} else {
+		addr->sa_family = AF_INET6;
+		ip6h = (const struct ipv6_hdr*)ip4h;
+		in6 = (struct sockaddr_in6 *)addr;
+		in6->sin6_port = udph->src_port;
+		rte_memcpy(&in6->sin6_addr, ip6h->src_addr,
+			   sizeof(in6->sin6_addr));
+	}
+}
+
+static ssize_t
+udp_send(struct sock *sk, struct rte_mbuf *pkt[],
+	 uint16_t num, const struct sockaddr *dst_addr)
+{
+	uint16_t i;
+	struct sockaddr_storage addr;
+
+	if (udp_addr_prepare(sk, &dst_addr, &addr) != 0)
+		return 0;
+
+	/* chain them together as *one* message */
+	for (i = 1; i < num; ++i) {
+		pkt[i-1]->next = pkt[i];
+		pkt[0]->pkt_len += pkt[i]->pkt_len;
+	}
+	pkt[0]->nb_segs = num;
+
+	if (tle_udp_stream_send(sk->s, &pkt[0], 1, dst_addr) == 0) {
+		errno = rte_errno;
+		return 0;
+	}
+
+	return num;
+}
+
+static ssize_t
+udp_readv(struct tle_stream *s, struct msghdr *msg, int flags)
+{
+	int i;
+	ssize_t sz;
+	uint16_t rc;
+	uint32_t fin;
+	struct iovec iv;
+	struct rte_mbuf *m;
+	const struct iovec *iov = msg->msg_iov;
+	int iovcnt = msg->msg_iovlen;
+
+	rc = tle_udp_stream_recv(s, &m, 1);
+	if (rc == 0) {
+		errno = rte_errno;
+		return -1;
+	}
+
+	if (!s->option.timestamp)
+		s->timestamp = m->timestamp;
+	if (msg != NULL && msg->msg_control != NULL) {
+		if (s->option.timestamp)
+			tle_set_timestamp(msg, m);
+		else
+			msg->msg_controllen = 0;
+	}
+
+	if (msg != NULL && msg->msg_name != NULL) {
+		udp_pkt_addr(m, (struct sockaddr*)msg->msg_name, 0);
+		if (((struct sockaddr *)msg->msg_name)->sa_family == AF_INET)
+			msg->msg_namelen = sizeof(struct sockaddr_in);
+		else
+			msg->msg_namelen = sizeof(struct sockaddr_in6);
+	}
+
+	for (i = 0, sz = 0; i != iovcnt; i++) {
+		iv = iov[i];
+		sz += iv.iov_len;
+		fin = _mbus_to_iovec(&iv, &m, 1);
+		if (fin == 1) {
+			sz -= iv.iov_len;
+			break;
+		}
+	}
+	if (fin == 0) {
+		if (flags & MSG_TRUNC)
+			sz += m->pkt_len;
+		rte_pktmbuf_free_seg(m);
+		msg->msg_flags |= MSG_TRUNC;
+	}
+	return sz;
+}
+
+static ssize_t
+udp_writev(struct sock *sk, const struct iovec *iov,
+	   int iovcnt, const struct sockaddr *dst_addr)
+{
+	struct rte_mempool *mp = get_mempool_by_socket(0); /* fix me */
+	struct sockaddr_storage addr;
+	uint32_t slen, left_m, left_b, copy_len, left;
+	uint16_t i, rc, nb_mbufs;
+	char *dst, *src;
+	uint64_t ufo;
+	size_t total;
+	int j;
+
+	if (udp_addr_prepare(sk, &dst_addr, &addr) != 0)
+		return -1;
+
+	for (j = 0, total = 0; j < iovcnt; ++j)
+		total += iov[j].iov_len;
+
+	ufo = tx_offload & DEV_TX_OFFLOAD_UDP_TSO;
+	if (ufo)
+		slen = RTE_MBUF_DEFAULT_DATAROOM;
+	else
+		slen = 1500 - 20; /* mtu - ip_hdr_len */
+
+	nb_mbufs = (total + 8 + slen - 1) / slen;
+	struct rte_mbuf *mbufs[nb_mbufs];
+	if (unlikely(rte_pktmbuf_alloc_bulk(mp, mbufs, nb_mbufs) != 0)) {
+		errno = ENOMEM;
+		return -1;
+	}
+
+	left_b = iov[0].iov_len;
+	for (i = 0, j = 0; i < nb_mbufs && j < iovcnt; ++i) {
+		/* first frag has udp hdr, its payload is 8 bytes less */
+		if (i == 0)
+			slen -= 8;
+		else if (i == 1)
+			slen += 8;
+		left_m = slen;
+		while (left_m > 0 && j < iovcnt) {
+			copy_len = RTE_MIN(left_m, left_b);
+			dst = rte_pktmbuf_mtod_offset(mbufs[i], char *,
+						      slen - left_m);
+			src = (char *)iov[j].iov_base + iov[j].iov_len - left_b;
+			rte_memcpy(dst, src, copy_len);
+	
+			left_m -= copy_len;
+			left_b -= copy_len;
+			if (left_b == 0) {
+				j++;
+				left_b = iov[j].iov_len;
+			}
+		}
+		mbufs[i]->data_len = slen;
+		mbufs[i]->pkt_len = slen;
+	}
+
+	/* last seg */
+	if (nb_mbufs == 1) {
+		mbufs[nb_mbufs - 1]->data_len = total;
+		mbufs[nb_mbufs - 1]->pkt_len = total;
+	} else {
+		mbufs[nb_mbufs - 1]->data_len = total - (nb_mbufs - 1) * slen + 8;
+		mbufs[nb_mbufs - 1]->pkt_len = total - (nb_mbufs - 1) * slen + 8;
+	}
+
+	/* chain as *one* message */
+	for (i = 1; i < nb_mbufs; ++i)
+		mbufs[i-1]->next = mbufs[i];
+	mbufs[0]->nb_segs = nb_mbufs;
+	mbufs[0]->pkt_len = total;
+	nb_mbufs = 1;
+
+	rc = tle_udp_stream_send(sk->s, mbufs, nb_mbufs, dst_addr);
+	for (i = rc, left = 0; i < nb_mbufs; ++i) {
+		left += mbufs[i]->pkt_len;
+		rte_pktmbuf_free(mbufs[i]);
+	}
+
+	if (rc == 0) {
+		errno = rte_errno;
+		return -1;
+	}
+
+	return total - left;
+}
+
+static ssize_t
+udp_recv(struct tle_stream *s, struct rte_mbuf *pkt[], uint16_t num,
+	 struct sockaddr *addr)
+{
+	uint16_t rc;
+
+	rc = tle_udp_stream_recv(s, pkt, num);
+	if (addr && num == 1 && rc == 1)
+		udp_pkt_addr(pkt[0], addr, 0);
+
+	if (rc == 0)
+		errno = rte_errno;
+	return rc;
+}
+
+static void
+udp_update_cfg(struct sock *sk)
+{
+	struct tle_udp_stream_param prm;
+	memset(&prm, 0, sizeof(prm));
+
+	prm.recv_ev = &sk->rxev;
+	prm.send_ev = &sk->txev;
+
+	tle_udp_stream_update_cfg(&sk->s, &prm, 1);
+}
+
+static int
+udp_shutdown(struct sock *sk, int how)
+{
+	int rc;
+
+	if (sk->s == NULL) {
+		errno = ENOTCONN;
+		return -1;
+	}
+
+	rc = tle_udp_stream_shutdown(sk->s, how);
+	if (rc < 0) {
+		errno = -rc;
+		return -1;
+	}
+	return 0;
+}
+
+struct proto udp_prot = {
+	.name		= "UDP",
+	.setsockopt	= udp_setsockopt,
+	.getsockopt	= udp_getsockopt,
+	.getname	= udp_getname,
+	.bind		= udp_bind,
+	.connect	= udp_connect,
+	.recv		= udp_recv,
+	.send		= udp_send,
+	.readv		= udp_readv,
+	.writev		= udp_writev,
+	.shutdown	= udp_shutdown,
+	.close		= tle_udp_stream_close,
+	.update_cfg	= udp_update_cfg,
+};
diff --git a/lib/libtle_glue/util.c b/lib/libtle_glue/util.c
new file mode 100644
index 0000000..69fc555
--- /dev/null
+++ b/lib/libtle_glue/util.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <pthread.h>
+#include <sched.h>
+#include <unistd.h>
+
+#include "util.h"
+
+#define NUMA_NODE_PATH "/sys/devices/system/node"
+
+static unsigned
+eal_cpu_socket_id(unsigned lcore_id)
+{
+        unsigned socket;
+        char path[PATH_MAX];
+
+        for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
+                snprintf(path, sizeof(path), "%s/node%u/cpu%u", NUMA_NODE_PATH,
+                	 socket, lcore_id);
+                if (access(path, F_OK) == 0)
+                        return socket;
+        }
+        return 0;
+}
+
+uint32_t
+get_socket_id(void)
+{
+	int err;
+	uint32_t i;
+	cpu_set_t cpuset;
+
+	CPU_ZERO(&cpuset);
+	err = pthread_getaffinity_np(pthread_self(),
+				     sizeof(cpuset), &cpuset);
+	if (err)
+		return 0;
+
+	for (i = 0; i < CPU_SETSIZE; i++)
+		if (CPU_ISSET(i, &cpuset))
+			break;
+
+	return eal_cpu_socket_id(i);
+}
diff --git a/lib/libtle_glue/util.h b/lib/libtle_glue/util.h
new file mode 100644
index 0000000..ac67d8b
--- /dev/null
+++ b/lib/libtle_glue/util.h
@@ -0,0 +1,377 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_GLUE_UTIL_H_
+#define _TLE_GLUE_UTIL_H_
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <tle_tcp.h>
+#include <tle_udp.h>
+
+#include "../libtle_l4p/tcp_stream.h"
+
+#include "fd.h"
+#include "ctx.h"
+#include "sock.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void *
+xstrdup(const void *old)
+{
+	void *new = strdup(old);
+	if (unlikely(new == NULL))
+		rte_panic("Failed to strdup");
+	return new;
+}
+
+static inline void *
+xmalloc(size_t size)
+{
+	void *p = malloc(size ? size : 1);
+	if (p == NULL)
+		rte_panic("Failed to malloc");
+	return p;
+}
+
+static inline char *
+xvasprintf(const char *format, va_list args)
+{
+	va_list args2;
+	size_t needed;
+	char *s;
+
+	va_copy(args2, args);
+	needed = vsnprintf(NULL, 0, format, args);
+
+	s = xmalloc(needed + 1);
+
+	vsnprintf(s, needed + 1, format, args2);
+	va_end(args2);
+
+	return s;
+}
+
+static inline char *
+xasprintf(const char *format, ...)
+{
+    va_list args;
+    char *s;
+
+    va_start(args, format);
+    s = xvasprintf(format, args);
+    va_end(args);
+
+    return s;
+}
+
+static inline char **
+grow_argv(char **argv, size_t cur_siz, size_t grow_by)
+{
+	char **p;
+
+	p = realloc(argv, sizeof(char *) * (cur_siz + grow_by));
+	if (unlikely(p == NULL))
+		rte_panic("Failed to grow argv");
+	return p;
+}
+
+static inline void
+release_argv(int argc, char **argv_to_release, char **argv)
+{
+	int i;
+
+	for (i = 0; i < argc; ++i)
+		free(argv_to_release[i]);
+
+	free(argv_to_release);
+	free(argv);
+}
+
+static inline void
+tle_event_attach(struct tle_event *ev, struct tle_evq *evq, const void *data)
+{
+	ev->head = evq;
+	ev->data = data;
+}
+
+static inline void
+sock_alloc_events(struct sock *so)
+{
+	tle_event_attach(&so->erev, CTX(so)->ereq, so);
+	tle_event_attach(&so->rxev, CTX(so)->rxeq, so);
+	tle_event_attach(&so->txev, CTX(so)->txeq, so);
+	tle_event_active(&so->erev, TLE_SEV_DOWN);
+#ifndef LOOK_ASIDE_BACKEND
+	tle_event_active(&so->rxev, TLE_SEV_DOWN);
+	tle_event_active(&so->txev, TLE_SEV_DOWN);
+#endif
+}
+
+static inline void
+sock_active_events(struct sock *so)
+{
+	tle_event_active(&so->erev, TLE_SEV_DOWN);
+	tle_event_active(&so->rxev, TLE_SEV_DOWN);
+	tle_event_active(&so->txev, TLE_SEV_DOWN);
+}
+
+static inline const struct in6_addr*
+select_local_addr_v6(const struct sockaddr *remote, struct glue_ctx *ctx)
+{
+	/* todo: implement route table to decide local address */
+
+	if (IN6_IS_ADDR_LOOPBACK(&((const struct sockaddr_in6 *)remote)
+				  ->sin6_addr))
+		return &in6addr_loopback;
+	else
+		return &ctx->ipv6;
+}
+
+static inline in_addr_t
+select_local_addr(const struct sockaddr *remote, struct glue_ctx *ctx)
+{
+	/* todo: implement route table to decide local address */
+	in_addr_t remote_addr;
+
+	remote_addr = ((const struct sockaddr_in*)remote)->sin_addr.s_addr;
+	if (remote_addr == htonl(INADDR_LOOPBACK))
+		return htonl(INADDR_LOOPBACK);
+	else
+		return ctx->ipv4;
+}
+
+static inline bool
+is_any_addr(const struct sockaddr *addr)
+{
+	const struct sockaddr_in *addr4;
+	const struct sockaddr_in6 *addr6;
+
+	if (addr->sa_family == AF_INET) {
+		addr4 = (const struct sockaddr_in *)addr;
+		if (addr4->sin_addr.s_addr == htonl(INADDR_ANY))
+			return true;
+		else
+			return false;
+	} else if (addr->sa_family == AF_INET6) {
+		addr6 = (const struct sockaddr_in6 *)addr;
+		if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr))
+			return true;
+		else
+			return false;
+	} else
+		return false;
+}
+
+static inline void
+set_any_addr(struct sockaddr *addr)
+{
+	struct sockaddr_in *addr4;
+	struct sockaddr_in6 *addr6;
+
+	if (addr->sa_family == AF_INET) {
+		addr4 = (struct sockaddr_in *)addr;
+		addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+	} else if (addr->sa_family == AF_INET6) {
+		addr6 = (struct sockaddr_in6 *)addr;
+		addr6->sin6_addr = in6addr_any;
+	}
+}
+
+/* transform an IPv4 address(in struct sockaddr_in) to
+ * an IPv4 mapped IPv6 address(in struct sockaddr_in6) */
+static inline void
+trans_4mapped6_addr(struct sockaddr *addr)
+{
+	struct sockaddr_in6 *addr6;
+
+	if (addr->sa_family != AF_INET)
+		return;
+
+	addr6 = (struct sockaddr_in6*)addr;
+	addr6->sin6_family = AF_INET6;
+	addr6->sin6_addr.s6_addr32[0] = 0;
+	addr6->sin6_addr.s6_addr32[1] = 0;
+	addr6->sin6_addr.s6_addr32[2] = 0xffff0000;
+	addr6->sin6_addr.s6_addr32[3] = ((struct sockaddr_in*)addr)->sin_addr.s_addr;
+}
+
+/* transform an IPv4 mapped IPv6 address(in struct sockaddr_in6) to
+ * an IPv4 address(in struct sockaddr_in) */
+static inline void
+retrans_4mapped6_addr(struct sockaddr_storage * addr)
+{
+	struct in6_addr* addr6;
+	if (addr->ss_family == AF_INET)
+		return;
+
+	addr6 = &((struct sockaddr_in6*)addr)->sin6_addr;
+	if(IN6_IS_ADDR_V4MAPPED(addr6)) {
+		addr->ss_family = AF_INET;
+		((struct sockaddr_in*)addr)->sin_addr.s_addr = addr6->__in6_u.__u6_addr32[3];
+	}
+}
+
+static inline struct tle_stream *
+open_bind(struct sock *so, const struct sockaddr *local,
+	  const struct sockaddr *remote)
+{
+	struct tle_stream *s;
+	struct sockaddr_storage *l, *r;
+	struct sockaddr_in *addr4;
+	struct sockaddr_in6 *addr6;
+	struct tle_tcp_stream_param pt = {0};
+	struct tle_udp_stream_param pu = {0};
+
+	if (IS_TCP(so)) {
+		pt.option = so->option.raw;
+		l = &pt.addr.local;
+		r = &pt.addr.remote;
+		pt.cfg.err_ev  = &so->erev;
+		pt.cfg.recv_ev = &so->rxev;
+		pt.cfg.send_ev = &so->txev;
+	} else {
+		pu.option = so->option.raw;
+		l = &pu.local_addr;
+		r = &pu.remote_addr;
+		pu.recv_ev = &so->rxev;
+		pu.send_ev = &so->txev;
+	}
+
+	if (remote) {
+		memcpy(r, remote, get_sockaddr_len(remote->sa_family));
+		retrans_4mapped6_addr(r);
+		if(r->ss_family == AF_INET) {
+			addr4 = (struct sockaddr_in*)r;
+			if (addr4->sin_addr.s_addr == 0)
+				addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		} else {
+			addr6 = (struct sockaddr_in6*)r;
+			if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr))
+				rte_memcpy(&addr6->sin6_addr, &in6addr_loopback,
+					   sizeof(struct in6_addr));
+		}
+	}
+
+	if (local) {
+		memcpy(l, local, get_sockaddr_len(local->sa_family));
+		retrans_4mapped6_addr(l);
+	} else {
+		if (remote)
+			l->ss_family = r->ss_family;
+		else
+			l->ss_family = so->domain;
+	}
+
+	if (!remote)
+		r->ss_family = l->ss_family;
+
+	/* Endpoints of stream have different socket families */
+	if (r->ss_family != l->ss_family) {
+		if (l->ss_family == AF_INET) {
+			errno = EINVAL;
+			return NULL;
+		} else {
+			/* if local addr is unbound, convert into remote family */
+			if (IN6_IS_ADDR_UNSPECIFIED(&((struct sockaddr_in6*)l)->sin6_addr)) {
+				l->ss_family = AF_INET;
+				((struct sockaddr_in*)l)->sin_addr.s_addr = 0;
+			} else {
+				errno = ENETUNREACH;
+				return NULL;
+			}
+		}
+	}
+
+	if (l->ss_family == AF_INET) {
+		addr4 = (struct sockaddr_in*)l;
+		if (addr4->sin_addr.s_addr == htonl(INADDR_ANY) && remote) {
+			addr4->sin_addr.s_addr =
+				select_local_addr((struct sockaddr*)r, CTX(so));
+			if (addr4->sin_addr.s_addr == htonl(INADDR_ANY)) {
+				errno = EADDRNOTAVAIL;
+				return NULL;
+			}
+		}
+		else if (addr4->sin_addr.s_addr != CTX(so)->ipv4 &&
+			 addr4->sin_addr.s_addr != htonl(INADDR_LOOPBACK) &&
+			 addr4->sin_addr.s_addr != htonl(INADDR_ANY)) {
+			errno = EADDRNOTAVAIL;
+			return NULL;
+		}
+	} else {
+		addr6 = (struct sockaddr_in6 *)l;
+		if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr) && remote) {
+			memcpy(&addr6->sin6_addr,
+			       select_local_addr_v6((struct sockaddr*)r, CTX(so)),
+			       sizeof(struct in6_addr));
+			if (IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr)) {
+				errno = EADDRNOTAVAIL;
+				return NULL;
+			}
+		}
+		else if (memcmp(&addr6->sin6_addr, &CTX(so)->ipv6,
+				sizeof(struct in6_addr)) != 0 &&
+			 (!IN6_IS_ADDR_LOOPBACK(&addr6->sin6_addr)) &&
+			 (!IN6_IS_ADDR_UNSPECIFIED(&addr6->sin6_addr))) {
+			errno = EADDRNOTAVAIL;
+			return NULL;
+		}
+	}
+
+	if (IS_TCP(so))
+		s = tle_tcp_stream_open(CTX(so)->tcp_ctx, &pt);
+	else {
+		if (so->s == NULL)
+			s = tle_udp_stream_open(CTX(so)->udp_ctx, &pu);
+		else
+			s = tle_udp_stream_set(so->s, CTX(so)->udp_ctx, &pu);
+	}
+
+	if (s == NULL)
+		errno = rte_errno;
+
+	return s;
+}
+
+static inline struct tle_stream *
+open_bind_listen(struct sock *so, const struct sockaddr *local)
+{
+	struct tle_stream *s = open_bind(so, local, NULL);
+
+	if (s == NULL)
+		return NULL;
+
+	if (tle_tcp_stream_listen(s) != 0) {
+		tle_tcp_stream_close(s);
+		return NULL;
+	}
+
+	return s;
+}
+
+uint32_t get_socket_id(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*_TLE_GLUE_UTIL_H_ */
diff --git a/lib/libtle_glue/zerocopy.h b/lib/libtle_glue/zerocopy.h
new file mode 100644
index 0000000..a37f8f5
--- /dev/null
+++ b/lib/libtle_glue/zerocopy.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TLE_GLUE_ZEROCOPY_H_
+#define _TLE_GLUE_ZEROCOPY_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * This API performs recv operation on specified socket, and it's
+ * optimized for zero copy, which means the caller does not need to
+ * prepare the buffer, instead, it will get a pointer on success.
+ * @param sockfd
+ *   the file descriptor for the socket.
+ * @param buf
+ *   after successfully receiving some payload, the pointer of the
+ *   received buffer will be stored in *buf.
+ * @return
+ *   the number of bytes received, or -1 if an error occurred, or 0
+ *   if a stream socket peer has performed an orderly shutdown.
+ *
+ */
+ssize_t recv_zc(int sockfd, void **buf);
+
+/**
+ * This API performs send operation on specified socket, and it's
+ * optimized for zero copy, which means the caller does not need to
+ * free the buffer, not even touch that buffer even after calling this
+ * API; the buffer will be freed after an ack from the socket peer.
+ * @param sockfd
+ *   the file descriptor for the socket.
+ * @param buf
+ *   The pointer to the payload buffer to be sent.
+ * @param len
+ *   The length of the payload buffer to be sent.
+ * @return
+ *   the number of bytes sent, or -1 if an error occurred.
+ */
+ssize_t send_zc(int sockfd, const void *buf, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*_TLE_GLUE_ZEROCOPY_H_ */
diff --git a/lib/libtle_l4p/Makefile b/lib/libtle_l4p/Makefile
index e1357d1..ee81d4a 100644
--- a/lib/libtle_l4p/Makefile
+++ b/lib/libtle_l4p/Makefile
@@ -45,6 +45,7 @@ SYMLINK-y-include += tle_ctx.h
 SYMLINK-y-include += tle_event.h
 SYMLINK-y-include += tle_tcp.h
 SYMLINK-y-include += tle_udp.h
+SYMLINK-y-include += tle_stats.h
 
 # this lib dependencies
 DEPDIRS-y += lib/libtle_misc
diff --git a/lib/libtle_l4p/ctx.c b/lib/libtle_l4p/ctx.c
index b8067f0..d6bde48 100644
--- a/lib/libtle_l4p/ctx.c
+++ b/lib/libtle_l4p/ctx.c
@@ -21,9 +21,14 @@
 #include <rte_ip.h>
 
 #include "stream.h"
+#include "stream_table.h"
 #include "misc.h"
 #include <halfsiphash.h>
 
+struct tle_mib default_mib;
+
+RTE_DEFINE_PER_LCORE(struct tle_mib *, mib) = &default_mib;
+
 #define	LPORT_START	0x8000
 #define	LPORT_END	MAX_PORT_NUM
 
@@ -103,6 +108,16 @@ tle_ctx_create(const struct tle_ctx_param *ctx_prm)
 
 	ctx->prm = *ctx_prm;
 
+	rc = bhash_init(ctx);
+	if (rc != 0) {
+		UDP_LOG(ERR, "create bhash table (ctx=%p, proto=%u) failed "
+			"with error code: %d;\n",
+			ctx, ctx_prm->proto, rc);
+		tle_ctx_destroy(ctx);
+		rte_errno = -rc;
+		return NULL;
+	}
+
 	rc = tle_stream_ops[ctx_prm->proto].init_streams(ctx);
 	if (rc != 0) {
 		UDP_LOG(ERR, "init_streams(ctx=%p, proto=%u) failed "
@@ -114,9 +129,10 @@ tle_ctx_create(const struct tle_ctx_param *ctx_prm)
 	}
 
 	for (i = 0; i != RTE_DIM(ctx->use); i++)
-		tle_pbm_init(ctx->use + i, LPORT_START_BLK);
+		tle_psm_init(ctx->use + i);
 
-	ctx->streams.nb_free = ctx->prm.max_streams;
+	ctx->streams.nb_free = ctx->prm.min_streams;
+	ctx->streams.nb_cur = ctx->prm.min_streams;
 
 	/* Initialization of siphash state is done here to speed up the
 	 * fastpath processing.
@@ -124,6 +140,11 @@ tle_ctx_create(const struct tle_ctx_param *ctx_prm)
 	if (ctx->prm.hash_alg == TLE_SIPHASH)
 		siphash_initialization(&ctx->prm.secret_key,
 					&ctx->prm.secret_key);
+
+	rte_spinlock_init(&ctx->dev_lock);
+	rte_spinlock_init(&ctx->bhash_lock[TLE_V4]);
+	rte_spinlock_init(&ctx->bhash_lock[TLE_V6]);
+
 	return ctx;
 }
 
@@ -137,6 +158,8 @@ tle_ctx_destroy(struct tle_ctx *ctx)
 		return;
 	}
 
+	bhash_fini(ctx);
+
 	for (i = 0; i != RTE_DIM(ctx->dev); i++)
 		tle_del_dev(ctx->dev + i);
 
@@ -150,37 +173,6 @@ tle_ctx_invalidate(struct tle_ctx *ctx)
 	RTE_SET_USED(ctx);
 }
 
-static void
-fill_pbm(struct tle_pbm *pbm, const struct tle_bl_port *blp)
-{
-	uint32_t i;
-
-	for (i = 0; i != blp->nb_port; i++)
-		tle_pbm_set(pbm, blp->port[i]);
-}
-
-static int
-init_dev_proto(struct tle_dev *dev, uint32_t idx, int32_t socket_id,
-	const struct tle_bl_port *blp)
-{
-	size_t sz;
-
-	sz = sizeof(*dev->dp[idx]);
-	dev->dp[idx] = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
-		socket_id);
-
-	if (dev->dp[idx] == NULL) {
-		UDP_LOG(ERR, "allocation of %zu bytes on "
-			"socket %d for %u-th device failed\n",
-			sz, socket_id, idx);
-		return ENOMEM;
-	}
-
-	tle_pbm_init(&dev->dp[idx]->use, LPORT_START_BLK);
-	fill_pbm(&dev->dp[idx]->use, blp);
-	return 0;
-}
-
 static struct tle_dev *
 find_free_dev(struct tle_ctx *ctx)
 {
@@ -214,27 +206,8 @@ tle_add_dev(struct tle_ctx *ctx, const struct tle_dev_param *dev_prm)
 		return NULL;
 	rc = 0;
 
-	/* device can handle IPv4 traffic */
-	if (dev_prm->local_addr4.s_addr != INADDR_ANY) {
-		rc = init_dev_proto(dev, TLE_V4, ctx->prm.socket_id,
-			&dev_prm->bl4);
-		if (rc == 0)
-			fill_pbm(&ctx->use[TLE_V4], &dev_prm->bl4);
-	}
-
-	/* device can handle IPv6 traffic */
-	if (rc == 0 && memcmp(&dev_prm->local_addr6, &tle_ipv6_any,
-			sizeof(tle_ipv6_any)) != 0) {
-		rc = init_dev_proto(dev, TLE_V6, ctx->prm.socket_id,
-			&dev_prm->bl6);
-		if (rc == 0)
-			fill_pbm(&ctx->use[TLE_V6], &dev_prm->bl6);
-	}
-
 	if (rc != 0) {
 		/* cleanup and return an error. */
-		rte_free(dev->dp[TLE_V4]);
-		rte_free(dev->dp[TLE_V6]);
 		rte_errno = rc;
 		return NULL;
 	}
@@ -246,16 +219,19 @@ tle_add_dev(struct tle_ctx *ctx, const struct tle_dev_param *dev_prm)
 
 	if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_UDP_CKSUM) != 0 &&
 			ctx->prm.proto == TLE_PROTO_UDP) {
-		dev->tx.ol_flags[TLE_V4] |= PKT_TX_IPV4 | PKT_TX_UDP_CKSUM;
-		dev->tx.ol_flags[TLE_V6] |= PKT_TX_IPV6 | PKT_TX_UDP_CKSUM;
+		dev->tx.ol_flags[TLE_V4] |= PKT_TX_UDP_CKSUM;
+		dev->tx.ol_flags[TLE_V6] |= PKT_TX_UDP_CKSUM;
 	} else if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_TCP_CKSUM) != 0 &&
 			ctx->prm.proto == TLE_PROTO_TCP) {
-		dev->tx.ol_flags[TLE_V4] |= PKT_TX_IPV4 | PKT_TX_TCP_CKSUM;
-		dev->tx.ol_flags[TLE_V6] |= PKT_TX_IPV6 | PKT_TX_TCP_CKSUM;
+		dev->tx.ol_flags[TLE_V4] |= PKT_TX_TCP_CKSUM;
+		dev->tx.ol_flags[TLE_V6] |= PKT_TX_TCP_CKSUM;
 	}
 
 	if ((dev_prm->tx_offload & DEV_TX_OFFLOAD_IPV4_CKSUM) != 0)
-		dev->tx.ol_flags[TLE_V4] |= PKT_TX_IPV4 | PKT_TX_IP_CKSUM;
+		dev->tx.ol_flags[TLE_V4] |= PKT_TX_IP_CKSUM;
+
+	dev->tx.ol_flags[TLE_V4] |= PKT_TX_IPV4;
+	dev->tx.ol_flags[TLE_V6] |= PKT_TX_IPV6;
 
 	dev->prm = *dev_prm;
 	dev->ctx = ctx;
@@ -300,220 +276,97 @@ tle_del_dev(struct tle_dev *dev)
 	ctx = dev->ctx;
 	p = dev - ctx->dev;
 
-	if (p >= RTE_DIM(ctx->dev) ||
-			(dev->dp[TLE_V4] == NULL &&
-			dev->dp[TLE_V6] == NULL))
+	if (p >= RTE_DIM(ctx->dev))
 		return -EINVAL;
 
 	/* emtpy TX queues. */
 	empty_dring(&dev->tx.dr, ctx->prm.proto);
 
-	rte_free(dev->dp[TLE_V4]);
-	rte_free(dev->dp[TLE_V6]);
 	memset(dev, 0, sizeof(*dev));
 	ctx->nb_dev--;
 	return 0;
 }
 
-static struct tle_dev *
-find_ipv4_dev(struct tle_ctx *ctx, const struct in_addr *addr)
-{
-	uint32_t i;
-
-	for (i = 0; i != RTE_DIM(ctx->dev); i++) {
-		if (ctx->dev[i].prm.local_addr4.s_addr == addr->s_addr &&
-				ctx->dev[i].dp[TLE_V4] != NULL)
-			return ctx->dev + i;
-	}
-
-	return NULL;
-}
-
-static struct tle_dev *
-find_ipv6_dev(struct tle_ctx *ctx, const struct in6_addr *addr)
+int
+stream_fill_ctx(struct tle_ctx *ctx, struct tle_stream *s,
+	const struct sockaddr *laddr, const struct sockaddr *raddr)
 {
-	uint32_t i;
+	struct sockaddr_storage addr;
+	int32_t rc = 0;
 
-	for (i = 0; i != RTE_DIM(ctx->dev); i++) {
-		if (memcmp(&ctx->dev[i].prm.local_addr6, addr,
-				sizeof(*addr)) == 0 &&
-				ctx->dev[i].dp[TLE_V6] != NULL)
-			return ctx->dev + i;
+	if (laddr->sa_family == AF_INET) {
+		s->type = TLE_V4;
+	} else if (laddr->sa_family == AF_INET6) {
+		s->type = TLE_V6;
 	}
 
-	return NULL;
-}
-
-static int
-stream_fill_dev(struct tle_ctx *ctx, struct tle_stream *s,
-	const struct sockaddr *addr)
-{
-	struct tle_dev *dev;
-	struct tle_pbm *pbm;
-	const struct sockaddr_in *lin4;
-	const struct sockaddr_in6 *lin6;
-	uint32_t i, p, sp, t;
-
-	if (addr->sa_family == AF_INET) {
-		lin4 = (const struct sockaddr_in *)addr;
-		t = TLE_V4;
-		p = lin4->sin_port;
-	} else if (addr->sa_family == AF_INET6) {
-		lin6 = (const struct sockaddr_in6 *)addr;
-		t = TLE_V6;
-		p = lin6->sin6_port;
-	} else
-		return EINVAL;
-
+	uint16_t p = ((const struct sockaddr_in *)laddr)->sin_port;
 	p = ntohs(p);
-
-	/* if local address is not wildcard, find device it belongs to. */
-	if (t == TLE_V4 && lin4->sin_addr.s_addr != INADDR_ANY) {
-		dev = find_ipv4_dev(ctx, &lin4->sin_addr);
-		if (dev == NULL)
-			return ENODEV;
-	} else if (t == TLE_V6 && memcmp(&tle_ipv6_any, &lin6->sin6_addr,
-			sizeof(tle_ipv6_any)) != 0) {
-		dev = find_ipv6_dev(ctx, &lin6->sin6_addr);
-		if (dev == NULL)
-			return ENODEV;
-	} else
-		dev = NULL;
-
-	if (dev != NULL)
-		pbm = &dev->dp[t]->use;
-	else
-		pbm = &ctx->use[t];
-
+	struct tle_psm *psm = &ctx->use[s->type];
 	/* try to acquire local port number. */
+	rte_spinlock_lock(&ctx->dev_lock);
 	if (p == 0) {
-		p = tle_pbm_find_range(pbm, pbm->blk, LPORT_END_BLK);
-		if (p == 0 && pbm->blk > LPORT_START_BLK)
-			p = tle_pbm_find_range(pbm, LPORT_START_BLK, pbm->blk);
-	} else if (tle_pbm_check(pbm, p) != 0)
-		return EEXIST;
-
-	if (p == 0)
-		return ENFILE;
-
-	/* fill socket's dst port and type */
-
-	sp = htons(p);
-	s->type = t;
-	s->port.dst = sp;
-
-	/* mark port as in-use */
-
-	tle_pbm_set(&ctx->use[t], p);
-	if (dev != NULL) {
-		tle_pbm_set(pbm, p);
-		dev->dp[t]->streams[sp] = s;
-	} else {
-		for (i = 0; i != RTE_DIM(ctx->dev); i++) {
-			if (ctx->dev[i].dp[t] != NULL) {
-				tle_pbm_set(&ctx->dev[i].dp[t]->use, p);
-				ctx->dev[i].dp[t]->streams[sp] = s;
-			}
+		if (s->type == TLE_V6 && is_empty_addr(laddr) && !s->option.ipv6only)
+			p = tle_psm_alloc_dual_port(&ctx->use[TLE_V4], psm);
+		else
+			p = tle_psm_alloc_port(psm);
+		if (p == 0) {
+			rte_spinlock_unlock(&ctx->dev_lock);
+			return ENFILE;
 		}
+		rte_memcpy(&addr, laddr, sizeof(struct sockaddr_storage));
+		((struct sockaddr_in *)&addr)->sin_port = htons(p);
+		laddr = (const struct sockaddr*)&addr;
 	}
 
-	return 0;
-}
+	if (tle_psm_set(psm, p, s->option.reuseport) != 0) {
+		rte_spinlock_unlock(&ctx->dev_lock);
+		return EADDRINUSE;
+	}
 
-static int
-stream_clear_dev(struct tle_ctx *ctx, const struct tle_stream *s)
-{
-	struct tle_dev *dev;
-	uint32_t i, p, sp, t;
-
-	t = s->type;
-	sp = s->port.dst;
-	p = ntohs(sp);
-
-	/* if local address is not wildcard, find device it belongs to. */
-	if (t == TLE_V4 && s->ipv4.addr.dst != INADDR_ANY) {
-		dev = find_ipv4_dev(ctx,
-			(const struct in_addr *)&s->ipv4.addr.dst);
-		if (dev == NULL)
-			return ENODEV;
-	} else if (t == TLE_V6 && memcmp(&tle_ipv6_any, &s->ipv6.addr.dst,
-			sizeof(tle_ipv6_any)) != 0) {
-		dev = find_ipv6_dev(ctx,
-			(const struct in6_addr *)&s->ipv6.addr.dst);
-		if (dev == NULL)
-			return ENODEV;
-	} else
-		dev = NULL;
-
-	tle_pbm_clear(&ctx->use[t], p);
-	if (dev != NULL) {
-		if (dev->dp[t]->streams[sp] == s) {
-			tle_pbm_clear(&dev->dp[t]->use, p);
-			dev->dp[t]->streams[sp] = NULL;
-		}
-	} else {
-		for (i = 0; i != RTE_DIM(ctx->dev); i++) {
-			if (ctx->dev[i].dp[t] != NULL &&
-					ctx->dev[i].dp[t]->streams[sp] == s) {
-				tle_pbm_clear(&ctx->dev[i].dp[t]->use, p);
-				ctx->dev[i].dp[t]->streams[sp] = NULL;
+	if (is_empty_addr(laddr)) {
+		if (s->type == TLE_V6 && !s->option.ipv6only) {
+			rc = tle_psm_set(&ctx->use[TLE_V4], p, s->option.reuseport);
+			if (rc != 0) {
+				tle_psm_clear(psm, p);
+				rte_spinlock_unlock(&ctx->dev_lock);
+				return EADDRINUSE;
 			}
 		}
 	}
 
-	return 0;
-}
-
-static void
-fill_ipv4_am(const struct sockaddr_in *in, uint32_t *addr, uint32_t *mask)
-{
-	*addr = in->sin_addr.s_addr;
-	*mask = (*addr == INADDR_ANY) ? INADDR_ANY : INADDR_NONE;
-}
+	if (is_empty_addr(raddr))
+		rc = bhash_add_entry(ctx, laddr, s);
 
-static void
-fill_ipv6_am(const struct sockaddr_in6 *in, rte_xmm_t *addr, rte_xmm_t *mask)
-{
-	const struct in6_addr *pm;
-
-	memcpy(addr, &in->sin6_addr, sizeof(*addr));
-	if (memcmp(&tle_ipv6_any, addr, sizeof(*addr)) == 0)
-		pm = &tle_ipv6_any;
-	else
-		pm = &tle_ipv6_none;
-
-	memcpy(mask, pm, sizeof(*mask));
-}
+	if (rc) {
+		tle_psm_clear(psm, p);
+	}
 
-int
-stream_fill_ctx(struct tle_ctx *ctx, struct tle_stream *s,
-	const struct sockaddr *laddr, const struct sockaddr *raddr)
-{
-	const struct sockaddr_in *rin;
-	int32_t rc;
+	rte_spinlock_unlock(&ctx->dev_lock);
+	/* fill socket's dst (src actually) port */
+	s->port.dst = htons(p);
 
-	/* setup ports and port mask fields (except dst port). */
-	rin = (const struct sockaddr_in *)raddr;
-	s->port.src = rin->sin_port;
-	s->pmsk.src = (s->port.src == 0) ? 0 : UINT16_MAX;
-	s->pmsk.dst = UINT16_MAX;
+	if (rc)
+		return rc;
 
-	/* setup src and dst addresses. */
+	/* setup src, dst addresses, and src port. */
 	if (laddr->sa_family == AF_INET) {
 		fill_ipv4_am((const struct sockaddr_in *)laddr,
 			&s->ipv4.addr.dst, &s->ipv4.mask.dst);
 		fill_ipv4_am((const struct sockaddr_in *)raddr,
 			&s->ipv4.addr.src, &s->ipv4.mask.src);
+		s->port.src = ((const struct sockaddr_in *)raddr)->sin_port;
 	} else if (laddr->sa_family == AF_INET6) {
 		fill_ipv6_am((const struct sockaddr_in6 *)laddr,
 			&s->ipv6.addr.dst, &s->ipv6.mask.dst);
 		fill_ipv6_am((const struct sockaddr_in6 *)raddr,
 			&s->ipv6.addr.src, &s->ipv6.mask.src);
+		s->port.src = ((const struct sockaddr_in6 *)raddr)->sin6_port;
 	}
 
-	rte_spinlock_lock(&ctx->dev_lock);
-	rc = stream_fill_dev(ctx, s, laddr);
-	rte_spinlock_unlock(&ctx->dev_lock);
+	/* setup port mask fields. */
+	s->pmsk.src = (s->port.src == 0) ? 0 : UINT16_MAX;
+	s->pmsk.dst = UINT16_MAX;
 
 	return rc;
 }
@@ -522,11 +375,41 @@ stream_fill_ctx(struct tle_ctx *ctx, struct tle_stream *s,
 int
 stream_clear_ctx(struct tle_ctx *ctx, struct tle_stream *s)
 {
-	int32_t rc;
+	bool is_any = false;
+	struct sockaddr_storage addr;
+	struct sockaddr_in *addr4;
+	struct sockaddr_in6 *addr6;
+
+	if (s->type == TLE_V4) {
+		if (s->ipv4.addr.src == INADDR_ANY) {
+			is_any = true;
+			addr4 = (struct sockaddr_in *)&addr;
+			addr4->sin_addr.s_addr = s->ipv4.addr.dst;
+			addr4->sin_port = s->port.dst;
+			addr.ss_family = AF_INET;
+			bhash_del_entry(ctx, s, (struct sockaddr*)&addr);
+		}
+	} else {
+		if (IN6_IS_ADDR_UNSPECIFIED(&s->ipv6.addr.src)) {
+			is_any = true;
+			addr6 = (struct sockaddr_in6 *)&addr;
+			memcpy(&addr6->sin6_addr, &s->ipv6.addr.dst,
+					sizeof(tle_ipv6_any));
+			addr6->sin6_port = s->port.dst;
+			addr.ss_family = AF_INET6;
+			bhash_del_entry(ctx, s, (struct sockaddr*)&addr);
+		}
+	}
 
 	rte_spinlock_lock(&ctx->dev_lock);
-	rc = stream_clear_dev(ctx, s);
+	/* strange behaviour to match linux stack */
+	if (is_any) {
+		if (s->type == TLE_V6 && !s->option.ipv6only)
+			tle_psm_clear(&ctx->use[TLE_V4], ntohs(s->port.dst));
+	}
+
+	tle_psm_clear(&ctx->use[s->type], ntohs(s->port.dst));
 	rte_spinlock_unlock(&ctx->dev_lock);
 
-	return rc;
+	return 0;
 }
diff --git a/lib/libtle_l4p/ctx.h b/lib/libtle_l4p/ctx.h
index f18060b..9483976 100644
--- a/lib/libtle_l4p/ctx.h
+++ b/lib/libtle_l4p/ctx.h
@@ -21,7 +21,7 @@
 #include <tle_dring.h>
 #include <tle_ctx.h>
 
-#include "port_bitmap.h"
+#include "port_statmap.h"
 #include "osdep.h"
 #include "net_misc.h"
 
@@ -29,11 +29,6 @@
 extern "C" {
 #endif
 
-struct tle_dport {
-	struct tle_pbm use; /* ports in use. */
-	struct tle_stream *streams[MAX_PORT_NUM]; /* port to stream. */
-};
-
 struct tle_dev {
 	struct tle_ctx *ctx;
 	struct {
@@ -45,7 +40,6 @@ struct tle_dev {
 		struct tle_dring dr;
 	} tx;
 	struct tle_dev_param prm; /* copy of device parameters. */
-	struct tle_dport *dp[TLE_VNUM]; /* device L4 ports */
 };
 
 struct tle_ctx {
@@ -54,18 +48,23 @@ struct tle_ctx {
 	struct {
 		rte_spinlock_t lock;
 		uint32_t nb_free; /* number of free streams. */
+		uint32_t nb_cur; /* number of allocated streams. */
 		STAILQ_HEAD(, tle_stream) free;
 		void *buf; /* space allocated for streams */
 	} streams;
 
-	rte_spinlock_t dev_lock;
+	rte_spinlock_t bhash_lock[TLE_VNUM];
+	struct rte_hash *bhash[TLE_VNUM]; /* bind and listen hash table */
+
 	uint32_t nb_dev;
-	struct tle_pbm use[TLE_VNUM]; /* all ports in use. */
+	rte_spinlock_t dev_lock;
+	struct tle_psm use[TLE_VNUM]; /* all ports in use. */
 	struct tle_dev dev[RTE_MAX_ETHPORTS];
 };
 
 struct stream_ops {
 	int (*init_streams)(struct tle_ctx *);
+	uint32_t (*more_streams)(struct tle_ctx *);
 	void (*fini_streams)(struct tle_ctx *);
 	void (*free_drbs)(struct tle_stream *, struct tle_drb *[], uint32_t);
 };
@@ -77,6 +76,27 @@ int stream_fill_ctx(struct tle_ctx *ctx, struct tle_stream *s,
 
 int stream_clear_ctx(struct tle_ctx *ctx, struct tle_stream *s);
 
+static inline void
+fill_ipv4_am(const struct sockaddr_in *in, uint32_t *addr, uint32_t *mask)
+{
+	*addr = in->sin_addr.s_addr;
+	*mask = (*addr == INADDR_ANY) ? INADDR_ANY : INADDR_NONE;
+}
+
+static inline void
+fill_ipv6_am(const struct sockaddr_in6 *in, rte_xmm_t *addr, rte_xmm_t *mask)
+{
+	const struct in6_addr *pm;
+
+	memcpy(addr, &in->sin6_addr, sizeof(*addr));
+	if (IN6_IS_ADDR_UNSPECIFIED(addr))
+		pm = &tle_ipv6_any;
+	else
+		pm = &tle_ipv6_none;
+
+	memcpy(mask, pm, sizeof(*mask));
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/libtle_l4p/misc.h b/lib/libtle_l4p/misc.h
index 327296f..d39e5a1 100644
--- a/lib/libtle_l4p/misc.h
+++ b/lib/libtle_l4p/misc.h
@@ -16,12 +16,34 @@
 #ifndef _MISC_H_
 #define _MISC_H_
 
+#include <tle_stats.h>
 #include <tle_dpdk_wrapper.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+union typflg {
+	uint16_t raw;
+	struct {
+		uint8_t type;  /* TLE_V4/TLE_V6 */
+		uint8_t flags; /* TCP header flags */
+	};
+};
+
+union pkt_info {
+	rte_xmm_t raw;
+	struct {
+		union typflg tf;
+		uint16_t csf;  /* checksum flags */
+		union l4_ports port;
+		union {
+			union ipv4_addrs addr4;
+			const union ipv6_addrs *addr6;
+		};
+	};
+};
+
 static inline int
 xmm_cmp(const rte_xmm_t *da, const rte_xmm_t *sa)
 {
@@ -286,43 +308,41 @@ _ipv4x_cksum(const void *iph, size_t len)
 	return (cksum == 0xffff) ? cksum : ~cksum;
 }
 
-/*
- * helper function to check csum.
- */
 static inline int
-check_pkt_csum(const struct rte_mbuf *m, uint64_t ol_flags, uint32_t type,
-	uint32_t proto)
+check_pkt_csum(const struct rte_mbuf *m, uint32_t type, uint32_t proto)
 {
 	const struct ipv4_hdr *l3h4;
 	const struct ipv6_hdr *l3h6;
 	const struct udp_hdr *l4h;
-	uint64_t fl3, fl4;
-	uint16_t csum;
 	int32_t ret;
-
-	fl4 = ol_flags & PKT_RX_L4_CKSUM_MASK;
-	fl3 = (type == TLE_V4) ?
-		(ol_flags & PKT_RX_IP_CKSUM_MASK) : PKT_RX_IP_CKSUM_GOOD;
+	uint16_t csum;
+	uint64_t ol_flags = m->ol_flags;
 
 	/* case 0: both ip and l4 cksum is verified or data is valid */
-	if ((fl3 | fl4) == (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD))
+	if ((ol_flags & PKT_RX_IP_CKSUM_GOOD) &&
+	    (ol_flags & PKT_RX_L4_CKSUM_GOOD))
 		return 0;
 
 	/* case 1: either ip or l4 cksum bad */
-	if (fl3 == PKT_RX_IP_CKSUM_BAD || fl4 == PKT_RX_L4_CKSUM_BAD)
+	if ((ol_flags & PKT_RX_IP_CKSUM_MASK) == PKT_RX_IP_CKSUM_BAD)
+		return 1;
+
+	if ((ol_flags & PKT_RX_L4_CKSUM_MASK) == PKT_RX_L4_CKSUM_BAD)
 		return 1;
 
 	/* case 2: either ip or l4 or both cksum is unknown */
+	ret = 0;
 	l3h4 = rte_pktmbuf_mtod_offset(m, const struct ipv4_hdr *, m->l2_len);
 	l3h6 = rte_pktmbuf_mtod_offset(m, const struct ipv6_hdr *, m->l2_len);
 
-	ret = 0;
-	if (fl3 == PKT_RX_IP_CKSUM_UNKNOWN && l3h4->hdr_checksum != 0) {
+	if ((ol_flags & PKT_RX_IP_CKSUM_MASK) == PKT_RX_IP_CKSUM_UNKNOWN &&
+			l3h4->hdr_checksum != 0) {
 		csum = _ipv4x_cksum(l3h4, m->l3_len);
 		ret = (csum != UINT16_MAX);
 	}
 
-	if (ret == 0 && fl4 == PKT_RX_L4_CKSUM_UNKNOWN) {
+	if (ret == 0 && (ol_flags & PKT_RX_L4_CKSUM_MASK) ==
+			PKT_RX_L4_CKSUM_UNKNOWN) {
 
 		/*
 		 * for IPv4 it is allowed to have zero UDP cksum,
@@ -376,8 +396,20 @@ rwl_acquire(rte_atomic32_t *p)
 static inline void
 rwl_down(rte_atomic32_t *p)
 {
-	 while (rte_atomic32_cmpset((volatile uint32_t *)p, 0, INT32_MIN) == 0)
+	while (rte_atomic32_cmpset((volatile uint32_t *)p, 0, INT32_MIN) == 0)
+		rte_pause();
+}
+
+static inline int
+rwl_try_down(rte_atomic32_t *p)
+{
+	while (rte_atomic32_cmpset((volatile uint32_t *)p, 0, INT32_MIN) == 0) {
+		/* Already down */
+		if (rte_atomic32_read(p) == INT32_MIN)
+			return -1;
 		rte_pause();
+	}
+	return 0;
 }
 
 static inline void
diff --git a/lib/libtle_l4p/net_misc.h b/lib/libtle_l4p/net_misc.h
index 2d8dac2..c1d946b 100644
--- a/lib/libtle_l4p/net_misc.h
+++ b/lib/libtle_l4p/net_misc.h
@@ -16,6 +16,7 @@
 #ifndef _NET_MISC_H_
 #define _NET_MISC_H_
 
+#include <stdbool.h>
 #include <rte_ip.h>
 #include <rte_udp.h>
 #include "osdep.h"
@@ -71,6 +72,26 @@ union ip_addrs {
 	union ipv6_addrs v6;
 };
 
+static inline bool
+is_empty_addr(const struct sockaddr *addr)
+{
+	bool any = false;
+	const struct sockaddr_in *in4;
+	const struct sockaddr_in6 *in6;
+
+	if (addr->sa_family == AF_INET) {
+		in4 = (const struct sockaddr_in *)addr;
+		if (in4->sin_addr.s_addr == INADDR_ANY)
+			any = true;
+	} else if (addr->sa_family == AF_INET6) {
+		in6 = (const struct sockaddr_in6 *)addr;
+		if (IN6_IS_ADDR_UNSPECIFIED(&in6->sin6_addr))
+			any = true;
+	}
+
+	return any;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/libtle_l4p/port_statmap.h b/lib/libtle_l4p/port_statmap.h
new file mode 100644
index 0000000..8bbb0ba
--- /dev/null
+++ b/lib/libtle_l4p/port_statmap.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2019 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _PORT_STATMAP_H_
+#define _PORT_STATMAP_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MAX_PORT_NUM		(UINT16_MAX + 1)
+#define	ALLOC_PORT_START	0x8000
+
+struct tle_psm {
+	uint32_t nb_used;            /* Number of ports already in use. */
+	uint32_t next_alloc;         /* Next port to try allocate. */
+	uint8_t  stat[MAX_PORT_NUM]; /* Status of the port:
+				      * 1) the most significant bit indicates
+				      * if SO_REUSEPORT is allowed;
+				      * 2) lowest 7 bits indicate # of streams
+				      * using the port.
+				      */
+};
+
+static inline void
+tle_psm_init(struct tle_psm *psm)
+{
+	memset(psm, 0, sizeof(struct tle_psm));
+	psm->next_alloc = ALLOC_PORT_START;
+}
+
+static inline int
+tle_psm_set(struct tle_psm *psm, uint16_t port, uint8_t reuseport)
+{
+	if (psm->stat[port] == 0) {
+		/* port has not been used */
+		psm->stat[port]++;
+		if (reuseport)
+			psm->stat[port] |= 0x80;
+	} else {
+		/* port is used by some socket */
+		if (reuseport && (psm->stat[port] & 0x80)) {
+			/* all sockets set reuseport */
+			psm->stat[port]++;
+		} else
+			return -1;
+	}
+
+	return 0;
+}
+
+static inline void
+tle_psm_clear(struct tle_psm *psm, uint16_t port)
+{
+	psm->stat[port]--;
+	if ((psm->stat[port] & 0x7f) == 0)
+		psm->stat[port] = 0;
+}
+
+
+static inline uint8_t
+tle_psm_check(const struct tle_psm *psm, uint16_t port)
+{
+	return psm->stat[port];
+}
+
+static inline uint16_t
+tle_psm_alloc_port(struct tle_psm *psm)
+{
+	uint32_t i = psm->next_alloc;
+
+	for (; i < MAX_PORT_NUM; i++) {
+		if (psm->stat[i] == 0) {
+			psm->next_alloc = i + 1;
+			return (uint16_t)i;
+		}
+	}
+
+	for (i = ALLOC_PORT_START; i < psm->next_alloc; i++) {
+		if (psm->stat[i] == 0) {
+			psm->next_alloc = i + 1;
+			return (uint16_t)i;
+		}
+	}
+
+	return 0;
+}
+
+static inline uint16_t
+tle_psm_alloc_dual_port(struct tle_psm *psm4, struct tle_psm *psm6)
+{
+	uint32_t i = psm6->next_alloc;
+
+	for (; i < MAX_PORT_NUM; i++) {
+		if (psm6->stat[i] == 0 && psm4->stat[i] == 0) {
+			psm6->next_alloc = i + 1;
+			return (uint16_t)i;
+		}
+	}
+
+	for (i = ALLOC_PORT_START; i < psm6->next_alloc; i++) {
+		if (psm6->stat[i] == 0 && psm4->stat[i] == 0) {
+			psm6->next_alloc = i + 1;
+			return (uint16_t)i;
+		}
+	}
+
+	return 0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _PORT_STATMAP_H_ */
diff --git a/lib/libtle_l4p/stream.h b/lib/libtle_l4p/stream.h
index 49a2809..9f2bbc1 100644
--- a/lib/libtle_l4p/stream.h
+++ b/lib/libtle_l4p/stream.h
@@ -31,7 +31,11 @@ struct tle_stream {
 	STAILQ_ENTRY(tle_stream) link;
 	struct tle_ctx *ctx;
 
-	uint8_t type;	       /* TLE_V4 or TLE_V6 */
+	tle_stream_options_t option;
+	unsigned long	     timestamp;
+	uint16_t	     reuseport_seed;
+	uint8_t		     type; /* TLE_V4 or TLE_V6 */
+	uint8_t		     padding;
 
 	/* Stream address information. */
 	union l4_ports port;
@@ -53,15 +57,25 @@ static inline uint32_t
 get_streams(struct tle_ctx *ctx, struct tle_stream *s[], uint32_t num)
 {
 	struct tle_stream *p;
-	uint32_t i, n;
+	uint32_t i, n, inc;
 
 	rte_spinlock_lock(&ctx->streams.lock);
 
-	n = RTE_MIN(ctx->streams.nb_free, num);
-	for (i = 0, p = STAILQ_FIRST(&ctx->streams.free);
-			i != n;
-			i++, p = STAILQ_NEXT(p, link))
+	n = ctx->streams.nb_free;
+	if (n < num) {
+		inc = tle_stream_ops[ctx->prm.proto].more_streams(ctx);
+		ctx->streams.nb_free += inc;
+		ctx->streams.nb_cur += inc;
+		n = ctx->streams.nb_free;
+	}
+	n = RTE_MIN(n, num);
+
+	for (i = 0, p = STAILQ_FIRST(&ctx->streams.free); i != n; ) {
 		s[i] = p;
+		p = STAILQ_NEXT(p, link);
+		s[i]->link.stqe_next = NULL;
+		i++;
+	}
 
 	if (p == NULL)
 		/* we retrieved all free entries */
@@ -80,9 +94,6 @@ get_stream(struct tle_ctx *ctx)
 	struct tle_stream *s;
 
 	s = NULL;
-	if (ctx->streams.nb_free == 0)
-		return s;
-
 	get_streams(ctx, &s, 1);
 	return s;
 }
@@ -120,8 +131,8 @@ drb_nb_elem(const struct tle_ctx *ctx)
 }
 
 static inline int32_t
-stream_get_dest(struct tle_stream *s, const void *dst_addr,
-	struct tle_dest *dst)
+stream_get_dest(uint8_t type, struct tle_stream *s, const void *src_addr,
+	const void *dst_addr, struct tle_dest *dst)
 {
 	int32_t rc;
 	const struct in_addr *d4;
@@ -133,12 +144,13 @@ stream_get_dest(struct tle_stream *s, const void *dst_addr,
 
 	/* it is here just to keep gcc happy. */
 	d4 = NULL;
+	/* it is here just to keep gcc happy. */
 	d6 = NULL;
 
-	if (s->type == TLE_V4) {
+	if (type == TLE_V4) {
 		d4 = dst_addr;
 		rc = ctx->prm.lookup4(ctx->prm.lookup4_data, d4, dst);
-	} else if (s->type == TLE_V6) {
+	} else if (type == TLE_V6) {
 		d6 = dst_addr;
 		rc = ctx->prm.lookup6(ctx->prm.lookup6_data, d6, dst);
 	} else
@@ -148,18 +160,25 @@ stream_get_dest(struct tle_stream *s, const void *dst_addr,
 		return -ENOENT;
 
 	dev = dst->dev;
-	dst->ol_flags = dev->tx.ol_flags[s->type];
+	dst->ol_flags = dev->tx.ol_flags[type];
 
-	if (s->type == TLE_V4) {
+	if (type == TLE_V4) {
 		struct ipv4_hdr *l3h;
 		l3h = (struct ipv4_hdr *)(dst->hdr + dst->l2_len);
-		l3h->src_addr = dev->prm.local_addr4.s_addr;
+		if (((const struct in_addr*)src_addr)->s_addr != INADDR_ANY)
+			l3h->src_addr = ((const struct in_addr*)src_addr)->s_addr;
+		else
+			l3h->src_addr = dev->prm.local_addr4.s_addr;
 		l3h->dst_addr = d4->s_addr;
 	} else {
 		struct ipv6_hdr *l3h;
 		l3h = (struct ipv6_hdr *)(dst->hdr + dst->l2_len);
-		rte_memcpy(l3h->src_addr, &dev->prm.local_addr6,
-			sizeof(l3h->src_addr));
+		if (!IN6_IS_ADDR_UNSPECIFIED(src_addr))
+			rte_memcpy(l3h->src_addr, src_addr,
+					sizeof(l3h->src_addr));
+		else
+			rte_memcpy(l3h->src_addr, &dev->prm.local_addr6,
+					sizeof(l3h->src_addr));
 		rte_memcpy(l3h->dst_addr, d6, sizeof(l3h->dst_addr));
 	}
 
diff --git a/lib/libtle_l4p/stream_table.c b/lib/libtle_l4p/stream_table.c
index 5a89553..e029306 100644
--- a/lib/libtle_l4p/stream_table.c
+++ b/lib/libtle_l4p/stream_table.c
@@ -13,68 +13,47 @@
  * limitations under the License.
  */
 #include <string.h>
-#include <rte_malloc.h>
 #include <rte_errno.h>
 
 #include "stream_table.h"
 
 void
-stbl_fini(struct stbl *st)
+bhash_fini(struct tle_ctx *ctx)
 {
 	uint32_t i;
 
-	for (i = 0; i != RTE_DIM(st->ht); i++) {
-		rte_hash_free(st->ht[i].t);
-		rte_free(st->ht[i].ent);
-	}
-
-	memset(st, 0, sizeof(*st));
+	for (i = 0; i != RTE_DIM(ctx->bhash); i++)
+		rte_hash_free(ctx->bhash[i]);
 }
 
 int
-stbl_init(struct stbl *st, uint32_t num, int32_t socket)
+bhash_init(struct tle_ctx *ctx)
 {
-	int32_t rc;
-	size_t i, sz;
-	struct rte_hash_parameters hprm;
+	int rc = 0;
+	struct rte_hash_parameters hprm = {0};
+	bool ipv6 = ctx->prm.lookup6 != NULL;
 	char buf[RTE_HASH_NAMESIZE];
 
-	num = RTE_MAX(5 * num / 4, 0x10U);
-
-	memset(&hprm, 0, sizeof(hprm));
 	hprm.name = buf;
-	hprm.entries = num;
-	hprm.socket_id = socket;
-
-	rc = 0;
-
-	snprintf(buf, sizeof(buf), "stbl4@%p", st);
-	hprm.key_len = sizeof(struct stbl4_key);
-	st->ht[TLE_V4].t = rte_hash_create(&hprm);
-	if (st->ht[TLE_V4].t == NULL)
+	hprm.entries = 4096;
+	hprm.extra_flag = RTE_HASH_EXTRA_FLAGS_EXT_TABLE;
+	hprm.socket_id = ctx->prm.socket_id;
+
+	snprintf(buf, sizeof(buf), "bhash4@%p", ctx);
+	hprm.key_len = sizeof(struct bhash4_key);
+	ctx->bhash[TLE_V4] = rte_hash_create(&hprm);
+	if (ctx->bhash[TLE_V4] == NULL)
 		rc = (rte_errno != 0) ? -rte_errno : -ENOMEM;
 
-	if (rc == 0) {
-		snprintf(buf, sizeof(buf), "stbl6@%p", st);
-		hprm.key_len = sizeof(struct stbl6_key);
-		st->ht[TLE_V6].t = rte_hash_create(&hprm);
-		if (st->ht[TLE_V6].t == NULL)
+	if (rc == 0 && ipv6) {
+		snprintf(buf, sizeof(buf), "bhash6@%p", ctx);
+		hprm.key_len = sizeof(struct bhash6_key);
+		ctx->bhash[TLE_V6] = rte_hash_create(&hprm);
+		if (ctx->bhash[TLE_V6] == NULL) {
+			rte_hash_free(ctx->bhash[TLE_V4]);
 			rc = (rte_errno != 0) ? -rte_errno : -ENOMEM;
+		}
 	}
 
-	for (i = 0; i != RTE_DIM(st->ht) && rc == 0; i++) {
-
-		sz = sizeof(*st->ht[i].ent) * num;
-		st->ht[i].ent = rte_zmalloc_socket(NULL, sz,
-			RTE_CACHE_LINE_SIZE, socket);
-		if (st->ht[i].ent == NULL)
-			rc = -ENOMEM;
-		else
-			st->ht[i].nb_ent = num;
-	}
-
-	if (rc != 0)
-		stbl_fini(st);
-
 	return rc;
 }
diff --git a/lib/libtle_l4p/stream_table.h b/lib/libtle_l4p/stream_table.h
index 033c306..ba8d165 100644
--- a/lib/libtle_l4p/stream_table.h
+++ b/lib/libtle_l4p/stream_table.h
@@ -16,199 +16,415 @@
 #ifndef _STREAM_TABLE_H_
 #define _STREAM_TABLE_H_
 
+#include <string.h>
 #include <rte_hash.h>
-#include "tcp_misc.h"
+#include "stream.h"
+#include "misc.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#define HASH_SIZE_32K 32771
+#define HASH_SIZE_64K 65537
+#define HASH_SIZE_128K 131071
+
+#define HASH_SIZE HASH_SIZE_64K
+
 struct stbl_entry {
 	void *data;
 };
 
-struct shtbl {
-	uint32_t nb_ent;  /* max number of entries in the table. */
-	rte_spinlock_t l; /* lock to protect the hash table */
-	struct rte_hash *t;
-	struct stbl_entry *ent;
+struct stbl {
+	rte_spinlock_t l;
+	uint32_t need_lock;
+	struct stbl_entry head[HASH_SIZE];
 } __rte_cache_aligned;
 
-struct stbl {
-	struct shtbl ht[TLE_VNUM];
-};
+static inline int
+stbl_init(struct stbl *st, uint32_t lock)
+{
+	st->need_lock = lock;
+	return 0;
+}
 
-struct stbl4_key {
-	union l4_ports port;
-	union ipv4_addrs addr;
-} __attribute__((__packed__));
+static inline int
+stbl_fini(struct stbl *st)
+{
+	st->need_lock = 0;
+	return 0;
+}
 
-struct stbl6_key {
-	union l4_ports port;
-	union ipv6_addrs addr;
-} __attribute__((__packed__));
+static inline uint8_t
+compare_pkt(const struct tle_stream *s, const union pkt_info *pi)
+{
+	if (s->type != pi->tf.type)
+		return -1;
 
-struct stbl_key {
-	union l4_ports port;
-	union {
-		union ipv4_addrs addr4;
-		union ipv6_addrs addr6;
-	};
-} __attribute__((__packed__));
+	if (s->port.raw != pi->port.raw)
+		return -1;
 
-extern void stbl_fini(struct stbl *st);
+	if (s->type == TLE_V4) {
+		if (s->ipv4.addr.raw != pi->addr4.raw)
+			return -1;
+	} else {
+		if (memcmp(&s->ipv6.addr, pi->addr6, sizeof(union ipv6_addrs)))
+			return -1;
+	}
 
-extern int stbl_init(struct stbl *st, uint32_t num, int32_t socket);
+	return 0;
+}
 
-static inline void
-stbl_pkt_fill_key(struct stbl_key *k, const union pkt_info *pi, uint32_t type)
+static inline uint32_t
+stbl_hash_stream(const struct tle_stream *s)
 {
-	static const struct stbl_key zero = {
-		.port.raw = 0,
-	};
-
-	k->port = pi->port;
-	if (type == TLE_V4)
-		k->addr4 = pi->addr4;
-	else if (type == TLE_V6)
-		k->addr6 = *pi->addr6;
-	else
-		*k = zero;
+	int i;
+	unsigned int hash;
+
+	if (s->type == TLE_V4) {
+		hash = s->ipv4.addr.src ^ s->ipv4.addr.dst
+				^ s->port.src ^ s->port.dst;
+	} else {
+		hash = s->port.src ^ s->port.dst;
+		for (i = 0; i < 4; i++) {
+			hash ^= s->ipv6.addr.src.u32[i];
+			hash ^= s->ipv6.addr.dst.u32[i];
+		}
+	}
+
+	return hash % HASH_SIZE;
 }
 
-static inline void
-stbl_lock(struct stbl *st, uint32_t type)
+static inline uint32_t
+stbl_hash_pkt(const union pkt_info* pi)
 {
-	rte_spinlock_lock(&st->ht[type].l);
+	int i;
+	unsigned int hash;
+
+	if (pi->tf.type == TLE_V4) {
+		hash = pi->addr4.src ^ pi->addr4.dst ^ pi->port.src ^ pi->port.dst;
+	} else {
+		hash = pi->port.src ^ pi->port.dst;
+		for (i = 0; i < 4; i++) {
+			hash ^= pi->addr6->src.u32[i];
+			hash ^= pi->addr6->dst.u32[i];
+		}
+	}
+
+	return hash % HASH_SIZE;
 }
 
-static inline void
-stbl_unlock(struct stbl *st, uint32_t type)
+static inline struct stbl_entry*
+stbl_add_stream(struct stbl *st, struct tle_stream *s)
 {
-	rte_spinlock_unlock(&st->ht[type].l);
+	struct stbl_entry* entry;
+
+	if (st->need_lock)
+		rte_spinlock_lock(&st->l);
+	entry = &st->head[stbl_hash_stream(s)];
+	s->link.stqe_next = (struct tle_stream*)entry->data;
+	entry->data = s;
+	if (st->need_lock)
+		rte_spinlock_unlock(&st->l);
+
+	return entry;
 }
 
-static inline struct stbl_entry *
-stbl_add_entry(struct stbl *st, const union pkt_info *pi)
+static inline struct tle_stream *
+stbl_find_stream(struct stbl *st, const union pkt_info *pi)
 {
-	int32_t rc;
-	uint32_t type;
-	struct shtbl *ht;
-	struct stbl_key k;
-
-	type = pi->tf.type;
-	stbl_pkt_fill_key(&k, pi, type);
-	ht = st->ht + type;
-
-	rc = rte_hash_add_key(ht->t, &k);
-	if ((uint32_t)rc >= ht->nb_ent)
-		return NULL;
-	return ht->ent + rc;
+	struct tle_stream* head;
+
+	if (st->need_lock)
+		rte_spinlock_lock(&st->l);
+	head = (struct tle_stream*)st->head[stbl_hash_pkt(pi)].data;
+	while (head != NULL) {
+		if (compare_pkt(head, pi) == 0)
+			break;
+
+		head = head->link.stqe_next;
+	}
+	if (st->need_lock)
+		rte_spinlock_unlock(&st->l);
+	return head;
 }
 
-static inline struct stbl_entry *
-stbl_add_stream(struct stbl *st, const union pkt_info *pi, const void *s)
+static inline void
+stbl_del_stream(struct stbl *st, struct stbl_entry *se,
+		struct tle_stream *s)
 {
-	struct stbl_entry *se;
+	struct tle_stream *prev, *current;
 
-	se = stbl_add_entry(st, pi);
-	if (se != NULL)
-		se->data = (void *)(uintptr_t)s;
-	return se;
+	if (st->need_lock)
+		rte_spinlock_lock(&st->l);
+	if (se == NULL)
+		se = &st->head[stbl_hash_stream(s)];
+	prev = NULL;
+	current = (struct tle_stream*)se->data;
+	while (current != NULL) {
+		if (current != s) {
+			prev = current;
+			current = current->link.stqe_next;
+			continue;
+		}
+
+		if (prev)
+			prev->link.stqe_next = current->link.stqe_next;
+		else
+			se->data = current->link.stqe_next;
+		break;
+	}
+	if (st->need_lock)
+		rte_spinlock_unlock(&st->l);
+
+	s->link.stqe_next = NULL;
 }
 
-static inline struct stbl_entry *
-stbl_find_entry(struct stbl *st, const union pkt_info *pi)
+struct bhash4_key {
+	uint16_t port;
+	uint32_t addr;
+} __attribute__((__packed__));
+
+struct bhash6_key {
+	uint16_t port;
+	rte_xmm_t addr;
+} __attribute__((__packed__));
+
+struct bhash_key {
+	uint16_t port;
+	union {
+		uint32_t  addr4;
+		rte_xmm_t addr6;
+	};
+} __attribute__((__packed__));
+
+void bhash_fini(struct tle_ctx *ctx);
+
+int bhash_init(struct tle_ctx *ctx);
+
+static inline int
+bhash_sockaddr2key(const struct sockaddr *addr, struct bhash_key *key)
 {
-	int32_t rc;
-	uint32_t type;
-	struct shtbl *ht;
-	struct stbl_key k;
-
-	type = pi->tf.type;
-	stbl_pkt_fill_key(&k, pi, type);
-	ht = st->ht + type;
-
-	rc = rte_hash_lookup(ht->t, &k);
-	if ((uint32_t)rc >= ht->nb_ent)
-		return NULL;
-	return ht->ent + rc;
+	int t;
+	const struct sockaddr_in *lin4;
+	const struct sockaddr_in6 *lin6;
+
+	if (addr->sa_family == AF_INET) {
+		lin4 = (const struct sockaddr_in *)addr;
+		key->port = lin4->sin_port;
+		key->addr4 = lin4->sin_addr.s_addr;
+		t = TLE_V4;
+	} else {
+		lin6 = (const struct sockaddr_in6 *)addr;
+		memcpy(&key->addr6, &lin6->sin6_addr, sizeof(key->addr6));
+		key->port = lin6->sin6_port;
+		t = TLE_V6;
+	}
+
+	return t;
 }
 
-static inline void *
-stbl_find_data(struct stbl *st, const union pkt_info *pi)
+/* Return 0 on success;
+ * Return errno on failure.
+ */
+static inline int
+bhash_add_entry(struct tle_ctx *ctx, const struct sockaddr *addr,
+		struct tle_stream *s)
 {
-	struct stbl_entry *ent;
-
-	ent = stbl_find_entry(st, pi);
-	return (ent == NULL) ? NULL : ent->data;
+	int t;
+	int rc;
+	int is_first;
+	struct bhash_key key;
+	struct rte_hash *bhash;
+	struct tle_stream *old, *tmp;
+
+	is_first = 0;
+	t = bhash_sockaddr2key(addr, &key);
+
+	rte_spinlock_lock(&ctx->bhash_lock[t]);
+	bhash = ctx->bhash[t];
+	rc = rte_hash_lookup_data(bhash, &key, (void **)&old);
+	if (rc == -ENOENT) {
+		is_first = 1;
+		s->link.stqe_next = NULL; /* just to avoid follow */
+		rc = rte_hash_add_key_data(bhash, &key, s);
+	} else if (rc >= 0) {
+		if (t == TLE_V4 && old->type == TLE_V6) {
+			/* V6 stream may listen V4 address, assure V4 stream
+			 * is ahead of V6 stream in the list
+			 */
+			s->link.stqe_next = old;
+			rte_hash_add_key_data(bhash, &key, s);
+		} else {
+			tmp = old->link.stqe_next;
+			old->link.stqe_next = s;
+			s->link.stqe_next = tmp;
+		}
+	}
+	rte_spinlock_unlock(&ctx->bhash_lock[t]);
+
+	/* IPv6 socket with unspecified address could receive IPv4 packets.
+	 * So the stream should also be recorded in IPv4 table.
+	 * Only the first stream need be inserted into V4 list, otherwise
+	 * the V6 list is already following V4 list.
+	 */
+	if (t == TLE_V6 && !s->option.ipv6only && is_first &&
+			IN6_IS_ADDR_UNSPECIFIED(&key.addr6)) {
+		t = TLE_V4;
+		rte_spinlock_lock(&ctx->bhash_lock[t]);
+		bhash = ctx->bhash[t];
+		rc = rte_hash_lookup_data(bhash, &key, (void **)&old);
+		if (rc == -ENOENT)
+			rc = rte_hash_add_key_data(bhash, &key, s);
+		else if (rc >= 0) {
+			while(old->link.stqe_next != NULL)
+				old = old->link.stqe_next;
+			old->link.stqe_next = s;
+			s->link.stqe_next = NULL;
+		}
+		rte_spinlock_unlock(&ctx->bhash_lock[t]);
+	}
+
+	return (rc >= 0) ? 0 : (-rc);
 }
 
-#include "tcp_stream.h"
-
 static inline void
-stbl_stream_fill_key(struct stbl_key *k, const struct tle_stream *s,
-	uint32_t type)
+bhash_del_entry(struct tle_ctx *ctx, struct tle_stream *s,
+		const struct sockaddr *addr)
 {
-	static const struct stbl_key zero = {
-		.port.raw = 0,
-	};
+	int t;
+	int rc;
+	struct bhash_key key;
+	struct tle_stream *f, *cur, *pre = NULL;
+
+	t = bhash_sockaddr2key(addr, &key);
+
+	rte_spinlock_lock(&ctx->bhash_lock[t]);
+	rc = rte_hash_lookup_data(ctx->bhash[t], &key, (void **)&f);
+	if (rc >= 0) {
+		cur = f;
+		pre = NULL;
+		while (cur != s) {
+			pre = cur;
+			cur = cur->link.stqe_next;
+		}
+
+		if (pre == NULL) {
+			cur = cur->link.stqe_next;
+			if (cur == NULL)
+				rte_hash_del_key(ctx->bhash[t], &key);
+			else /* change data */
+				rte_hash_add_key_data(ctx->bhash[t], &key, cur);
+		} else
+			pre->link.stqe_next = cur->link.stqe_next;
+	}
+
+	rte_spinlock_unlock(&ctx->bhash_lock[t]);
+
+	if (rc < 0)
+		return;
+
+	s->link.stqe_next = NULL;
+
+	/* IPv6 socket with unspecified address could receive IPv4 packets.
+	 * So the stream should also be recorded in IPv4 table*/
+	if (t == TLE_V6 && !s->option.ipv6only && pre == NULL &&
+			IN6_IS_ADDR_UNSPECIFIED(&key.addr6)) {
+		t = TLE_V4;
+		rte_spinlock_lock(&ctx->bhash_lock[t]);
+		rc = rte_hash_lookup_data(ctx->bhash[t], &key, (void **)&f);
+		if (rc >= 0) {
+			cur = f;
+			pre = NULL;
+			while (cur != s) {
+				pre = cur;
+				cur = cur->link.stqe_next;
+			}
+
+			if (pre == NULL) {
+				cur = cur->link.stqe_next;
+				if (cur == NULL)
+					rte_hash_del_key(ctx->bhash[t], &key);
+				else /* change data */
+					rte_hash_add_key_data(ctx->bhash[t], &key, cur);
+			} else
+				pre->link.stqe_next = cur->link.stqe_next;
+		}
+
+		rte_spinlock_unlock(&ctx->bhash_lock[t]);
+	}
 
-	k->port = s->port;
-	if (type == TLE_V4)
-		k->addr4 = s->ipv4.addr;
-	else if (type == TLE_V6)
-		k->addr6 = s->ipv6.addr;
-	else
-		*k = zero;
 }
 
-static inline struct stbl_entry *
-stbl_add_stream_lock(struct stbl *st, const struct tle_tcp_stream *s)
+static inline void *
+bhash_reuseport_get_stream(struct tle_stream *s)
 {
-	uint32_t type;
-	struct stbl_key k;
-	struct stbl_entry *se;
-	struct shtbl *ht;
-	int32_t rc;
-
-	type = s->s.type;
-	stbl_stream_fill_key(&k, &s->s, type);
-	ht = st->ht + type;
+	int n = 0;
+	struct tle_stream *e, *all[32];
+
+	e = s;
+	while(e && n < 32) {
+		all[n++] = e;
+		e = e->link.stqe_next;
+	}
+
+	/* for each connection, this function will be called twice
+	 * 1st time for the first handshake: SYN
+	 * 2nd time for the third handshake: ACK
+	 */
+	return all[(s->reuseport_seed++) % n];
+}
 
-	stbl_lock(st, type);
-	rc = rte_hash_add_key(ht->t, &k);
-	stbl_unlock(st, type);
+static inline void *
+bhash_lookup4(struct rte_hash *t, uint32_t addr, uint16_t port, uint8_t reuse)
+{
+	int rc;
+	void *s = NULL;
+	struct bhash_key key = {
+		.port = port,
+		.addr4 = addr,
+	};
 
-	if ((uint32_t)rc >= ht->nb_ent)
-		return NULL;
+	rc = rte_hash_lookup_data(t, &key, &s);
+	if (rc == -ENOENT) {
+		key.addr4 = INADDR_ANY;
+		rc = rte_hash_lookup_data(t, &key, &s);
+	}
 
-	se = ht->ent + rc;
-	if (se != NULL)
-		se->data = (void *)(uintptr_t)s;
+	if (rc >= 0) {
+		if (reuse)
+			return bhash_reuseport_get_stream(s);
+		else
+			return s;
+	}
 
-	return se;
+	return NULL;
 }
 
-static inline void
-stbl_del_stream(struct stbl *st, struct stbl_entry *se,
-	const struct tle_tcp_stream *s, uint32_t lock)
+static inline void *
+bhash_lookup6(struct rte_hash *t, rte_xmm_t addr, uint16_t port, uint8_t reuse)
 {
-	uint32_t type;
-	struct stbl_key k;
+	int rc;
+	void *s = NULL;
+	struct bhash_key key = {
+		.port = port,
+		.addr6 = addr,
+	};
 
-	if (se == NULL)
-		return;
+	rc = rte_hash_lookup_data(t, &key, &s);
+	if (rc == -ENOENT) {
+		memcpy(&key.addr6, &tle_ipv6_any, sizeof(key.addr6));
+		rc = rte_hash_lookup_data(t, &key, &s);
+	}
 
-	se->data = NULL;
+	if (rc >= 0) {
+		if (reuse)
+			return bhash_reuseport_get_stream(s);
+		else
+			return s;
+	}
 
-	type = s->s.type;
-	stbl_stream_fill_key(&k, &s->s, type);
-	if (lock != 0)
-		stbl_lock(st, type);
-	rte_hash_del_key(st->ht[type].t, &k);
-	if (lock != 0)
-		stbl_unlock(st, type);
+	return NULL;
 }
 
 #ifdef __cplusplus
diff --git a/lib/libtle_l4p/syncookie.h b/lib/libtle_l4p/syncookie.h
index 61bfce4..bf01e78 100644
--- a/lib/libtle_l4p/syncookie.h
+++ b/lib/libtle_l4p/syncookie.h
@@ -182,9 +182,12 @@ sync_fill_tcb(struct tcb *tcb, const union seg_info *si, const union tsopt *to)
 {
 	uint32_t ack, mss, seq, wscale;
 
+	tcb->err = 0;
+
 	seq = si->seq;
 
 	tcb->rcv.nxt = seq;
+	tcb->rcv.cpy = seq;
 	tcb->rcv.irs = seq - 1;
 	tcb->snd.wu.wl1 = seq;
 
@@ -202,6 +205,7 @@ sync_fill_tcb(struct tcb *tcb, const union seg_info *si, const union tsopt *to)
 	tcb->so.mss = mss;
 
 	tcb->snd.ts = to->ecr;
+	tcb->snd.cork_ts = 0;
 	tcb->rcv.ts = to->val;
 	tcb->so.ts.raw = to->raw;
 
diff --git a/lib/libtle_l4p/tcp_ctl.h b/lib/libtle_l4p/tcp_ctl.h
index bec1e76..3196470 100644
--- a/lib/libtle_l4p/tcp_ctl.h
+++ b/lib/libtle_l4p/tcp_ctl.h
@@ -22,6 +22,7 @@
 
 #include "tcp_stream.h"
 #include "tcp_ofo.h"
+#include "tcp_timer.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -97,10 +98,10 @@ calc_rx_wnd(const struct tle_tcp_stream *s, uint32_t scale)
 
 	/* peer doesn't support WSCALE option, wnd size is limited to 64K */
 	if (scale == TCP_WSCALE_NONE) {
-		wnd = _rte_ring_get_mask(s->rx.q) << TCP_WSCALE_DEFAULT;
+		wnd = rte_ring_free_count(s->rx.q) << TCP_WSCALE_DEFAULT;
 		return RTE_MIN(wnd, (uint32_t)UINT16_MAX);
 	} else
-		return  _rte_ring_get_mask(s->rx.q) << scale;
+		return rte_ring_free_count(s->rx.q) << scale;
 }
 
 /* empty stream's send queue */
@@ -144,31 +145,34 @@ static inline void
 tcp_stream_reset(struct tle_ctx *ctx, struct tle_tcp_stream *s)
 {
 	struct stbl *st;
-	uint16_t uop;
+	uint16_t state;
+	uint8_t i;
 
 	st = CTX_TCP_STLB(ctx);
 
-	/* reset TX armed */
-	rte_atomic32_set(&s->tx.arm, 0);
+	for (i = 0; i < TIMER_NUM; i++)
+		timer_stop(s, i);
 
 	/* reset TCB */
-	uop = s->tcb.uop & ~TCP_OP_CLOSE;
+	state = s->tcb.state;
 	memset(&s->tcb, 0, sizeof(s->tcb));
 
 	/* reset cached destination */
 	memset(&s->tx.dst, 0, sizeof(s->tx.dst));
 
-	if (uop != TCP_OP_ACCEPT) {
+	/* state could be ESTABLISHED, CLOSED or LISTEN
+	 * stream in CLOSED state has already been cleared by stream_term
+	 * stream in ESTABLISHED state is accepted stream, and doesn't need clear
+	 */
+	if (state == TCP_ST_LISTEN) {
 		/* free stream's destination port */
 		stream_clear_ctx(ctx, &s->s);
-		if (uop == TCP_OP_LISTEN)
-			empty_lq(s);
+		empty_lq(s);
 	}
 
 	if (s->ste != NULL) {
 		/* remove entry from RX streams table */
-		stbl_del_stream(st, s->ste, s,
-			(s->flags & TLE_CTX_FLAG_ST) == 0);
+		stbl_del_stream(st, s->ste, &s->s);
 		s->ste = NULL;
 		empty_rq(s);
 	}
@@ -184,6 +188,48 @@ tcp_stream_reset(struct tle_ctx *ctx, struct tle_tcp_stream *s)
 	put_stream(ctx, &s->s, TCP_STREAM_TX_FINISHED(s));
 }
 
+static inline void
+stream_term(struct tle_tcp_stream *s)
+{
+	struct sdr *dr;
+
+	/* 1) recv a RST packet; 2) keepalive timeout */
+	if (s->tcb.state == TCP_ST_ESTABLISHED) {
+		TCP_DEC_STATS_ATOMIC(TCP_MIB_CURRESTAB);
+		TCP_INC_STATS(TCP_MIB_ESTABRESETS);
+	}
+
+	s->tcb.state = TCP_ST_CLOSED;
+	rte_smp_wmb();
+
+	/* close() was already invoked, schedule final cleanup */
+	if ((s->tcb.uop & TCP_OP_CLOSE) != 0) {
+		if ((s->tcb.uop & TCP_OP_ACCEPT) == 0) {
+			/* free stream's destination port */
+			stream_clear_ctx(s->s.ctx, &s->s);
+			if ((s->tcb.uop & TCP_OP_LISTEN) != 0)
+				empty_lq(s);
+		}
+
+		if (s->ste != NULL) {
+			/* remove entry from RX streams table */
+			stbl_del_stream(CTX_TCP_STLB(s->s.ctx), s->ste, &s->s);
+			s->ste = NULL;
+			empty_rq(s);
+		}
+
+		dr = CTX_TCP_SDR(s->s.ctx);
+		rte_spinlock_lock(&dr->lock);
+		STAILQ_INSERT_TAIL(&dr->be, &s->s, link);
+		rte_spinlock_unlock(&dr->lock);
+
+	/* notify user that stream need to be closed */
+	} else if (s->err.ev != NULL)
+		tle_event_raise(s->err.ev);
+	else if (s->err.cb.func != NULL)
+		s->err.cb.func(s->err.cb.data, &s->s);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/libtle_l4p/tcp_misc.h b/lib/libtle_l4p/tcp_misc.h
index 0cef8b2..1f7974e 100644
--- a/lib/libtle_l4p/tcp_misc.h
+++ b/lib/libtle_l4p/tcp_misc.h
@@ -30,7 +30,7 @@ extern "C" {
  * of protocol related data.
  */
 
-#define	TCP_WSCALE_DEFAULT	7
+#define	TCP_WSCALE_DEFAULT	10
 #define	TCP_WSCALE_NONE		0
 
 #define	TCP_TX_HDR_MAX	(sizeof(struct tcp_hdr) + TCP_TX_OPT_LEN_MAX)
@@ -71,27 +71,6 @@ extern "C" {
 /* TCP flags mask. */
 #define	TCP_FLAG_MASK	UINT8_MAX
 
-union typflg {
-	uint16_t raw;
-	struct {
-		uint8_t type;  /* TLE_V4/TLE_V6 */
-		uint8_t flags; /* TCP header flags */
-	};
-};
-
-union pkt_info {
-	rte_xmm_t raw;
-	struct {
-		union typflg tf;
-		uint16_t csf;  /* checksum flags */
-		union l4_ports port;
-		union {
-			union ipv4_addrs addr4;
-			const union ipv6_addrs *addr6;
-		};
-	};
-};
-
 union seg_info {
 	rte_xmm_t raw;
 	struct {
@@ -226,7 +205,7 @@ struct dack_info {
 };
 
 /* get current timestamp in ms */
-static inline uint32_t
+static inline uint64_t
 tcp_get_tms(uint32_t mshift)
 {
 	uint64_t ts;
@@ -344,7 +323,9 @@ fill_syn_opts(void *p, const struct syn_opts *so)
 		opt = (struct tcpopt *)to;
 	}
 
-	to[0] = TCP_OPT_KIND_EOL;
+	to[0] = TCP_OPT_KIND_NOP;
+	to[1] = TCP_OPT_KIND_NOP;
+	to[2] = TCP_OPT_KIND_NOP;
 }
 
 /*
@@ -390,6 +371,8 @@ get_tms_opts(uintptr_t p, uint32_t len)
 		else if (kind == TCP_OPT_KIND_NOP)
 			i += sizeof(to->kl.kind);
 		else {
+			if (to->kl.len == 0)
+				break;
 			i += to->kl.len;
 			if (i <= len && to->kl.raw == TCP_OPT_KL_TMS) {
 				ts.val = rte_be_to_cpu_32(to->ts.val);
@@ -449,7 +432,6 @@ get_pkt_info(const struct rte_mbuf *m, union pkt_info *pi, union seg_info *si)
 		((uintptr_t)tcph + offsetof(struct tcp_hdr, src_port));
 	pi->tf.flags = tcph->tcp_flags;
 	pi->tf.type = type;
-	pi->csf = m->ol_flags & (PKT_RX_IP_CKSUM_MASK | PKT_RX_L4_CKSUM_MASK);
 	pi->port.raw = prt->raw;
 
 	get_seg_info(tcph, si);
@@ -462,7 +444,7 @@ tcp_mbuf_seq_free(struct rte_mbuf *mb[], uint32_t num)
 
 	len = 0;
 	for (i = 0; i != num; i++) {
-		len += mb[i]->pkt_len;
+		len += PKT_L4_PLEN(mb[i]);
 		rte_pktmbuf_free(mb[i]);
 	}
 
diff --git a/lib/libtle_l4p/tcp_ofo.c b/lib/libtle_l4p/tcp_ofo.c
index 1565445..b31f2b5 100644
--- a/lib/libtle_l4p/tcp_ofo.c
+++ b/lib/libtle_l4p/tcp_ofo.c
@@ -12,7 +12,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <rte_malloc.h>
 #include <rte_errno.h>
 
 #include "tcp_stream.h"
@@ -28,12 +27,6 @@
 #define OFO_OBJ_MAX	(OFODB_OBJ_MAX * OFO_DB_MAX)
 
 void
-tcp_ofo_free(struct ofo *ofo)
-{
-	rte_free(ofo);
-}
-
-static void
 calc_ofo_elems(uint32_t nbufs, uint32_t *nobj, uint32_t *ndb)
 {
 	uint32_t n, nd, no;
@@ -51,35 +44,3 @@ calc_ofo_elems(uint32_t nbufs, uint32_t *nobj, uint32_t *ndb)
 	*nobj = no;
 	*ndb = nd;
 }
-
-struct ofo *
-tcp_ofo_alloc(uint32_t nbufs, int32_t socket)
-{
-	uint32_t i, ndb, nobj;
-	size_t dsz, osz, sz;
-	struct ofo *ofo;
-	struct rte_mbuf **obj;
-
-	calc_ofo_elems(nbufs, &nobj, &ndb);
-	osz = sizeof(*ofo) + sizeof(ofo->db[0]) * ndb;
-	dsz = sizeof(ofo->db[0].obj[0]) * nobj * ndb;
-	sz = osz + dsz;
-
-	ofo = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE, socket);
-	if (ofo == NULL) {
-		TCP_LOG(ERR, "%s: allocation of %zu bytes on socket %d "
-			"failed with error code: %d\n",
-			__func__, sz, socket, rte_errno);
-		return NULL;
-	}
-
-	obj = (struct rte_mbuf **)&ofo->db[ndb];
-	for (i = 0; i != ndb; i++) {
-		ofo->db[i].nb_max = nobj;
-		ofo->db[i].obj = obj + i * nobj;
-	}
-
-	ofo->nb_max = ndb;
-	return ofo;
-}
-
diff --git a/lib/libtle_l4p/tcp_ofo.h b/lib/libtle_l4p/tcp_ofo.h
index 9d88266..0857f17 100644
--- a/lib/libtle_l4p/tcp_ofo.h
+++ b/lib/libtle_l4p/tcp_ofo.h
@@ -20,8 +20,6 @@
 extern "C" {
 #endif
 
-#include <stdbool.h>
-
 struct ofodb {
 	uint32_t nb_elem;
 	uint32_t nb_max;
@@ -103,7 +101,7 @@ _ofo_insert_mbuf(struct ofo* ofo, uint32_t pos, union seqlen* sl,
 		db->obj[k + i] = mb[i];
 	}
 	if (tcp_seq_lt(end, seq))
-		rte_pktmbuf_trim(mb[i - 1], seq - end);
+		_rte_pktmbuf_trim(mb[i - 1], seq - end);
 
 	db->nb_elem += i;
 	db->sl.len += tcp_seq_min(seq, end) - sl->seq;
@@ -157,7 +155,7 @@ _ofo_insert_right(struct ofo *ofo, uint32_t pos, union seqlen *sl,
 		plen = mb[i]->pkt_len;
 		if (n < plen) {
 			/* adjust partially overlapped packet. */
-			rte_pktmbuf_adj(mb[i], n);
+			mb[i] = _rte_pktmbuf_adj(mb[i], n);
 			break;
 		}
 	}
@@ -258,7 +256,7 @@ static inline uint32_t
 _ofodb_enqueue(struct rte_ring *r, const struct ofodb *db, uint32_t *seq)
 {
 	uint32_t i, n, num, begin, end;
-	struct rte_mbuf *pkt;
+	struct rte_mbuf* pkt;
 
 	n = 0;
 	num = db->nb_elem;
@@ -289,11 +287,7 @@ _ofodb_enqueue(struct rte_ring *r, const struct ofodb *db, uint32_t *seq)
 	return num - n;
 }
 
-struct ofo *
-tcp_ofo_alloc(uint32_t nbufs, int32_t socket);
-
-void
-tcp_ofo_free(struct ofo *ofo);
+void calc_ofo_elems(uint32_t nbufs, uint32_t *nobj, uint32_t *ndb);
 
 #ifdef __cplusplus
 }
diff --git a/lib/libtle_l4p/tcp_rxq.h b/lib/libtle_l4p/tcp_rxq.h
index 2351ee6..be092f9 100644
--- a/lib/libtle_l4p/tcp_rxq.h
+++ b/lib/libtle_l4p/tcp_rxq.h
@@ -17,6 +17,7 @@
 #define _TCP_RXQ_H_
 
 #include "tcp_ofo.h"
+#include "tcp_ctl.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -74,6 +75,7 @@ rx_ofo_reduce(struct tle_tcp_stream *s)
 
 	s->tcb.rcv.nxt = seq;
 	_ofo_remove(ofo, 0, i);
+
 	return n;
 }
 
@@ -133,6 +135,8 @@ rx_data_enqueue(struct tle_tcp_stream *s, uint32_t seq, uint32_t len,
 	}
 
 	n = rte_ring_count(s->rx.q);
+	/* update receive window with left recv buffer*/
+	s->tcb.rcv.wnd = calc_rx_wnd(s, s->tcb.rcv.wscale);
 	if (r != n) {
 		/* raise RX event */
 		if (s->rx.ev != NULL)
diff --git a/lib/libtle_l4p/tcp_rxtx.c b/lib/libtle_l4p/tcp_rxtx.c
index a519645..5d7e0d1 100644
--- a/lib/libtle_l4p/tcp_rxtx.c
+++ b/lib/libtle_l4p/tcp_rxtx.c
@@ -28,8 +28,30 @@
 #include "tcp_rxq.h"
 #include "tcp_txq.h"
 #include "tcp_tx_seg.h"
+#include "tcp_rxtx.h"
 
-#define	TCP_MAX_PKT_SEG	0x20
+/* Uncomment below line to debug cwnd */
+// #define DEBUG_CWND
+
+#ifdef DEBUG_CWND
+#define CWND_INFO(msg, value) printf("CWND: %s: %d\n", msg, value)
+#else
+#define CWND_INFO(msg, value) do {} while (0)
+#endif
+
+#define	TCP_MAX_PKT_SEG			0x20
+#define DELAY_ACK_CHECK_INTERVAL 	100
+
+/* must larger than l2_len(14)+l3_len(20)+l4_len(20)+tms_option(12) */
+#define RESERVE_HEADER_LEN		128
+
+/* If we encounter exhaustion of recv win, we set this thresh to
+ * update recv win to the remote. It's not set to 1 or some smaller
+ * value to avoid too-frequent update.
+ */
+#define RECV_WIN_NOTIFY_THRESH		64
+
+static inline int stream_fill_dest(struct tle_tcp_stream *s);
 
 /*
  * checks if input TCP ports and IP addresses match given stream.
@@ -54,11 +76,17 @@ rx_check_stream(const struct tle_tcp_stream *s, const union pkt_info *pi)
 
 static inline struct tle_tcp_stream *
 rx_obtain_listen_stream(const struct tle_dev *dev, const union pkt_info *pi,
-	uint32_t type)
+	uint32_t type, uint8_t reuse)
 {
 	struct tle_tcp_stream *s;
 
-	s = (struct tle_tcp_stream *)dev->dp[type]->streams[pi->port.dst];
+	if (type == TLE_V4)
+		s = bhash_lookup4(dev->ctx->bhash[type],
+				  pi->addr4.dst, pi->port.dst, reuse);
+	else
+		s = bhash_lookup6(dev->ctx->bhash[type],
+				  pi->addr6->dst, pi->port.dst, reuse);
+
 	if (s == NULL || tcp_stream_acquire(s) < 0)
 		return NULL;
 
@@ -77,10 +105,10 @@ rx_obtain_stream(const struct tle_dev *dev, struct stbl *st,
 {
 	struct tle_tcp_stream *s;
 
-	s = stbl_find_data(st, pi);
+	s = TCP_STREAM(stbl_find_stream(st, pi));
 	if (s == NULL) {
-		if (pi->tf.flags == TCP_FLAG_ACK)
-			return rx_obtain_listen_stream(dev, pi, type);
+		if (pi->tf.flags & TCP_FLAG_ACK)
+			return rx_obtain_listen_stream(dev, pi, type, 1);
 		return NULL;
 	}
 
@@ -150,131 +178,6 @@ pkt_info_bulk_syneq(const union pkt_info pi[], uint32_t num)
 	return i;
 }
 
-static inline void
-stream_drb_free(struct tle_tcp_stream *s, struct tle_drb *drbs[],
-	uint32_t nb_drb)
-{
-	_rte_ring_enqueue_burst(s->tx.drb.r, (void **)drbs, nb_drb);
-}
-
-static inline uint32_t
-stream_drb_alloc(struct tle_tcp_stream *s, struct tle_drb *drbs[],
-	uint32_t nb_drb)
-{
-	return _rte_ring_dequeue_burst(s->tx.drb.r, (void **)drbs, nb_drb);
-}
-
-static inline uint32_t
-get_ip_pid(struct tle_dev *dev, uint32_t num, uint32_t type, uint32_t st)
-{
-	uint32_t pid;
-	rte_atomic32_t *pa;
-
-	pa = &dev->tx.packet_id[type];
-
-	if (st == 0) {
-		pid = rte_atomic32_add_return(pa, num);
-		return pid - num;
-	} else {
-		pid = rte_atomic32_read(pa);
-		rte_atomic32_set(pa, pid + num);
-		return pid;
-	}
-}
-
-static inline void
-fill_tcph(struct tcp_hdr *l4h, const struct tcb *tcb, union l4_ports port,
-	uint32_t seq, uint8_t hlen, uint8_t flags)
-{
-	uint16_t wnd;
-
-	l4h->src_port = port.dst;
-	l4h->dst_port = port.src;
-
-	wnd = (flags & TCP_FLAG_SYN) ?
-		RTE_MIN(tcb->rcv.wnd, (uint32_t)UINT16_MAX) :
-		tcb->rcv.wnd >> tcb->rcv.wscale;
-
-	/* ??? use sse shuffle to hton all remaining 16 bytes at once. ??? */
-	l4h->sent_seq = rte_cpu_to_be_32(seq);
-	l4h->recv_ack = rte_cpu_to_be_32(tcb->rcv.nxt);
-	l4h->data_off = hlen / TCP_DATA_ALIGN << TCP_DATA_OFFSET;
-	l4h->tcp_flags = flags;
-	l4h->rx_win = rte_cpu_to_be_16(wnd);
-	l4h->cksum = 0;
-	l4h->tcp_urp = 0;
-
-	if (flags & TCP_FLAG_SYN)
-		fill_syn_opts(l4h + 1, &tcb->so);
-	else if ((flags & TCP_FLAG_RST) == 0 && tcb->so.ts.raw != 0)
-		fill_tms_opts(l4h + 1, tcb->snd.ts, tcb->rcv.ts);
-}
-
-static inline int
-tcp_fill_mbuf(struct rte_mbuf *m, const struct tle_tcp_stream *s,
-	const struct tle_dest *dst, uint64_t ol_flags,
-	union l4_ports port, uint32_t seq, uint32_t flags,
-	uint32_t pid, uint32_t swcsm)
-{
-	uint32_t l4, len, plen;
-	struct tcp_hdr *l4h;
-	char *l2h;
-
-	len = dst->l2_len + dst->l3_len;
-	plen = m->pkt_len;
-
-	if (flags & TCP_FLAG_SYN)
-		l4 = sizeof(*l4h) + TCP_TX_OPT_LEN_MAX;
-	else if ((flags & TCP_FLAG_RST) == 0 && s->tcb.rcv.ts != 0)
-		l4 = sizeof(*l4h) + TCP_TX_OPT_LEN_TMS;
-	else
-		l4 = sizeof(*l4h);
-
-	/* adjust mbuf to put L2/L3/L4 headers into it. */
-	l2h = rte_pktmbuf_prepend(m, len + l4);
-	if (l2h == NULL)
-		return -EINVAL;
-
-	/* copy L2/L3 header */
-	rte_memcpy(l2h, dst->hdr, len);
-
-	/* setup TCP header & options */
-	l4h = (struct tcp_hdr *)(l2h + len);
-	fill_tcph(l4h, &s->tcb, port, seq, l4, flags);
-
-	/* setup mbuf TX offload related fields. */
-	m->tx_offload = _mbuf_tx_offload(dst->l2_len, dst->l3_len, l4, 0, 0, 0);
-	m->ol_flags |= ol_flags;
-
-	/* update proto specific fields. */
-
-	if (s->s.type == TLE_V4) {
-		struct ipv4_hdr *l3h;
-		l3h = (struct ipv4_hdr *)(l2h + dst->l2_len);
-		l3h->packet_id = rte_cpu_to_be_16(pid);
-		l3h->total_length = rte_cpu_to_be_16(plen + dst->l3_len + l4);
-
-		if ((ol_flags & PKT_TX_TCP_CKSUM) != 0)
-			l4h->cksum = _ipv4x_phdr_cksum(l3h, m->l3_len,
-				ol_flags);
-		else if (swcsm != 0)
-			l4h->cksum = _ipv4_udptcp_mbuf_cksum(m, len, l3h);
-
-		if ((ol_flags & PKT_TX_IP_CKSUM) == 0 && swcsm != 0)
-			l3h->hdr_checksum = _ipv4x_cksum(l3h, m->l3_len);
-	} else {
-		struct ipv6_hdr *l3h;
-		l3h = (struct ipv6_hdr *)(l2h + dst->l2_len);
-		l3h->payload_len = rte_cpu_to_be_16(plen + l4);
-		if ((ol_flags & PKT_TX_TCP_CKSUM) != 0)
-			l4h->cksum = rte_ipv6_phdr_cksum(l3h, ol_flags);
-		else if (swcsm != 0)
-			l4h->cksum = _ipv6_udptcp_mbuf_cksum(m, len, l3h);
-	}
-
-	return 0;
-}
-
 /*
  * That function supposed to be used only for data packets.
  * Assumes that L2/L3/L4 headers and mbuf fields already setup properly.
@@ -355,6 +258,9 @@ tx_data_pkts(struct tle_tcp_stream *s, struct rte_mbuf *const m[], uint32_t num)
 	i = tle_dring_mp_enqueue(&dev->tx.dr, (const void * const*)m,
 		num, drb, &nb);
 
+	if (i > 0)
+		timer_stop(s, TIMER_DACK);
+
 	/* free unused drbs. */
 	if (nb != 0)
 		stream_drb_free(s, drb + nbm - nb, nb);
@@ -362,6 +268,113 @@ tx_data_pkts(struct tle_tcp_stream *s, struct rte_mbuf *const m[], uint32_t num)
 	return i;
 }
 
+/*
+ * case 0: pkt is not split yet, (indicate plen > sl->len)
+ * case 1: pkt is split, but left packet > sl->len
+ * case 2: pkt is split, but left packet <= sl->len
+ */
+static inline struct rte_mbuf *
+get_indirect_mbuf(struct tle_tcp_stream *s,
+		  struct rte_mbuf *m, uint32_t *p_plen,
+		  union seqlen *sl, uint32_t type,
+		  uint32_t mss)
+{
+	uint32_t hdr_len = PKT_L234_HLEN(m), plen, left;
+	struct rte_mbuf *f, *t;
+	uint16_t i, nb_segs, adj;
+	void *hdr;
+
+	if (s->tcb.snd.nxt_pkt) {
+		f = s->tcb.snd.nxt_pkt;
+		plen = f->data_len - s->tcb.snd.nxt_offset;
+		if (f == m) /* 1st segment contains net headers */
+			plen -= hdr_len;
+	} else {
+		f = m;
+		plen = f->data_len - hdr_len;
+	}
+
+	TCP_LOG(DEBUG, "m(%p): pkt_len=%u, nb_segs=%u, sl->len = %u\n",
+		m, m->pkt_len, m->nb_segs, sl->len);
+
+	nb_segs = 1;
+	if (sl->len < plen) {
+		/* Segment split needed: sometimes, cwnd will be reset to
+		 * 1 or 2 mss. In this case, we send part of this seg, and
+		 * record which segment we've sent, and the offset of sent
+		 * data in tcb.
+		 */
+		left = plen - sl->len;
+		plen = sl->len;
+		s->tcb.snd.nxt_pkt = f;
+	} else {
+		left = 0;
+		t = f->next;
+		while (t && plen + t->data_len <= sl->len) {
+			plen += t->data_len;
+			t = t->next;
+			nb_segs++;
+		}
+		s->tcb.snd.nxt_pkt = t;
+	}
+
+	struct rte_mbuf *pkts[1 + nb_segs];
+	if (rte_pktmbuf_alloc_bulk(s->tx.dst.head_mp, pkts, 1 + nb_segs) < 0)
+		return NULL;
+
+	rte_pktmbuf_attach(pkts[1], f);
+
+	/* remove bytes in the beginning */
+	adj = s->tcb.snd.nxt_offset;
+	if (f == m)
+		adj += hdr_len;
+	if (adj)
+		rte_pktmbuf_adj(pkts[1], adj);
+
+	/* remove bytes in the end */
+	if (left > 0) {
+		rte_pktmbuf_trim(pkts[1], left);
+		s->tcb.snd.nxt_offset += plen;
+	} else
+		s->tcb.snd.nxt_offset = 0;
+
+	/* attach chaining segment if we have */
+	for (i = 1, t = f->next; i < nb_segs; ++i) {
+		rte_pktmbuf_attach(pkts[i+1], t);
+		pkts[i]->next = pkts[i+1];
+		t = t->next;
+	}
+
+	/* prepare l2/l3/l4 header */
+	hdr = rte_pktmbuf_append(pkts[0], hdr_len);
+	rte_memcpy(hdr, rte_pktmbuf_mtod(m, void *), hdr_len);
+	pkts[0]->nb_segs = nb_segs + 1;
+	pkts[0]->pkt_len = plen + hdr_len;
+	pkts[0]->ol_flags = m->ol_flags;
+	pkts[0]->tx_offload = m->tx_offload;
+	if (type == TLE_V4) {
+		struct ipv4_hdr *l3h;
+
+		l3h = rte_pktmbuf_mtod_offset(pkts[0],
+				struct ipv4_hdr *, m->l2_len);
+		l3h->total_length =
+			rte_cpu_to_be_16(plen + m->l3_len + m->l4_len);
+	} else {
+		struct ipv6_hdr *l3h;
+
+		l3h = rte_pktmbuf_mtod_offset(pkts[0],
+				struct ipv6_hdr *, m->l2_len);
+		l3h->payload_len =
+			rte_cpu_to_be_16(plen + m->l4_len);
+	}
+	if (plen <= mss)
+		pkts[0]->ol_flags &= ~PKT_TX_TCP_SEG;
+	pkts[0]->next = pkts[1];
+
+	*p_plen = plen;
+	return pkts[0];
+}
+
 static inline uint32_t
 tx_data_bulk(struct tle_tcp_stream *s, union seqlen *sl, struct rte_mbuf *mi[],
 	uint32_t num)
@@ -371,11 +384,13 @@ tx_data_bulk(struct tle_tcp_stream *s, union seqlen *sl, struct rte_mbuf *mi[],
 	struct rte_mbuf *mb;
 	struct rte_mbuf *mo[MAX_PKT_BURST + TCP_MAX_PKT_SEG];
 
+	/* check stream has drb to send pkts */
+	if (stream_drb_empty(s))
+		return 0;
+
 	mss = s->tcb.snd.mss;
 	type = s->s.type;
-
 	dev = s->tx.dst.dev;
-	pid = get_ip_pid(dev, num, type, (s->flags & TLE_CTX_FLAG_ST) != 0);
 
 	k = 0;
 	tn = 0;
@@ -383,26 +398,64 @@ tx_data_bulk(struct tle_tcp_stream *s, union seqlen *sl, struct rte_mbuf *mi[],
 	for (i = 0; i != num && sl->len != 0 && fail == 0; i++) {
 
 		mb = mi[i];
-		sz = RTE_MIN(sl->len, mss);
 		plen = PKT_L4_PLEN(mb);
 
 		/*fast path, no need to use indirect mbufs. */
-		if (plen <= sz) {
-
+		if (s->tcb.snd.nxt_pkt == NULL && plen <= sl->len) {
+			pid = get_ip_pid(dev, calc_seg_cnt(plen, s->tcb.snd.mss),
+					type, (s->flags & TLE_CTX_FLAG_ST) != 0);
 			/* update pkt TCP header */
-			tcp_update_mbuf(mb, type, &s->tcb, sl->seq, pid + i);
+			tcp_update_mbuf(mb, type, &s->tcb, sl->seq, pid);
 
 			/* keep mbuf till ACK is received. */
 			rte_pktmbuf_refcnt_update(mb, 1);
 			sl->len -= plen;
 			sl->seq += plen;
 			mo[k++] = mb;
-		/* remaining snd.wnd is less them MSS, send nothing */
-		} else if (sz < mss)
+			if (sl->seq <= s->tcb.snd.rcvr)
+				TCP_INC_STATS(TCP_MIB_RETRANSSEGS);
+		/* remaining snd.wnd is less than MSS, send nothing */
+		} else if (sl->len < mss) {
+			break;
+		/* some data to send already */
+		} else if (k != 0 || tn != 0) {
 			break;
 		/* packet indirection needed */
-		else
-			RTE_VERIFY(0);
+		} else {
+			struct rte_mbuf *out;
+
+			out = get_indirect_mbuf(s, mb, &plen, sl, type, mss);
+			if (out == NULL)
+				return 0;
+
+			pid = get_ip_pid(dev, calc_seg_cnt(plen, s->tcb.snd.mss),
+					type, (s->flags & TLE_CTX_FLAG_ST) != 0);
+			/* update pkt TCP header */
+			tcp_update_mbuf(out, type, &s->tcb, sl->seq, pid);
+
+			/* no need to bump refcnt !!! */
+
+			sl->len -= plen;
+			sl->seq += plen;
+
+			if (tx_data_pkts(s, &out, 1) == 0) {
+				/* should not happen, we have checked at least one
+				 * drb is available to send this mbuf
+				 */
+				rte_pktmbuf_free(out);
+				return 0;
+			}
+
+			if (sl->seq <= s->tcb.snd.rcvr)
+				TCP_INC_STATS(TCP_MIB_RETRANSSEGS);
+
+			if (s->tcb.snd.nxt_pkt)
+				return 0;
+			else {
+				tn = 1;
+				continue;
+			}
+		}
 
 		if (k >= MAX_PKT_BURST) {
 			n = tx_data_pkts(s, mo, k);
@@ -466,14 +519,17 @@ tx_nxt_data(struct tle_tcp_stream *s, uint32_t tms)
 		tcp_txq_set_nxt_head(s, n);
 	} while (n == num);
 
-	s->tcb.snd.nxt += sl.seq - (uint32_t)s->tcb.snd.nxt;
+	if (sl.seq != (uint32_t)s->tcb.snd.nxt) {
+		s->tcb.snd.nxt += sl.seq - (uint32_t)s->tcb.snd.nxt;
+		s->tcb.snd.ack = s->tcb.rcv.nxt;
+	}
 	return tn;
 }
 
 static inline void
 free_una_data(struct tle_tcp_stream *s, uint32_t len)
 {
-	uint32_t i, num, plen;
+	uint32_t i, num, plen, una_data;
 	struct rte_mbuf **mi;
 
 	plen = 0;
@@ -487,14 +543,18 @@ free_una_data(struct tle_tcp_stream *s, uint32_t len)
 
 		/* free acked data */
 		for (i = 0; i != num && plen != len; i++) {
-			uint32_t next_pkt_len = PKT_L4_PLEN(mi[i]);
-			if (plen + next_pkt_len > len) {
-				/* keep SND.UNA at the start of the packet */
-				len = plen;
+			una_data = PKT_L4_PLEN(mi[i]) - s->tcb.snd.una_offset;
+
+			/* partial ack */
+			if (plen + una_data > len) {
+				s->tcb.snd.una_offset += len - plen;
+				plen = len;
 				break;
-			} else {
-				plen += next_pkt_len;
 			}
+
+			/* monolithic ack */
+			s->tcb.snd.una_offset = 0;
+			plen += una_data;
 			rte_pktmbuf_free(mi[i]);
 		}
 
@@ -503,6 +563,7 @@ free_una_data(struct tle_tcp_stream *s, uint32_t len)
 	} while (plen < len);
 
 	s->tcb.snd.una += len;
+	s->tcb.snd.waitlen -= len;
 
 	/*
 	 * that could happen in case of retransmit,
@@ -519,7 +580,7 @@ calc_smss(uint16_t mss, const struct tle_dest *dst)
 {
 	uint16_t n;
 
-	n = dst->mtu - dst->l2_len - dst->l3_len - TCP_TX_HDR_DACK;
+	n = dst->mtu - dst->l3_len - sizeof(struct tcp_hdr);
 	mss = RTE_MIN(n, mss);
 	return mss;
 }
@@ -537,71 +598,53 @@ initial_cwnd(uint32_t smss, uint32_t icw)
 	return RTE_MIN(10 * smss, RTE_MAX(2 * smss, icw));
 }
 
-/*
- * queue standalone packet to he particular output device
- * It assumes that:
- * - L2/L3/L4 headers should be already set.
- * - packet fits into one segment.
- */
-static inline int
-send_pkt(struct tle_tcp_stream *s, struct tle_dev *dev, struct rte_mbuf *m)
+void
+tle_tcp_stream_kill(struct tle_stream *ts)
 {
-	uint32_t n, nb;
-	struct tle_drb *drb;
-
-	if (stream_drb_alloc(s, &drb, 1) == 0)
-		return -ENOBUFS;
-
-	/* enqueue pkt for TX. */
-	nb = 1;
-	n = tle_dring_mp_enqueue(&dev->tx.dr, (const void * const*)&m, 1,
-		&drb, &nb);
-
-	/* free unused drbs. */
-	if (nb != 0)
-		stream_drb_free(s, &drb, 1);
-
-	return (n == 1) ? 0 : -ENOBUFS;
-}
+	struct tle_tcp_stream *s;
 
-static inline int
-send_ctrl_pkt(struct tle_tcp_stream *s, struct rte_mbuf *m, uint32_t seq,
-	uint32_t flags)
-{
-	const struct tle_dest *dst;
-	uint32_t pid, type;
-	int32_t rc;
+	s = TCP_STREAM(ts);
+	if (ts == NULL || s->s.type >= TLE_VNUM)
+		return;
 
-	dst = &s->tx.dst;
-	type = s->s.type;
-	pid = get_ip_pid(dst->dev, 1, type, (s->flags & TLE_CTX_FLAG_ST) != 0);
+	if (s->tcb.state > TCP_ST_LISTEN)
+		send_rst(s, s->tcb.snd.nxt);
 
-	rc = tcp_fill_mbuf(m, s, dst, 0, s->s.port, seq, flags, pid, 1);
-	if (rc == 0)
-		rc = send_pkt(s, dst->dev, m);
+	if (s->tcb.state == TCP_ST_ESTABLISHED)
+		TCP_DEC_STATS_ATOMIC(TCP_MIB_CURRESTAB);
 
-	return rc;
+	s->tcb.state = TCP_ST_CLOSED;
+	rte_smp_wmb();
+	timer_stop(s, TIMER_RTO);
 }
 
 static inline int
-send_rst(struct tle_tcp_stream *s, uint32_t seq)
+send_ack(struct tle_tcp_stream *s, uint32_t tms, uint32_t flags)
 {
 	struct rte_mbuf *m;
+	uint32_t seq;
 	int32_t rc;
 
 	m = rte_pktmbuf_alloc(s->tx.dst.head_mp);
 	if (m == NULL)
 		return -ENOMEM;
 
-	rc = send_ctrl_pkt(s, m, seq, TCP_FLAG_RST);
-	if (rc != 0)
+	seq = s->tcb.snd.nxt - ((flags & (TCP_FLAG_FIN | TCP_FLAG_SYN)) != 0);
+	s->tcb.snd.ts = tms;
+
+	rc = send_ctrl_pkt(s, m, seq, flags);
+	if (rc != 0) {
 		rte_pktmbuf_free(m);
+		return rc;
+	}
 
-	return rc;
+	timer_stop(s, TIMER_DACK);
+	s->tcb.snd.ack = s->tcb.rcv.nxt;
+	return 0;
 }
 
 static inline int
-send_ack(struct tle_tcp_stream *s, uint32_t tms, uint32_t flags)
+send_keepalive(struct tle_tcp_stream *s)
 {
 	struct rte_mbuf *m;
 	uint32_t seq;
@@ -611,20 +654,16 @@ send_ack(struct tle_tcp_stream *s, uint32_t tms, uint32_t flags)
 	if (m == NULL)
 		return -ENOMEM;
 
-	seq = s->tcb.snd.nxt - ((flags & (TCP_FLAG_FIN | TCP_FLAG_SYN)) != 0);
-	s->tcb.snd.ts = tms;
+	seq = s->tcb.snd.una - 1;
 
-	rc = send_ctrl_pkt(s, m, seq, flags);
+	rc = send_ctrl_pkt(s, m, seq, TCP_FLAG_ACK);
 	if (rc != 0) {
 		rte_pktmbuf_free(m);
 		return rc;
 	}
-
-	s->tcb.snd.ack = s->tcb.rcv.nxt;
 	return 0;
 }
 
-
 static int
 sync_ack(struct tle_tcp_stream *s, const union pkt_info *pi,
 	const union seg_info *si, uint32_t ts, struct rte_mbuf *m)
@@ -633,19 +672,23 @@ sync_ack(struct tle_tcp_stream *s, const union pkt_info *pi,
 	int32_t rc;
 	uint32_t pid, seq, type;
 	struct tle_dev *dev;
-	const void *da;
+	const void *sa, *da;
 	struct tle_dest dst;
 	const struct tcp_hdr *th;
 
-	type = s->s.type;
+	type = pi->tf.type;
 
 	/* get destination information. */
-	if (type == TLE_V4)
+	if (type == TLE_V4) {
 		da = &pi->addr4.src;
-	else
+		sa = &pi->addr4.dst;
+	}
+	else {
 		da = &pi->addr6->src;
+		sa = &pi->addr6->dst;
+	}
 
-	rc = stream_get_dest(&s->s, da, &dst);
+	rc = stream_get_dest(type, &s->s, sa, da, &dst);
 	if (rc < 0)
 		return rc;
 
@@ -654,11 +697,16 @@ sync_ack(struct tle_tcp_stream *s, const union pkt_info *pi,
 	get_syn_opts(&s->tcb.so, (uintptr_t)(th + 1), m->l4_len - sizeof(*th));
 
 	s->tcb.rcv.nxt = si->seq + 1;
+	s->tcb.rcv.cpy = si->seq + 1;
 	seq = sync_gen_seq(pi, s->tcb.rcv.nxt, ts, s->tcb.so.mss,
 				s->s.ctx->prm.hash_alg,
 				&s->s.ctx->prm.secret_key);
-	s->tcb.so.ts.ecr = s->tcb.so.ts.val;
-	s->tcb.so.ts.val = sync_gen_ts(ts, s->tcb.so.wscale);
+	
+	if (s->tcb.so.ts.raw) {
+		s->tcb.so.ts.ecr = s->tcb.so.ts.val;
+		s->tcb.so.ts.val = sync_gen_ts(ts, s->tcb.so.wscale);
+	}	
+
 	s->tcb.so.wscale = (s->tcb.so.wscale == TCP_WSCALE_NONE) ?
 		TCP_WSCALE_NONE : TCP_WSCALE_DEFAULT;
 	s->tcb.so.mss = calc_smss(dst.mtu, &dst);
@@ -672,11 +720,13 @@ sync_ack(struct tle_tcp_stream *s, const union pkt_info *pi,
 	dev = dst.dev;
 	pid = get_ip_pid(dev, 1, type, (s->flags & TLE_CTX_FLAG_ST) != 0);
 
-	rc = tcp_fill_mbuf(m, s, &dst, 0, pi->port, seq,
-		TCP_FLAG_SYN | TCP_FLAG_ACK, pid, 1);
+	rc = tcp_fill_mbuf(m, s, &dst, TCP_OLFLAGS_CKSUM(dst.ol_flags),
+			   pi->port, seq, TCP_FLAG_SYN | TCP_FLAG_ACK, pid, 1);
 	if (rc == 0)
 		rc = send_pkt(s, dev, m);
 
+	TCP_INC_STATS(TCP_MIB_PASSIVEOPENS);
+
 	return rc;
 }
 
@@ -800,43 +850,24 @@ restore_syn_opt(union seg_info *si, union tsopt *to,
 	return 0;
 }
 
-static inline void
-stream_term(struct tle_tcp_stream *s)
-{
-	struct sdr *dr;
-
-	s->tcb.state = TCP_ST_CLOSED;
-	rte_smp_wmb();
-
-	timer_stop(s);
-
-	/* close() was already invoked, schedule final cleanup */
-	if ((s->tcb.uop & TCP_OP_CLOSE) != 0) {
-
-		dr = CTX_TCP_SDR(s->s.ctx);
-		STAILQ_INSERT_TAIL(&dr->be, &s->s, link);
-
-	/* notify user that stream need to be closed */
-	} else if (s->err.ev != NULL)
-		tle_event_raise(s->err.ev);
-	else if (s->err.cb.func != NULL)
-		s->err.cb.func(s->err.cb.data, &s->s);
-}
-
 static inline int
 stream_fill_dest(struct tle_tcp_stream *s)
 {
 	int32_t rc;
 	uint32_t type;
-	const void *da;
+	const void *sa, *da;
 
-        type = s->s.type;
-	if (type == TLE_V4)
+	type = s->s.type;
+	if (type == TLE_V4) {
+		sa = &s->s.ipv4.addr.dst;
 		da = &s->s.ipv4.addr.src;
-	else
+	}
+	else {
+		sa = &s->s.ipv6.addr.dst;
 		da = &s->s.ipv6.addr.src;
+	}
 
-	rc = stream_get_dest(&s->s, da, &s->tx.dst);
+	rc = stream_get_dest(type, &s->s, sa, da, &s->tx.dst);
 	return (rc < 0) ? rc : 0;
 }
 
@@ -851,19 +882,17 @@ accept_prep_stream(struct tle_tcp_stream *ps, struct stbl *st,
 	int32_t rc;
 	uint32_t rtt;
 
-	/* some TX still pending for that stream. */
-	if (TCP_STREAM_TX_PENDING(cs))
-		return -EAGAIN;
-
 	/* setup L4 ports and L3 addresses fields. */
 	cs->s.port.raw = pi->port.raw;
 	cs->s.pmsk.raw = UINT32_MAX;
 
 	if (pi->tf.type == TLE_V4) {
+		cs->s.type = TLE_V4;
 		cs->s.ipv4.addr = pi->addr4;
 		cs->s.ipv4.mask.src = INADDR_NONE;
 		cs->s.ipv4.mask.dst = INADDR_NONE;
 	} else if (pi->tf.type == TLE_V6) {
+		cs->s.type = TLE_V6;
 		cs->s.ipv6.addr = *pi->addr6;
 		rte_memcpy(&cs->s.ipv6.mask.src, &tle_ipv6_none,
 			sizeof(cs->s.ipv6.mask.src));
@@ -887,7 +916,7 @@ accept_prep_stream(struct tle_tcp_stream *ps, struct stbl *st,
 		cs->tcb.snd.rto = TCP_RTO_DEFAULT;
 
 	/* copy streams type & flags. */
-	cs->s.type = ps->s.type;
+	cs->s.type = pi->tf.type;
 	cs->flags = ps->flags;
 
 	/* retrive and cache destination information. */
@@ -897,16 +926,23 @@ accept_prep_stream(struct tle_tcp_stream *ps, struct stbl *st,
 
 	/* update snd.mss with SMSS value */
 	cs->tcb.snd.mss = calc_smss(cs->tcb.snd.mss, &cs->tx.dst);
+	if (cs->tcb.so.ts.raw != 0) {
+		cs->tcb.snd.mss -= TCP_TX_OPT_LEN_TMS;
+	}
 
 	/* setup congestion variables */
 	cs->tcb.snd.cwnd = initial_cwnd(cs->tcb.snd.mss, ps->tcb.snd.cwnd);
+	CWND_INFO("accept", cs->tcb.snd.cwnd);
+
 	cs->tcb.snd.ssthresh = cs->tcb.snd.wnd;
 	cs->tcb.snd.rto_tw = ps->tcb.snd.rto_tw;
+	cs->tcb.snd.rto_fw = ps->tcb.snd.rto_fw;
 
 	cs->tcb.state = TCP_ST_ESTABLISHED;
+	TCP_INC_STATS_ATOMIC(TCP_MIB_CURRESTAB);
 
 	/* add stream to the table */
-	cs->ste = stbl_add_stream(st, pi, cs);
+	cs->ste = stbl_add_stream(st, &cs->s);
 	if (cs->ste == NULL)
 		return -ENOBUFS;
 
@@ -937,7 +973,7 @@ rx_ack_listen(struct tle_tcp_stream *s, struct stbl *st,
 
 	*csp = NULL;
 
-	if (pi->tf.flags != TCP_FLAG_ACK || rx_check_stream(s, pi) != 0)
+	if ((pi->tf.flags & TCP_FLAG_ACK) == 0|| rx_check_stream(s, pi) != 0)
 		return -EINVAL;
 
 	ctx = s->s.ctx;
@@ -964,7 +1000,8 @@ rx_ack_listen(struct tle_tcp_stream *s, struct stbl *st,
 
 		/* cleanup on failure */
 		tcp_stream_down(cs);
-		stbl_del_stream(st, cs->ste, cs, 0);
+		TCP_DEC_STATS_ATOMIC(TCP_MIB_CURRESTAB);
+		stbl_del_stream(st, cs->ste, &cs->s);
 		cs->ste = NULL;
 	}
 
@@ -982,6 +1019,10 @@ data_pkt_adjust(const struct tcb *tcb, struct rte_mbuf **mb, uint32_t hlen,
 	len = *plen;
 
 	rte_pktmbuf_adj(*mb, hlen);
+	/* header is removed, so we clear tx_offload here to make sure
+	 * we can get correct payload length with PKT_L4_PLEN.
+	 */
+	(*mb)->tx_offload = 0;
 	if (len == 0)
 		return -ENODATA;
 	/* cut off the start of the packet */
@@ -1018,7 +1059,8 @@ rx_ackdata(struct tle_tcp_stream *s, uint32_t ack)
 				tle_event_raise(s->tx.ev);
 			else if (k == 0 && s->tx.cb.func != NULL)
 				s->tx.cb.func(s->tx.cb.data, &s->s);
-		}
+		} else
+			txs_enqueue(s->s.ctx, s);
 	}
 
 	return n;
@@ -1029,8 +1071,7 @@ stream_timewait(struct tle_tcp_stream *s, uint32_t rto)
 {
 	if (rto != 0) {
 		s->tcb.state = TCP_ST_TIME_WAIT;
-		s->tcb.snd.rto = rto;
-		timer_reset(s);
+		timer_reset(s, TIMER_RTO, rto);
 	} else
 		stream_term(s);
 }
@@ -1041,20 +1082,30 @@ rx_fin_state(struct tle_tcp_stream *s, struct resp_info *rsp)
 	uint32_t state;
 	int32_t ackfin;
 
+	s->tcb.rcv.frs.on = 2;
 	s->tcb.rcv.nxt += 1;
 
 	ackfin = (s->tcb.snd.una == s->tcb.snd.fss);
 	state = s->tcb.state;
 
 	if (state == TCP_ST_ESTABLISHED) {
+		TCP_DEC_STATS_ATOMIC(TCP_MIB_CURRESTAB);
 		s->tcb.state = TCP_ST_CLOSE_WAIT;
 		/* raise err.ev & err.cb */
-		if (s->err.ev != NULL)
+		/* raise error event only when recvbuf is empty, to inform
+		 * that the stream will not receive data any more.
+		 */
+		if (rte_ring_count(s->rx.q) == 0 && s->err.ev != NULL)
 			tle_event_raise(s->err.ev);
 		else if (s->err.cb.func != NULL)
 			s->err.cb.func(s->err.cb.data, &s->s);
 	} else if (state == TCP_ST_FIN_WAIT_1 || state == TCP_ST_CLOSING) {
 		rsp->flags |= TCP_FLAG_ACK;
+
+		/* shutdown instead of close happens */
+		if (rte_ring_count(s->rx.q) == 0 && s->err.ev != NULL)
+			tle_event_raise(s->err.ev);
+
 		if (ackfin != 0)
 			stream_timewait(s, s->tcb.snd.rto_tw);
 		else
@@ -1089,8 +1140,10 @@ rx_fin(struct tle_tcp_stream *s, uint32_t state,
 
 	ts = rx_tms_opt(&s->tcb, mb);
 	ret = rx_check_seqack(&s->tcb, seq, si->ack, plen, ts);
-	if (ret != 0)
+	if (ret != 0) {
+		rsp->flags |= TCP_FLAG_ACK;
 		return ret;
+	}
 
 	if (state < TCP_ST_ESTABLISHED)
 		return -EINVAL;
@@ -1108,9 +1161,10 @@ rx_fin(struct tle_tcp_stream *s, uint32_t state,
 	 * fast-path: all data & FIN was already sent out
 	 * and now is acknowledged.
 	 */
-	if (s->tcb.snd.fss == s->tcb.snd.nxt &&
-			si->ack == (uint32_t)s->tcb.snd.nxt) {
+	if (s->tcb.snd.fss >= s->tcb.snd.nxt &&
+			si->ack == (uint32_t)s->tcb.snd.fss) {
 		s->tcb.snd.una = s->tcb.snd.fss;
+		s->tcb.snd.nxt = s->tcb.snd.una;
 		empty_tq(s);
 	/* conventional ACK processiing */
 	} else
@@ -1148,8 +1202,25 @@ rx_rst(struct tle_tcp_stream *s, uint32_t state, uint32_t flags,
 	else
 		rc = check_seqn(&s->tcb, si->seq, 0);
 
-	if (rc == 0)
+	if (rc == 0) {
+		/* receive rst, connection is closed abnormal
+		 * and should return errno in later operations.
+		 */
+		switch (state) {
+		case TCP_ST_SYN_SENT:
+			TCP_INC_STATS(TCP_MIB_ATTEMPTFAILS);
+			s->tcb.err = ECONNREFUSED;
+			break;
+		case TCP_ST_CLOSE_WAIT:
+			s->tcb.err = EPIPE;
+			break;
+		case TCP_ST_CLOSED:
+			return rc;
+		default:
+			s->tcb.err = ECONNRESET;
+		}
 		stream_term(s);
+	}
 
 	return rc;
 }
@@ -1222,6 +1293,7 @@ rto_cwnd_update(struct tcb *tcb)
 	 * no more than 1 full-sized segment.
 	 */
 	tcb->snd.cwnd = tcb->snd.mss;
+	CWND_INFO("update", tcb->snd.cwnd);
 }
 
 static inline void
@@ -1330,13 +1402,17 @@ rx_data_ack(struct tle_tcp_stream *s, struct dack_info *tack,
 			ret = rx_check_seqack(&s->tcb, si[j].seq, si[j].ack,
 				plen, ts);
 
-			if (ret != 0)
-				break;
-
 			/* account for segment received */
 			ack_info_update(tack, &si[j], ret != 0, plen, ts);
 
+			if (ret != 0)
+				break;
+
 			rte_pktmbuf_adj(mb[j], hlen);
+			/* header is removed, so we clear tx_offload here to make sure
+			 * we can get correct payload length with PKT_L4_PLEN.
+			 */
+			mb[j]->tx_offload = 0;
 		}
 
 		n = j - i;
@@ -1377,6 +1453,7 @@ start_fast_retransmit(struct tle_tcp_stream *s)
 	tcp_txq_rst_nxt_head(s);
 	tcb->snd.nxt = tcb->snd.una;
 	tcb->snd.cwnd = tcb->snd.ssthresh + 3 * tcb->snd.mss;
+	CWND_INFO("start fast retrans", tcb->snd.cwnd);
 }
 
 static inline void
@@ -1389,6 +1466,7 @@ stop_fast_retransmit(struct tle_tcp_stream *s)
 	n = tcb->snd.nxt - tcb->snd.una;
 	tcb->snd.cwnd = RTE_MIN(tcb->snd.ssthresh,
 		RTE_MAX(n, tcb->snd.mss) + tcb->snd.mss);
+	CWND_INFO("stop fast retrans", tcb->snd.cwnd);
 	tcb->snd.fastack = 0;
 }
 
@@ -1415,8 +1493,10 @@ in_fast_retransmit(struct tle_tcp_stream *s, uint32_t ack_len, uint32_t ack_num,
 		 * during fast recovery, also reset the
 		 * retransmit timer.
 		 */
-		if (tcb->snd.fastack == 1)
-			timer_reset(s);
+		if (tcb->snd.fastack == 1) {
+			timer_reset(s, TIMER_RTO, s->tcb.snd.rto);
+			s->tcb.snd.nb_retx = 0;
+		}
 
 		tcb->snd.fastack += ack_num;
 		return 1;
@@ -1456,7 +1536,8 @@ process_ack(struct tle_tcp_stream *s, uint32_t acked,
 		/* remain in normal mode */
 		} else if (acked != 0) {
 			ack_cwnd_update(&s->tcb, acked, tack);
-			timer_stop(s);
+			timer_stop(s, TIMER_RTO);
+			s->tcb.snd.nb_retx = 0;
 		}
 
 	/* fast retransmit mode */
@@ -1470,7 +1551,7 @@ process_ack(struct tle_tcp_stream *s, uint32_t acked,
 		} else {
 			/* RFC 5682 3.2.3 full ACK */
 			stop_fast_retransmit(s);
-			timer_stop(s);
+			timer_stop(s, TIMER_RTO);
 
 			/* if we have another series of dup ACKs */
 			if (tack->dup3.seg != 0 &&
@@ -1501,17 +1582,22 @@ rx_ackfin(struct tle_tcp_stream *s)
 	uint32_t state;
 
 	s->tcb.snd.una = s->tcb.snd.fss;
+	s->tcb.snd.nxt = s->tcb.snd.una;
 	empty_tq(s);
 
 	state = s->tcb.state;
 	if (state == TCP_ST_LAST_ACK)
 		stream_term(s);
 	else if (state == TCP_ST_FIN_WAIT_1) {
-		timer_stop(s);
+		timer_stop(s, TIMER_RTO);
 		s->tcb.state = TCP_ST_FIN_WAIT_2;
-	} else if (state == TCP_ST_CLOSING) {
+		/* if stream is closed, should be released
+		* before timeout even without fin from peer
+		*/
+		if (s->tcb.uop & TCP_OP_CLOSE)
+			timer_start(s, TIMER_RTO, s->tcb.snd.rto_fw);
+	} else if (state == TCP_ST_CLOSING)
 		stream_timewait(s, s->tcb.snd.rto_tw);
-	}
 }
 
 static inline void
@@ -1532,7 +1618,7 @@ rx_process_ack(struct tle_tcp_stream *s, uint32_t ts,
 
 	/* restart RTO timer. */
 	if (s->tcb.snd.nxt != s->tcb.snd.una)
-		timer_start(s);
+		timer_start(s, TIMER_RTO, s->tcb.snd.rto);
 
 	/* update rto, if fresh packet is here then calculate rtt */
 	if (tack->ts.ecr != 0)
@@ -1554,15 +1640,9 @@ rx_synack(struct tle_tcp_stream *s, uint32_t ts, uint32_t state,
 	if (state != TCP_ST_SYN_SENT)
 		return -EINVAL;
 
-	/*
-	 * RFC 793 3.9: in the SYN-SENT state
-	 * If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send a reset
-	 * <SEQ=SEG.ACK><CTL=RST>
-	 * and discard the segment.
-	 * The connection remains in the same state.
-	 */
+	/* invalid SEG.SEQ */
 	if (si->ack != (uint32_t)s->tcb.snd.nxt) {
-		send_rst(s, si->ack);
+		rsp->flags = TCP_FLAG_RST;
 		return 0;
 	}
 
@@ -1574,18 +1654,25 @@ rx_synack(struct tle_tcp_stream *s, uint32_t ts, uint32_t state,
 
 	s->tcb.snd.una = s->tcb.snd.nxt;
 	s->tcb.snd.mss = calc_smss(so.mss, &s->tx.dst);
+	if (s->tcb.so.ts.raw != 0) {
+		s->tcb.snd.mss -= TCP_TX_OPT_LEN_TMS;
+	}
 	s->tcb.snd.wnd = si->wnd << so.wscale;
 	s->tcb.snd.wu.wl1 = si->seq;
 	s->tcb.snd.wu.wl2 = si->ack;
 	s->tcb.snd.wscale = so.wscale;
+	s->tcb.snd.cork_ts = 0;
 
 	/* setup congestion variables */
 	s->tcb.snd.cwnd = initial_cwnd(s->tcb.snd.mss, s->tcb.snd.cwnd);
+	CWND_INFO("synack", s->tcb.snd.cwnd);
+
 	s->tcb.snd.ssthresh = s->tcb.snd.wnd;
 
 	s->tcb.rcv.ts = so.ts.val;
 	s->tcb.rcv.irs = si->seq;
 	s->tcb.rcv.nxt = si->seq + 1;
+	s->tcb.rcv.cpy = si->seq + 1;
 
 	/* if peer doesn't support WSCALE opt, recalculate RCV.WND */
 	s->tcb.rcv.wscale = (so.wscale == TCP_WSCALE_NONE) ?
@@ -1597,9 +1684,14 @@ rx_synack(struct tle_tcp_stream *s, uint32_t ts, uint32_t state,
 
 	rsp->flags |= TCP_FLAG_ACK;
 
-	timer_stop(s);
+	timer_stop(s, TIMER_RTO);
+	s->tcb.snd.nb_retx = 0;
 	s->tcb.state = TCP_ST_ESTABLISHED;
 	rte_smp_wmb();
+	TCP_INC_STATS_ATOMIC(TCP_MIB_CURRESTAB);
+
+	if (s->s.option.keepalive)
+		timer_start(s, TIMER_KEEPALIVE, s->s.option.keepidle * MS_PER_S);
 
 	if (s->tx.ev != NULL)
 		tle_event_raise(s->tx.ev);
@@ -1689,8 +1781,8 @@ rx_stream(struct tle_tcp_stream *s, uint32_t ts,
 		 * fast-path: all data & FIN was already sent out
 		 * and now is acknowledged.
 		 */
-		if (s->tcb.snd.fss == s->tcb.snd.nxt &&
-				tack.ack == (uint32_t)s->tcb.snd.nxt)
+		if (s->tcb.snd.fss >= s->tcb.snd.nxt &&
+				tack.ack == (uint32_t)s->tcb.snd.fss)
 			rx_ackfin(s);
 		else
 			rx_process_ack(s, ts, &tack);
@@ -1702,27 +1794,44 @@ rx_stream(struct tle_tcp_stream *s, uint32_t ts,
 		 * - received segment with INO data and no TX is scheduled
 		 *   for that stream.
 		 */
-		if (tack.segs.badseq != 0 || tack.segs.ofo != 0 ||
-				(tack.segs.data != 0 &&
-				rte_atomic32_read(&s->tx.arm) == 0))
+		if (tack.segs.badseq != 0 || tack.segs.ofo != 0)
+			rsp.flags |= TCP_FLAG_ACK;
+		else if (tack.segs.data != 0 &&
+			rte_atomic32_read(&s->tx.arm) == 0 &&
+			(s->s.option.tcpquickack ||
+				s->tcb.rcv.nxt - s->tcb.snd.ack > 8 * s->tcb.so.mss)) {
 			rsp.flags |= TCP_FLAG_ACK;
+			if (s->s.option.tcpquickack > 0)
+				s->s.option.tcpquickack--;
+		}
+		else if (tack.segs.data && rsp.flags == 0)
+			timer_start(s, TIMER_DACK, DELAY_ACK_CHECK_INTERVAL);
 
 		rx_ofo_fin(s, &rsp);
 
 		k += num - n;
 		i = num;
 
+		if (s->s.option.keepalive) {
+			s->tcb.snd.nb_keepalive = 0;
+			timer_reset(s, TIMER_KEEPALIVE, s->s.option.keepidle * MS_PER_S);
+		}
 	/* unhandled state, drop all packets. */
 	} else
 		i = 0;
 
 	/* we have a response packet to send. */
-	if (rsp.flags != 0) {
+	if (rsp.flags == TCP_FLAG_RST) {
+		send_rst(s, si[i].ack);
+		stream_term(s);
+	} else if (rsp.flags != 0) {
 		send_ack(s, ts, rsp.flags);
 
 		/* start the timer for FIN packet */
-		if ((rsp.flags & TCP_FLAG_FIN) != 0)
-			timer_reset(s);
+		if ((rsp.flags & TCP_FLAG_FIN) != 0) {
+			timer_reset(s, TIMER_RTO, s->tcb.snd.rto);
+			s->tcb.snd.nb_retx = 0;
+		}
 	}
 
 	/* unprocessed packets */
@@ -1778,7 +1887,6 @@ rx_postsyn(struct tle_dev *dev, struct stbl *st, uint32_t type, uint32_t ts,
 	state = s->tcb.state;
 
 	if (state == TCP_ST_LISTEN) {
-
 		/* one connection per flow */
 		cs = NULL;
 		ret = -EINVAL;
@@ -1835,6 +1943,74 @@ rx_postsyn(struct tle_dev *dev, struct stbl *st, uint32_t type, uint32_t ts,
 	return num - k;
 }
 
+static inline void
+sync_refuse(struct tle_tcp_stream *s, struct tle_dev *dev,
+	const union pkt_info *pi, struct rte_mbuf *m)
+{
+	struct ether_hdr *eth_h;
+	struct ether_addr eth_addr;
+	struct ipv4_hdr *ip_h;
+	uint32_t ip_addr;
+	struct ipv6_hdr *ipv6_h;
+	struct in6_addr ipv6_addr;
+	struct tcp_hdr *th;
+	uint16_t port;
+
+	/* rst pkt should not contain options for syn */
+	rte_pktmbuf_trim(m, m->l4_len - sizeof(*th));
+
+	eth_h = rte_pktmbuf_mtod(m, struct ether_hdr*);
+	ether_addr_copy(&eth_h->s_addr, &eth_addr);
+	ether_addr_copy(&eth_h->d_addr, &eth_h->s_addr);
+	ether_addr_copy(&eth_addr, &eth_h->d_addr);
+
+	th = rte_pktmbuf_mtod_offset(m, struct tcp_hdr*,
+			m->l2_len + m->l3_len);
+	port = th->src_port;
+	th->src_port = th->dst_port;
+	th->dst_port = port;
+	th->tcp_flags = TCP_FLAG_RST | TCP_FLAG_ACK;
+	th->recv_ack = rte_cpu_to_be_32(rte_be_to_cpu_32(th->sent_seq) + 1);
+	th->sent_seq = 0;
+	th->data_off &= 0x0f;
+	th->data_off |= (sizeof(*th) / 4) << 4;
+	th->cksum = 0;
+
+	if (pi->tf.type == TLE_V4) {
+		ip_h = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr*,
+				m->l2_len);
+		ip_addr = ip_h->src_addr;
+		ip_h->src_addr = ip_h->dst_addr;
+		ip_h->dst_addr = ip_addr;
+		ip_h->total_length = rte_cpu_to_be_16(
+				rte_be_to_cpu_16(ip_h->total_length) -
+				(m->l4_len - sizeof(*th)));
+		ip_h->hdr_checksum = 0;
+		th->cksum = rte_ipv4_udptcp_cksum(ip_h, th);
+		ip_h->hdr_checksum = rte_ipv4_cksum(ip_h);
+	} else {
+		ipv6_h = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr*,
+				m->l2_len);
+		rte_memcpy(&ipv6_addr, ipv6_h->src_addr,
+				sizeof(struct in6_addr));
+		rte_memcpy(ipv6_h->src_addr, ipv6_h->dst_addr,
+				sizeof(struct in6_addr));
+		rte_memcpy(ipv6_h->dst_addr, &ipv6_addr,
+				sizeof(struct in6_addr));
+		ipv6_h->payload_len = rte_cpu_to_be_16(
+				rte_be_to_cpu_16(ipv6_h->payload_len) -
+				(m->l4_len - sizeof(*th)));
+		th->cksum = rte_ipv6_udptcp_cksum(ipv6_h, th);
+	}
+
+	if (m->pkt_len < ETHER_MIN_LEN)
+		rte_pktmbuf_append(m, ETHER_MIN_LEN - m->pkt_len);
+
+	if (send_pkt(s, dev, m) != 0)
+		rte_pktmbuf_free(m);
+	else
+		TCP_INC_STATS(TCP_MIB_OUTRSTS);
+}
 
 static inline uint32_t
 rx_syn(struct tle_dev *dev, uint32_t type, uint32_t ts,
@@ -1846,20 +2022,35 @@ rx_syn(struct tle_dev *dev, uint32_t type, uint32_t ts,
 	uint32_t i, k;
 	int32_t ret;
 
-	s = rx_obtain_listen_stream(dev, &pi[0], type);
+	s = rx_obtain_listen_stream(dev, &pi[0], type, 0);
 	if (s == NULL) {
-		for (i = 0; i != num; i++) {
-			rc[i] = ENOENT;
-			rp[i] = mb[i];
+		/* no socket listening this syn, send rst to refuse connect */
+		s = TCP_STREAM(get_stream(dev->ctx));
+		if (s != NULL) {
+			sync_refuse(s, dev, &pi[0], mb[0]);
+			put_stream(dev->ctx, &s->s, 0);
+			i = 1;
+		} else {
+			i = 0;
 		}
-		return 0;
+		k = 0;
+		for (; i != num; i++) {
+			rc[k] = ENOENT;
+			rp[k] = mb[i];
+			k++;
+		}
+		return num - k;
 	}
 
 	k = 0;
 	for (i = 0; i != num; i++) {
-
+		/* check if stream has space to maintain new connection */
+		if (rte_ring_free_count(s->rx.q) == 0 ||
+		    (s->s.ctx->streams.nb_free == 0 &&
+		     s->s.ctx->streams.nb_cur >= s->s.ctx->prm.max_streams - 1))
+			ret = -ENOSPC;
 		/* check that this remote is allowed to connect */
-		if (rx_check_stream(s, &pi[i]) != 0)
+		else if (rx_check_stream(s, &pi[i]) != 0)
 			ret = -ENOENT;
 		else
 			/* syncokie: reply with <SYN,ACK> */
@@ -1882,43 +2073,34 @@ tle_tcp_rx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
 {
 	struct stbl *st;
 	struct tle_ctx *ctx;
-	uint32_t i, j, k, mt, n, t, ts;
+	uint32_t i, j, k, n, t;
+	uint64_t ts;
 	union pkt_info pi[num];
 	union seg_info si[num];
-	union {
-		uint8_t t[TLE_VNUM];
-		uint32_t raw;
-	} stu;
+
+	TCP_ADD_STATS(TCP_MIB_INSEGS, num);
 
 	ctx = dev->ctx;
 	ts = tcp_get_tms(ctx->cycles_ms_shift);
 	st = CTX_TCP_STLB(ctx);
-	mt = ((ctx->prm.flags & TLE_CTX_FLAG_ST) == 0);
-
-	stu.raw = 0;
 
 	/* extract packet info and check the L3/L4 csums */
 	for (i = 0; i != num; i++) {
 
 		get_pkt_info(pkt[i], &pi[i], &si[i]);
-
 		t = pi[i].tf.type;
-		pi[i].csf = check_pkt_csum(pkt[i], pi[i].csf, t, IPPROTO_TCP);
-		stu.t[t] = mt;
+		pi[i].csf = check_pkt_csum(pkt[i], t, IPPROTO_TCP);
 	}
 
-	if (stu.t[TLE_V4] != 0)
-		stbl_lock(st, TLE_V4);
-	if (stu.t[TLE_V6] != 0)
-		stbl_lock(st, TLE_V6);
-
 	k = 0;
 	for (i = 0; i != num; i += j) {
-
 		t = pi[i].tf.type;
 
 		/*basic checks for incoming packet */
-		if (t >= TLE_VNUM || pi[i].csf != 0 || dev->dp[t] == NULL) {
+		if (t >= TLE_VNUM || pi[i].csf != 0) {
+			TCP_INC_STATS(TCP_MIB_INERRS);
+			if (t < TLE_VNUM)
+				TCP_INC_STATS(TCP_MIB_CSUMERRORS);
 			rc[k] = EINVAL;
 			rp[k] = pkt[i];
 			j = 1;
@@ -1937,11 +2119,6 @@ tle_tcp_rx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
 		}
 	}
 
-	if (stu.t[TLE_V4] != 0)
-		stbl_unlock(st, TLE_V4);
-	if (stu.t[TLE_V6] != 0)
-		stbl_unlock(st, TLE_V6);
-
 	return num - k;
 }
 
@@ -1953,21 +2130,37 @@ tle_tcp_stream_accept(struct tle_stream *ts, struct tle_stream *rs[],
 	struct tle_tcp_stream *s;
 
 	s = TCP_STREAM(ts);
-	n = _rte_ring_dequeue_burst(s->rx.q, (void **)rs, num);
-	if (n == 0)
-		return 0;
 
-	/*
-	 * if we still have packets to read,
-	 * then rearm stream RX event.
-	 */
-	if (n == num && rte_ring_count(s->rx.q) != 0) {
-		if (tcp_stream_try_acquire(s) > 0 && s->rx.ev != NULL)
-			tle_event_raise(s->rx.ev);
+	if (tcp_stream_try_acquire(s) > 0) {
+		if (s->tcb.state != TCP_ST_LISTEN) {
+			tcp_stream_release(s);
+			rte_errno = EINVAL;
+			return 0;
+		}
+
+		n = _rte_ring_dequeue_burst(s->rx.q, (void **)rs, num);
+		if (n == 0)
+		{
+			tcp_stream_release(s);
+			rte_errno = EAGAIN;
+			return 0;
+		}
+
+		/*
+		 * if we still have packets to read,
+		 * then rearm stream RX event.
+		 */
+		if (n == num && rte_ring_count(s->rx.q) != 0) {
+			if (s->rx.ev != NULL)
+				tle_event_raise(s->rx.ev);
+		}
+		tcp_stream_release(s);
+		return n;
+	} else {
 		tcp_stream_release(s);
+		rte_errno = EINVAL;
+		return 0;
 	}
-
-	return n;
 }
 
 uint16_t
@@ -1995,6 +2188,7 @@ tle_tcp_tx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
 		stream_drb_free(s, drb + i, j - i);
 	}
 
+	TCP_ADD_STATS(TCP_MIB_OUTSEGS, n);
 	return n;
 }
 
@@ -2010,73 +2204,17 @@ stream_fill_pkt_info(const struct tle_tcp_stream *s, union pkt_info *pi)
 	pi->tf.type = s->s.type;
 }
 
-static int
-stream_fill_addr(struct tle_tcp_stream *s, const struct sockaddr *addr)
-{
-	const struct sockaddr_in *in4;
-	const struct sockaddr_in6 *in6;
-	const struct tle_dev_param *prm;
-	int32_t rc;
-
-	rc = 0;
-	s->s.pmsk.raw = UINT32_MAX;
-
-	/* setup L4 src ports and src address fields. */
-	if (s->s.type == TLE_V4) {
-		in4 = (const struct sockaddr_in *)addr;
-		if (in4->sin_addr.s_addr == INADDR_ANY || in4->sin_port == 0)
-			return -EINVAL;
-
-		s->s.port.src = in4->sin_port;
-		s->s.ipv4.addr.src = in4->sin_addr.s_addr;
-		s->s.ipv4.mask.src = INADDR_NONE;
-		s->s.ipv4.mask.dst = INADDR_NONE;
-
-	} else if (s->s.type == TLE_V6) {
-		in6 = (const struct sockaddr_in6 *)addr;
-		if (memcmp(&in6->sin6_addr, &tle_ipv6_any,
-				sizeof(tle_ipv6_any)) == 0 ||
-				in6->sin6_port == 0)
-			return -EINVAL;
-
-		s->s.port.src = in6->sin6_port;
-		rte_memcpy(&s->s.ipv6.addr.src, &in6->sin6_addr,
-			sizeof(s->s.ipv6.addr.src));
-		rte_memcpy(&s->s.ipv6.mask.src, &tle_ipv6_none,
-			sizeof(s->s.ipv6.mask.src));
-		rte_memcpy(&s->s.ipv6.mask.dst, &tle_ipv6_none,
-			sizeof(s->s.ipv6.mask.dst));
-	}
-
-	/* setup the destination device. */
-	rc = stream_fill_dest(s);
-	if (rc != 0)
-		return rc;
-
-	/* setup L4 dst address from device param */
-	prm = &s->tx.dst.dev->prm;
-	if (s->s.type == TLE_V4) {
-		if (s->s.ipv4.addr.dst == INADDR_ANY)
-			s->s.ipv4.addr.dst = prm->local_addr4.s_addr;
-	} else if (memcmp(&s->s.ipv6.addr.dst, &tle_ipv6_any,
-			sizeof(tle_ipv6_any)) == 0)
-		memcpy(&s->s.ipv6.addr.dst, &prm->local_addr6,
-			sizeof(s->s.ipv6.addr.dst));
-
-	return rc;
-}
-
 static inline int
-tx_syn(struct tle_tcp_stream *s, const struct sockaddr *addr)
+tx_syn(struct tle_tcp_stream *s)
 {
 	int32_t rc;
-	uint32_t tms, seq;
+	uint32_t seq;
+	uint64_t tms;
 	union pkt_info pi;
 	struct stbl *st;
 	struct stbl_entry *se;
 
-	/* fill stream address */
-	rc = stream_fill_addr(s, addr);
+	rc = stream_fill_dest(s);
 	if (rc != 0)
 		return rc;
 
@@ -2107,7 +2245,7 @@ tx_syn(struct tle_tcp_stream *s, const struct sockaddr *addr)
 
 	/* add the stream in stream table */
 	st = CTX_TCP_STLB(s->s.ctx);
-	se = stbl_add_stream_lock(st, s);
+	se = stbl_add_stream(st, &s->s);
 	if (se == NULL)
 		return -ENOBUFS;
 	s->ste = se;
@@ -2115,6 +2253,7 @@ tx_syn(struct tle_tcp_stream *s, const struct sockaddr *addr)
 	/* put stream into the to-send queue */
 	txs_enqueue(s->s.ctx, s);
 
+	TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
 	return 0;
 }
 
@@ -2147,7 +2286,7 @@ tle_tcp_stream_connect(struct tle_stream *ts, const struct sockaddr *addr)
 
 	/* fill stream, prepare and transmit syn pkt */
 	s->tcb.uop |= TCP_OP_CONNECT;
-	rc = tx_syn(s, addr);
+	rc = tx_syn(s);
 	tcp_stream_release(s);
 
 	/* error happened, do a cleanup */
@@ -2160,13 +2299,29 @@ tle_tcp_stream_connect(struct tle_stream *ts, const struct sockaddr *addr)
 uint16_t
 tle_tcp_stream_recv(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
 {
-	uint32_t n;
+	uint32_t n, i;
+	uint32_t free_slots;
 	struct tle_tcp_stream *s;
 
 	s = TCP_STREAM(ts);
+
+	free_slots = rte_ring_free_count(s->rx.q);
+
 	n = _rte_ring_mcs_dequeue_burst(s->rx.q, (void **)pkt, num);
-	if (n == 0)
+	if (n == 0) {
+		if (s->tcb.err != 0) {
+			rte_errno = s->tcb.err;
+		} else {
+			rte_errno = EAGAIN;
+		}
 		return 0;
+	}
+
+	for (i = 0; i < n; ++i)
+		s->tcb.rcv.cpy += rte_pktmbuf_pkt_len(pkt[i]);
+
+	/* update receive window with left recv buffer*/
+	s->tcb.rcv.wnd = calc_rx_wnd(s, s->tcb.rcv.wscale);
 
 	/*
 	 * if we still have packets to read,
@@ -2176,28 +2331,99 @@ tle_tcp_stream_recv(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
 		if (tcp_stream_try_acquire(s) > 0 && s->rx.ev != NULL)
 			tle_event_raise(s->rx.ev);
 		tcp_stream_release(s);
+	/* if we have received fin, no more data will come, raise err event. */
+	} else if (s->tcb.rcv.frs.on == 2) {
+		if (tcp_stream_try_acquire(s) > 0 && s->err.ev != NULL)
+			tle_event_raise(s->err.ev);
+		tcp_stream_release(s);
+	}
+
+	/* update recv win to the remote */
+	if (free_slots < RECV_WIN_NOTIFY_THRESH &&
+	    rte_ring_free_count(s->rx.q) >= RECV_WIN_NOTIFY_THRESH) {
+		s->tcb.snd.update_rcv = true;
+		txs_enqueue(s->s.ctx, s);
 	}
 
 	return n;
 }
 
+uint16_t
+tle_tcp_stream_inq(struct tle_stream *ts)
+{
+	struct tle_tcp_stream *s;
+
+	s = TCP_STREAM(ts);
+	return s->tcb.rcv.nxt - s->tcb.rcv.cpy;
+}
+
+#define DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
+
+ssize_t
+tle_tcp_stream_readv(struct tle_stream *ts, const struct iovec *iov, int iovcnt)
+{
+	struct msghdr msg = {0};
+
+	msg.msg_iov = DECONST(struct iovec *, iov); /* Recover const later */
+	msg.msg_iovlen = iovcnt;
+	return tle_tcp_stream_recvmsg(ts, &msg);
+}
+
 ssize_t
-tle_tcp_stream_readv(struct tle_stream *ts, const struct iovec *iov,
-	int iovcnt)
+tle_tcp_stream_recvmsg(struct tle_stream *ts, struct msghdr *msg)
 {
+	size_t sz;
 	int32_t i;
 	uint32_t mn, n, tn;
-	size_t sz;
+	uint32_t free_slots;
 	struct tle_tcp_stream *s;
 	struct iovec iv;
 	struct rxq_objs mo[2];
+	struct sockaddr_in *addr;
+	struct sockaddr_in6 *addr6;
+	const struct iovec *iov = msg->msg_iov;
+	int iovcnt = msg->msg_iovlen;
 
 	s = TCP_STREAM(ts);
 
+	free_slots = rte_ring_free_count(s->rx.q);
+
 	/* get group of packets */
 	mn = tcp_rxq_get_objs(s, mo);
-	if (mn == 0)
-		return 0;
+	if (mn == 0) {
+		if (s->tcb.err != 0)
+			rte_errno = s->tcb.err;
+		else
+			rte_errno = EAGAIN;
+		return -1;
+	}
+
+	if (!ts->option.timestamp)
+		ts->timestamp = mo[0].mb[0]->timestamp;
+
+	if (msg->msg_control != NULL) {
+		if (ts->option.timestamp)
+			tle_set_timestamp(msg, mo[0].mb[0]);
+		else
+			msg->msg_controllen = 0;
+	}
+
+	if (msg->msg_name != NULL) {
+		if (s->s.type == TLE_V4) {
+			addr = (struct sockaddr_in*)msg->msg_name;
+			addr->sin_family = AF_INET;
+			addr->sin_addr.s_addr = s->s.ipv4.addr.src;
+			addr->sin_port = s->s.port.src;
+			msg->msg_namelen = sizeof(struct sockaddr_in);
+		} else {
+			addr6 = (struct sockaddr_in6*)msg->msg_name;
+			addr6->sin6_family = AF_INET6;
+			rte_memcpy(&addr6->sin6_addr, &s->s.ipv6.addr.src,
+					sizeof(struct sockaddr_in6));
+			addr6->sin6_port = s->s.port.src;
+			msg->msg_namelen = sizeof(struct sockaddr_in6);
+		}
+	}
 
 	sz = 0;
 	n = 0;
@@ -2229,6 +2455,8 @@ tle_tcp_stream_readv(struct tle_stream *ts, const struct iovec *iov,
 	}
 
 	tcp_rxq_consume(s, tn);
+	/* update receive window with left recv buffer*/
+	s->tcb.rcv.wnd = calc_rx_wnd(s, s->tcb.rcv.wscale);
 
 	/*
 	 * if we still have packets to read,
@@ -2238,6 +2466,20 @@ tle_tcp_stream_readv(struct tle_stream *ts, const struct iovec *iov,
 		if (tcp_stream_try_acquire(s) > 0 && s->rx.ev != NULL)
 			tle_event_raise(s->rx.ev);
 		tcp_stream_release(s);
+	/* if we have received fin, no more data will come, raise err event. */
+	} else if (s->tcb.rcv.frs.on == 2) {
+		if (tcp_stream_try_acquire(s) > 0 && s->err.ev != NULL)
+			tle_event_raise(s->err.ev);
+		tcp_stream_release(s);
+	}
+
+	s->tcb.rcv.cpy += sz;
+
+	/* update recv win to the remote */
+	if (free_slots < RECV_WIN_NOTIFY_THRESH &&
+	    rte_ring_free_count(s->rx.q) >= RECV_WIN_NOTIFY_THRESH) {
+		s->tcb.snd.update_rcv = true;
+		txs_enqueue(s->s.ctx, s);
 	}
 
 	return sz;
@@ -2263,48 +2505,35 @@ tx_segments(struct tle_tcp_stream *s, uint64_t ol_flags,
 	if (i == num) {
 		/* queue packets for further transmission. */
 		rc = _rte_ring_enqueue_bulk(s->tx.q, (void **)segs, num);
-		if (rc != 0)
+		if (rc != 0) {
+			rc = -EAGAIN;
 			free_mbufs(segs, num);
+		}
 	}
 
 	return rc;
 }
 
-uint16_t
-tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
+static inline uint16_t
+stream_send(struct tle_tcp_stream *s, struct rte_mbuf *pkt[],
+	    uint16_t num, uint16_t mss, uint64_t ol_flags)
 {
-	uint32_t i, j, k, mss, n, state;
+	uint16_t i, j, k;
 	int32_t rc;
-	uint64_t ol_flags;
-	struct tle_tcp_stream *s;
+	uint32_t n, free_slots;
 	struct rte_mbuf *segs[TCP_MAX_PKT_SEG];
-
-	s = TCP_STREAM(ts);
-
-	/* mark stream as not closable. */
-	if (tcp_stream_acquire(s) < 0) {
-		rte_errno = EAGAIN;
-		return 0;
-	}
-
-	state = s->tcb.state;
-	if (state != TCP_ST_ESTABLISHED && state != TCP_ST_CLOSE_WAIT) {
-		rte_errno = ENOTCONN;
-		tcp_stream_release(s);
-		return 0;
-	}
-
-	mss = s->tcb.snd.mss;
-	ol_flags = s->tx.dst.ol_flags;
+	int32_t pkt_len;
 
 	k = 0;
 	rc = 0;
+	pkt_len = 0;
 	while (k != num) {
 		/* prepare and check for TX */
 		for (i = k; i != num; i++) {
 			if (pkt[i]->pkt_len > mss ||
 					pkt[i]->nb_segs > TCP_MAX_PKT_SEG)
 				break;
+			pkt_len += pkt[i]->pkt_len;
 			rc = tcp_fill_mbuf(pkt[i], s, &s->tx.dst, ol_flags,
 				s->s.port, 0, TCP_FLAG_ACK, 0, 0);
 			if (rc != 0)
@@ -2328,6 +2557,7 @@ tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
 						pkt[j]->l3_len +
 						pkt[j]->l4_len);
 					pkt[j]->ol_flags &= ol_flags;
+					pkt_len -= pkt[j]->pkt_len;
 				}
 				break;
 			}
@@ -2339,8 +2569,10 @@ tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
 
 		/* segment large packet and enqueue for sending */
 		} else if (i != num) {
+			free_slots = rte_ring_free_count(s->tx.q);
+			free_slots = RTE_MIN(free_slots, RTE_DIM(segs));
 			/* segment the packet. */
-			rc = tcp_segmentation(pkt[i], segs, RTE_DIM(segs),
+			rc = tcp_segmentation(pkt[i], segs, free_slots,
 				&s->tx.dst, mss);
 			if (rc < 0) {
 				rte_errno = -rc;
@@ -2351,19 +2583,161 @@ tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
 			if (rc == 0) {
 				/* free the large mbuf */
 				rte_pktmbuf_free(pkt[i]);
+				pkt_len += pkt[i]->pkt_len;
 				/* set the mbuf as consumed */
 				k++;
-			} else
+			} else {
 				/* no space left in tx queue */
+				RTE_VERIFY(0);
 				break;
+			}
 		}
 	}
 
+	s->tcb.snd.waitlen += pkt_len;
+	return k;
+}
+
+static inline uint16_t
+stream_send_tso(struct tle_tcp_stream *s, struct rte_mbuf *pkt[],
+		uint16_t num, uint16_t mss, uint64_t ol_flags)
+{
+	uint16_t i, k, nb_segs;
+	int32_t rc, pkt_len;
+	uint64_t ol_flags1;
+	struct rte_mbuf *pre_tail;
+
+	k = 0;
+	rc = 0;
+	while (k != num) {
+		/* Make sure there is at least one slot available */
+		if (rte_ring_free_count(s->tx.q) == 0)
+			break;
+
+		/* prepare and check for TX */
+		nb_segs = 0;
+		pkt_len = 0;
+		pre_tail = NULL;
+		for (i = k; i != num; i++) {
+			if (pkt[i]->nb_segs != 1)
+				rte_panic("chained mbuf: %p\n", pkt[i]);
+			/* We shall consider cwnd and snd wnd when limit len */
+			if (nb_segs + pkt[i]->nb_segs <= TCP_MAX_PKT_SEG &&
+			    pkt_len + pkt[i]->pkt_len <= 65535 - RESERVE_HEADER_LEN) {
+				nb_segs += pkt[i]->nb_segs;
+				pkt_len += pkt[i]->pkt_len;
+				if (pre_tail)
+					pre_tail->next = pkt[i];
+				pre_tail = rte_pktmbuf_lastseg(pkt[i]);
+			} else {
+				/* enqueue this one now */
+				break;
+			}
+		}
+
+		if (unlikely(i == k)) {
+			/* pkt[k] is a too big packet, now we fall back to
+			 * non-tso send; we can optimize it later by
+			 * splitting the mbuf.
+			 */
+			if (stream_send(s, &pkt[k], 1, mss, ol_flags) == 1) {
+				k++;
+				continue;
+			} else
+				break;
+		}
+
+		pkt[k]->nb_segs = nb_segs;
+		pkt[k]->pkt_len = pkt_len;
+
+		ol_flags1 = ol_flags;
+		if (pkt_len > mss)
+			ol_flags1 |= PKT_TX_TCP_SEG;
+
+		rc = tcp_fill_mbuf(pkt[k], s, &s->tx.dst, ol_flags1,
+				   s->s.port, 0, TCP_FLAG_ACK, 0, 0);
+		if (rc != 0) /* hard to recover */
+			rte_panic("failed to fill mbuf: %p\n", pkt[k]);
+
+		/* correct mss */
+		pkt[k]->tso_segsz = mss;
+
+		s->tcb.snd.waitlen += pkt_len;
+		/* We already make sure there is at least one slot */
+		if (_rte_ring_enqueue_burst(s->tx.q, (void **)pkt + k, 1) < 1)
+			RTE_VERIFY(0);
+
+		k = i;
+	}
+
+	return k;
+}
+
+uint16_t
+tle_tcp_stream_send(struct tle_stream *ts, struct rte_mbuf *pkt[], uint16_t num)
+{
+	uint16_t k, mss, state;
+	uint64_t ol_flags;
+	struct tle_tcp_stream *s;
+
+	s = TCP_STREAM(ts);
+
+	if (s->tcb.err != 0) {
+		rte_errno = s->tcb.err;
+		return 0;
+	}
+
+	/* mark stream as not closable. */
+	if (tcp_stream_acquire(s) < 0) {
+		rte_errno = EAGAIN;
+		return 0;
+	}
+
+	state = s->tcb.state;
+	switch (state) {
+	case TCP_ST_ESTABLISHED:
+	case TCP_ST_CLOSE_WAIT:
+		break;
+	case TCP_ST_FIN_WAIT_1:
+	case TCP_ST_FIN_WAIT_2:
+	case TCP_ST_CLOSING:
+	case TCP_ST_LAST_ACK:
+		rte_errno = EPIPE;
+		tcp_stream_release(s);
+		return 0;
+	default:
+		rte_errno = ENOTCONN;
+		tcp_stream_release(s);
+		return 0;
+	}
+
+	mss = s->tcb.snd.mss;
+
+	ol_flags = s->tx.dst.ol_flags;
+
+	/* Some reference number on the case:
+	 *   "<netperf with uss> - tap - <kernel stack> - <netserver>"
+	 *  ~2Gbps with tso disabled;
+	 *  ~16Gbps with tso enabled.
+	 */
+	if (rte_ring_free_count(s->tx.q) == 0) {
+		/* Block send may try without waiting for tx event (raised by acked
+		 * data), so here we will still put this stream for further process
+		 */
+		txs_enqueue(s->s.ctx, s);
+		rte_errno = EAGAIN;
+		k = 0;
+	} else if (s->tx.dst.dev->prm.tx_offload & DEV_TX_OFFLOAD_TCP_TSO)
+		k = stream_send_tso(s, pkt, num, mss, ol_flags);
+	else
+		k = stream_send(s, pkt, num, mss, ol_flags);
+
 	/* notify BE about more data to send */
 	if (k != 0)
 		txs_enqueue(s->s.ctx, s);
+
 	/* if possible, re-arm stream write event. */
-	if (rte_ring_free_count(s->tx.q) != 0 && s->tx.ev != NULL)
+	if (rte_ring_free_count(s->tx.q) && s->tx.ev != NULL && k == num)
 		tle_event_raise(s->tx.ev);
 
 	tcp_stream_release(s);
@@ -2382,9 +2756,15 @@ tle_tcp_stream_writev(struct tle_stream *ts, struct rte_mempool *mp,
 	struct tle_tcp_stream *s;
 	struct iovec iv;
 	struct rte_mbuf *mb[2 * MAX_PKT_BURST];
+	uint16_t mss;
 
 	s = TCP_STREAM(ts);
 
+	if (s->tcb.err != 0) {
+		rte_errno = s->tcb.err;
+		return -1;
+	}
+
 	/* mark stream as not closable. */
 	if (tcp_stream_acquire(s) < 0) {
 		rte_errno = EAGAIN;
@@ -2392,7 +2772,18 @@ tle_tcp_stream_writev(struct tle_stream *ts, struct rte_mempool *mp,
 	}
 
 	state = s->tcb.state;
-	if (state != TCP_ST_ESTABLISHED && state != TCP_ST_CLOSE_WAIT) {
+	switch (state) {
+	case TCP_ST_ESTABLISHED:
+	case TCP_ST_CLOSE_WAIT:
+		break;
+	case TCP_ST_FIN_WAIT_1:
+	case TCP_ST_FIN_WAIT_2:
+	case TCP_ST_CLOSING:
+	case TCP_ST_LAST_ACK:
+		rte_errno = EPIPE;
+		tcp_stream_release(s);
+		return -1;
+	default:
 		rte_errno = ENOTCONN;
 		tcp_stream_release(s);
 		return -1;
@@ -2403,11 +2794,24 @@ tle_tcp_stream_writev(struct tle_stream *ts, struct rte_mempool *mp,
 	for (i = 0; i != iovcnt; i++)
 		tsz += iov[i].iov_len;
 
+	if (tsz == 0) {
+		tcp_stream_release(s);
+		return 0;
+	}
+
 	slen = rte_pktmbuf_data_room_size(mp);
-	slen = RTE_MIN(slen, s->tcb.snd.mss);
+	mss = s->tcb.snd.mss;
+
+	slen = RTE_MIN(slen, mss);
 
 	num = (tsz + slen - 1) / slen;
 	n = rte_ring_free_count(s->tx.q);
+
+	if (n == 0) {
+		tcp_stream_release(s);
+		return 0;
+	}
+
 	num = RTE_MIN(num, n);
 	n = RTE_MIN(num, RTE_DIM(mb));
 
@@ -2451,7 +2855,6 @@ tle_tcp_stream_writev(struct tle_stream *ts, struct rte_mempool *mp,
 		k = 0;
 
 	if (k != j) {
-
 		/* free pkts that were not enqueued */
 		free_mbufs(mb + k, j - k);
 
@@ -2466,14 +2869,16 @@ tle_tcp_stream_writev(struct tle_stream *ts, struct rte_mempool *mp,
 		}
 	}
 
-        if (k != 0) {
-
+	if (k != 0) {
 		/* notify BE about more data to send */
 		txs_enqueue(s->s.ctx, s);
 
 		/* if possible, re-arm stream write event. */
 		if (rte_ring_free_count(s->tx.q) != 0 && s->tx.ev != NULL)
 			tle_event_raise(s->tx.ev);
+	} else {
+		rte_errno = EAGAIN;
+		sz = -1;
 	}
 
 	tcp_stream_release(s);
@@ -2485,7 +2890,7 @@ static inline void
 tx_data_fin(struct tle_tcp_stream *s, uint32_t tms, uint32_t state)
 {
 	/* try to send some data */
-	tx_nxt_data(s, tms);
+	uint32_t tn = tx_nxt_data(s, tms);
 
 	/* we also have to send a FIN */
 	if (state != TCP_ST_ESTABLISHED &&
@@ -2495,6 +2900,13 @@ tx_data_fin(struct tle_tcp_stream *s, uint32_t tms, uint32_t state)
 		s->tcb.snd.fss = ++s->tcb.snd.nxt;
 		send_ack(s, tms, TCP_FLAG_FIN | TCP_FLAG_ACK);
 	}
+
+	if (s->tcb.snd.update_rcv) {
+		if (tn == 0)
+			send_ack(s, tms, TCP_FLAG_ACK); /* update recv window */
+
+		s->tcb.snd.update_rcv = false;
+	}
 }
 
 static inline void
@@ -2507,7 +2919,7 @@ tx_stream(struct tle_tcp_stream *s, uint32_t tms)
 	if (state == TCP_ST_SYN_SENT) {
 		/* send the SYN, start the rto timer */
 		send_ack(s, tms, TCP_FLAG_SYN);
-		timer_start(s);
+		timer_start(s, TIMER_RTO, s->tcb.snd.rto);
 
 	} else if (state >= TCP_ST_ESTABLISHED && state <= TCP_ST_LAST_ACK) {
 
@@ -2515,7 +2927,7 @@ tx_stream(struct tle_tcp_stream *s, uint32_t tms)
 
 		/* start RTO timer. */
 		if (s->tcb.snd.nxt != s->tcb.snd.una)
-			timer_start(s);
+			timer_start(s, TIMER_RTO, s->tcb.snd.rto);
 	}
 }
 
@@ -2544,7 +2956,6 @@ rto_stream(struct tle_tcp_stream *s, uint32_t tms)
 	if (s->tcb.snd.nb_retx < s->tcb.snd.nb_retm) {
 
 		if (state >= TCP_ST_ESTABLISHED && state <= TCP_ST_LAST_ACK) {
-
 			/* update SND.CWD and SND.SSTHRESH */
 			rto_cwnd_update(&s->tcb);
 
@@ -2570,50 +2981,131 @@ rto_stream(struct tle_tcp_stream *s, uint32_t tms)
 			 * than one SYN or SYN/ACK retransmissions or true loss
 			 * detection has been made.
 			 */
-			if (s->tcb.snd.nb_retx != 0)
+			if (s->tcb.snd.nb_retx != 0) {
 				s->tcb.snd.cwnd = s->tcb.snd.mss;
+				CWND_INFO("synsent", s->tcb.snd.cwnd);
+			}
 
 			send_ack(s, tms, TCP_FLAG_SYN);
-
-		} else if (state == TCP_ST_TIME_WAIT) {
-			stream_term(s);
+			TCP_INC_STATS(TCP_MIB_RETRANSSEGS);
 		}
 
 		/* RFC6298:5.5 back off the timer */
 		s->tcb.snd.rto = rto_roundup(2 * s->tcb.snd.rto);
 		s->tcb.snd.nb_retx++;
-		timer_restart(s);
+		timer_restart(s, TIMER_RTO, s->tcb.snd.rto);
 
 	} else {
-		send_rst(s, s->tcb.snd.nxt);
+		if (state == TCP_ST_SYN_SENT) {
+			if (stream_fill_dest(s) != 0 ||
+			    is_broadcast_ether_addr((struct ether_addr *)s->tx.dst.hdr))
+				s->tcb.err = EHOSTUNREACH;
+			else
+				/* TODO: do we send rst on this */
+				s->tcb.err = ENOTCONN;
+		} else
+			send_rst(s, s->tcb.snd.una);
 		stream_term(s);
 	}
 }
 
+static inline void
+set_keepalive_timer(struct tle_tcp_stream *s)
+{
+	if (s->s.option.keepalive) {
+		if (s->tcb.state == TCP_ST_ESTABLISHED) {
+			if (s->tcb.snd.nb_keepalive == 0)
+				timer_reset(s, TIMER_KEEPALIVE,
+					    s->s.option.keepidle * MS_PER_S);
+			else
+				timer_reset(s, TIMER_KEEPALIVE,
+					    s->s.option.keepintvl * MS_PER_S);
+		}
+	} else {
+		timer_stop(s, TIMER_KEEPALIVE);
+		s->tcb.snd.nb_keepalive = 0;
+	}
+}
+
 int
 tle_tcp_process(struct tle_ctx *ctx, uint32_t num)
 {
-	uint32_t i, k, tms;
+	uint8_t type;
+	uint32_t i, k;
+	uint64_t tms;
 	struct sdr *dr;
 	struct tle_timer_wheel *tw;
 	struct tle_stream *p;
 	struct tle_tcp_stream *s, *rs[num];
 
-	/* process streams with RTO exipred */
+	tms = tcp_get_tms(ctx->cycles_ms_shift);
 
+	/* process streams with RTO exipred */
 	tw = CTX_TCP_TMWHL(ctx);
-	tms = tcp_get_tms(ctx->cycles_ms_shift);
 	tle_timer_expire(tw, tms);
 
 	k = tle_timer_get_expired_bulk(tw, (void **)rs, RTE_DIM(rs));
 
 	for (i = 0; i != k; i++) {
-
-		s = rs[i];
-		s->timer.handle = NULL;
-		if (tcp_stream_try_acquire(s) > 0)
-			rto_stream(s, tms);
-		tcp_stream_release(s);
+		s = timer_stream(rs[i]);
+		type = timer_type(rs[i]);
+		s->timer.handle[type] = NULL;
+
+		switch (type) {
+		case TIMER_RTO:
+			/* FE cannot change stream into below states,
+			 * that's why we don't put it into lock
+			 */
+			if (s->tcb.state == TCP_ST_TIME_WAIT ||
+			    s->tcb.state == TCP_ST_FIN_WAIT_2) {
+				tcp_stream_down(s);
+				stream_term(s);
+				tcp_stream_up(s);
+			} else if (tcp_stream_acquire(s) > 0) {
+				/*
+				 * stream may be closed in frontend concurrently.
+				 * if stream has already been closed, it need not
+				 * to retransmit anymore.
+				 */
+				if (s->tcb.state != TCP_ST_CLOSED)
+					rto_stream(s, tms);
+				tcp_stream_release(s);
+			}
+			/* Fail to aquire lock? FE is shutdown or close this
+			 * stream, either FIN or RST needs to be sent, which
+			 * means it's in tsq, will be processed later.
+			 */
+			break;
+		case TIMER_DACK:
+			if (rte_atomic32_read(&s->tx.arm) == 0 &&
+			    s->tcb.rcv.nxt != s->tcb.snd.ack &&
+			    tcp_stream_acquire(s) > 0) {
+				s->s.option.tcpquickack = 8;
+				send_ack(s, tms, TCP_FLAG_ACK);
+				tcp_stream_release(s);
+			}
+			break;
+		case TIMER_KEEPALIVE:
+			if (s->tcb.snd.nb_keepalive < s->s.option.keepcnt) {
+				if (tcp_stream_try_acquire(s) > 0 &&
+				    s->tcb.state == TCP_ST_ESTABLISHED) {
+					send_keepalive(s);
+					s->tcb.snd.nb_keepalive++;
+					timer_start(s, TIMER_KEEPALIVE,
+						    s->s.option.keepintvl * MS_PER_S);
+				}
+				tcp_stream_release(s);
+			} else {
+				tcp_stream_down(s);
+				send_rst(s, s->tcb.snd.nxt);
+				s->tcb.err = ETIMEDOUT;
+				stream_term(s);
+				tcp_stream_up(s);
+			}
+			break;
+		default:
+			rte_panic("Invalid timer type: %d\n", type);
+		}
 	}
 
 	/* process streams from to-send queue */
@@ -2621,20 +3113,63 @@ tle_tcp_process(struct tle_ctx *ctx, uint32_t num)
 	k = txs_dequeue_bulk(ctx, rs, RTE_DIM(rs));
 
 	for (i = 0; i != k; i++) {
-
 		s = rs[i];
-		rte_atomic32_set(&s->tx.arm, 0);
 
-		if (tcp_stream_try_acquire(s) > 0)
+		if (s->tcb.uop & TCP_OP_RESET) {
+			/* already put into death row in close() */
+			send_rst(s, s->tcb.snd.nxt);
+			continue;
+		}
+
+		if (tcp_stream_acquire(s) > 0) {
+			if (s->tcb.uop & TCP_OP_KEEPALIVE) {
+				s->tcb.uop &= ~TCP_OP_KEEPALIVE;
+				set_keepalive_timer(s);
+			}
+
+			if (s->tcb.state == TCP_ST_FIN_WAIT_2 &&
+			    s->tcb.uop & TCP_OP_CLOSE) {
+				/* This could happen after:
+				 * 1) shutdown;
+				 * 2) FIN sent;
+				 * 3) ack received;
+				 * 4) close;
+				 */
+				timer_start(s, TIMER_RTO, s->tcb.snd.rto_fw);
+				tcp_stream_release(s);
+				continue;
+			}
+
+			if (s->tcb.state == TCP_ST_ESTABLISHED &&
+			    s->s.option.tcpcork) {
+				if (s->tcb.snd.cork_ts == 0)
+					s->tcb.snd.cork_ts = (uint32_t)tms;
+
+				if (s->tcb.snd.waitlen < s->tcb.snd.mss &&
+				    (uint32_t)tms - s->tcb.snd.cork_ts < 200) {
+					txs_enqueue(s->s.ctx, s);
+					tcp_stream_release(s);
+					continue;
+				}
+
+				s->tcb.snd.cork_ts = 0;
+			}
+
 			tx_stream(s, tms);
-		else
+			tcp_stream_release(s);
+			continue;
+		}
+
+		if (s->tcb.state != TCP_ST_CLOSED)
 			txs_enqueue(s->s.ctx, s);
-		tcp_stream_release(s);
+
+		/* TCP_ST_CLOSED? See close with TCP_ST_CLOSED state */
 	}
 
 	/* collect streams to close from the death row */
 
 	dr = CTX_TCP_SDR(ctx);
+	rte_spinlock_lock(&dr->lock);
 	for (k = 0, p = STAILQ_FIRST(&dr->be);
 			k != num && p != NULL;
 			k++, p = STAILQ_NEXT(p, link))
@@ -2645,9 +3180,21 @@ tle_tcp_process(struct tle_ctx *ctx, uint32_t num)
 	else
 		STAILQ_FIRST(&dr->be) = p;
 
+	/* if stream still in tsq, wait one more round */
+	for (i = 0; i != k; i++) {
+		if (rte_atomic32_read(&rs[i]->tx.arm) > 0) {
+			STAILQ_INSERT_TAIL(&dr->be, &rs[i]->s, link);
+			rs[i] = NULL;
+		}
+	}
+
+	rte_spinlock_unlock(&dr->lock);
+
 	/* cleanup closed streams */
 	for (i = 0; i != k; i++) {
 		s = rs[i];
+		if (s == NULL)
+			continue;
 		tcp_stream_down(s);
 		tcp_stream_reset(ctx, s);
 	}
diff --git a/lib/libtle_l4p/tcp_rxtx.h b/lib/libtle_l4p/tcp_rxtx.h
new file mode 100644
index 0000000..e7f8e3e
--- /dev/null
+++ b/lib/libtle_l4p/tcp_rxtx.h
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2016-2017  Intel Corporation.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _TCP_RXTX_H_
+#define _TCP_RXTX_H_
+
+#include "tcp_stream.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline uint32_t
+calc_seg_cnt(uint32_t plen, uint32_t mss)
+{
+	if (plen > mss)
+		return (plen + mss - 1) / mss;
+	else
+		return 1;
+}
+
+static inline uint32_t
+get_ip_pid(struct tle_dev *dev, uint32_t num, uint32_t type, uint32_t st)
+{
+	uint32_t pid;
+	rte_atomic32_t *pa;
+
+	pa = &dev->tx.packet_id[type];
+
+	if (st == 0) {
+		pid = rte_atomic32_add_return(pa, num);
+		return pid - num;
+	} else {
+		pid = rte_atomic32_read(pa);
+		rte_atomic32_set(pa, pid + num);
+		return pid;
+	}
+}
+
+static inline void
+fill_tcph(struct tcp_hdr *l4h, const struct tcb *tcb, union l4_ports port,
+	uint32_t seq, uint8_t hlen, uint8_t flags)
+{
+	uint16_t wnd;
+
+	l4h->src_port = port.dst;
+	l4h->dst_port = port.src;
+
+	wnd = (flags & TCP_FLAG_SYN) ?
+		RTE_MIN(tcb->rcv.wnd, (uint32_t)UINT16_MAX) :
+		tcb->rcv.wnd >> tcb->rcv.wscale;
+
+	/* ??? use sse shuffle to hton all remaining 16 bytes at once. ??? */
+	l4h->sent_seq = rte_cpu_to_be_32(seq);
+	l4h->recv_ack = rte_cpu_to_be_32(tcb->rcv.nxt);
+	l4h->data_off = hlen / TCP_DATA_ALIGN << TCP_DATA_OFFSET;
+	l4h->tcp_flags = flags;
+	l4h->rx_win = rte_cpu_to_be_16(wnd);
+	l4h->cksum = 0;
+	l4h->tcp_urp = 0;
+
+	if (flags & TCP_FLAG_SYN)
+		fill_syn_opts(l4h + 1, &tcb->so);
+	else if ((flags & TCP_FLAG_RST) == 0 && tcb->so.ts.raw != 0)
+		fill_tms_opts(l4h + 1, tcb->snd.ts, tcb->rcv.ts);
+}
+
+static inline int
+tcp_fill_mbuf(struct rte_mbuf *m, const struct tle_tcp_stream *s,
+	const struct tle_dest *dst, uint64_t ol_flags,
+	union l4_ports port, uint32_t seq, uint32_t flags,
+	uint32_t pid, uint32_t swcsm)
+{
+	uint32_t l4, len, plen;
+	struct tcp_hdr *l4h;
+	char *l2h, *l3;
+
+	len = dst->l2_len + dst->l3_len;
+	plen = m->pkt_len;
+
+	if (flags & TCP_FLAG_SYN) {
+		/* basic length */
+		l4 = sizeof(*l4h) + TCP_OPT_LEN_MSS;
+
+		/* add wscale space and nop  */
+		if (s->tcb.so.wscale) {
+			l4 += TCP_OPT_LEN_WSC + TCP_OPT_LEN_NOP;
+		}
+
+		/* add timestamp space and nop  */
+		if (s->tcb.so.ts.raw) {
+			l4 += TCP_TX_OPT_LEN_TMS;
+		}
+	} else if ((flags & TCP_FLAG_RST) == 0 && s->tcb.rcv.ts != 0) {
+		l4 = sizeof(*l4h) + TCP_TX_OPT_LEN_TMS;
+	} else {
+		l4 = sizeof(*l4h);
+	}
+
+	/* adjust mbuf to put L2/L3/L4 headers into it. */
+	l2h = rte_pktmbuf_prepend(m, len + l4);
+	if (l2h == NULL)
+		return -EINVAL;
+
+	/* copy L2/L3 header */
+	rte_memcpy(l2h, dst->hdr, len);
+
+	/* setup TCP header & options */
+	l4h = (struct tcp_hdr *)(l2h + len);
+	fill_tcph(l4h, &s->tcb, port, seq, l4, flags);
+
+	/* setup mbuf TX offload related fields. */
+	m->tx_offload = _mbuf_tx_offload(dst->l2_len, dst->l3_len, l4, 0, 0, 0);
+	m->ol_flags |= ol_flags;
+
+	/* update proto specific fields. */
+
+	l3 = l2h + dst->l2_len;
+	if (((struct ipv4_hdr*)l3)->version_ihl>>4 == 4) {
+		struct ipv4_hdr *l3h;
+		l3h = (struct ipv4_hdr *)l3;
+		l3h->packet_id = rte_cpu_to_be_16(pid);
+		l3h->total_length = rte_cpu_to_be_16(plen + dst->l3_len + l4);
+
+		if ((ol_flags & PKT_TX_TCP_CKSUM) != 0)
+			l4h->cksum = _ipv4x_phdr_cksum(l3h, m->l3_len,
+				ol_flags);
+		else if (swcsm != 0)
+			l4h->cksum = _ipv4_udptcp_mbuf_cksum(m, len, l3h);
+
+		if ((ol_flags & PKT_TX_IP_CKSUM) == 0 && swcsm != 0)
+			l3h->hdr_checksum = _ipv4x_cksum(l3h, m->l3_len);
+	} else {
+		struct ipv6_hdr *l3h;
+		l3h = (struct ipv6_hdr *)l3;
+		l3h->payload_len = rte_cpu_to_be_16(plen + l4);
+		if ((ol_flags & PKT_TX_TCP_CKSUM) != 0)
+			l4h->cksum = rte_ipv6_phdr_cksum(l3h, ol_flags);
+		else if (swcsm != 0)
+			l4h->cksum = _ipv6_udptcp_mbuf_cksum(m, len, l3h);
+	}
+
+	return 0;
+}
+
+static inline int
+stream_drb_empty(struct tle_tcp_stream *s)
+{
+	return rte_ring_empty(s->tx.drb.r);
+}
+
+static inline void
+stream_drb_free(struct tle_tcp_stream *s, struct tle_drb *drbs[],
+	uint32_t nb_drb)
+{
+	_rte_ring_enqueue_burst(s->tx.drb.r, (void **)drbs, nb_drb);
+}
+
+static inline uint32_t
+stream_drb_alloc(struct tle_tcp_stream *s, struct tle_drb *drbs[],
+	uint32_t nb_drb)
+{
+	return _rte_ring_dequeue_burst(s->tx.drb.r, (void **)drbs, nb_drb);
+}
+
+/*
+ * queue standalone packet to he particular output device
+ * It assumes that:
+ * - L2/L3/L4 headers should be already set.
+ * - packet fits into one segment.
+ */
+static inline int
+send_pkt(struct tle_tcp_stream *s, struct tle_dev *dev, struct rte_mbuf *m)
+{
+	uint32_t n, nb;
+	struct tle_drb *drb;
+
+	if (stream_drb_alloc(s, &drb, 1) == 0)
+		return -ENOBUFS;
+
+	/* enqueue pkt for TX. */
+	nb = 1;
+	n = tle_dring_mp_enqueue(&dev->tx.dr, (const void * const*)&m, 1,
+		&drb, &nb);
+
+	/* free unused drbs. */
+	if (nb != 0)
+		stream_drb_free(s, &drb, 1);
+
+	return (n == 1) ? 0 : -ENOBUFS;
+}
+
+#define TCP_OLFLAGS_CKSUM(flags) (flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM))
+
+static inline int
+send_ctrl_pkt(struct tle_tcp_stream *s, struct rte_mbuf *m, uint32_t seq,
+	uint32_t flags)
+{
+	const struct tle_dest *dst;
+	uint32_t pid, type;
+	int32_t rc;
+
+	dst = &s->tx.dst;
+	type = s->s.type;
+	pid = get_ip_pid(dst->dev, 1, type, (s->flags & TLE_CTX_FLAG_ST) != 0);
+
+	rc = tcp_fill_mbuf(m, s, dst, TCP_OLFLAGS_CKSUM(dst->ol_flags),
+			   s->s.port, seq, flags, pid, 1);
+	if (rc == 0)
+		rc = send_pkt(s, dst->dev, m);
+
+	return rc;
+}
+
+static inline int
+send_rst(struct tle_tcp_stream *s, uint32_t seq)
+{
+	struct rte_mbuf *m;
+	int32_t rc;
+
+	m = rte_pktmbuf_alloc(s->tx.dst.head_mp);
+	if (m == NULL)
+		return -ENOMEM;
+
+	rc = send_ctrl_pkt(s, m, seq, TCP_FLAG_RST | TCP_FLAG_ACK);
+	if (rc != 0)
+		rte_pktmbuf_free(m);
+	else
+		TCP_INC_STATS(TCP_MIB_OUTRSTS);
+
+	return rc;
+}
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TCP_RXTX_H_ */
diff --git a/lib/libtle_l4p/tcp_stream.c b/lib/libtle_l4p/tcp_stream.c
index 676521b..4a65053 100644
--- a/lib/libtle_l4p/tcp_stream.c
+++ b/lib/libtle_l4p/tcp_stream.c
@@ -20,6 +20,8 @@
 #include <rte_ip.h>
 #include <rte_tcp.h>
 
+#include <netinet/tcp.h>
+
 #include "tcp_stream.h"
 #include "tcp_timer.h"
 #include "stream_table.h"
@@ -27,6 +29,7 @@
 #include "tcp_ctl.h"
 #include "tcp_ofo.h"
 #include "tcp_txq.h"
+#include "tcp_rxtx.h"
 
 static void
 unuse_stream(struct tle_tcp_stream *s)
@@ -38,25 +41,27 @@ unuse_stream(struct tle_tcp_stream *s)
 static void
 fini_stream(struct tle_tcp_stream *s)
 {
-	if (s != NULL) {
-		rte_free(s->rx.q);
-		tcp_ofo_free(s->rx.ofo);
-		rte_free(s->tx.q);
-		rte_free(s->tx.drb.r);
-	}
+	rte_free(s);
 }
 
 static void
 tcp_fini_streams(struct tle_ctx *ctx)
 {
-	uint32_t i;
 	struct tcp_streams *ts;
+	struct tle_stream *s;
 
 	ts = CTX_TCP_STREAMS(ctx);
 	if (ts != NULL) {
 		stbl_fini(&ts->st);
-		for (i = 0; i != ctx->prm.max_streams; i++)
-			fini_stream(&ts->s[i]);
+
+		/* TODO: free those in use? may be not necessary, as we assume
+		 * all streams have been closed and are free.
+		 */
+		while (ctx->streams.nb_free--) {
+			s = STAILQ_FIRST(&ctx->streams.free);
+			STAILQ_FIRST(&ctx->streams.free) = STAILQ_NEXT(s, link);
+			fini_stream(TCP_STREAM(s));
+		}
 
 		/* free the timer wheel */
 		tle_timer_free(ts->tmr);
@@ -94,61 +99,100 @@ alloc_ring(uint32_t n, uint32_t flags, int32_t socket)
 	return r;
 }
 
+/* stream memory layout:
+ * [tle_tcp_stream] [rx.q] [rx.ofo] [tx.q] [tx.drb.r]
+ */
 static int
-init_stream(struct tle_ctx *ctx, struct tle_tcp_stream *s)
+add_stream(struct tle_ctx *ctx)
 {
-	size_t bsz, rsz, sz;
-	uint32_t f, i, k, n, nb;
+	size_t sz_s, sz_rxq, sz_ofo, sz_txq, sz_drb_r, sz;
+	/* for rx.q */
+	uint32_t n_rxq;
+	/* for rx.ofo */
+	struct ofo *ofo;
+	struct rte_mbuf **obj;
+	uint32_t ndb, nobj;
+	size_t dsz, osz;
+	/* for tx.q */
+	uint32_t n_txq;
+	/* for tx.drb.r */
+	size_t bsz, rsz;
 	struct tle_drb *drb;
-	char name[RTE_RING_NAMESIZE];
-
-	f = ((ctx->prm.flags & TLE_CTX_FLAG_ST) == 0) ? 0 :
-		(RING_F_SP_ENQ |  RING_F_SC_DEQ);
-
-	/* init RX part. */
-
-	n = RTE_MAX(ctx->prm.max_stream_rbufs, 1U);
-	s->rx.q = alloc_ring(n, f | RING_F_SP_ENQ, ctx->prm.socket_id);
-	if (s->rx.q == NULL)
-		return -ENOMEM;
-
-	s->rx.ofo = tcp_ofo_alloc(n, ctx->prm.socket_id);
-	if (s->rx.ofo == NULL)
-		return -ENOMEM;
-
-	/* init TX part. */
+	uint32_t k, nb, n_drb;
 
-	n = RTE_MAX(ctx->prm.max_stream_sbufs, 1U);
-	s->tx.q = alloc_ring(n, f | RING_F_SC_DEQ, ctx->prm.socket_id);
-	if (s->tx.q == NULL)
-		return -ENOMEM;
+	uint32_t f, i;
+	char name[RTE_RING_NAMESIZE];
+	struct tle_tcp_stream *s;
 
+	// stream
+	sz_s = RTE_ALIGN_CEIL(sizeof(*s), RTE_CACHE_LINE_SIZE);
+
+	// rx.q
+	n_rxq = RTE_MAX(ctx->prm.max_stream_rbufs, 1U);
+	n_rxq = rte_align32pow2(n_rxq);
+	sz_rxq = rte_ring_get_memsize(n_rxq);
+	sz_rxq = RTE_ALIGN_CEIL(sz_rxq, RTE_CACHE_LINE_SIZE);
+
+	// rx.ofo
+	calc_ofo_elems(n_rxq, &nobj, &ndb);
+	osz = sizeof(*ofo) + sizeof(ofo->db[0]) * ndb;
+	dsz = sizeof(ofo->db[0].obj[0]) * nobj * ndb;
+	sz_ofo = osz + dsz;
+	sz_ofo = RTE_ALIGN_CEIL(sz_ofo, RTE_CACHE_LINE_SIZE);
+
+	// tx.q
+	n_txq = RTE_MAX(ctx->prm.max_stream_sbufs, 1U);
+	n_txq = rte_align32pow2(n_txq);
+	sz_txq = rte_ring_get_memsize(n_txq);
+	sz_txq = RTE_ALIGN_CEIL(sz_txq, RTE_CACHE_LINE_SIZE);
+
+	// tx.drb.r
 	nb = drb_nb_elem(ctx);
 	k = calc_stream_drb_num(ctx, nb);
-	n = rte_align32pow2(k);
-
-	/* size of the drbs ring */
-	rsz = rte_ring_get_memsize(n);
+	n_drb = rte_align32pow2(k);
+	rsz = rte_ring_get_memsize(n_drb); /* size of the drbs ring */
 	rsz = RTE_ALIGN_CEIL(rsz, RTE_CACHE_LINE_SIZE);
+	bsz = tle_drb_calc_size(nb); /* size of the drb. */
+	sz_drb_r = rsz + bsz * k; /* total stream drbs size. */
+	sz_drb_r = RTE_ALIGN_CEIL(sz_drb_r, RTE_CACHE_LINE_SIZE);
 
-	/* size of the drb. */
-	bsz = tle_drb_calc_size(nb);
-
-	/* total stream drbs size. */
-	sz = rsz + bsz * k;
-
-	s->tx.drb.r = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
-		ctx->prm.socket_id);
-	if (s->tx.drb.r == NULL) {
-		TCP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
+	sz = sz_s + sz_rxq + sz_ofo + sz_txq + sz_drb_r;
+	s = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+				ctx->prm.socket_id);
+	if (s == NULL) {
+		TCP_LOG(ERR, "%s: allocation of %zu bytes on socket %d "
 			"failed with error code: %d\n",
-			__func__, s, sz, ctx->prm.socket_id, rte_errno);
+			__func__, sz, ctx->prm.socket_id, rte_errno);
 		return -ENOMEM;
 	}
 
-	snprintf(name, sizeof(name), "%p@%zu", s, sz);
-	rte_ring_init(s->tx.drb.r, name, n, f);
+	s->rx.q = (struct rte_ring *)((uintptr_t)s + sz_s);
+	s->rx.ofo = (struct ofo *)((uintptr_t)s->rx.q + sz_rxq);
+	ofo = s->rx.ofo;
+	s->tx.q = (struct rte_ring *)((uintptr_t)s->rx.ofo + sz_ofo);
+	s->tx.drb.r = (struct rte_ring *)((uintptr_t)s->tx.q + sz_txq);
 
+	// ring flags
+	f = ((ctx->prm.flags & TLE_CTX_FLAG_ST) == 0) ? 0 :
+		(RING_F_SP_ENQ |  RING_F_SC_DEQ);
+
+	/* init RX part. */
+	snprintf(name, sizeof(name), "%p@%zu", s->rx.q, sz_rxq);
+	rte_ring_init(s->rx.q, name, n_rxq, f);
+
+	obj = (struct rte_mbuf **)&ofo->db[ndb];
+	for (i = 0; i != ndb; i++) {
+		ofo->db[i].nb_max = nobj;
+		ofo->db[i].obj = obj + i * nobj;
+	}
+	ofo->nb_max = ndb;
+
+	/* init TX part. */
+	snprintf(name, sizeof(name), "%p@%zu", s->tx.q, sz_txq);
+	rte_ring_init(s->tx.q, name, n_txq, f);
+
+	snprintf(name, sizeof(name), "%p@%zu", s->tx.drb.r, sz_drb_r);
+	rte_ring_init(s->tx.drb.r, name, n_drb, f);
 	for (i = 0; i != k; i++) {
 		drb = (struct tle_drb *)((uintptr_t)s->tx.drb.r +
 			rsz + bsz * i);
@@ -200,7 +244,7 @@ tcp_init_streams(struct tle_ctx *ctx)
 	f = ((ctx->prm.flags & TLE_CTX_FLAG_ST) == 0) ? 0 :
 		(RING_F_SP_ENQ |  RING_F_SC_DEQ);
 
-	sz = sizeof(*ts) + sizeof(ts->s[0]) * ctx->prm.max_streams;
+	sz = sizeof(*ts);
 	ts = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
 		ctx->prm.socket_id);
 	if (ts == NULL) {
@@ -210,6 +254,7 @@ tcp_init_streams(struct tle_ctx *ctx)
 		return -ENOMEM;
 	}
 
+	rte_spinlock_init(&ts->dr.lock);
 	STAILQ_INIT(&ts->dr.fe);
 	STAILQ_INIT(&ts->dr.be);
 
@@ -228,12 +273,11 @@ tcp_init_streams(struct tle_ctx *ctx)
 		if (ts->tsq == NULL)
 			rc = -ENOMEM;
 		else
-			rc = stbl_init(&ts->st, ctx->prm.max_streams,
-				ctx->prm.socket_id);
+			rc = stbl_init(&ts->st, (ctx->prm.flags & TLE_CTX_FLAG_ST) == 0);
 	}
 
-	for (i = 0; rc == 0 && i != ctx->prm.max_streams; i++)
-		rc = init_stream(ctx, &ts->s[i]);
+	for (i = 0; rc == 0 && i != ctx->prm.min_streams; i++)
+		rc = add_stream(ctx);
 
 	if (rc != 0) {
 		TCP_LOG(ERR, "initalisation of %u-th stream failed", i);
@@ -243,11 +287,30 @@ tcp_init_streams(struct tle_ctx *ctx)
 	return rc;
 }
 
-static void __attribute__((constructor))
+/*
+ * Note this function is not thread-safe, and we did not lock here as we
+ * have the assumption that this ctx is dedicated to one thread.
+ */
+static uint32_t
+tcp_more_streams(struct tle_ctx *ctx)
+{
+	uint32_t i, nb;
+	uint32_t nb_max = ctx->prm.max_streams - 1;
+	uint32_t nb_cur = ctx->streams.nb_cur;
+
+	nb = RTE_MIN(ctx->prm.delta_streams, nb_max - nb_cur);
+	for (i = 0; i < nb; i++)
+		if (add_stream(ctx) != 0)
+			break;
+	return i;
+}
+
+static void __attribute__((constructor(101)))
 tcp_stream_setup(void)
 {
 	static const struct stream_ops tcp_ops = {
 		.init_streams = tcp_init_streams,
+		.more_streams = tcp_more_streams,
 		.fini_streams = tcp_fini_streams,
 		.free_drbs = tcp_free_drbs,
 	};
@@ -305,16 +368,12 @@ tle_tcp_stream_open(struct tle_ctx *ctx,
 
 	s = (struct tle_tcp_stream *)get_stream(ctx);
 	if (s == NULL)	{
-		rte_errno = ENFILE;
-		return NULL;
-
-	/* some TX still pending for that stream. */
-	} else if (TCP_STREAM_TX_PENDING(s)) {
-		put_stream(ctx, &s->s, 0);
 		rte_errno = EAGAIN;
 		return NULL;
 	}
 
+	s->s.option.raw = prm->option;
+
 	/* setup L4 ports and L3 addresses fields. */
 	rc = stream_fill_ctx(ctx, &s->s,
 		(const struct sockaddr *)&prm->addr.local,
@@ -336,12 +395,14 @@ tle_tcp_stream_open(struct tle_ctx *ctx,
 
 	/* store other params */
 	s->flags = ctx->prm.flags;
+	s->tcb.err = 0;
 	s->tcb.snd.nb_retm = (prm->cfg.nb_retries != 0) ? prm->cfg.nb_retries :
 		TLE_TCP_DEFAULT_RETRIES;
 	s->tcb.snd.cwnd = (ctx->prm.icw == 0) ? TCP_INITIAL_CWND_MAX :
 				ctx->prm.icw;
 	s->tcb.snd.rto_tw = (ctx->prm.timewait == TLE_TCP_TIMEWAIT_DEFAULT) ?
 				TCP_RTO_2MSL : ctx->prm.timewait;
+	s->tcb.snd.rto_fw = TLE_TCP_FINWAIT_TIMEOUT;
 
 	tcp_stream_up(s);
 	return &s->s;
@@ -354,9 +415,16 @@ static inline int
 stream_close(struct tle_ctx *ctx, struct tle_tcp_stream *s)
 {
 	uint16_t uop;
-	uint32_t state;
 	static const struct tle_stream_cb zcb;
 
+	/* Put uop operation into this wlock; or it may cause this stream
+	 * to be put into death ring twice, for example:
+	 * 1) FE sets OP_CLOSE;
+	 * 2) BE stream_term sets state as TCP_ST_CLOSED, and put in queue;
+	 * 3) FE down the stream, and calls stream_term again.
+	 */
+	tcp_stream_down(s);
+
 	/* check was close() already invoked */
 	uop = s->tcb.uop;
 	if ((uop & TCP_OP_CLOSE) != 0)
@@ -366,47 +434,66 @@ stream_close(struct tle_ctx *ctx, struct tle_tcp_stream *s)
 	if (rte_atomic16_cmpset(&s->tcb.uop, uop, uop | TCP_OP_CLOSE) == 0)
 		return -EDEADLK;
 
-	/* mark stream as unavaialbe for RX/TX. */
-	tcp_stream_down(s);
-
 	/* reset events/callbacks */
-	s->rx.ev = NULL;
 	s->tx.ev = NULL;
+	s->rx.ev = NULL;
 	s->err.ev = NULL;
 
 	s->rx.cb = zcb;
 	s->tx.cb = zcb;
 	s->err.cb = zcb;
 
-	state = s->tcb.state;
-
-	/* CLOSED, LISTEN, SYN_SENT - we can close the stream straighway */
-	if (state <= TCP_ST_SYN_SENT) {
+	switch (s->tcb.state) {
+	case TCP_ST_LISTEN:
+		/* close the stream straightway */
 		tcp_stream_reset(ctx, s);
 		return 0;
-	}
-
-	/* generate FIN and proceed with normal connection termination */
-	if (state == TCP_ST_ESTABLISHED || state == TCP_ST_CLOSE_WAIT) {
-
-		/* change state */
-		s->tcb.state = (state == TCP_ST_ESTABLISHED) ?
-			TCP_ST_FIN_WAIT_1 : TCP_ST_LAST_ACK;
-
-		/* mark stream as writable/readable again */
+	case TCP_ST_CLOSED:
+		/* it could be put into this state if a RST packet is
+		 * received, but this stream could be still in tsq trying
+		 * to send something.
+		 */
+		/* fallthrough */
+	case TCP_ST_SYN_SENT:
+		/* timer on and could be in tsq (SYN retrans) */
+		stream_term(s);
+		/* fallthrough */
+	case TCP_ST_FIN_WAIT_1:
+		/* fallthrough */
+	case TCP_ST_CLOSING:
+		/* fallthrough */
+	case TCP_ST_TIME_WAIT:
+		/* fallthrough */
+	case TCP_ST_LAST_ACK:
 		tcp_stream_up(s);
-
-		/* queue stream into to-send queue */
-		txs_enqueue(ctx, s);
 		return 0;
+	case TCP_ST_ESTABLISHED:
+		/* fallthrough */
+	case TCP_ST_CLOSE_WAIT:
+		if (s->tcb.state == TCP_ST_ESTABLISHED) {
+			s->tcb.state = TCP_ST_FIN_WAIT_1;
+			TCP_DEC_STATS_ATOMIC(TCP_MIB_CURRESTAB);
+		} else
+			s->tcb.state = TCP_ST_LAST_ACK;
+
+		if (!rte_ring_empty(s->rx.q)) {
+			TCP_INC_STATS(TCP_MIB_ESTABRESETS);
+			s->tcb.uop |= TCP_OP_RESET;
+			stream_term(s);
+		}
+		break;
+	case TCP_ST_FIN_WAIT_2:
+		/* Can reach this state if shutdown was called, but the timer
+		 * shall be set after this close.
+		 */
+		break;
+	default:
+		rte_panic("Invalid state when close: %d\n", s->tcb.state);
 	}
 
-	/*
-	 * accroding to the state, close() was already invoked,
-	 * should never that point.
-	 */
-	RTE_ASSERT(0);
-	return -EINVAL;
+	tcp_stream_up(s);
+	txs_enqueue(ctx, s);
+	return 0;
 }
 
 uint32_t
@@ -453,6 +540,64 @@ tle_tcp_stream_close(struct tle_stream *ts)
 }
 
 int
+tle_tcp_stream_shutdown(struct tle_stream *ts, int how)
+{
+	int ret;
+	bool wakeup;
+	uint32_t state;
+	struct tle_tcp_stream *s;
+
+	s = TCP_STREAM(ts);
+	if (ts == NULL || s->s.type >= TLE_VNUM)
+		return -EINVAL;
+
+	/* Refer to linux/net/ipv4/tcp.c:tcp_shutdown() */
+	if (how == SHUT_RD)
+		return 0;
+
+	tcp_stream_down(s);
+
+	state = s->tcb.state;
+
+	switch (state) {
+	case TCP_ST_LISTEN:
+		/* fallthrough */
+	case TCP_ST_SYN_SENT:
+		s->tcb.state = TCP_ST_CLOSED;
+		wakeup = true;
+		ret = 0;
+		break;
+	case TCP_ST_ESTABLISHED:
+		/* fallthrough */
+	case TCP_ST_CLOSE_WAIT:
+		if (state == TCP_ST_ESTABLISHED) {
+			TCP_DEC_STATS_ATOMIC(TCP_MIB_CURRESTAB);
+			s->tcb.state = TCP_ST_FIN_WAIT_1;
+		} else
+			s->tcb.state = TCP_ST_LAST_ACK;
+		txs_enqueue(ts->ctx, s);
+		wakeup = true;
+		ret = 0;
+		break;
+	default:
+		wakeup = false;
+		rte_errno = ENOTCONN;
+		ret = -1;
+	}
+
+	if (wakeup) {
+		/* Notify other threads which may wait on the event */
+		if (s->tx.ev)
+			tle_event_raise(s->tx.ev);
+		if (how == SHUT_RDWR && s->err.ev)
+			tle_event_raise(s->err.ev);
+	}
+
+	tcp_stream_up(s);
+	return ret;
+}
+
+int
 tle_tcp_stream_get_addr(const struct tle_stream *ts,
 	struct tle_tcp_stream_addr *addr)
 {
@@ -617,3 +762,73 @@ tle_tcp_stream_get_mss(const struct tle_stream * ts)
 	s = TCP_STREAM(ts);
 	return s->tcb.snd.mss;
 }
+
+int
+tle_tcp_stream_get_info(const struct tle_stream * ts, void *info, socklen_t *optlen)
+{
+	struct tle_tcp_stream *s;
+	struct tcp_info i;
+
+	if (ts == NULL)
+		return -EINVAL;
+
+	s = TCP_STREAM(ts);
+
+	memset(&i, 0, sizeof(struct tcp_info));
+
+	/* transform from tldk state into linux kernel state */
+	switch (s->tcb.state) {
+	case TCP_ST_CLOSED:
+		i.tcpi_state = TCP_CLOSE;
+		break;
+	case TCP_ST_LISTEN:
+		i.tcpi_state = TCP_LISTEN;
+		break;
+	case TCP_ST_SYN_SENT:
+		i.tcpi_state = TCP_SYN_SENT;
+		break;
+	case TCP_ST_SYN_RCVD:
+		i.tcpi_state = TCP_SYN_RECV;
+		break;
+	case TCP_ST_ESTABLISHED:
+		i.tcpi_state = TCP_ESTABLISHED;
+		break;
+	case TCP_ST_FIN_WAIT_1:
+		i.tcpi_state = TCP_FIN_WAIT1;
+		break;
+	case TCP_ST_FIN_WAIT_2:
+		i.tcpi_state = TCP_FIN_WAIT2;
+		break;
+	case TCP_ST_CLOSE_WAIT:
+		i.tcpi_state = TCP_CLOSE_WAIT;
+		break;
+	case TCP_ST_CLOSING:
+		i.tcpi_state = TCP_CLOSING;
+		break;
+	case TCP_ST_LAST_ACK:
+		i.tcpi_state = TCP_LAST_ACK;
+		break;
+	case TCP_ST_TIME_WAIT:
+		i.tcpi_state = TCP_TIME_WAIT;
+		break;
+	}
+
+	/* fix me, total retrans? */
+	i.tcpi_total_retrans = s->tcb.snd.nb_retx;
+
+	if (*optlen > sizeof(struct tcp_info))
+		*optlen = sizeof(struct tcp_info);
+	rte_memcpy(info, &i, *optlen);
+	return 0;
+}
+
+void
+tle_tcp_stream_set_keepalive(struct tle_stream *ts)
+{
+	struct tle_tcp_stream *s;
+
+	s = TCP_STREAM(ts);
+
+	s->tcb.uop |= TCP_OP_KEEPALIVE;
+	txs_enqueue(ts->ctx, s);
+}
diff --git a/lib/libtle_l4p/tcp_stream.h b/lib/libtle_l4p/tcp_stream.h
index 4629fe6..1202574 100644
--- a/lib/libtle_l4p/tcp_stream.h
+++ b/lib/libtle_l4p/tcp_stream.h
@@ -17,6 +17,8 @@
 #define _TCP_STREAM_H_
 
 #include <rte_vect.h>
+#include <rte_mbuf.h>
+
 #include <tle_dring.h>
 #include <tle_tcp.h>
 #include <tle_event.h>
@@ -45,23 +47,28 @@ enum {
 };
 
 enum {
-	TCP_OP_LISTEN =  0x1,
-	TCP_OP_ACCEPT =  0x2,
-	TCP_OP_CONNECT = 0x4,
-	TCP_OP_CLOSE =   0x8,
+	TCP_OP_LISTEN =		0x1,
+	TCP_OP_ACCEPT =		0x2,
+	TCP_OP_CONNECT =	0x4,
+	TCP_OP_CLOSE =		0x8,
+	TCP_OP_RESET =		0x10,
+	TCP_OP_KEEPALIVE =	0x20
 };
 
 struct tcb {
+	int err;
 	volatile uint16_t state;
 	volatile uint16_t uop; /* operations by user performed */
 	struct {
 		uint32_t nxt;
+		uint32_t cpy; /* head of yet unread data */
 		uint32_t irs; /* initial received sequence */
 		uint32_t wnd;
 		uint32_t ts;
 		struct {
 			uint32_t seq;
-			uint32_t on;
+			uint32_t on; /* on == 1: received an out-of-order fin
+				      * on == 2: received an in order fin */
 		} frs;
 		uint32_t srtt;   /* smoothed round trip time (scaled by >> 3) */
 		uint32_t rttvar; /* rtt variance */
@@ -83,15 +90,32 @@ struct tcb {
 		uint32_t ssthresh; /* slow start threshold */
 		uint32_t rto;      /* retransmission timeout */
 		uint32_t rto_tw;   /* TIME_WAIT retransmission timeout */
+		uint32_t rto_fw;   /* FIN_WAIT_2 waiting timeout */
 		uint32_t iss;      /* initial send sequence */
+		uint32_t waitlen;  /* total length of unacknowledged pkt */
+		uint32_t cork_ts;
 		uint16_t mss;
 		uint8_t  wscale;
 		uint8_t nb_retx; /* number of retransmission */
 		uint8_t nb_retm; /**< max number of retx attempts. */
+		uint8_t nb_keepalive;/* number of sended keepalive */
+		bool     update_rcv; /* Flag for updating recv window */
+		uint16_t nxt_offset; /* Partial tx, next data of a segment to tx */
+		uint32_t una_offset; /* Partial ack, next data of a mbuf to ack */
+		struct rte_mbuf *nxt_pkt; /* Partial tx, next segment to send */
 	} snd;
 	struct syn_opts so; /* initial syn options. */
 };
 
+enum {
+	TIMER_RTO,
+	TIMER_DACK,
+	TIMER_KEEPALIVE,
+	TIMER_NUM,
+	TIMER_MAX_NUM = 8,
+	TIMER_MASK = TIMER_MAX_NUM - 1
+};
+
 struct tle_tcp_stream {
 
 	struct tle_stream s;
@@ -103,7 +127,7 @@ struct tle_tcp_stream {
 	struct tcb tcb;
 
 	struct {
-		void *handle;
+		void *handle[TIMER_NUM];
 	} timer;
 
 	struct {
@@ -155,7 +179,6 @@ struct tcp_streams {
 	struct tle_timer_wheel *tmr; /* timer wheel */
 	struct rte_ring *tsq;        /* to-send streams queue */
 	struct sdr dr;               /* death row for zombie streams */
-	struct tle_tcp_stream s[];   /* array of allocated streams. */
 };
 
 #define CTX_TCP_STREAMS(ctx)	((struct tcp_streams *)(ctx)->streams.buf)
diff --git a/lib/libtle_l4p/tcp_timer.h b/lib/libtle_l4p/tcp_timer.h
index 8faefb3..d242556 100644
--- a/lib/libtle_l4p/tcp_timer.h
+++ b/lib/libtle_l4p/tcp_timer.h
@@ -27,43 +27,53 @@ extern "C" {
  * all RTO values are in ms.
  */
 #define	TCP_RTO_MAX	60000U        /* RFC 6298 (2.5) */
-#define	TCP_RTO_MIN	1000U         /* RFC 6298 (2.4) */
+#define	TCP_RTO_MIN	200U          /* Linux/include/net/tcp.h: TCP_RTO_MIN */
 #define	TCP_RTO_2MSL	(2 * TCP_RTO_MAX)
-#define	TCP_RTO_DEFAULT	TCP_RTO_MIN   /* RFC 6298 (2.1)*/
+#define	TCP_RTO_DEFAULT	1000U   /* RFC 6298 (2.1)*/
 #define	TCP_RTO_GRANULARITY	100U
 
+static inline struct tle_tcp_stream *
+timer_stream(struct tle_tcp_stream *s)
+{
+	return (struct tle_tcp_stream *)((unsigned long)s & (~(unsigned long)TIMER_MASK));
+}
+
+static inline uint8_t
+timer_type(struct tle_tcp_stream *s)
+{
+	return (uint8_t)((unsigned long)s & (unsigned long)TIMER_MASK);
+}
 
 static inline void
-timer_stop(struct tle_tcp_stream *s)
+timer_stop(struct tle_tcp_stream *s, uint8_t type)
 {
 	struct tle_timer_wheel *tw;
 
-	if (s->timer.handle != NULL) {
+	if (s->timer.handle[type] != NULL) {
 		tw = CTX_TCP_TMWHL(s->s.ctx);
-		tle_timer_stop(tw, s->timer.handle);
-		s->timer.handle = NULL;
+		tle_timer_stop(tw, s->timer.handle[type]);
+		s->timer.handle[type] = NULL;
 	}
 }
 
 static inline void
-timer_start(struct tle_tcp_stream *s)
+timer_start(struct tle_tcp_stream *s, uint8_t type, uint32_t timeout)
 {
 	struct tle_timer_wheel *tw;
 
-	if (s->timer.handle == NULL) {
+	if (s->timer.handle[type] == NULL) {
 		tw = CTX_TCP_TMWHL(s->s.ctx);
-		s->timer.handle = tle_timer_start(tw, s, s->tcb.snd.rto);
-		s->tcb.snd.nb_retx = 0;
+		s->timer.handle[type] = tle_timer_start(tw, (void*)((unsigned long)s | type), timeout);
 	}
 }
 
 static inline void
-timer_restart(struct tle_tcp_stream *s)
+timer_restart(struct tle_tcp_stream *s, uint8_t type, uint32_t timeout)
 {
 	struct tle_timer_wheel *tw;
 
 	tw = CTX_TCP_TMWHL(s->s.ctx);
-	s->timer.handle = tle_timer_start(tw, s, s->tcb.snd.rto);
+	s->timer.handle[type] = tle_timer_start(tw, (void*)((unsigned long)s | type), timeout);
 }
 
 
@@ -71,10 +81,10 @@ timer_restart(struct tle_tcp_stream *s)
  * reset number of retransmissions and restart RTO timer.
  */
 static inline void
-timer_reset(struct tle_tcp_stream *s)
+timer_reset(struct tle_tcp_stream *s, uint8_t type, uint32_t timeout)
 {
-	timer_stop(s);
-	timer_start(s);
+	timer_stop(s, type);
+	timer_start(s, type, timeout);
 }
 
 static inline uint32_t
diff --git a/lib/libtle_l4p/tcp_tx_seg.h b/lib/libtle_l4p/tcp_tx_seg.h
index ac2b13b..b64aa77 100644
--- a/lib/libtle_l4p/tcp_tx_seg.h
+++ b/lib/libtle_l4p/tcp_tx_seg.h
@@ -27,7 +27,7 @@ tcp_segmentation(struct rte_mbuf *mbin, struct rte_mbuf *mbout[], uint16_t num,
 	struct rte_mbuf *in_seg = NULL;
 	uint32_t nbseg, in_seg_data_pos;
 	uint32_t more_in_segs;
-	uint16_t bytes_left;
+	uint16_t out_bytes_remain;
 
 	in_seg = mbin;
 	in_seg_data_pos = 0;
@@ -35,7 +35,7 @@ tcp_segmentation(struct rte_mbuf *mbin, struct rte_mbuf *mbout[], uint16_t num,
 
 	/* Check that pkts_out is big enough to hold all fragments */
 	if (mss * num < (uint16_t)mbin->pkt_len)
-		return -ENOSPC;
+		return -EAGAIN;
 
 	more_in_segs = 1;
 	while (more_in_segs) {
@@ -49,7 +49,7 @@ tcp_segmentation(struct rte_mbuf *mbin, struct rte_mbuf *mbout[], uint16_t num,
 			return -ENOMEM;
 		}
 
-		bytes_left = mss;
+		out_bytes_remain = mss;
 		out_seg_prev = out_pkt;
 		more_out_segs = 1;
 		while (more_out_segs && more_in_segs) {
@@ -68,7 +68,7 @@ tcp_segmentation(struct rte_mbuf *mbin, struct rte_mbuf *mbout[], uint16_t num,
 
 			/* Prepare indirect buffer */
 			rte_pktmbuf_attach(out_seg, in_seg);
-			len = bytes_left;
+			len = out_bytes_remain;
 			if (len > (in_seg->data_len - in_seg_data_pos))
 				len = in_seg->data_len - in_seg_data_pos;
 
@@ -77,10 +77,10 @@ tcp_segmentation(struct rte_mbuf *mbin, struct rte_mbuf *mbout[], uint16_t num,
 			out_pkt->pkt_len = (uint16_t)(len + out_pkt->pkt_len);
 			out_pkt->nb_segs += 1;
 			in_seg_data_pos += len;
-			bytes_left -= len;
+			out_bytes_remain -= len;
 
 			/* Current output packet (i.e. fragment) done ? */
-			if (bytes_left == 0)
+			if (out_bytes_remain == 0)
 				more_out_segs = 0;
 
 			/* Current input segment done ? */
diff --git a/lib/libtle_l4p/tcp_txq.h b/lib/libtle_l4p/tcp_txq.h
index 78f1d56..303b8fd 100644
--- a/lib/libtle_l4p/tcp_txq.h
+++ b/lib/libtle_l4p/tcp_txq.h
@@ -68,9 +68,28 @@ tcp_txq_set_nxt_head(struct tle_tcp_stream *s, uint32_t num)
 static inline void
 tcp_txq_rst_nxt_head(struct tle_tcp_stream *s)
 {
-	struct rte_ring *r;
+	struct rte_ring *r = s->tx.q;
+	struct rte_mbuf *m;
+	uint32_t offset, data_len;
+
+	if (s->tcb.snd.nxt_pkt != NULL) {
+		s->tcb.snd.nxt_offset = 0;
+		s->tcb.snd.nxt_pkt = NULL;
+	}
+
+	offset = s->tcb.snd.una_offset;
+	if (offset) {
+		m = (struct rte_mbuf *)(_rte_ring_get_data(r)[r->cons.tail & r->mask]);
+		data_len = m->data_len - PKT_L234_HLEN(m);
+		while (offset >= data_len) {
+			offset -= data_len;
+			m = m->next;
+			data_len = m->data_len;
+		}
+		s->tcb.snd.nxt_pkt = m;
+		s->tcb.snd.nxt_offset = offset;
+	}
 
-	r = s->tx.q;
 	r->cons.head = r->cons.tail;
 }
 
@@ -110,9 +129,13 @@ static inline uint32_t
 txs_dequeue_bulk(struct tle_ctx *ctx, struct tle_tcp_stream *s[], uint32_t num)
 {
 	struct rte_ring *r;
+	uint32_t n, i;
 
 	r = CTX_TCP_TSQ(ctx);
-	return _rte_ring_dequeue_burst(r, (void **)s, num);
+	n = _rte_ring_dequeue_burst(r, (void **)s, num);
+	for (i = 0; i < n; i++)
+		rte_atomic32_clear(&s[i]->tx.arm);
+	return n;
 }
 
 #ifdef __cplusplus
diff --git a/lib/libtle_l4p/tle_ctx.h b/lib/libtle_l4p/tle_ctx.h
index de78a6b..f0efd51 100644
--- a/lib/libtle_l4p/tle_ctx.h
+++ b/lib/libtle_l4p/tle_ctx.h
@@ -54,6 +54,43 @@ extern "C" {
 struct tle_ctx;
 struct tle_dev;
 
+typedef union tle_stream_options {
+	struct {
+		uint32_t reuseaddr:   1;
+		uint32_t reuseport:   1;
+		uint32_t keepalive:   1;
+		uint32_t ipv6only:    1;
+		uint32_t oobinline:   1;
+		uint32_t tcpcork:     1;
+		uint32_t tcpnodelay:  1;
+		uint32_t mulloop:     1;
+		uint32_t timestamp:   1;
+		uint32_t reserve:     3;
+		uint32_t tcpquickack: 4;
+		uint32_t multtl:      8;
+		uint32_t keepcnt:     8;
+		uint16_t keepidle;
+		uint16_t keepintvl;
+	};
+	uint64_t raw;
+} tle_stream_options_t;
+
+static inline void
+tle_set_timestamp(struct msghdr *msg, struct rte_mbuf *m)
+{
+	struct timeval *tv;
+	struct cmsghdr *cmsg;
+
+	cmsg = CMSG_FIRSTHDR(msg);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SO_TIMESTAMP;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(struct timeval));
+	msg->msg_controllen = cmsg->cmsg_len;
+	tv = (struct timeval*)CMSG_DATA(cmsg);
+	tv->tv_sec = m->timestamp >> 20;
+	tv->tv_usec = m->timestamp & 0xFFFFFUL;
+}
+
 /**
  * Blocked L4 ports info.
  */
@@ -112,6 +149,8 @@ struct tle_ctx_param {
 	int32_t socket_id;         /**< socket ID to allocate memory for. */
 	uint32_t proto;            /**< L4 proto to handle. */
 	uint32_t max_streams;      /**< max number of streams in context. */
+	uint32_t min_streams;      /**< min number of streams at init. */
+	uint32_t delta_streams;    /**< delta of streams of each allocation. */
 	uint32_t max_stream_rbufs; /**< max recv mbufs per stream. */
 	uint32_t max_stream_sbufs; /**< max send mbufs per stream. */
 	uint32_t send_bulk_size;   /**< expected # of packets per send call. */
@@ -145,6 +184,8 @@ struct tle_ctx_param {
  */
 #define	TLE_TCP_TIMEWAIT_DEFAULT	UINT32_MAX
 
+#define TLE_TCP_FINWAIT_TIMEOUT                60000
+
 /**
  * create L4 processing context.
  * @param ctx_prm
diff --git a/lib/libtle_l4p/tle_event.h b/lib/libtle_l4p/tle_event.h
index d730345..dd7a997 100644
--- a/lib/libtle_l4p/tle_event.h
+++ b/lib/libtle_l4p/tle_event.h
@@ -43,7 +43,7 @@ struct tle_event {
 	struct tle_evq *head;
 	const void *data;
 	enum tle_ev_state state;
-} __rte_cache_aligned;
+};
 
 struct tle_evq {
 	rte_spinlock_t lock;
diff --git a/lib/libtle_l4p/tle_stats.h b/lib/libtle_l4p/tle_stats.h
new file mode 100644
index 0000000..3588c6d
--- /dev/null
+++ b/lib/libtle_l4p/tle_stats.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2018 Ant Financial Services Group.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TLE_STATS_H
+#define TLE_STATS_H
+
+#include <rte_per_lcore.h>
+#include <rte_memory.h>
+
+/* tcp mib definitions */
+/*
+ * RFC 1213:  MIB-II TCP group
+ * RFC 2012 (updates 1213):  SNMPv2-MIB-TCP
+ */
+enum
+{
+	TCP_MIB_RTOALGORITHM,			/* RtoAlgorithm */
+	TCP_MIB_RTOMIN,				/* RtoMin */
+	TCP_MIB_RTOMAX,				/* RtoMax */
+	TCP_MIB_MAXCONN,			/* MaxConn */
+	TCP_MIB_ACTIVEOPENS,			/* ActiveOpens */
+	TCP_MIB_PASSIVEOPENS,			/* PassiveOpens */
+	TCP_MIB_ATTEMPTFAILS,			/* AttemptFails */
+	TCP_MIB_ESTABRESETS,			/* EstabResets */
+	TCP_MIB_CURRESTAB,			/* CurrEstab */
+	TCP_MIB_INSEGS,				/* InSegs */
+	TCP_MIB_OUTSEGS,			/* OutSegs */
+	TCP_MIB_RETRANSSEGS,			/* RetransSegs */
+	TCP_MIB_INERRS,				/* InErrs */
+	TCP_MIB_OUTRSTS,			/* OutRsts */
+	TCP_MIB_CSUMERRORS,			/* InCsumErrors */
+	TCP_MIB_MAX
+};
+
+/* udp mib definitions */
+/*
+ * RFC 1213:  MIB-II UDP group
+ * RFC 2013 (updates 1213):  SNMPv2-MIB-UDP
+ */
+enum
+{
+	UDP_MIB_INDATAGRAMS,			/* InDatagrams */
+	UDP_MIB_NOPORTS,			/* NoPorts */
+	UDP_MIB_INERRORS,			/* InErrors */
+	UDP_MIB_OUTDATAGRAMS,			/* OutDatagrams */
+	UDP_MIB_RCVBUFERRORS,			/* RcvbufErrors */
+	UDP_MIB_SNDBUFERRORS,			/* SndbufErrors */
+	UDP_MIB_CSUMERRORS,			/* InCsumErrors */
+	UDP_MIB_IGNOREDMULTI,			/* IgnoredMulti */
+	UDP_MIB_MAX
+};
+
+struct tcp_mib {
+	unsigned long mibs[TCP_MIB_MAX];
+};
+
+struct udp_mib {
+	unsigned long mibs[UDP_MIB_MAX];
+};
+
+struct tle_mib {
+	struct tcp_mib tcp;
+	struct udp_mib udp;
+} __rte_cache_aligned;
+
+extern struct tle_mib default_mib;
+
+RTE_DECLARE_PER_LCORE(struct tle_mib *, mib);
+
+#define PERCPU_MIB RTE_PER_LCORE(mib)
+
+#define SNMP_INC_STATS(mib, field) (mib).mibs[field]++
+#define SNMP_DEC_STATS(mib, field) (mib).mibs[field]--
+#define SNMP_ADD_STATS(mib, field, n) (mib).mibs[field] += n
+#define SNMP_ADD_STATS_ATOMIC(mib, field, n) \
+	rte_atomic64_add((rte_atomic64_t *)(&(mib).mibs[field]), n)
+
+#define TCP_INC_STATS(field) SNMP_INC_STATS(PERCPU_MIB->tcp, field)
+#define TCP_DEC_STATS(field) SNMP_DEC_STATS(PERCPU_MIB->tcp, field)
+#define TCP_ADD_STATS(field, n) SNMP_ADD_STATS(PERCPU_MIB->tcp, field, n)
+#define TCP_INC_STATS_ATOMIC(field) SNMP_ADD_STATS_ATOMIC(PERCPU_MIB->tcp, field, 1)
+#define TCP_DEC_STATS_ATOMIC(field) SNMP_ADD_STATS_ATOMIC(PERCPU_MIB->tcp, field, (-1))
+
+#define UDP_INC_STATS(field) SNMP_INC_STATS(PERCPU_MIB->udp, field)
+#define UDP_ADD_STATS(field, n) SNMP_ADD_STATS(PERCPU_MIB->udp, field, n)
+#define UDP_ADD_STATS_ATOMIC(field, n) \
+	SNMP_ADD_STATS_ATOMIC(PERCPU_MIB->udp, field, n)
+
+#endif /* TLE_STATS_H */
diff --git a/lib/libtle_l4p/tle_tcp.h b/lib/libtle_l4p/tle_tcp.h
index b0cbda6..93e853e 100644
--- a/lib/libtle_l4p/tle_tcp.h
+++ b/lib/libtle_l4p/tle_tcp.h
@@ -49,6 +49,7 @@ struct tle_tcp_stream_cfg {
 struct tle_tcp_stream_param {
 	struct tle_tcp_stream_addr addr;
 	struct tle_tcp_stream_cfg cfg;
+	uint64_t option;
 };
 
 /**
@@ -86,6 +87,25 @@ tle_tcp_stream_open(struct tle_ctx *ctx,
 int tle_tcp_stream_close(struct tle_stream *s);
 
 /**
+ * shutdown an open stream in SHUT_WR way.
+ * similar to tle_tcp_stream_close(), except:
+ *  - rx still works
+ *  - er still works
+ * @param s
+ *   Pointer to the stream to close.
+ * @return
+ *   zero on successful completion.
+ *   - -EINVAL - invalid parameter passed to function
+ *   - -EDEADLK - close was already invoked on that stream
+ */
+int tle_tcp_stream_shutdown(struct tle_stream *s, int how);
+
+/**
+ * Send rst on this stream.
+ */
+void tle_tcp_stream_kill(struct tle_stream *s);
+
+/**
  * close a group of open streams.
  * if the stream is in connected state, then:
  * - connection termination would be performed.
@@ -268,6 +288,15 @@ uint16_t tle_tcp_stream_recv(struct tle_stream *s, struct rte_mbuf *pkt[],
 	uint16_t num);
 
 /**
+ * Get how many bytes are received in recv window.
+ * @param ts
+ *   TCP stream to receive data from.
+ * @return
+ *   bytes of data inside recv window.
+ */
+uint16_t tle_tcp_stream_inq(struct tle_stream *s);
+
+/**
  * Reads iovcnt buffers from the for given TCP stream.
  * Note that the stream has to be in connected state.
  * Data ordering is preserved.
@@ -290,6 +319,19 @@ ssize_t tle_tcp_stream_readv(struct tle_stream *ts, const struct iovec *iov,
 	int iovcnt);
 
 /**
+ * Like tle_tcp_stream_readv, but with more information returned in msghdr.
+ * Note that the stream has to be in connected state.
+ * @param ts
+ *   TCP stream to receive data from.
+ * @param msg
+ *   If not NULL, generate control message into msg_control field of msg.
+ * @return
+ *   On success, number of bytes read in the stream receive buffer.
+ *   In case of error, returns -1 and error code will be set in rte_errno.
+ */
+ssize_t tle_tcp_stream_recvmsg(struct tle_stream *ts, struct msghdr *msg);
+
+/**
  * Consume and queue up to *num* packets, that will be sent eventually
  * by tle_tcp_tx_bulk().
  * Note that the stream has to be in connected state.
@@ -420,6 +462,24 @@ uint16_t tle_tcp_tx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
  */
 int tle_tcp_process(struct tle_ctx *ctx, uint32_t num);
 
+/**
+ * Get tcp info of a tcp stream.
+ * This function is not multi-thread safe.
+ * @param ts
+ *   TCP stream to get info from.
+ * @param info
+ *   Pointer to store info.
+ * @param optlen
+ *   Pointer to length of info.
+ * @return
+ *   zero on successful completion.
+ *   - ENOTCONN - connection is not connected yet.
+ */
+int
+tle_tcp_stream_get_info(const struct tle_stream * ts, void *info, socklen_t *optlen);
+
+void tle_tcp_stream_set_keepalive(struct tle_stream *ts);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/libtle_l4p/tle_udp.h b/lib/libtle_l4p/tle_udp.h
index d3a8fe9..640ed64 100644
--- a/lib/libtle_l4p/tle_udp.h
+++ b/lib/libtle_l4p/tle_udp.h
@@ -35,6 +35,7 @@ struct tle_udp_stream_param {
 
 	struct tle_event *send_ev;          /**< send event to use. */
 	struct tle_stream_cb send_cb;   /**< send callback to use. */
+	uint64_t option;
 };
 
 /**
@@ -55,6 +56,36 @@ tle_udp_stream_open(struct tle_ctx *ctx,
 	const struct tle_udp_stream_param *prm);
 
 /**
+ * set an existed stream within given UDP context with new param.
+ * @param ts
+ *   stream to set with new param
+ * @param ctx
+ *   UDP context to set the stream within.
+ * @param prm
+ *   Parameters used to set the stream.
+ * @return
+ *   Pointer to UDP stream structure that can be used in future UDP API calls,
+ *   or NULL on error, with error code set in rte_errno.
+ *   Possible rte_errno errors include:
+ *   - EINVAL - invalid parameter passed to function
+ *   - ENOFILE - max limit of open streams reached for that context
+ */
+struct tle_stream *
+tle_udp_stream_set(struct tle_stream *ts, struct tle_ctx *ctx,
+	const struct tle_udp_stream_param *prm);
+
+/**
+ * shutdown an open stream.
+ *
+ * @param s
+ *   Pointer to the stream to shutdown.
+ * @return
+ *   zero on successful completion.
+ *   - -EINVAL - invalid parameter passed to function
+ */
+int tle_udp_stream_shutdown(struct tle_stream *s, int how);
+
+/**
  * close an open stream.
  * All packets still remaining in stream receive buffer will be freed.
  * All packets still remaining in stream transmit buffer will be kept
@@ -180,6 +211,24 @@ uint16_t tle_udp_stream_recv(struct tle_stream *s, struct rte_mbuf *pkt[],
 uint16_t tle_udp_stream_send(struct tle_stream *s, struct rte_mbuf *pkt[],
 	uint16_t num, const struct sockaddr *dst_addr);
 
+/**
+ * updates configuration (associated events, callbacks, stream parameters)
+ * for the given streams.
+ * @param ts
+ *   An array of pointers to the streams to update.
+ * @param prm
+ *   An array of parameters to update for the given streams.
+ * @param num
+ *   Number of elements in the *ts* and *prm* arrays.
+ * @return
+ *   number of streams successfully updated.
+ *   In case of error, error code set in rte_errno.
+ *   Possible rte_errno errors include:
+ *   - EINVAL - invalid parameter passed to function
+ */
+uint32_t tle_udp_stream_update_cfg(struct tle_stream *ts[],
+	struct tle_udp_stream_param prm[], uint32_t num);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/libtle_l4p/udp_rxtx.c b/lib/libtle_l4p/udp_rxtx.c
index 84a13ea..e9539b9 100644
--- a/lib/libtle_l4p/udp_rxtx.c
+++ b/lib/libtle_l4p/udp_rxtx.c
@@ -13,7 +13,6 @@
  * limitations under the License.
  */
 
-#include <rte_malloc.h>
 #include <rte_errno.h>
 #include <rte_ethdev.h>
 #include <rte_ip.h>
@@ -24,14 +23,11 @@
 #include "misc.h"
 
 static inline struct tle_udp_stream *
-rx_stream_obtain(struct tle_dev *dev, uint32_t type, uint32_t port)
+rx_stream_obtain_by_tuples(struct stbl *st, const union pkt_info *pi)
 {
 	struct tle_udp_stream *s;
 
-	if (type >= TLE_VNUM || dev->dp[type] == NULL)
-		return NULL;
-
-	s = (struct tle_udp_stream *)dev->dp[type]->streams[port];
+	s = UDP_STREAM(stbl_find_stream(st, pi));
 	if (s == NULL)
 		return NULL;
 
@@ -41,6 +37,24 @@ rx_stream_obtain(struct tle_dev *dev, uint32_t type, uint32_t port)
 	return s;
 }
 
+static inline struct tle_udp_stream *
+rx_stream_obtain(struct tle_dev *dev, uint32_t type, const union pkt_info *pi)
+{
+	struct tle_udp_stream *s;
+
+	if (type == TLE_V4)
+		s = bhash_lookup4(dev->ctx->bhash[type],
+				  pi->addr4.dst, pi->port.dst, 1);
+	else
+		s = bhash_lookup6(dev->ctx->bhash[type],
+				  pi->addr6->dst, pi->port.dst, 1);
+
+	if (s == NULL || rwl_acquire(&s->rx.use) < 0)
+		return NULL;
+
+	return s;
+}
+
 static inline uint16_t
 get_pkt_type(const struct rte_mbuf *m)
 {
@@ -57,8 +71,9 @@ get_pkt_type(const struct rte_mbuf *m)
 }
 
 static inline union l4_ports
-pkt_info(struct rte_mbuf *m, union l4_ports *ports, union ipv4_addrs *addr4,
-	union ipv6_addrs **addr6)
+pkt_info_udp(struct rte_mbuf *m, union l4_ports *ports,
+	     union ipv4_addrs *addr4, union ipv6_addrs **addr6,
+	     union pkt_info *pi)
 {
 	uint32_t len;
 	union l4_ports ret, *up;
@@ -71,15 +86,20 @@ pkt_info(struct rte_mbuf *m, union l4_ports *ports, union ipv4_addrs *addr4,
 		pa4 = rte_pktmbuf_mtod_offset(m, union ipv4_addrs *,
 			len + offsetof(struct ipv4_hdr, src_addr));
 		addr4->raw = pa4->raw;
+		pi->addr4.raw = pa4->raw;
+		pi->tf.type = TLE_V4;
 	} else if (ret.src == TLE_V6) {
 		*addr6 = rte_pktmbuf_mtod_offset(m, union ipv6_addrs *,
 			len + offsetof(struct ipv6_hdr, src_addr));
+		pi->addr6 = *addr6;
+		pi->tf.type = TLE_V6;
 	}
 
 	len += m->l3_len;
 	up = rte_pktmbuf_mtod_offset(m, union l4_ports *,
 		len + offsetof(struct udp_hdr, src_port));
 	ports->raw = up->raw;
+	pi->port.raw = up->raw;
 	ret.dst = ports->dst;
 	return ret;
 }
@@ -96,6 +116,11 @@ rx_stream(struct tle_udp_stream *s, void *mb[], struct rte_mbuf *rp[],
 
 	r = _rte_ring_enqueue_burst(s->rx.q, mb, num);
 
+	if (unlikely(r != num)) {
+		UDP_ADD_STATS(UDP_MIB_RCVBUFERRORS, num - r);
+		UDP_ADD_STATS(UDP_MIB_INERRORS, num - r);
+	}
+
 	/* if RX queue was empty invoke user RX notification callback. */
 	if (s->rx.cb.func != NULL && r != 0 && rte_ring_count(s->rx.q) == r)
 		s->rx.cb.func(s->rx.cb.data, &s->s);
@@ -164,28 +189,64 @@ rx_stream4(struct tle_udp_stream *s, struct rte_mbuf *pkt[],
 	return rx_stream(s, mb, rp + k, rc + k, n);
 }
 
+/*
+ * Consider 2 UDP pkt_info *equal* if their:
+ * - types (IPv4/IPv6)
+ * - TCP src and dst ports
+ * - IP src and dst addresses
+ * are equal.
+ */
+static inline int
+udp_pkt_info_bulk_eq(const union pkt_info pi[], uint32_t num)
+{
+	uint32_t i;
+
+	i = 1;
+
+	if (pi[0].tf.type == TLE_V4) {
+		while (i != num && pi[i].tf.type == TLE_V4 &&
+				pi[0].port.raw == pi[i].port.raw &&
+				pi[0].addr4.raw == pi[i].addr4.raw)
+			i++;
+	} else if (pi[0].tf.type == TLE_V6) {
+		while (i != num && pi[i].tf.type == TLE_V6 &&
+				pi[0].port.raw == pi[i].port.raw &&
+				ymm_cmp(&pi[0].addr6->raw,
+								&pi[i].addr6->raw) == 0)
+			i++;
+	}
+
+	return i;
+}
+
 uint16_t
 tle_udp_rx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
 	struct rte_mbuf *rp[], int32_t rc[], uint16_t num)
 {
+	struct stbl *st;
 	struct tle_udp_stream *s;
-	uint32_t i, j, k, n, p, t;
+	uint32_t i, j, k, n, t;
 	union l4_ports tp[num], port[num];
 	union ipv4_addrs a4[num];
 	union ipv6_addrs *pa6[num];
+	union pkt_info pi[num];
+
+	st = CTX_UDP_STLB(dev->ctx);
 
 	for (i = 0; i != num; i++)
-		tp[i] = pkt_info(pkt[i], &port[i], &a4[i], &pa6[i]);
+		tp[i] = pkt_info_udp(pkt[i], &port[i], &a4[i],
+				     &pa6[i], &pi[i]);
 
 	k = 0;
 	for (i = 0; i != num; i = j) {
 
-		for (j = i + 1; j != num && tp[j].raw == tp[i].raw; j++)
-			;
+		j = i + udp_pkt_info_bulk_eq(pi + i, num - i);
 
 		t = tp[i].src;
-		p = tp[i].dst;
-		s = rx_stream_obtain(dev, t, p);
+
+		s = rx_stream_obtain_by_tuples(st, &pi[i]);
+		if (s == NULL)
+			s = rx_stream_obtain(dev, t, &pi[i]);
 		if (s != NULL) {
 
 			if (t == TLE_V4)
@@ -202,6 +263,7 @@ tle_udp_rx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[],
 			rwl_release(&s->rx.use);
 
 		} else {
+			UDP_ADD_STATS(UDP_MIB_NOPORTS, j - i);
 			for (; i != j; i++) {
 				rc[k] = ENOENT;
 				rp[k] = pkt[i];
@@ -262,6 +324,8 @@ tle_udp_tx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
 		stream_drb_release(s, drb + i, j - i);
 	}
 
+	UDP_ADD_STATS(UDP_MIB_OUTDATAGRAMS, n);
+
 	return n;
 }
 
@@ -272,24 +336,18 @@ tle_udp_tx_bulk(struct tle_dev *dev, struct rte_mbuf *pkt[], uint16_t num)
 static inline uint32_t
 recv_pkt_process(struct rte_mbuf *m[], uint32_t num, uint32_t type)
 {
-	uint32_t i, k;
-	uint64_t flg[num], ofl[num];
-
-	for (i = 0; i != num; i++) {
-		flg[i] = m[i]->ol_flags;
-		ofl[i] = m[i]->tx_offload;
-	}
+	uint32_t i, k, offset;
 
-	k = 0;
-	for (i = 0; i != num; i++) {
-
-		/* drop packets with invalid cksum(s). */
-		if (check_pkt_csum(m[i], flg[i], type, IPPROTO_UDP) != 0) {
+	for (i = 0, k = 0; i != num; i++) {
+		if (check_pkt_csum(m[i], type, IPPROTO_UDP) != 0) {
+			UDP_INC_STATS(UDP_MIB_CSUMERRORS);
 			rte_pktmbuf_free(m[i]);
 			m[i] = NULL;
 			k++;
-		} else
-			rte_pktmbuf_adj(m[i], _tx_offload_l4_offset(ofl[i]));
+		} else {
+			offset = _tx_offload_l4_offset(m[i]->tx_offload);
+			rte_pktmbuf_adj(m[i], offset);
+		}
 	}
 
 	return k;
@@ -302,9 +360,25 @@ tle_udp_stream_recv(struct tle_stream *us, struct rte_mbuf *pkt[], uint16_t num)
 	struct tle_udp_stream *s;
 
 	s = UDP_STREAM(us);
+	n = 0;
+
+again:
 	n = _rte_ring_mc_dequeue_burst(s->rx.q, (void **)pkt, num);
-	if (n == 0)
+	if (n == 0) {
+		if (rwl_try_acquire(&s->rx.use) > 0)
+			rte_errno = EAGAIN;
+		else
+			rte_errno = ESHUTDOWN;
+		rwl_release(&s->rx.use);
 		return 0;
+	}
+
+	k = recv_pkt_process(pkt, n, s->s.type);
+	if (unlikely(k))
+		UDP_ADD_STATS_ATOMIC(UDP_MIB_CSUMERRORS, k);
+	n = compress_pkt_list(pkt, n, k);
+	if (n == 0)
+		goto again;
 
 	/*
 	 * if we still have packets to read,
@@ -316,8 +390,8 @@ tle_udp_stream_recv(struct tle_stream *us, struct rte_mbuf *pkt[], uint16_t num)
 		rwl_release(&s->rx.use);
 	}
 
-	k = recv_pkt_process(pkt, n, s->s.type);
-	return compress_pkt_list(pkt, n, k);
+	UDP_ADD_STATS_ATOMIC(UDP_MIB_INDATAGRAMS, n);
+	return n;
 }
 
 static inline int
@@ -413,7 +487,7 @@ fragment(struct rte_mbuf *pkt, struct rte_mbuf *frag[], uint32_t num,
 
 	/* Remove the Ethernet header from the input packet */
 	rte_pktmbuf_adj(pkt, dst->l2_len);
-	mtu = dst->mtu - dst->l2_len;
+	mtu = dst->mtu;
 
 	/* fragment packet */
 	if (type == TLE_V4)
@@ -475,13 +549,22 @@ queue_pkt_out(struct tle_udp_stream *s, struct tle_dev *dev,
 	nb += nbc;
 
 	/* no free drbs, can't send anything */
-	if (nb == 0)
+	if (unlikely(nb == 0)) {
+		if (all_or_nothing)
+			UDP_ADD_STATS_ATOMIC(UDP_MIB_SNDBUFERRORS, 1);
+		else
+			UDP_ADD_STATS_ATOMIC(UDP_MIB_SNDBUFERRORS, nb_pkt);
 		return 0;
+	}
 
 	/* not enough free drbs, reduce number of packets to send. */
 	else if (nb != nbm) {
-		if (all_or_nothing)
+		if (all_or_nothing) {
+			UDP_ADD_STATS_ATOMIC(UDP_MIB_SNDBUFERRORS, 1);
 			return 0;
+		}
+
+		UDP_ADD_STATS_ATOMIC(UDP_MIB_SNDBUFERRORS, nb_pkt - nb * bsz);
 		nb_pkt = nb * bsz;
 	}
 
@@ -509,12 +592,18 @@ tle_udp_stream_send(struct tle_stream *us, struct rte_mbuf *pkt[],
 	const struct sockaddr_in *d4;
 	const struct sockaddr_in6 *d6;
 	struct tle_udp_stream *s;
-	const void *da;
+	const void *sa, *da;
 	union udph udph;
 	struct tle_dest dst;
 	struct tle_drb *drb[num];
+	uint8_t ufo;
 
 	s = UDP_STREAM(us);
+	if (rwl_acquire(&s->tx.use) < 0) {
+		rte_errno = EPIPE; /* tx is shutdown */
+		return 0;
+	}
+
 	type = s->s.type;
 
 	/* start filling UDP header. */
@@ -523,7 +612,10 @@ tle_udp_stream_send(struct tle_stream *us, struct rte_mbuf *pkt[],
 
 	/* figure out what destination addr/port to use. */
 	if (dst_addr != NULL) {
-		if (dst_addr->sa_family != s->prm.remote_addr.ss_family) {
+		if (dst_addr->sa_family != s->prm.remote_addr.ss_family &&
+				(s->prm.remote_addr.ss_family == AF_INET ||
+						!IN6_IS_ADDR_UNSPECIFIED(&s->s.ipv6.addr.dst))) {
+			rwl_release(&s->tx.use);
 			rte_errno = EINVAL;
 			return 0;
 		}
@@ -531,21 +623,28 @@ tle_udp_stream_send(struct tle_stream *us, struct rte_mbuf *pkt[],
 			d4 = (const struct sockaddr_in *)dst_addr;
 			da = &d4->sin_addr;
 			udph.ports.dst = d4->sin_port;
+			sa = &s->s.ipv4.addr.dst;
 		} else {
 			d6 = (const struct sockaddr_in6 *)dst_addr;
 			da = &d6->sin6_addr;
 			udph.ports.dst = d6->sin6_port;
+			sa = &s->s.ipv6.addr.dst;
 		}
 	} else {
 		udph.ports.dst = s->s.port.src;
-		if (type == TLE_V4)
+		if (type == TLE_V4) {
 			da = &s->s.ipv4.addr.src;
-		else
+			sa = &s->s.ipv4.addr.dst;
+		}
+		else {
 			da = &s->s.ipv6.addr.src;
+			sa = &s->s.ipv6.addr.dst;
+		}
 	}
 
-	di = stream_get_dest(&s->s, da, &dst);
+	di = stream_get_dest(type, &s->s, sa, da, &dst);
 	if (di < 0) {
+		rwl_release(&s->tx.use);
 		rte_errno = -di;
 		return 0;
 	}
@@ -553,12 +652,7 @@ tle_udp_stream_send(struct tle_stream *us, struct rte_mbuf *pkt[],
 	pid = rte_atomic32_add_return(&dst.dev->tx.packet_id[type], num) - num;
 	mtu = dst.mtu - dst.l2_len - dst.l3_len;
 
-	/* mark stream as not closable. */
-	if (rwl_acquire(&s->tx.use) < 0) {
-		rte_errno = EAGAIN;
-		return 0;
-	}
-
+	ufo = dst.dev->prm.tx_offload & DEV_TX_OFFLOAD_UDP_TSO;
 	nb = 0;
 	for (i = 0, k = 0; k != num; k = i) {
 
@@ -568,7 +662,7 @@ tle_udp_stream_send(struct tle_stream *us, struct rte_mbuf *pkt[],
 		ol_flags = dst.dev->tx.ol_flags[type];
 
 		while (i != num && frg == 0) {
-			frg = pkt[i]->pkt_len > mtu;
+			frg = (!ufo) && pkt[i]->pkt_len > mtu;
 			if (frg != 0)
 				ol_flags &= ~PKT_TX_UDP_CKSUM;
 			rc = udp_fill_mbuf(pkt[i], type, ol_flags, pid + i,
diff --git a/lib/libtle_l4p/udp_stream.c b/lib/libtle_l4p/udp_stream.c
index 29f5a40..0cd5c27 100644
--- a/lib/libtle_l4p/udp_stream.c
+++ b/lib/libtle_l4p/udp_stream.c
@@ -43,74 +43,87 @@ fini_stream(struct tle_udp_stream *s)
 static void
 udp_fini_streams(struct tle_ctx *ctx)
 {
-	uint32_t i;
-	struct tle_udp_stream *s;
+	struct udp_streams *us;
+	struct tle_stream *s;
+
+	us = CTX_UDP_STREAMS(ctx);
+	if (us != NULL) {
+		stbl_fini(&us->st);
+
+		while (ctx->streams.nb_free--) {
+			s = STAILQ_FIRST(&ctx->streams.free);
+			STAILQ_FIRST(&ctx->streams.free) = STAILQ_NEXT(s, link);
+			fini_stream(UDP_STREAM(s));
+		}
 
-	s = ctx->streams.buf;
-	if (s != NULL) {
-		for (i = 0; i != ctx->prm.max_streams; i++)
-			fini_stream(s + i);
 	}
 
-	rte_free(s);
+	rte_free(us);
 	ctx->streams.buf = NULL;
 	STAILQ_INIT(&ctx->streams.free);
 }
 
+/* stream memory layout:
+ * [tle_udp_stream] [rx.q] [tx.drb.r]
+ */
 static int
-init_stream(struct tle_ctx *ctx, struct tle_udp_stream *s)
+add_stream(struct tle_ctx *ctx)
 {
-	size_t bsz, rsz, sz;
-	uint32_t i, k, n, nb;
+	size_t sz_s, sz_rxq, sz_drb_r, sz;
+	/* for rx.q */
+	uint32_t n_rxq;
+	/* for tx.drb.r */
+	size_t bsz, rsz;
 	struct tle_drb *drb;
-	char name[RTE_RING_NAMESIZE];
+	uint32_t k, nb, n_drb;
 
-	/* init RX part. */
-
-	n = RTE_MAX(ctx->prm.max_stream_rbufs, 1U);
-	n = rte_align32pow2(n);
-	sz = rte_ring_get_memsize(n);
-
-	s->rx.q = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
-		ctx->prm.socket_id);
-	if (s->rx.q == NULL) {
-		UDP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
-			"failed with error code: %d\n",
-			__func__, s, sz, ctx->prm.socket_id, rte_errno);
-		return -ENOMEM;
-	}
+	uint32_t i, f;
+	char name[RTE_RING_NAMESIZE];
+	struct tle_udp_stream *s;
 
-	snprintf(name, sizeof(name), "%p@%zu", s, sz);
-	rte_ring_init(s->rx.q, name, n, RING_F_SP_ENQ);
+	// stream
+	sz_s = RTE_ALIGN_CEIL(sizeof(*s), RTE_CACHE_LINE_SIZE);
 
-	/* init TX part. */
+	// rx.q
+	n_rxq = RTE_MAX(ctx->prm.max_stream_rbufs, 1U);
+	n_rxq = rte_align32pow2(n_rxq);
+	sz_rxq = rte_ring_get_memsize(n_rxq);
+	sz_rxq = RTE_ALIGN_CEIL(sz_rxq, RTE_CACHE_LINE_SIZE);
 
+	// tx.drb.r
 	nb = drb_nb_elem(ctx);
 	k = calc_stream_drb_num(ctx, nb);
-	n = rte_align32pow2(k);
-
-	/* size of the drbs ring */
-	rsz = rte_ring_get_memsize(n);
+	n_drb = rte_align32pow2(k);
+	rsz = rte_ring_get_memsize(n_drb); /* size of the drbs ring */
 	rsz = RTE_ALIGN_CEIL(rsz, RTE_CACHE_LINE_SIZE);
+	bsz = tle_drb_calc_size(nb); /* size of the drb. */
+	sz_drb_r = rsz + bsz * k; /* total stream drbs size. */
+	sz_drb_r = RTE_ALIGN_CEIL(sz_drb_r, RTE_CACHE_LINE_SIZE);
 
-	/* size of the drb. */
-	bsz = tle_drb_calc_size(nb);
-
-	/* total stream drbs size. */
-	sz = rsz + bsz * k;
-
-	s->tx.drb.r = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
-		ctx->prm.socket_id);
-	if (s->tx.drb.r == NULL) {
-		UDP_LOG(ERR, "%s(%p): allocation of %zu bytes on socket %d "
+	sz = sz_s + sz_rxq + sz_drb_r;
+	s = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+				ctx->prm.socket_id);
+	if (s == NULL) {
+		UDP_LOG(ERR, "%s: allocation of %zu bytes on socket %d "
 			"failed with error code: %d\n",
-			__func__, s, sz, ctx->prm.socket_id, rte_errno);
+			__func__, sz, ctx->prm.socket_id, rte_errno);
 		return -ENOMEM;
 	}
 
-	snprintf(name, sizeof(name), "%p@%zu", s, sz);
-	rte_ring_init(s->tx.drb.r, name, n, 0);
+	s->rx.q = (struct rte_ring *)((uintptr_t)s + sz_s);
+	s->tx.drb.r = (struct rte_ring *)((uintptr_t)s->rx.q + sz_rxq);
+
+	// ring flags
+	f = ((ctx->prm.flags & TLE_CTX_FLAG_ST) == 0) ? 0 :
+		(RING_F_SP_ENQ |  RING_F_SC_DEQ);
+
+	/* init RX part. */
+	snprintf(name, sizeof(name), "%p@%zu", s->rx.q, sz_rxq);
+	rte_ring_init(s->rx.q, name, n_rxq, f);
 
+	/* init TX part. */
+	snprintf(name, sizeof(name), "%p@%zu", s->tx.drb.r, sz_drb_r);
+	rte_ring_init(s->tx.drb.r, name, n_drb, f);
 	for (i = 0; i != k; i++) {
 		drb = (struct tle_drb *)((uintptr_t)s->tx.drb.r +
 			rsz + bsz * i);
@@ -146,38 +159,59 @@ udp_init_streams(struct tle_ctx *ctx)
 	size_t sz;
 	uint32_t i;
 	int32_t rc;
-	struct tle_udp_stream *s;
+	struct udp_streams *us;
 
-	sz = sizeof(*s) * ctx->prm.max_streams;
-	s = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
+	sz = sizeof(*us);
+	us = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
 		ctx->prm.socket_id);
-	if (s == NULL) {
+	if (us == NULL) {
 		UDP_LOG(ERR, "allocation of %zu bytes on socket %d "
 			"for %u udp_streams failed\n",
 			sz, ctx->prm.socket_id, ctx->prm.max_streams);
 		return -ENOMEM;
 	}
 
-	ctx->streams.buf = s;
+	ctx->streams.buf = us;
 	STAILQ_INIT(&ctx->streams.free);
 
-	for (i = 0; i != ctx->prm.max_streams; i++) {
-		rc = init_stream(ctx, s + i);
-		if (rc != 0) {
-			UDP_LOG(ERR, "initalisation of %u-th stream failed", i);
-			udp_fini_streams(ctx);
-			return rc;
-		}
+	rc = stbl_init(&us->st, (ctx->prm.flags & TLE_CTX_FLAG_ST) == 0);
+	if (rc < 0) {
+		UDP_LOG(ERR, "failed to init UDP stbl: rc = %dl\n", rc);
+		return rc;
 	}
 
-	return 0;
+	for (i = 0; rc == 0 && i != ctx->prm.min_streams; i++)
+		rc = add_stream(ctx);
+
+	if (rc != 0) {
+		UDP_LOG(ERR, "initalisation of %u-th stream failed", i);
+		udp_fini_streams(ctx);
+	}
+
+	return rc;
 }
 
-static void __attribute__((constructor))
+static uint32_t
+udp_more_streams(struct tle_ctx *ctx)
+{
+	uint32_t i, nb;
+	uint32_t nb_max = ctx->prm.max_streams;
+	uint32_t nb_cur = ctx->streams.nb_cur;
+
+	nb = RTE_MIN(ctx->prm.delta_streams, nb_max - nb_cur);
+	for (i = 0; i < nb; i++)
+		if (add_stream(ctx) != 0)
+			break;
+
+	return i;
+}
+
+static void __attribute__((constructor(101)))
 udp_stream_setup(void)
 {
 	static const struct stream_ops udp_ops = {
 		.init_streams = udp_init_streams,
+		.more_streams = udp_more_streams,
 		.fini_streams = udp_fini_streams,
 		.free_drbs = udp_free_drbs,
 	};
@@ -188,8 +222,8 @@ udp_stream_setup(void)
 static inline void
 stream_down(struct tle_udp_stream *s)
 {
-	rwl_down(&s->rx.use);
-	rwl_down(&s->tx.use);
+	rwl_try_down(&s->rx.use);
+	rwl_try_down(&s->tx.use);
 }
 
 static inline void
@@ -224,6 +258,59 @@ check_stream_prm(const struct tle_ctx *ctx,
 }
 
 struct tle_stream *
+tle_udp_stream_set(struct tle_stream *ts, struct tle_ctx *ctx,
+		const struct tle_udp_stream_param *prm)
+{
+	struct tle_udp_stream *s;
+	int32_t rc;
+
+	if (ctx == NULL || prm == NULL || check_stream_prm(ctx, prm) != 0) {
+		tle_udp_stream_close(ts);
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
+	s = UDP_STREAM(ts);
+
+	/* free stream's destination port */
+	rc = stream_clear_ctx(ctx, &s->s);
+
+	if (s->ste) {
+		stbl_del_stream(CTX_UDP_STLB(ctx), s->ste, ts);
+		s->ste = NULL;
+	}
+
+	/* copy input parameters. */
+	s->prm = *prm;
+	s->s.option.raw = prm->option;
+
+	/* setup L4 ports and L3 addresses fields. */
+	rc = stream_fill_ctx(ctx, &s->s,
+		(const struct sockaddr *)&prm->local_addr,
+		(const struct sockaddr *)&prm->remote_addr);
+
+	if (rc != 0)
+		goto error;
+
+	/* add stream to the table for non-listen type stream */
+	if (!is_empty_addr((const struct sockaddr *)&prm->remote_addr)) {
+		s->ste = stbl_add_stream(CTX_UDP_STLB(ctx), &s->s);
+		if (s->ste == NULL) {
+			rc = EEXIST;
+			goto error;
+		}
+	}
+
+	return &s->s;
+
+error:
+	tle_udp_stream_close(ts);
+	rte_errno = rc;
+	return NULL;
+
+}
+
+struct tle_stream *
 tle_udp_stream_open(struct tle_ctx *ctx,
 	const struct tle_udp_stream_param *prm)
 {
@@ -237,42 +324,80 @@ tle_udp_stream_open(struct tle_ctx *ctx,
 
 	s = (struct tle_udp_stream *)get_stream(ctx);
 	if (s == NULL)	{
-		rte_errno = ENFILE;
-		return NULL;
-
-	/* some TX still pending for that stream. */
-	} else if (UDP_STREAM_TX_PENDING(s)) {
-		put_stream(ctx, &s->s, 0);
 		rte_errno = EAGAIN;
 		return NULL;
 	}
 
 	/* copy input parameters. */
 	s->prm = *prm;
+	s->s.option.raw = prm->option;
 
 	/* setup L4 ports and L3 addresses fields. */
 	rc = stream_fill_ctx(ctx, &s->s,
 		(const struct sockaddr *)&prm->local_addr,
 		(const struct sockaddr *)&prm->remote_addr);
 
-	if (rc != 0) {
-		put_stream(ctx, &s->s, 1);
-		s = NULL;
-		rte_errno = rc;
-	} else {
-		/* setup stream notification menchanism */
-		s->rx.ev = prm->recv_ev;
-		s->rx.cb = prm->recv_cb;
-		s->tx.ev = prm->send_ev;
-		s->tx.cb = prm->send_cb;
-
-		/* mark stream as avaialbe for RX/TX */
-		if (s->tx.ev != NULL)
-			tle_event_raise(s->tx.ev);
-		stream_up(s);
+	if (rc != 0)
+		goto error;
+
+	/* add stream to the table for non-listen type stream */
+	if (!is_empty_addr((const struct sockaddr *)&prm->remote_addr)) {
+		s->ste = stbl_add_stream(CTX_UDP_STLB(ctx), &s->s);
+		if (s->ste == NULL) {
+			rc = EEXIST;
+			goto error;
+		}
 	}
 
+	/* setup stream notification menchanism */
+	s->rx.ev = prm->recv_ev;
+	s->rx.cb = prm->recv_cb;
+	s->tx.ev = prm->send_ev;
+	s->tx.cb = prm->send_cb;
+
+	/* mark stream as avaialbe for RX/TX */
+	if (s->tx.ev != NULL)
+		tle_event_raise(s->tx.ev);
+	stream_up(s);
+
 	return &s->s;
+
+error:
+	put_stream(ctx, &s->s, 1);
+	rte_errno = rc;
+	return NULL;
+}
+
+int
+tle_udp_stream_shutdown(struct tle_stream *us, int how)
+{
+	bool shut_rd = false;
+	bool shut_wr = false;
+	struct tle_udp_stream *s = UDP_STREAM(us);
+
+	switch (how) {
+	case SHUT_RD:
+		shut_rd = true;
+		rwl_down(&s->rx.use);
+		break;
+	case SHUT_WR:
+		shut_wr = true;
+		rwl_down(&s->tx.use);
+		break;
+	case SHUT_RDWR:
+		shut_rd = true;
+		shut_wr = true;
+		stream_down(s);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (shut_rd && s->rx.ev != NULL)
+		tle_event_raise(s->rx.ev);
+	if (shut_wr && s->tx.ev != NULL)
+		tle_event_raise(s->tx.ev);
+	return 0;
 }
 
 int
@@ -312,6 +437,11 @@ tle_udp_stream_close(struct tle_stream *us)
 	/* empty stream's RX queue */
 	empty_mbuf_ring(s->rx.q);
 
+	if (s->ste) {
+		stbl_del_stream(CTX_UDP_STLB(ctx), s->ste, us);
+		s->ste = NULL;
+	}
+
 	/*
 	 * mark the stream as free again.
 	 * if there still are pkts queued for TX,
@@ -344,3 +474,56 @@ tle_udp_stream_get_param(const struct tle_stream *us,
 
 	return 0;
 }
+
+/*
+ * helper function, updates stream config
+ */
+static inline int
+stream_update_cfg(struct tle_stream *us, struct tle_udp_stream_param *prm)
+{
+	struct tle_udp_stream *s;
+
+	s = UDP_STREAM(us);
+
+	/* setup stream notification menchanism */
+	s->rx.ev = prm->recv_ev;
+	s->rx.cb = prm->recv_cb;
+	s->tx.ev = prm->send_ev;
+	s->tx.cb = prm->send_cb;
+
+	rte_smp_wmb();
+
+	/* invoke async notifications, if any */
+	if (rte_ring_count(s->rx.q) != 0) {
+		if (s->rx.ev != NULL)
+			tle_event_raise(s->rx.ev);
+		else if (s->rx.cb.func != NULL)
+			s->rx.cb.func(s->rx.cb.data, &s->s);
+	}
+
+	/* always ok to write */
+	if (s->tx.ev != NULL)
+		tle_event_raise(s->tx.ev);
+	else if (s->tx.cb.func != NULL)
+			s->tx.cb.func(s->tx.cb.data, &s->s);
+
+	return 0;
+}
+
+uint32_t
+tle_udp_stream_update_cfg(struct tle_stream *us[],
+			  struct tle_udp_stream_param prm[], uint32_t num)
+{
+	int32_t rc;
+	uint32_t i;
+
+	for (i = 0; i != num; i++) {
+		rc = stream_update_cfg(us[i], &prm[i]);
+		if (rc != 0) {
+			rte_errno = -rc;
+			break;
+		}
+	}
+
+	return i;
+}
diff --git a/lib/libtle_l4p/udp_stream.h b/lib/libtle_l4p/udp_stream.h
index a950e56..55a66f8 100644
--- a/lib/libtle_l4p/udp_stream.h
+++ b/lib/libtle_l4p/udp_stream.h
@@ -24,6 +24,7 @@
 #include "osdep.h"
 #include "ctx.h"
 #include "stream.h"
+#include "stream_table.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -41,6 +42,7 @@ union udph {
 struct tle_udp_stream {
 
 	struct tle_stream s;
+	struct stbl_entry *ste;     /* entry in streams table. */
 
 	struct {
 		struct rte_ring *q;
@@ -63,6 +65,13 @@ struct tle_udp_stream {
 	struct tle_udp_stream_param prm;
 } __rte_cache_aligned;
 
+struct udp_streams {
+	struct stbl st;
+};
+
+#define CTX_UDP_STREAMS(ctx)	((struct udp_streams *)(ctx)->streams.buf)
+#define CTX_UDP_STLB(ctx)	(&CTX_UDP_STREAMS(ctx)->st)
+
 #define UDP_STREAM(p)	\
 ((struct tle_udp_stream *)((uintptr_t)(p) - offsetof(struct tle_udp_stream, s)))
 
diff --git a/lib/libtle_timer/timer.c b/lib/libtle_timer/timer.c
index 8b89fd6..a0169ef 100644
--- a/lib/libtle_timer/timer.c
+++ b/lib/libtle_timer/timer.c
@@ -134,6 +134,30 @@ put_timer(struct tle_timer_list *list, struct tle_timer_elmt *e)
 	list->num++;
 }
 
+static inline struct tle_timer_elmt *
+get_free_timer(struct tle_timer_wheel *tw)
+{
+	unsigned i, n;
+	struct tle_timer_elmt *e;
+
+	e = LIST_FIRST(&tw->free.head);
+	if (e == NULL) {
+		n = 128;
+		n = RTE_MIN(n, tw->prm.max_timer - tw->free.num);
+		for (i = 0; i < n; i++) {
+			e = rte_zmalloc_socket(NULL, sizeof(*e),
+					sizeof(e), tw->prm.socket_id);
+			if (e != NULL)
+				put_timer(&tw->free, e);
+			else
+				rte_panic("Failed to allocate timer");
+		}
+	}
+
+	e = get_timer(&tw->free);
+	return e;
+}
+
 static inline void
 rem_timer(struct tle_timer_list *list, struct tle_timer_elmt *e)
 {
@@ -149,8 +173,6 @@ tle_timer_create(struct tle_timer_wheel_args *prm, uint64_t now)
 	uint32_t i, j;
 	size_t sz;
 	struct tle_timer_wheel *tw;
-	struct tle_timer_elmt *e;
-	struct tle_timer_elmt *timers;
 
 	if (prm == NULL) {
 		rte_errno = -EINVAL;
@@ -169,7 +191,7 @@ tle_timer_create(struct tle_timer_wheel_args *prm, uint64_t now)
 		return NULL;
 	}
 
-	sz = sizeof(*tw) + prm->max_timer * sizeof(struct tle_timer_elmt);
+	sz = sizeof(*tw);
 
 	/* allocate memory */
 	tw = rte_zmalloc_socket(NULL, sz, RTE_CACHE_LINE_SIZE,
@@ -182,17 +204,11 @@ tle_timer_create(struct tle_timer_wheel_args *prm, uint64_t now)
 
 	tw->last_run_time = now;
 	tw->prm = *prm;
-	timers = (struct tle_timer_elmt *)(tw + 1);
 
 	/* initialize the lists */
 	LIST_INIT(&tw->free.head);
 	LIST_INIT(&tw->expired.head);
 
-	for (i = 0; i < prm->max_timer; i++) {
-		e = timers + i;
-		put_timer(&tw->free, e);
-	}
-
 	for (i = 0; i < TW_N_RINGS; i++)
 		for (j = 0; j < TW_SLOTS_PER_RING; j++)
 			LIST_INIT(&tw->w[i][j].head);
@@ -223,11 +239,6 @@ tle_timer_start(struct tle_timer_wheel *tw, void *obj, uint64_t interval)
 		return NULL;
 	}
 
-	if (tw->free.num == 0) {
-		rte_errno = ENOMEM;
-		return NULL;
-	}
-
 	nb_tick = interval / tw->prm.tick_size;
 
 	fast_ring_index = nb_tick & TW_RING_MASK;
@@ -248,7 +259,7 @@ tle_timer_start(struct tle_timer_wheel *tw, void *obj, uint64_t interval)
 		slow_ring_index %= TW_SLOTS_PER_RING;
 		ts = &tw->w[TW_RING_SLOW][slow_ring_index];
 
-		e = get_timer(&tw->free);
+		e = get_free_timer(tw);
 		e->obj = obj;
 		e->fast_index = fast_ring_index;
 		put_timer(ts, e);
@@ -260,7 +271,7 @@ tle_timer_start(struct tle_timer_wheel *tw, void *obj, uint64_t interval)
 	/* Timer expires less than 51.2 seconds from now */
 	ts = &tw->w[TW_RING_FAST][fast_ring_index];
 
-	e = get_timer(&tw->free);
+	e = get_free_timer(tw);
 	e->obj = obj;
 	put_timer(ts, e);