summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--plugins/lb-plugin/lb/lb.h6
-rw-r--r--plugins/lb-plugin/lb/lbhash.h196
-rw-r--r--plugins/lb-plugin/lb/node.c314
3 files changed, 307 insertions, 209 deletions
diff --git a/plugins/lb-plugin/lb/lb.h b/plugins/lb-plugin/lb/lb.h
index 09cfde3e378..882b9b30f7e 100644
--- a/plugins/lb-plugin/lb/lb.h
+++ b/plugins/lb-plugin/lb/lb.h
@@ -116,8 +116,10 @@ typedef struct {
} lb_new_flow_entry_t;
#define lb_foreach_vip_counter \
- _(TRACKED_SESSION, "tracked session", 0) \
- _(UNTRACKED_PACKET, "untracked packet", 1)
+ _(NEXT_PACKET, "packet from existing sessions", 0) \
+ _(FIRST_PACKET, "first session packet", 1) \
+ _(UNTRACKED_PACKET, "untracked packet", 2) \
+ _(NO_SERVER, "no server configured", 3)
typedef enum {
#define _(a,b,c) LB_VIP_COUNTER_##a = c,
diff --git a/plugins/lb-plugin/lb/lbhash.h b/plugins/lb-plugin/lb/lbhash.h
index 12e892569fe..d47b49828fa 100644
--- a/plugins/lb-plugin/lb/lbhash.h
+++ b/plugins/lb-plugin/lb/lbhash.h
@@ -31,46 +31,63 @@
#include <vnet/vnet.h>
-#define LBHASH_ENTRY_PER_BUCKET_LOG2 2
-#define LBHASH_ENTRY_PER_BUCKET (1 << LBHASH_ENTRY_PER_BUCKET_LOG2)
-#define LBHASH_ENTRY_PER_BUCKET_MASK (LBHASH_ENTRY_PER_BUCKET - 1)
+#include <immintrin.h>
+/*
+ * @brief Number of entries per bucket.
+ */
+#define LBHASH_ENTRY_PER_BUCKET 4
+
+#define LB_HASH_DO_NOT_USE_SSE_BUCKETS 0
+
+/*
+ * @brief One bucket contains 4 entries.
+ * Each bucket takes one 64B cache line in memory.
+ */
typedef struct {
- u64 key[5];
- u32 value;
- u32 last_seen;
-} lb_hash_entry_t;
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 hash[LBHASH_ENTRY_PER_BUCKET];
+ u32 timeout[LBHASH_ENTRY_PER_BUCKET];
+ u32 vip[LBHASH_ENTRY_PER_BUCKET];
+ u32 value[LBHASH_ENTRY_PER_BUCKET];
+} lb_hash_bucket_t;
typedef struct {
u32 buckets_mask;
u32 timeout;
- lb_hash_entry_t entries[];
+ lb_hash_bucket_t buckets[];
} lb_hash_t;
-#define lb_hash_nbuckets(h) (((h)->buckets_mask >> LBHASH_ENTRY_PER_BUCKET_LOG2) + 1)
+#define lb_hash_nbuckets(h) (((h)->buckets_mask) + 1)
#define lb_hash_size(h) ((h)->buckets_mask + LBHASH_ENTRY_PER_BUCKET)
-#define lb_hash_foreach_entry(h, e) \
- for (e = (h)->entries; e < h->entries + lb_hash_size(h); e++)
+#define lb_hash_foreach_bucket(h, bucket) \
+ for (bucket = (h)->buckets; \
+ bucket < (h)->buckets + lb_hash_nbuckets(h); \
+ bucket++)
+
+#define lb_hash_foreach_entry(h, bucket, i) \
+ lb_hash_foreach_bucket(h, bucket) \
+ for (i = 0; i < LBHASH_ENTRY_PER_BUCKET; i++)
-#define lb_hash_foreach_valid_entry(h, e, now) \
- lb_hash_foreach_entry(h, e) \
- if (!clib_u32_loop_gt((now), (e)->last_seen + (h)->timeout))
+#define lb_hash_foreach_valid_entry(h, bucket, i, now) \
+ lb_hash_foreach_entry(h, bucket, i) \
+ if (!clib_u32_loop_gt((now), bucket->timeout[i]))
static_always_inline
lb_hash_t *lb_hash_alloc(u32 buckets, u32 timeout)
{
- if ((!is_pow2(buckets)) ||
- ((buckets << LBHASH_ENTRY_PER_BUCKET_LOG2) == 0))
+ if (!is_pow2(buckets))
return NULL;
// Allocate 1 more bucket for prefetch
- u32 size = sizeof(lb_hash_t) + ((buckets << LBHASH_ENTRY_PER_BUCKET_LOG2) + 1)* sizeof(lb_hash_entry_t);
+ u32 size = ((u64)&((lb_hash_t *)(0))->buckets[0]) +
+ sizeof(lb_hash_bucket_t) * (buckets + 1);
u8 *mem = 0;
lb_hash_t *h;
vec_alloc_aligned(mem, size, CLIB_CACHE_LINE_BYTES);
h = (lb_hash_t *)mem;
- h->buckets_mask = (buckets - 1) << LBHASH_ENTRY_PER_BUCKET_LOG2;
+ h->buckets_mask = (buckets - 1);
h->timeout = timeout;
return h;
}
@@ -78,102 +95,117 @@ lb_hash_t *lb_hash_alloc(u32 buckets, u32 timeout)
static_always_inline
void lb_hash_free(lb_hash_t *h)
{
- vec_free(h);
+ u8 *mem = (u8 *)h;
+ vec_free(mem);
}
#if __SSE4_2__
static_always_inline
-u32 lb_hash_crc_u32(u32 data, u32 value)
-{
- __asm__ volatile( "crc32l %[data], %[value];"
- : [value] "+r" (value)
- : [data] "rm" (data));
- return value;
-}
-
-static_always_inline
-u32 lb_hash_hash(u64 k[5])
+u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4)
{
- u32 * dp = (u32 *) k;
- u32 value = 0;
-
- value = lb_hash_crc_u32 (dp[0], value);
- value = lb_hash_crc_u32 (dp[1], value);
- value = lb_hash_crc_u32 (dp[2], value);
- value = lb_hash_crc_u32 (dp[3], value);
- value = lb_hash_crc_u32 (dp[4], value);
- value = lb_hash_crc_u32 (dp[5], value);
- value = lb_hash_crc_u32 (dp[6], value);
- value = lb_hash_crc_u32 (dp[7], value);
- value = lb_hash_crc_u32 (dp[8], value);
- value = lb_hash_crc_u32 (dp[9], value);
- return value;
+ u64 val = 0;
+ val = _mm_crc32_u64(val, k0);
+ val = _mm_crc32_u64(val, k1);
+ val = _mm_crc32_u64(val, k2);
+ val = _mm_crc32_u64(val, k3);
+ val = _mm_crc32_u64(val, k4);
+ return (u32) val;
}
#else
static_always_inline
-u32 lb_hash_hash(u64 k[5])
+u32 lb_hash_hash(u64 k0, u64 k1, u64 k2, u64 k3, u64 k4)
{
- u64 tmp = k[0] ^ k[1] ^ k[2] ^ k[3] ^ k[4];
+ u64 tmp = k0 ^ k1 ^ k2 ^ k3 ^ k4;
return (u32)clib_xxhash (tmp);
}
#endif
-
+static_always_inline
+void lb_hash_prefetch_bucket(lb_hash_t *ht, u32 hash)
+{
+ lb_hash_bucket_t *bucket = &ht->buckets[hash & ht->buckets_mask];
+ CLIB_PREFETCH(bucket, sizeof(*bucket), READ);
+}
static_always_inline
-void lb_hash_get(lb_hash_t *h, u64 k[5], u32 hash, u32 time_now, u32 *available_index, u32 *value)
+void lb_hash_get(lb_hash_t *ht, u32 hash, u32 vip, u32 time_now,
+ u32 *available_index, u32 *found_value)
{
- lb_hash_entry_t *e = &h->entries[hash & h->buckets_mask];
- u32 i;
- *value = ~0;
+ lb_hash_bucket_t *bucket = &ht->buckets[hash & ht->buckets_mask];
+ *found_value = ~0;
*available_index = ~0;
- CLIB_PREFETCH (&(e[1]), sizeof(lb_hash_entry_t), STORE);
- for (i=0; i<LBHASH_ENTRY_PER_BUCKET; i++) {
- CLIB_PREFETCH (&(e[i+2]), sizeof(lb_hash_entry_t), STORE); //+2 somehow performs best
- u64 cmp =
- (e[i].key[0] ^ k[0]) |
- (e[i].key[1] ^ k[1]) |
- (e[i].key[2] ^ k[2]) |
- (e[i].key[3] ^ k[3]) |
- (e[i].key[4] ^ k[4]);
-
- u8 timeouted = clib_u32_loop_gt(time_now, e[i].last_seen + h->timeout);
-
- *value = (cmp || timeouted)?*value:e[i].value;
- e[i].last_seen = (cmp || timeouted)?e[i].last_seen:time_now;
- *available_index = (timeouted && (*available_index == ~0))?(&e[i] - h->entries):*available_index;
-
- if (!cmp)
- return;
+#if __SSE4_2__ && LB_HASH_DO_NOT_USE_SSE_BUCKETS == 0
+ u32 bitmask, found_index;
+ __m128i mask;
+
+ // mask[*] = timeout[*] > now
+ mask = _mm_cmpgt_epi32(_mm_loadu_si128 ((__m128i *) bucket->timeout),
+ _mm_set1_epi32 (time_now));
+ // bitmask[*] = now <= timeout[*/4]
+ bitmask = (~_mm_movemask_epi8(mask)) & 0xffff;
+ // Get first index with now <= timeout[*], if any.
+ *available_index = (bitmask)?__builtin_ctz(bitmask)/4:*available_index;
+
+ // mask[*] = (timeout[*] > now) && (hash[*] == hash)
+ mask = _mm_and_si128(mask,
+ _mm_cmpeq_epi32(
+ _mm_loadu_si128 ((__m128i *) bucket->hash),
+ _mm_set1_epi32 (hash)));
+
+ // Load the array of vip values
+ // mask[*] = (timeout[*] > now) && (hash[*] == hash) && (vip[*] == vip)
+ mask = _mm_and_si128(mask,
+ _mm_cmpeq_epi32(
+ _mm_loadu_si128 ((__m128i *) bucket->vip),
+ _mm_set1_epi32 (vip)));
+
+ // mask[*] = (timeout[*x4] > now) && (hash[*x4] == hash) && (vip[*x4] == vip)
+ bitmask = _mm_movemask_epi8(mask);
+ // Get first index, if any
+ found_index = (bitmask)?__builtin_ctzll(bitmask)/4:0;
+ ASSERT(found_index < 4);
+ *found_value = (bitmask)?bucket->value[found_index]:*found_value;
+ bucket->timeout[found_index] =
+ (bitmask)?time_now + ht->timeout:bucket->timeout[found_index];
+#else
+ u32 i;
+ for (i = 0; i < LBHASH_ENTRY_PER_BUCKET; i++) {
+ u8 cmp = (bucket->hash[i] == hash && bucket->vip[i] == vip);
+ u8 timeouted = clib_u32_loop_gt(time_now, bucket->timeout[i]);
+ *found_value = (cmp || timeouted)?*found_value:bucket->value[i];
+ bucket->timeout[i] = (cmp || timeouted)?time_now + ht->timeout:bucket->timeout[i];
+ *available_index = (timeouted && (*available_index == ~0))?i:*available_index;
+
+ if (!cmp)
+ return;
}
+#endif
}
static_always_inline
-u32 lb_hash_available_value(lb_hash_t *h, u32 available_index)
+u32 lb_hash_available_value(lb_hash_t *h, u32 hash, u32 available_index)
{
- return h->entries[available_index].value;
+ return h->buckets[hash & h->buckets_mask].value[available_index];
}
static_always_inline
-u32 lb_hash_put(lb_hash_t *h, u64 k[5], u32 value, u32 available_index, u32 time_now)
+void lb_hash_put(lb_hash_t *h, u32 hash, u32 value, u32 vip,
+ u32 available_index, u32 time_now)
{
- lb_hash_entry_t *e = &h->entries[available_index];
- e->key[0] = k[0];
- e->key[1] = k[1];
- e->key[2] = k[2];
- e->key[3] = k[3];
- e->key[4] = k[4];
- e->value = value;
- e->last_seen = time_now;
- return 0;
+ lb_hash_bucket_t *bucket = &h->buckets[hash & h->buckets_mask];
+ bucket->hash[available_index] = hash;
+ bucket->value[available_index] = value;
+ bucket->timeout[available_index] = time_now + h->timeout;
+ bucket->vip[available_index] = vip;
}
static_always_inline
u32 lb_hash_elts(lb_hash_t *h, u32 time_now)
{
u32 tot = 0;
- lb_hash_entry_t *e;
- lb_hash_foreach_valid_entry(h, e, time_now) {
+ lb_hash_bucket_t *bucket;
+ u32 i;
+ lb_hash_foreach_valid_entry(h, bucket, i, time_now) {
tot++;
}
return tot;
diff --git a/plugins/lb-plugin/lb/node.c b/plugins/lb-plugin/lb/node.c
index 82f0cb529aa..8b763c537d5 100644
--- a/plugins/lb-plugin/lb/node.c
+++ b/plugins/lb-plugin/lb/node.c
@@ -20,8 +20,7 @@
#define foreach_lb_error \
_(NONE, "no error") \
- _(PROTO_NOT_SUPPORTED, "protocol not supported") \
- _(NO_SERVER, "no configured application server")
+ _(PROTO_NOT_SUPPORTED, "protocol not supported")
typedef enum {
#define _(sym,str) LB_ERROR_##sym,
@@ -66,19 +65,20 @@ lb_hash_t *lb_get_sticky_table(u32 cpu_index)
lb_main_t *lbm = &lb_main;
lb_hash_t *sticky_ht = lbm->per_cpu[cpu_index].sticky_ht;
//Check if size changed
- if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht)))) {
+ if (PREDICT_FALSE(sticky_ht && (lbm->per_cpu_sticky_buckets != lb_hash_nbuckets(sticky_ht))))
+ {
+ //Dereference everything in there
+ lb_hash_bucket_t *b;
+ u32 i;
+ lb_hash_foreach_entry(sticky_ht, b, i) {
+ vlib_refcount_add(&lbm->as_refcount, cpu_index, b->value[i], -1);
+ vlib_refcount_add(&lbm->as_refcount, cpu_index, 0, 1);
+ }
- //Dereference everything in there
- lb_hash_entry_t *e;
- lb_hash_foreach_entry(sticky_ht, e) {
- vlib_refcount_add(&lbm->as_refcount, cpu_index, e->value, -1);
- vlib_refcount_add(&lbm->as_refcount, cpu_index, 0, -1);
+ lb_hash_free(sticky_ht);
+ sticky_ht = NULL;
}
- lb_hash_free(sticky_ht);
- sticky_ht = NULL;
- }
-
//Create if necessary
if (PREDICT_FALSE(sticky_ht == NULL)) {
lbm->per_cpu[cpu_index].sticky_ht = lb_hash_alloc(lbm->per_cpu_sticky_buckets, lbm->flow_timeout);
@@ -93,6 +93,58 @@ lb_hash_t *lb_get_sticky_table(u32 cpu_index)
return sticky_ht;
}
+u64
+lb_node_get_other_ports4(ip4_header_t *ip40)
+{
+ return 0;
+}
+
+u64
+lb_node_get_other_ports6(ip6_header_t *ip60)
+{
+ return 0;
+}
+
+static_always_inline u32
+lb_node_get_hash(vlib_buffer_t *p, u8 is_input_v4)
+{
+ u32 hash;
+ if (is_input_v4)
+ {
+ ip4_header_t *ip40;
+ u64 ports;
+ ip40 = vlib_buffer_get_current (p);
+ if (PREDICT_TRUE (ip40->protocol == IP_PROTOCOL_TCP ||
+ ip40->protocol == IP_PROTOCOL_UDP))
+ ports = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 16) |
+ ((u64)((udp_header_t *)(ip40 + 1))->dst_port);
+ else
+ ports = lb_node_get_other_ports4(ip40);
+
+ hash = lb_hash_hash(*((u64 *)&ip40->address_pair), ports,
+ 0, 0, 0);
+ }
+ else
+ {
+ ip6_header_t *ip60;
+ ip60 = vlib_buffer_get_current (p);
+ u64 ports;
+ if (PREDICT_TRUE (ip60->protocol == IP_PROTOCOL_TCP ||
+ ip60->protocol == IP_PROTOCOL_UDP))
+ ports = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 16) |
+ ((u64)((udp_header_t *)(ip60 + 1))->dst_port);
+ else
+ ports = lb_node_get_other_ports6(ip60);
+
+ hash = lb_hash_hash(ip60->src_address.as_u64[0],
+ ip60->src_address.as_u64[1],
+ ip60->dst_address.as_u64[0],
+ ip60->dst_address.as_u64[1],
+ ports);
+ }
+ return hash;
+}
+
static_always_inline uword
lb_node_fn (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame,
@@ -100,7 +152,6 @@ lb_node_fn (vlib_main_t * vm,
u8 is_encap_v4) //Compile-time parameter stating that is GRE encap is v4 (or v6)
{
lb_main_t *lbm = &lb_main;
- vlib_node_runtime_t *error_node = node;
u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
u32 cpu_index = os_get_cpu_number();
u32 lb_time = lb_hash_time_now(vm);
@@ -110,6 +161,10 @@ lb_node_fn (vlib_main_t * vm,
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
+ u32 nexthash0 = 0;
+ if (PREDICT_TRUE(n_left_from > 0))
+ nexthash0 = lb_node_get_hash(vlib_get_buffer (vm, from[0]), is_input_v4);
+
while (n_left_from > 0)
{
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -118,21 +173,30 @@ lb_node_fn (vlib_main_t * vm,
u32 pi0;
vlib_buffer_t *p0;
lb_vip_t *vip0;
- lb_as_t *as0;
- gre_header_t *gre0;
+ u32 asindex0;
u16 len0;
- u32 value0, available_index0, hash0;
- u64 key0[5];
- lb_error_t error0 = LB_ERROR_NONE;
+ u32 available_index0;
+ u8 counter = 0;
+ u32 hash0 = nexthash0;
if (PREDICT_TRUE(n_left_from > 1))
- {
- vlib_buffer_t *p2;
- p2 = vlib_get_buffer(vm, from[1]);
- vlib_prefetch_buffer_header(p2, STORE);
- /* IPv4 + 8 = 28. possibly plus -40 */
- CLIB_PREFETCH (vlib_buffer_get_current(p2) - 40, 128, STORE);
- }
+ {
+ vlib_buffer_t *p1 = vlib_get_buffer (vm, from[1]);
+ //Compute next hash and prefetch bucket
+ nexthash0 = lb_node_get_hash(p1, is_input_v4);
+ lb_hash_prefetch_bucket(sticky_ht, nexthash0);
+ //Prefetch for encap, next
+ CLIB_PREFETCH (vlib_buffer_get_current(p1) - 64, 64, STORE);
+ }
+
+ if (PREDICT_TRUE(n_left_from > 2))
+ {
+ vlib_buffer_t *p2;
+ p2 = vlib_get_buffer(vm, from[2]);
+ /* prefetch packet header and data */
+ vlib_prefetch_buffer_header(p2, STORE);
+ CLIB_PREFETCH (vlib_buffer_get_current(p2), 64, STORE);
+ }
pi0 = to_next[0] = from[0];
from += 1;
@@ -144,112 +208,112 @@ lb_node_fn (vlib_main_t * vm,
vip0 = pool_elt_at_index (lbm->vips,
vnet_buffer (p0)->ip.adj_index[VLIB_TX]);
- if (is_input_v4) {
- ip4_header_t *ip40;
- ip40 = vlib_buffer_get_current (p0);
- len0 = clib_net_to_host_u16(ip40->length);
- key0[0] = (u64) ip40->src_address.as_u32;
- key0[1] = (u64) ip40->dst_address.as_u32;
- key0[2] = 0;
- key0[3] = 0;
- key0[4] = ((u64)((udp_header_t *)(ip40 + 1))->src_port << 32) |
- ((u64)((udp_header_t *)(ip40 + 1))->dst_port << 16);
-
- hash0 = lb_hash_hash(key0);
- } else {
- ip6_header_t *ip60;
- ip60 = vlib_buffer_get_current (p0);
- len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t);
- key0[0] = ip60->src_address.as_u64[0];
- key0[1] = ip60->src_address.as_u64[1];
- key0[2] = ip60->dst_address.as_u64[0];
- key0[3] = ip60->dst_address.as_u64[1];
- key0[4] = ((u64)((udp_header_t *)(ip60 + 1))->src_port << 32) |
- ((u64)((udp_header_t *)(ip60 + 1))->dst_port << 16);
-
- hash0 = lb_hash_hash(key0);
- }
-
- //NOTE: This is an ugly trick to not include the VIP index in the hash calculation
- //but actually use it in the key determination.
- key0[4] |= ((vip0 - lbm->vips));
-
- lb_hash_get(sticky_ht, key0, hash0, lb_time, &available_index0, &value0);
- if (PREDICT_TRUE(value0 != ~0)) {
- //Found an existing entry
- as0 = &lbm->ass[value0];
- } else if (PREDICT_TRUE(available_index0 != ~0)) {
- //There is an available slot for a new flow
- as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index];
- if (PREDICT_FALSE(as0 == lbm->ass)) { //Special first element
- error0 = LB_ERROR_NO_SERVER;
- } else {
- vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_TRACKED_SESSION],
- cpu_index, vip0 - lbm->vips, 1);
- }
-
- //TODO: There are race conditions with as0 and vip0 manipulation.
- //Configuration may be changed, vectors resized, etc...
-
- //Dereference previously used
- vlib_refcount_add(&lbm->as_refcount, cpu_index, lb_hash_available_value(sticky_ht, available_index0), -1);
- vlib_refcount_add(&lbm->as_refcount, cpu_index, as0 - lbm->ass, 1);
-
- //Add sticky entry
- //Note that when there is no AS configured, an entry is configured anyway.
- //But no configured AS is not something that should happen
- lb_hash_put(sticky_ht, key0, as0 - lbm->ass, available_index0, lb_time);
- } else {
- //Could not store new entry in the table
- as0 = &lbm->ass[vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index];
- vlib_increment_simple_counter(&lbm->vip_counters[LB_VIP_COUNTER_UNTRACKED_PACKET],
- cpu_index, vip0 - lbm->vips, 1);
- }
+ if (is_input_v4)
+ {
+ ip4_header_t *ip40;
+ ip40 = vlib_buffer_get_current (p0);
+ len0 = clib_net_to_host_u16(ip40->length);
+ }
+ else
+ {
+ ip6_header_t *ip60;
+ ip60 = vlib_buffer_get_current (p0);
+ len0 = clib_net_to_host_u16(ip60->payload_length) + sizeof(ip6_header_t);
+ }
+
+ lb_hash_get(sticky_ht, hash0, vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ lb_time, &available_index0, &asindex0);
+
+ if (PREDICT_TRUE(asindex0 != ~0))
+ {
+ //Found an existing entry
+ counter = LB_VIP_COUNTER_NEXT_PACKET;
+ }
+ else if (PREDICT_TRUE(available_index0 != ~0))
+ {
+ //There is an available slot for a new flow
+ asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index;
+ counter = LB_VIP_COUNTER_FIRST_PACKET;
+ counter = (asindex0 == 0)?LB_VIP_COUNTER_NO_SERVER:counter;
+
+ //TODO: There are race conditions with as0 and vip0 manipulation.
+ //Configuration may be changed, vectors resized, etc...
+
+ //Dereference previously used
+ vlib_refcount_add(&lbm->as_refcount, cpu_index,
+ lb_hash_available_value(sticky_ht, hash0, available_index0), -1);
+ vlib_refcount_add(&lbm->as_refcount, cpu_index,
+ asindex0, 1);
+
+ //Add sticky entry
+ //Note that when there is no AS configured, an entry is configured anyway.
+ //But no configured AS is not something that should happen
+ lb_hash_put(sticky_ht, hash0, asindex0,
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ available_index0, lb_time);
+ }
+ else
+ {
+ //Could not store new entry in the table
+ asindex0 = vip0->new_flow_table[hash0 & vip0->new_flow_table_mask].as_index;
+ counter = LB_VIP_COUNTER_UNTRACKED_PACKET;
+ }
+
+ vlib_increment_simple_counter(&lbm->vip_counters[counter],
+ cpu_index,
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX],
+ 1);
//Now let's encap
- if (is_encap_v4) {
- ip4_header_t *ip40;
- vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t));
- ip40 = vlib_buffer_get_current(p0);
- gre0 = (gre_header_t *)(ip40 + 1);
- ip40->src_address = lbm->ip4_src_address;
- ip40->dst_address = as0->address.ip4;
- ip40->ip_version_and_header_length = 0x45;
- ip40->ttl = 128;
- ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t));
- ip40->protocol = IP_PROTOCOL_GRE;
- ip40->checksum = ip4_header_checksum (ip40);
- } else {
- ip6_header_t *ip60;
- vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t));
- ip60 = vlib_buffer_get_current(p0);
- gre0 = (gre_header_t *)(ip60 + 1);
- ip60->dst_address = as0->address.ip6;
- ip60->src_address = lbm->ip6_src_address;
- ip60->hop_limit = 128;
- ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28);
- ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t));
- ip60->protocol = IP_PROTOCOL_GRE;
- }
-
- gre0->flags_and_version = 0;
- gre0->protocol = (is_input_v4)?
- clib_host_to_net_u16(0x0800):
- clib_host_to_net_u16(0x86DD);
-
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = as0->dpo.dpoi_index;
-
- if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
{
- lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
- tr->as_index = as0 - lbm->ass;
- tr->vip_index = vip0 - lbm->vips;
+ gre_header_t *gre0;
+ if (is_encap_v4)
+ {
+ ip4_header_t *ip40;
+ vlib_buffer_advance(p0, - sizeof(ip4_header_t) - sizeof(gre_header_t));
+ ip40 = vlib_buffer_get_current(p0);
+ gre0 = (gre_header_t *)(ip40 + 1);
+ ip40->src_address = lbm->ip4_src_address;
+ ip40->dst_address = lbm->ass[asindex0].address.ip4;
+ ip40->ip_version_and_header_length = 0x45;
+ ip40->ttl = 128;
+ ip40->length = clib_host_to_net_u16(len0 + sizeof(gre_header_t) + sizeof(ip4_header_t));
+ ip40->protocol = IP_PROTOCOL_GRE;
+ ip40->checksum = ip4_header_checksum (ip40);
+ }
+ else
+ {
+ ip6_header_t *ip60;
+ vlib_buffer_advance(p0, - sizeof(ip6_header_t) - sizeof(gre_header_t));
+ ip60 = vlib_buffer_get_current(p0);
+ gre0 = (gre_header_t *)(ip60 + 1);
+ ip60->dst_address = lbm->ass[asindex0].address.ip6;
+ ip60->src_address = lbm->ip6_src_address;
+ ip60->hop_limit = 128;
+ ip60->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6<<28);
+ ip60->payload_length = clib_host_to_net_u16(len0 + sizeof(gre_header_t));
+ ip60->protocol = IP_PROTOCOL_GRE;
+ }
+
+ gre0->flags_and_version = 0;
+ gre0->protocol = (is_input_v4)?
+ clib_host_to_net_u16(0x0800):
+ clib_host_to_net_u16(0x86DD);
}
- p0->error = error_node->errors[error0];
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ lb_trace_t *tr = vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->as_index = asindex0;
+ tr->vip_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ }
+
+ //Enqueue to next
+ //Note that this is going to error if asindex0 == 0
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbm->ass[asindex0].dpo.dpoi_index;
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, pi0,
- as0->dpo.dpoi_next_node);
+ n_left_to_next, pi0,
+ lbm->ass[asindex0].dpo.dpoi_next_node);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}