diff options
Diffstat (limited to 'extras/deprecated')
36 files changed, 12360 insertions, 0 deletions
diff --git a/extras/deprecated/vppinfra/anneal.c b/extras/deprecated/vppinfra/anneal.c new file mode 100644 index 00000000000..35d10946482 --- /dev/null +++ b/extras/deprecated/vppinfra/anneal.c @@ -0,0 +1,172 @@ +/* + Copyright (c) 2011 Cisco and/or its affiliates. + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#include <vppinfra/anneal.h> + +/* + * Optimize an objective function by simulated annealing + * + * Here are a couple of short, easily-understood + * descriptions of simulated annealing: + * + * http://www.cs.sandia.gov/opt/survey/sa.html + * Numerical Recipes in C, 2nd ed., 444ff + * + * The description in the Wikipedia is not helpful. + * + * The algorithm tries to produce a decent answer to combinatorially + * explosive optimization problems by analogy to slow cooling + * of hot metal, aka annealing. + * + * There are (at least) three problem-dependent annealing parameters + * to consider: + * + * t0, the initial "temperature. Should be set so that the probability + * of accepting a transition to a higher cost configuration is + * initially about 0.8. + * + * ntemps, the number of temperatures to use. Each successive temperature + * is some fraction of the previous temperature. + * + * nmoves_per_temp, the number of configurations to try at each temperature + * + * It is a black art to set ntemps, nmoves_per_temp, and the rate + * at which the temperature drops. Go too fast with too few iterations, + * and the computation falls into a local minimum instead of the + * (desired) global minimum. + */ + +void +clib_anneal (clib_anneal_param_t * p) +{ + f64 t; + f64 cost, prev_cost, delta_cost, initial_cost, best_cost; + f64 random_accept, delta_cost_over_t; + f64 total_increase = 0.0, average_increase; + u32 i, j; + u32 number_of_increases = 0; + u32 accepted_this_temperature; + u32 best_saves_this_temperature; + int accept; + + t = p->initial_temperature; + best_cost = initial_cost = prev_cost = p->anneal_metric (p->opaque); + p->anneal_save_best_configuration (p->opaque); + + if (p->flags & CLIB_ANNEAL_VERBOSE) + fformat (stdout, "Initial cost %.2f\n", initial_cost); + + for (i = 0; i < p->number_of_temperatures; i++) + { + accepted_this_temperature = 0; + best_saves_this_temperature = 0; + + p->anneal_restore_best_configuration (p->opaque); + cost = best_cost; + + for (j = 0; j < p->number_of_configurations_per_temperature; j++) + { + p->anneal_new_configuration (p->opaque); + cost = p->anneal_metric (p->opaque); + + delta_cost = cost - prev_cost; + + /* cost function looks better, accept this move */ + if (p->flags & CLIB_ANNEAL_MINIMIZE) + accept = delta_cost < 0.0; + else + accept = delta_cost > 0.0; + + if (accept) + { + if (p->flags & CLIB_ANNEAL_MINIMIZE) + if (cost < best_cost) + { + if (p->flags & CLIB_ANNEAL_VERBOSE) + fformat (stdout, "New best cost %.2f\n", cost); + best_cost = cost; + p->anneal_save_best_configuration (p->opaque); + best_saves_this_temperature++; + } + + accepted_this_temperature++; + prev_cost = cost; + continue; + } + + /* cost function worse, keep stats to suggest t0 */ + total_increase += (p->flags & CLIB_ANNEAL_MINIMIZE) ? + delta_cost : -delta_cost; + + number_of_increases++; + + /* + * Accept a higher cost with Pr { e^(-(delta_cost / T)) }, + * equivalent to rnd[0,1] < e^(-(delta_cost / T)) + * + * AKA, the Boltzmann factor. + */ + random_accept = random_f64 (&p->random_seed); + + delta_cost_over_t = delta_cost / t; + + if (random_accept < exp (-delta_cost_over_t)) + { + accepted_this_temperature++; + prev_cost = cost; + continue; + } + p->anneal_restore_previous_configuration (p->opaque); + } + + if (p->flags & CLIB_ANNEAL_VERBOSE) + { + fformat (stdout, "Temp %.2f, cost %.2f, accepted %d, bests %d\n", t, + prev_cost, accepted_this_temperature, + best_saves_this_temperature); + fformat (stdout, "Improvement %.2f\n", initial_cost - prev_cost); + fformat (stdout, "-------------\n"); + } + + t = t * p->temperature_step; + } + + /* + * Empirically, one wants the probability of accepting a move + * at the initial temperature to be about 0.8. + */ + average_increase = total_increase / (f64) number_of_increases; + p->suggested_initial_temperature = average_increase / 0.22; /* 0.22 = -ln (0.8) */ + + p->final_temperature = t; + p->final_metric = p->anneal_metric (p->opaque); + + if (p->flags & CLIB_ANNEAL_VERBOSE) + { + fformat (stdout, "Average cost increase from a bad move: %.2f\n", + average_increase); + fformat (stdout, "Suggested t0 = %.2f\n", + p->suggested_initial_temperature); + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/anneal.h b/extras/deprecated/vppinfra/anneal.h new file mode 100644 index 00000000000..148d38ba551 --- /dev/null +++ b/extras/deprecated/vppinfra/anneal.h @@ -0,0 +1,89 @@ +/* + Copyright (c) 2011 Cisco and/or its affiliates. + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#ifndef __included_anneal_h__ +#define __included_anneal_h__ + +#include <vppinfra/clib.h> +#include <vppinfra/format.h> +#include <vppinfra/random.h> +#include <math.h> + +typedef struct +{ + /* Initial temperature */ + f64 initial_temperature; + + /* Temperature fraction at each step, 0.95 is reasonable */ + f64 temperature_step; + + /* Number of temperatures used */ + u32 number_of_temperatures; + + /* Number of configurations tried at each temperature */ + u32 number_of_configurations_per_temperature; + + u32 flags; +#define CLIB_ANNEAL_VERBOSE (1<<0) +#define CLIB_ANNEAL_MINIMIZE (1<<1) /* mutually exclusive */ +#define CLIB_ANNEAL_MAXIMIZE (1<<2) /* mutually exclusive */ + + /* Random number seed, set to ensure repeatable results */ + u32 random_seed; + + /* Opaque data passed to callbacks */ + void *opaque; + + /* Final temperature (output) */ + f64 final_temperature; + + /* Final metric (output) */ + f64 final_metric; + + /* Suggested initial temperature (output) */ + f64 suggested_initial_temperature; + + + /*--- Callbacks ---*/ + + /* objective function to minimize */ + f64 (*anneal_metric) (void *opaque); + + /* Generate a new configuration */ + void (*anneal_new_configuration) (void *opaque); + + /* Restore the previous configuration */ + void (*anneal_restore_previous_configuration) (void *opaque); + + /* Save best configuration found e.g at a certain temperature */ + void (*anneal_save_best_configuration) (void *opaque); + + /* restore best configuration found e.g at a certain temperature */ + void (*anneal_restore_best_configuration) (void *opaque); + +} clib_anneal_param_t; + +void clib_anneal (clib_anneal_param_t * p); + +#endif /* __included_anneal_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/cuckoo_16_8.h b/extras/deprecated/vppinfra/cuckoo_16_8.h new file mode 100644 index 00000000000..b667ff6cbb8 --- /dev/null +++ b/extras/deprecated/vppinfra/cuckoo_16_8.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#undef CLIB_CUCKOO_TYPE + +#define CLIB_CUCKOO_TYPE _16_8 +#define CLIB_CUCKOO_KVP_PER_BUCKET (4) +#define CLIB_CUCKOO_LOG2_KVP_PER_BUCKET (2) +#define CLIB_CUCKOO_BFS_MAX_STEPS (2000) +#define CLIB_CUCKOO_BFS_MAX_PATH_LENGTH (8) + +#ifndef __included_cuckoo_16_8_h__ +#define __included_cuckoo_16_8_h__ + +#include <vppinfra/heap.h> +#include <vppinfra/format.h> +#include <vppinfra/pool.h> +#include <vppinfra/xxhash.h> +#include <vppinfra/cuckoo_debug.h> +#include <vppinfra/cuckoo_common.h> + +#undef CLIB_CUCKOO_OPTIMIZE_PREFETCH +#undef CLIB_CUCKOO_OPTIMIZE_UNROLL +#undef CLIB_CUCKOO_OPTIMIZE_USE_COUNT_LIMITS_SEARCH +#define CLIB_CUCKOO_OPTIMIZE_PREFETCH 1 +#define CLIB_CUCKOO_OPTIMIZE_UNROLL 1 +#define CLIB_CUCKOO_OPTIMIZE_USE_COUNT_LIMITS_SEARCH 1 + +#if __SSE4_2__ && !defined (__i386__) +#include <x86intrin.h> +#endif + +/** 8 octet key, 8 octet key value pair */ +typedef struct +{ + u64 key[2]; /**< the key */ + u64 value; /**< the value */ +} clib_cuckoo_kv_16_8_t; + +/** Decide if a clib_cuckoo_kv_16_8_t instance is free + @param v- pointer to the (key,value) pair +*/ +always_inline int +clib_cuckoo_kv_is_free_16_8 (const clib_cuckoo_kv_16_8_t * v) +{ + if (v->key[0] == ~0ULL && v->value == ~0ULL) + return 1; + return 0; +} + +always_inline void +clib_cuckoo_kv_set_free_16_8 (clib_cuckoo_kv_16_8_t * v) +{ + clib_memset (v, 0xff, sizeof (*v)); +} + +/** Format a clib_cuckoo_kv_16_8_t instance + @param s - u8 * vector under construction + @param args (vararg) - the (key,value) pair to format + @return s - the u8 * vector under construction +*/ +always_inline u8 * +format_cuckoo_kvp_16_8 (u8 * s, va_list * args) +{ + clib_cuckoo_kv_16_8_t *v = va_arg (*args, clib_cuckoo_kv_16_8_t *); + + if (clib_cuckoo_kv_is_free_16_8 (v)) + { + s = format (s, " -- empty -- "); + } + else + { + s = + format (s, "key %llu%llu value %llu", v->key[0], v->key[1], v->value); + } + return s; +} + +always_inline u64 +clib_cuckoo_hash_16_8 (clib_cuckoo_kv_16_8_t * v) +{ +#ifdef clib_crc32c_uses_intrinsics + return clib_crc32c ((u8 *) v->key, 16); +#else + u64 tmp = v->key[0] ^ v->key[1]; + return clib_xxhash (tmp); +#endif +} + +/** Compare two clib_cuckoo_kv_16_8_t instances + @param a - first key + @param b - second key +*/ +always_inline int +clib_cuckoo_key_compare_16_8 (u64 * a, u64 * b) +{ +#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) + u64x2 v; + v = u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b); + return u64x2_is_all_zero (v); +#else + return ((a[0] ^ b[0]) | (a[1] ^ b[1])) == 0; +#endif +} + +#undef __included_cuckoo_template_h__ +#include <vppinfra/cuckoo_template.h> + +#endif /* __included_cuckoo_16_8_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/cuckoo_8_8.h b/extras/deprecated/vppinfra/cuckoo_8_8.h new file mode 100644 index 00000000000..67af79c0e50 --- /dev/null +++ b/extras/deprecated/vppinfra/cuckoo_8_8.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#undef CLIB_CUCKOO_TYPE + +#define CLIB_CUCKOO_TYPE _8_8 +#define CLIB_CUCKOO_KVP_PER_BUCKET (4) +#define CLIB_CUCKOO_LOG2_KVP_PER_BUCKET (2) +#define CLIB_CUCKOO_BFS_MAX_STEPS (2000) +#define CLIB_CUCKOO_BFS_MAX_PATH_LENGTH (8) + +#ifndef __included_cuckoo_8_8_h__ +#define __included_cuckoo_8_8_h__ + +#include <vppinfra/heap.h> +#include <vppinfra/format.h> +#include <vppinfra/pool.h> +#include <vppinfra/xxhash.h> +#include <vppinfra/cuckoo_debug.h> +#include <vppinfra/cuckoo_common.h> + +#undef CLIB_CUCKOO_OPTIMIZE_PREFETCH +#undef CLIB_CUCKOO_OPTIMIZE_UNROLL +#undef CLIB_CUCKOO_OPTIMIZE_USE_COUNT_LIMITS_SEARCH +#define CLIB_CUCKOO_OPTIMIZE_PREFETCH 1 +#define CLIB_CUCKOO_OPTIMIZE_UNROLL 1 +#define CLIB_CUCKOO_OPTIMIZE_USE_COUNT_LIMITS_SEARCH 1 + +#if __SSE4_2__ && !defined (__i386__) +#include <x86intrin.h> +#endif + +/** 8 octet key, 8 octet key value pair */ +typedef struct +{ + u64 key; /**< the key */ + u64 value; /**< the value */ +} clib_cuckoo_kv_8_8_t; + +/** Decide if a clib_cuckoo_kv_8_8_t instance is free + @param v- pointer to the (key,value) pair +*/ +always_inline int +clib_cuckoo_kv_is_free_8_8 (const clib_cuckoo_kv_8_8_t * v) +{ + if (v->key == ~0ULL && v->value == ~0ULL) + return 1; + return 0; +} + +always_inline void +clib_cuckoo_kv_set_free_8_8 (clib_cuckoo_kv_8_8_t * v) +{ + clib_memset (v, 0xff, sizeof (*v)); +} + +/** Format a clib_cuckoo_kv_8_8_t instance + @param s - u8 * vector under construction + @param args (vararg) - the (key,value) pair to format + @return s - the u8 * vector under construction +*/ +always_inline u8 * +format_cuckoo_kvp_8_8 (u8 * s, va_list * args) +{ + clib_cuckoo_kv_8_8_t *v = va_arg (*args, clib_cuckoo_kv_8_8_t *); + + if (clib_cuckoo_kv_is_free_8_8 (v)) + { + s = format (s, " -- empty -- "); + } + else + { + s = format (s, "key %llu value %llu", v->key, v->value); + } + return s; +} + +always_inline u64 +clib_cuckoo_hash_8_8 (clib_cuckoo_kv_8_8_t * v) +{ +#if defined(clib_crc32c_uses_intrinsics) && !defined (__i386__) + return crc32_u64 (0, v->key); +#else + return clib_xxhash (v->key); +#endif +} + +/** Compare two clib_cuckoo_kv_8_8_t instances + @param a - first key + @param b - second key +*/ +always_inline int +clib_cuckoo_key_compare_8_8 (u64 a, u64 b) +{ + return a == b; +} + +#undef __included_cuckoo_template_h__ +#include <vppinfra/cuckoo_template.h> + +#endif /* __included_cuckoo_8_8_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/cuckoo_common.h b/extras/deprecated/vppinfra/cuckoo_common.h new file mode 100644 index 00000000000..ea0fc30084b --- /dev/null +++ b/extras/deprecated/vppinfra/cuckoo_common.h @@ -0,0 +1,59 @@ +/* + Copyright (c) 2017 Cisco and/or its affiliates. + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +/* + * Note: to instantiate the template multiple times in a single file, + * #undef __included_cuckoo_template_h__... + */ +#ifndef __included_cuckoo_common_h__ +#define __included_cuckoo_common_h__ + +#include <vppinfra/types.h> + +#define CLIB_CUCKOO_OPTIMIZE_PREFETCH 1 +#define CLIB_CUCKOO_OPTIMIZE_UNROLL 1 +#define CLIB_CUCKOO_OPTIMIZE_USE_COUNT_LIMITS_SEARCH 1 + +#define foreach_clib_cuckoo_error(F) \ + F (CLIB_CUCKOO_ERROR_SUCCESS, 0, "success") \ + F (CLIB_CUCKOO_ERROR_NOT_FOUND, -1, "object not found") \ + F (CLIB_CUCKOO_ERROR_AGAIN, -2, "object busy") + +typedef enum +{ +#define F(n, v, s) n = v, + foreach_clib_cuckoo_error (F) +#undef F +} clib_cuckoo_error_e; + +typedef struct +{ + uword bucket1; + uword bucket2; + u8 reduced_hash; +} clib_cuckoo_lookup_info_t; + +#endif /* __included_cuckoo_common_h__ */ + +/** @endcond */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/cuckoo_debug.h b/extras/deprecated/vppinfra/cuckoo_debug.h new file mode 100644 index 00000000000..eb236509935 --- /dev/null +++ b/extras/deprecated/vppinfra/cuckoo_debug.h @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief cuckoo debugs + */ +#ifndef __included_cuckoo_debug_h__ +#define __included_cuckoo_debug_h__ + +/* controls debug counters */ +#define CLIB_CUCKOO_DEBUG_COUNTERS (0) + +/* controls debug prints */ +#define CLIB_CUCKOO_DEBUG (0) + +/* controls garbage collection related debug prints */ +#define CLIB_CUCKOO_DEBUG_GC (0) + +#if CLIB_CUCKOO_DEBUG +#define CLIB_CUCKOO_DEBUG_FILE_DEF \ + static const char *__file = NULL; \ + { \ + __file = strrchr (__FILE__, '/'); \ + if (__file) \ + { \ + ++__file; \ + } \ + else \ + { \ + __file = __FILE__; \ + } \ + } + +#define CLIB_CUCKOO_DBG(fmt, ...) \ + do \ + { \ + CLIB_CUCKOO_DEBUG_FILE_DEF \ + static u8 *_s = NULL; \ + _s = format (_s, "DBG:%s:%d:%s():" fmt, __file, __LINE__, __func__, \ + ##__VA_ARGS__); \ + printf ("%.*s\n", vec_len (_s), _s); \ + vec_reset_length (_s); \ + } \ + while (0); + +#define CLIB_CUCKOO_ERR(fmt, ...) \ + do \ + { \ + CLIB_CUCKOO_DEBUG_FILE_DEF \ + static u8 *_s = NULL; \ + _s = format (_s, "ERR:%s:%d:%s():" fmt, __file, __LINE__, __func__, \ + ##__VA_ARGS__); \ + printf ("%.*s\n", vec_len (_s), _s); \ + vec_reset_length (_s); \ + } \ + while (0); + +#else +#define CLIB_CUCKOO_DBG(...) +#define CLIB_CUCKOO_ERR(...) +#endif + +#endif /* __included_cuckoo_debug_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/cuckoo_template.c b/extras/deprecated/vppinfra/cuckoo_template.c new file mode 100644 index 00000000000..8cd2a2be2b5 --- /dev/null +++ b/extras/deprecated/vppinfra/cuckoo_template.c @@ -0,0 +1,1002 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * cuckoo hash implementation based on paper + * 'Algorithmic Improvements for Fast Concurrent Cuckoo Hashing' + * by Xiaozhou Li, David G. Andersen, Michael Kaminsky, Michael J. Freedman + * and their libcuckoo implementation (https://github.com/efficient/libcuckoo) + */ + +#include <vppinfra/vec.h> + +int CV (clib_cuckoo_search) (CVT (clib_cuckoo) * h, + CVT (clib_cuckoo_kv) * search_v, + CVT (clib_cuckoo_kv) * return_v) +{ + CVT (clib_cuckoo_kv) tmp = *search_v; + int rv = CV (clib_cuckoo_search_inline) (h, &tmp); + if (CLIB_CUCKOO_ERROR_SUCCESS == rv) + { + *return_v = tmp; + } + return rv; +} + +static +CVT (clib_cuckoo_bucket) * +CV (clib_cuckoo_bucket_at_index) (CVT (clib_cuckoo) * h, uword bucket) +{ + return vec_elt_at_index (h->buckets, bucket); +} + +static uword CV (clib_cuckoo_get_nbuckets) (CVT (clib_cuckoo) * h) +{ + return vec_len (h->buckets); +} + +static inline uword +CV (clib_cuckoo_elt_in_bucket_to_offset) (CVT (clib_cuckoo_bucket) * b, + CVT (clib_cuckoo_kv) * e) +{ + ASSERT (e >= b->elts); + ASSERT (e <= &b->elts[sizeof (b->elts) / sizeof (b->elts[0]) - 1]); + return e - b->elts; +} + +u8 *CV (format_cuckoo_elt) (u8 * s, va_list * args) +{ + CVT (clib_cuckoo_kv) * elt = va_arg (*args, CVT (clib_cuckoo_kv) *); + unsigned reduced_hash = va_arg (*args, unsigned); + if (CV (clib_cuckoo_kv_is_free) (elt)) + { + s = format (s, "[ -- empty -- ]"); + } + else + { + s = format (s, "[%U, reduced hash: %u]", CV (format_cuckoo_kvp), elt, + reduced_hash); + } + return s; +} + +u8 *CV (format_cuckoo_bucket) (u8 * s, va_list * args) +{ + CVT (clib_cuckoo_bucket) * bucket = + va_arg (*args, CVT (clib_cuckoo_bucket) *); + int i = 0; + + /* *INDENT-OFF* */ + clib_cuckoo_bucket_foreach_idx (i) + { + CVT (clib_cuckoo_kv) *elt = bucket->elts + i; + s = format (s, "bucket %p, offset %d: %U\n", bucket, i, + CV (format_cuckoo_elt), elt, bucket->reduced_hashes[i]); + } + /* *INDENT-ON* */ + clib_cuckoo_bucket_aux_t aux = bucket->aux; + s = format (s, "version: %lld, use count: %d\n", + clib_cuckoo_bucket_aux_get_version (aux), + clib_cuckoo_bucket_aux_get_use_count (aux)); + return s; +} + +#if CLIB_CUCKOO_DEBUG +static void CV (clib_cuckoo_deep_self_check) (CVT (clib_cuckoo) * h) +{ + CVT (clib_cuckoo_bucket) * bucket; + uword bucket_idx = 0; + /* *INDENT-OFF* */ + clib_cuckoo_foreach_bucket (bucket, h, { + int i = 0; + int used = 0; + clib_cuckoo_bucket_foreach_idx (i) + { + CVT (clib_cuckoo_kv) *elt = bucket->elts + i; + if (!CV (clib_cuckoo_kv_is_free) (elt)) + { + u64 hash = CV (clib_cuckoo_hash) (elt); + clib_cuckoo_lookup_info_t lookup = + CV (clib_cuckoo_calc_lookup) (h->buckets, hash); + CVT (clib_cuckoo_kv) kv = *elt; + int rv = CV (clib_cuckoo_search) (h, &kv, &kv); + if (CLIB_CUCKOO_ERROR_SUCCESS != rv) + { + CLIB_CUCKOO_DBG ("Search for elt `%U' failed!", + CV (format_cuckoo_elt), elt, + bucket->reduced_hashes[i]); + CLIB_CUCKOO_DBG ("%U", CV (format_cuckoo), h, 1); + } + ASSERT (lookup.bucket1 == bucket_idx || lookup.bucket2 == bucket_idx); + ASSERT (CLIB_CUCKOO_ERROR_SUCCESS == rv); + ++used; + } + } + clib_cuckoo_bucket_aux_t aux = bucket->aux; + ASSERT (used == clib_cuckoo_bucket_aux_get_use_count (aux)); + ++bucket_idx; + }); + /* *INDENT-ON* */ + // CLIB_CUCKOO_DBG ("Deep self check passed: %U", CV (format_cuckoo), h); +} + +#define CLIB_CUCKOO_DEEP_SELF_CHECK(h) CV (clib_cuckoo_deep_self_check) (h) +#define CLIB_CUCKOO_ASSERT_BUCKET_SORTED(b) \ + do \ + { \ + int i; \ + int min_free = CLIB_CUCKOO_KVP_PER_BUCKET; \ + int max_used = 0; \ + clib_cuckoo_bucket_foreach_idx (i) \ + { \ + if (!CV (clib_cuckoo_kv_is_free) (b->elts + i)) \ + { \ + max_used = i; \ + } \ + if (CV (clib_cuckoo_kv_is_free) (b->elts + \ + (CLIB_CUCKOO_KVP_PER_BUCKET - i))) \ + { \ + min_free = i; \ + } \ + } \ + ASSERT (min_free > max_used); \ + } \ + while (0) + +#else +#define CLIB_CUCKOO_DEEP_SELF_CHECK(h) +#define CLIB_CUCKOO_ASSERT_BUCKET_SORTED(b) +#endif + +void CV (clib_cuckoo_init) (CVT (clib_cuckoo) * h, const char *name, + uword nbuckets, + void (*garbage_callback) (CVT (clib_cuckoo) *, + void *), + void *garbage_ctx) +{ + uword log2_nbuckets = max_log2 (nbuckets); + nbuckets = 1ULL << (log2_nbuckets); + CLIB_CUCKOO_DBG ("New cuckoo, adjusted nbuckets %wu", nbuckets); + CVT (clib_cuckoo_bucket) * buckets = NULL; + vec_validate_aligned (buckets, nbuckets - 1, CLIB_CACHE_LINE_BYTES); + ASSERT (nbuckets == vec_len (buckets)); + h->buckets = buckets; + clib_spinlock_init (&h->writer_lock); + /* mark all elements free ... */ + CVT (clib_cuckoo_bucket) * bucket; + /* *INDENT-OFF* */ + clib_cuckoo_foreach_bucket ( + bucket, h, { clib_memset (bucket->elts, 0xff, sizeof (bucket->elts)); }); + /* *INDENT-ON* */ + h->name = name; + h->garbage_callback = garbage_callback; + h->garbage_ctx = garbage_ctx; +} + +void CV (clib_cuckoo_free) (CVT (clib_cuckoo) * h) +{ + clib_memset (h, 0, sizeof (*h)); +} + +static clib_cuckoo_bucket_aux_t +CV (clib_cuckoo_bucket_version_bump_and_lock) (CVT (clib_cuckoo_bucket) * b) +{ + clib_cuckoo_bucket_aux_t aux = b->aux; + u64 version = clib_cuckoo_bucket_aux_get_version (aux); + u8 use_count = clib_cuckoo_bucket_aux_get_use_count (aux); + u8 writer_flag = clib_cuckoo_bucket_aux_get_writer_flag (aux); + ASSERT (0 == writer_flag); + aux = clib_cuckoo_bucket_aux_pack (version + 1, use_count, 1); + b->aux = aux; + return aux; +} + +static void CV (clib_cuckoo_bucket_unlock) (CVT (clib_cuckoo_bucket) * b, + clib_cuckoo_bucket_aux_t aux) +{ + u64 version = clib_cuckoo_bucket_aux_get_version (aux); + u8 use_count = clib_cuckoo_bucket_aux_get_use_count (aux); + u8 writer_flag = clib_cuckoo_bucket_aux_get_writer_flag (aux); + ASSERT (1 == writer_flag); + aux = clib_cuckoo_bucket_aux_pack (version, use_count, 0); + b->aux = aux; +} + +#define CLIB_CUCKOO_DEBUG_PATH (1) +#define CLIB_CUCKOO_DEBUG_PATH_DETAIL (0) + +#if CLIB_CUCKOO_DEBUG && CLIB_CUCKOO_DEBUG_PATH +static u8 *CV (format_cuckoo_path) (u8 * s, va_list * args); +#endif + +static clib_cuckoo_path_t *CV (clib_cuckoo_path_get) (CVT (clib_cuckoo) * h) +{ + clib_cuckoo_path_t *path; + pool_get (h->paths, path); + clib_memset (path, 0, sizeof (*path)); +#if CLIB_CUCKOO_DEBUG_PATH_DETAIL + CLIB_CUCKOO_DBG ("Get path @%lu", (long unsigned) (path - h->paths)); +#endif + return path; +} + +static void CV (clib_cuckoo_path_put) (CVT (clib_cuckoo) * h, uword path_idx) +{ + clib_cuckoo_path_t *path = pool_elt_at_index (h->paths, path_idx); +#if CLIB_CUCKOO_DEBUG_PATH_DETAIL + CLIB_CUCKOO_DBG ("Put path @%lu", (long unsigned) (path - h->paths)); +#endif + pool_put (h->paths, path); +} + +static clib_cuckoo_path_t *CV (clib_cuckoo_path_begin) (CVT (clib_cuckoo) * h, + uword bucket, + uword next_offset) +{ + ASSERT (next_offset < CLIB_CUCKOO_KVP_PER_BUCKET); + clib_cuckoo_path_t *new_path = CV (clib_cuckoo_path_get) (h); + new_path->length = 1; + new_path->data = next_offset; + new_path->start = bucket; + new_path->bucket = bucket; +#if CLIB_CUCKOO_DEBUG_PATH + CLIB_CUCKOO_DBG ("Create new path @%wu, length: %u data: %llu bucket: %wu " + "next-offset: %wu", + new_path - h->paths, new_path->length, + (long long unsigned) new_path->data, new_path->bucket, + next_offset); +#endif + return new_path; +} + +/** + * create a new path based on existing path extended by adding a bucket + * and offset + */ +static uword CV (clib_cuckoo_path_extend) (CVT (clib_cuckoo) * h, + uword path_idx, uword bucket, + unsigned offset) +{ + ASSERT (offset < CLIB_CUCKOO_KVP_PER_BUCKET); + clib_cuckoo_path_t *new_path = CV (clib_cuckoo_path_get) (h); + uword new_path_idx = new_path - h->paths; + clib_cuckoo_path_t *path = pool_elt_at_index (h->paths, path_idx); + new_path->start = path->start; + new_path->length = path->length + 1; + new_path->data = (path->data << CLIB_CUCKOO_LOG2_KVP_PER_BUCKET) + offset; + new_path->bucket = bucket; +#if CLIB_CUCKOO_DEBUG_PATH + CLIB_CUCKOO_DBG ("Extend path @%wu, new path @%wu, %U", path_idx, + new_path_idx, CV (format_cuckoo_path), h, new_path_idx); +#endif + return new_path_idx; +} + +/** return the offset of the last element in the path */ +static uword CV (clib_cuckoo_path_peek_offset) (const clib_cuckoo_path_t * + path) +{ + ASSERT (path->length > 0); + uword mask = (1 << CLIB_CUCKOO_LOG2_KVP_PER_BUCKET) - 1; + uword offset = path->data & mask; + return offset; +} + +static +CVT (clib_cuckoo_kv) * +CV (clib_cuckoo_bucket_find_empty) (CVT (clib_cuckoo_bucket) * bucket) +{ + clib_cuckoo_bucket_aux_t aux = bucket->aux; + u8 use_count = clib_cuckoo_bucket_aux_get_use_count (aux); + if (use_count < CLIB_CUCKOO_KVP_PER_BUCKET) + { + return bucket->elts + use_count; + } + return NULL; +} + +/** + * walk the cuckoo path two ways, + * first backwards, extracting offsets, + * then forward, extracting buckets + * + * buckets and offsets are arrays filled with elements extracted from path + * the arrays must be able to contain CLIB_CUCKOO_BFS_MAX_PATH_LENGTH elements + */ +static void +clib_cuckoo_path_walk (CVT (clib_cuckoo) * h, uword path_idx, + uword * buckets, uword * offsets) +{ + clib_cuckoo_path_t *path = pool_elt_at_index (h->paths, path_idx); + ASSERT (path->length > 0); + u64 data = path->data; + uword mask = (1 << CLIB_CUCKOO_LOG2_KVP_PER_BUCKET) - 1; + uword i; + for (i = path->length; i > 0; --i) + { + uword offset = data & mask; + offsets[i - 1] = offset; + data >>= CLIB_CUCKOO_LOG2_KVP_PER_BUCKET; + } + buckets[0] = path->start; + for (i = 1; i < path->length; ++i) + { + CVT (clib_cuckoo_bucket) * b = + CV (clib_cuckoo_bucket_at_index) (h, buckets[i - 1]); + buckets[i] = + clib_cuckoo_get_other_bucket (CV (clib_cuckoo_get_nbuckets) (h), + buckets[i - 1], + b->reduced_hashes[offsets[i - 1]]); + } +} + +#if CLIB_CUCKOO_DEBUG && CLIB_CUCKOO_DEBUG_PATH +static u8 *CV (format_cuckoo_path) (u8 * s, va_list * args) +{ + CVT (clib_cuckoo) * h = va_arg (*args, CVT (clib_cuckoo) *); + uword path_idx = va_arg (*args, uword); + clib_cuckoo_path_t *p = pool_elt_at_index (h->paths, path_idx); + uword buckets[CLIB_CUCKOO_BFS_MAX_PATH_LENGTH]; + uword offsets[CLIB_CUCKOO_BFS_MAX_PATH_LENGTH]; + clib_cuckoo_path_walk (h, path_idx, buckets, offsets); + s = format (s, "length %u: ", p->length); + for (uword i = p->length - 1; i > 0; --i) + { + s = format (s, "%wu[%wu]->", buckets[i], offsets[i]); + } + if (p->length) + { + s = format (s, "%wu[%wu]", buckets[0], offsets[0]); + } + return s; +} +#endif + +/* + * perform breadth-first search in the cuckoo hash, finding the closest + * empty slot, i.e. one which requires minimum swaps to move it + * to one of the buckets provided + */ +static int CV (clib_cuckoo_find_empty_slot_bfs) (CVT (clib_cuckoo) * h, + clib_cuckoo_lookup_info_t * + lookup, uword * path_idx_out, + uword * found_bucket, + CVT (clib_cuckoo_kv) * + *found_elt) +{ + uword *tail; + ASSERT (!vec_len (h->bfs_search_queue)); + clib_cuckoo_path_t *tmp; + pool_flush (tmp, h->paths,); + int rv = CLIB_CUCKOO_ERROR_AGAIN; + int counter = 0; + /* start by creating paths starting in each of the buckets ... */ + vec_add2 (h->bfs_search_queue, tail, CLIB_CUCKOO_KVP_PER_BUCKET); + int i; + for (i = 0; i < CLIB_CUCKOO_KVP_PER_BUCKET; ++i) + { + clib_cuckoo_path_t *path = + CV (clib_cuckoo_path_begin) (h, lookup->bucket1, i); + tail[i] = path - h->paths; + } + if (lookup->bucket1 != lookup->bucket2) + { + vec_add2 (h->bfs_search_queue, tail, CLIB_CUCKOO_KVP_PER_BUCKET); + for (i = 0; i < CLIB_CUCKOO_KVP_PER_BUCKET; ++i) + { + clib_cuckoo_path_t *path = + CV (clib_cuckoo_path_begin) (h, lookup->bucket2, i); + tail[i] = path - h->paths; + } + } + while (1) + { + if (counter >= CLIB_CUCKOO_BFS_MAX_STEPS) + { +#if CLIB_CUCKOO_DEBUG_COUNTERS + ++h->steps_exceeded; +#endif + break; + } + if (counter >= vec_len (h->bfs_search_queue)) + { +#if CLIB_CUCKOO_DEBUG_COUNTERS + ++h->bfs_queue_emptied; +#endif + break; + } + const uword path_idx = vec_elt (h->bfs_search_queue, counter); + const clib_cuckoo_path_t *path = pool_elt_at_index (h->paths, path_idx); +#if CLIB_CUCKOO_DEBUG_PATH + CLIB_CUCKOO_DBG ("Examine path @%wu: %U", path_idx, + CV (format_cuckoo_path), h, path_idx); +#endif + /* TODO prefetch ? */ + /* search the alternative bucket for free space */ + int offset = CV (clib_cuckoo_path_peek_offset) (path); + CVT (clib_cuckoo_bucket) * bucket = + CV (clib_cuckoo_bucket_at_index) (h, path->bucket); + uword other_bucket = + clib_cuckoo_get_other_bucket (CV (clib_cuckoo_get_nbuckets) (h), + path->bucket, + bucket->reduced_hashes[offset]); + CLIB_CUCKOO_DBG + ("Path ends in bucket %wu, offset #%wu, other bucket is %wu", + path->bucket, CV (clib_cuckoo_path_peek_offset) (path), + other_bucket); + if (path->bucket != other_bucket) + { + if ((*found_elt = + CV (clib_cuckoo_bucket_find_empty) (CV + (clib_cuckoo_bucket_at_index) + (h, other_bucket)))) + { + /* found empty element */ + *found_bucket = other_bucket; + *path_idx_out = path_idx; + rv = CLIB_CUCKOO_ERROR_SUCCESS; +#if CLIB_CUCKOO_DEBUG_PATH + CLIB_CUCKOO_DBG ("Bucket with empty slot:\n%U", + CV (format_cuckoo_bucket), + CV (clib_cuckoo_bucket_at_index) (h, + other_bucket)); +#endif + goto out; + } + /* extend the current path with possible next buckets and add to + * queue */ + if (path->length < CLIB_CUCKOO_BFS_MAX_PATH_LENGTH && + vec_len (h->bfs_search_queue) < CLIB_CUCKOO_BFS_MAX_STEPS) + { + uword *tail; + vec_add2 (h->bfs_search_queue, tail, + CLIB_CUCKOO_KVP_PER_BUCKET); + for (i = 0; i < CLIB_CUCKOO_KVP_PER_BUCKET; ++i) + { + uword new_path_idx = + CV (clib_cuckoo_path_extend) (h, path_idx, other_bucket, + i); + tail[i] = new_path_idx; + } + } + } + else + { + CLIB_CUCKOO_DBG ("Discard path @%wu, loop detected", path_idx); + } + /* done with this path - put back to pool for later reuse */ + CV (clib_cuckoo_path_put) (h, path_idx); + ++counter; + } +out: + vec_reset_length (h->bfs_search_queue); + return rv; +} + +static void CV (clib_cuckoo_swap_elts_in_bucket) (CVT (clib_cuckoo_bucket) * + b, uword e1, uword e2) +{ + CVT (clib_cuckoo_kv) kv; + clib_memcpy (&kv, b->elts + e1, sizeof (kv)); + clib_memcpy (b->elts + e1, b->elts + e2, sizeof (kv)); + clib_memcpy (b->elts + e2, &kv, sizeof (kv)); + u8 reduced_hash = b->reduced_hashes[e1]; + b->reduced_hashes[e1] = b->reduced_hashes[e2]; + b->reduced_hashes[e2] = reduced_hash; +} + +static void CV (clib_cuckoo_bucket_tidy) (CVT (clib_cuckoo_bucket) * b) +{ + int i = 0; + int j = CLIB_CUCKOO_KVP_PER_BUCKET - 1; + while (i != j) + { + int min_free = i; + int max_used = j; + while (!CV (clib_cuckoo_kv_is_free) (&b->elts[min_free])) + { + ++min_free; + } + while (CV (clib_cuckoo_kv_is_free) (&b->elts[max_used])) + { + --max_used; + } + if (min_free < max_used) + { + CV (clib_cuckoo_swap_elts_in_bucket) (b, min_free, max_used); + i = min_free + 1; + j = max_used - 1; + } + else + { + break; + } + } +} + +static void CV (clib_cuckoo_free_locked_elt) (CVT (clib_cuckoo_kv) * elt) +{ + /* + * FIXME - improve performance by getting rid of this clib_memset - make all + * functions in this file not rely on clib_cuckoo_kv_is_free but instead + * take use_count into account */ + clib_memset (elt, 0xff, sizeof (*elt)); +} + +static void CV (clib_cuckoo_free_elt_in_bucket) (CVT (clib_cuckoo_bucket) * b, + CVT (clib_cuckoo_kv) * elt) +{ + clib_cuckoo_bucket_aux_t aux = + CV (clib_cuckoo_bucket_version_bump_and_lock) (b); + int use_count = clib_cuckoo_bucket_aux_get_use_count (aux); + int offset = elt - b->elts; + ASSERT (offset < use_count); + CV (clib_cuckoo_free_locked_elt) (elt); + if (offset != use_count - 1) + { + CV (clib_cuckoo_bucket_tidy) (b); + } + aux = clib_cuckoo_bucket_aux_set_use_count (aux, use_count - 1); + CV (clib_cuckoo_bucket_unlock) (b, aux); +} + +static void CV (clib_cuckoo_set_locked_elt) (CVT (clib_cuckoo_bucket) * b, + CVT (clib_cuckoo_kv) * elt, + CVT (clib_cuckoo_kv) * kvp, + u8 reduced_hash) +{ + int offset = CV (clib_cuckoo_elt_in_bucket_to_offset) (b, elt); + clib_memcpy (elt, kvp, sizeof (*elt)); + b->reduced_hashes[offset] = reduced_hash; + CLIB_CUCKOO_DBG ("Set bucket %p, offset %d, %U", b, offset, + CV (format_cuckoo_elt), elt, b->reduced_hashes[offset]); +} + +static void CV (clib_cuckoo_set_elt) (CVT (clib_cuckoo_bucket) * b, + CVT (clib_cuckoo_kv) * elt, + CVT (clib_cuckoo_kv) * kvp, + u8 reduced_hash) +{ + clib_cuckoo_bucket_aux_t aux = + CV (clib_cuckoo_bucket_version_bump_and_lock) (b); + CV (clib_cuckoo_set_locked_elt) (b, elt, kvp, reduced_hash); + CV (clib_cuckoo_bucket_unlock) (b, aux); +} + +static int CV (clib_cuckoo_add_slow) (CVT (clib_cuckoo) * h, + CVT (clib_cuckoo_kv) * kvp, + clib_cuckoo_lookup_info_t * lookup, + u8 reduced_hash) +{ + uword path_idx; + uword empty_bucket_idx; + CVT (clib_cuckoo_kv) * empty_elt; + int rv = CV (clib_cuckoo_find_empty_slot_bfs) (h, lookup, &path_idx, + &empty_bucket_idx, + &empty_elt); + if (CLIB_CUCKOO_ERROR_SUCCESS == rv) + { + uword buckets[CLIB_CUCKOO_BFS_MAX_PATH_LENGTH]; + uword offsets[CLIB_CUCKOO_BFS_MAX_PATH_LENGTH]; + clib_cuckoo_path_walk (h, path_idx, buckets, offsets); + /* + * walk back the path, moving the free element forward to one of our + * buckets ... + */ + clib_cuckoo_path_t *path = pool_elt_at_index (h->paths, path_idx); + CVT (clib_cuckoo_bucket) * empty_bucket = + CV (clib_cuckoo_bucket_at_index) (h, empty_bucket_idx); + int i; + for (i = path->length - 1; i >= 0; --i) + { + /* copy the key-value in path to the bucket with empty element */ + CVT (clib_cuckoo_bucket) * b = + CV (clib_cuckoo_bucket_at_index) (h, buckets[i]); + CVT (clib_cuckoo_kv) * elt = b->elts + offsets[i]; + clib_cuckoo_bucket_aux_t empty_aux = + CV (clib_cuckoo_bucket_version_bump_and_lock) (empty_bucket); + CV (clib_cuckoo_set_locked_elt) + (empty_bucket, empty_elt, elt, b->reduced_hashes[elt - b->elts]); + if (i == path->length - 1) + { + /* we only need to increase the use count for the bucket with + * free element - all other buckets' use counts won't change */ + empty_aux = clib_cuckoo_bucket_aux_set_use_count (empty_aux, + clib_cuckoo_bucket_aux_get_use_count + (empty_aux) + + 1); + } + CV (clib_cuckoo_bucket_unlock) (empty_bucket, empty_aux); + /* + * the element now exists in both places - in the previously empty + * element and in its original bucket - we can now safely overwrite + * the element in the original bucket with previous element in path + * without loosing data (and we don't need to modify the use count) + */ + empty_bucket = b; + empty_elt = elt; + } + /* now we have a place to put the kvp in ... */ + CV (clib_cuckoo_set_elt) (empty_bucket, empty_elt, kvp, reduced_hash); + CLIB_CUCKOO_DBG ("Slow insert success, bucket: %p\n%U", empty_bucket, + CV (format_cuckoo_bucket), empty_bucket); +#if CLIB_CUCKOO_DEBUG_COUNTERS + ++h->slow_adds; +#endif + } + return rv; +} + +static int CV (clib_cuckoo_add_fast) (CVT (clib_cuckoo) * h, + clib_cuckoo_lookup_info_t * lookup, + CVT (clib_cuckoo_kv) * kvp, + u8 reduced_hash) +{ + CVT (clib_cuckoo_kv) * elt; + CVT (clib_cuckoo_bucket) * bucket1 = + CV (clib_cuckoo_bucket_at_index) (h, lookup->bucket1); + if ((elt = CV (clib_cuckoo_bucket_find_empty) (bucket1))) + { + clib_cuckoo_bucket_aux_t aux = + CV (clib_cuckoo_bucket_version_bump_and_lock) (bucket1); + CV (clib_cuckoo_set_locked_elt) (bucket1, elt, kvp, reduced_hash); + aux = + clib_cuckoo_bucket_aux_set_use_count (aux, + clib_cuckoo_bucket_aux_get_use_count + (aux) + 1); + CV (clib_cuckoo_bucket_unlock) (bucket1, aux); +#if CLIB_CUCKOO_DEBUG_COUNTERS + ++h->fast_adds; +#endif + return CLIB_CUCKOO_ERROR_SUCCESS; + } + CVT (clib_cuckoo_bucket) * bucket2 = + CV (clib_cuckoo_bucket_at_index) (h, lookup->bucket2); + if ((elt = + CV (clib_cuckoo_bucket_find_empty) (CV (clib_cuckoo_bucket_at_index) + (h, lookup->bucket2)))) + { + clib_cuckoo_bucket_aux_t aux = + CV (clib_cuckoo_bucket_version_bump_and_lock) (bucket2); + CV (clib_cuckoo_set_locked_elt) (bucket2, elt, kvp, reduced_hash); + aux = + clib_cuckoo_bucket_aux_set_use_count (aux, + clib_cuckoo_bucket_aux_get_use_count + (aux) + 1); + CV (clib_cuckoo_bucket_unlock) (bucket2, aux); +#if CLIB_CUCKOO_DEBUG_COUNTERS + ++h->fast_adds; +#endif + return CLIB_CUCKOO_ERROR_SUCCESS; + } + return CLIB_CUCKOO_ERROR_AGAIN; +} + +/** + * perform garbage collection + * + * this function assumes there is no other thread touching the cuckoo hash, + * not even a reader, it's meant to be called from main thread + * in a stop-the-world situation + */ +void CV (clib_cuckoo_garbage_collect) (CVT (clib_cuckoo) * h) +{ + CLIB_MEMORY_BARRIER (); + CVT (clib_cuckoo_bucket) * *b; + /* *INDENT-OFF* */ + vec_foreach (b, h->to_be_freed) + { + if (*b == h->buckets) + { + continue; + } +#if CLIB_CUCKOO_DEBUG_GC + fformat (stdout, "gc: free %p\n", *b); +#endif + vec_free (*b); + } + /* *INDENT-ON* */ + vec_free (h->to_be_freed); + CLIB_MEMORY_BARRIER (); +} + +/** + * expand and rehash a cuckoo hash + * + * 1. double the size of the hash table + * 2. move items to new locations derived from the new size + */ +static void CV (clib_cuckoo_rehash) (CVT (clib_cuckoo) * h) +{ + CVT (clib_cuckoo_bucket) * old = h->buckets; + uword old_nbuckets = vec_len (old); + uword new_nbuckets = 2 * old_nbuckets; + CVT (clib_cuckoo_bucket) * new = + vec_dup_aligned (old, CLIB_CACHE_LINE_BYTES); + /* allocate space */ + vec_validate_aligned (new, new_nbuckets - 1, CLIB_CACHE_LINE_BYTES); + ASSERT (new_nbuckets == vec_len (new)); + /* store old pointer in to-be-freed list */ + vec_add1 (h->to_be_freed, old); + /* mark new elements as free */ + CVT (clib_cuckoo_bucket) * bucket; + for (bucket = new + old_nbuckets; bucket < vec_end (new); ++bucket) + { + clib_memset (bucket->elts, 0xff, sizeof (bucket->elts)); + } + /* + * this for loop manipulates the new (unseen) memory, so no locks + * are required here + */ + uword old_bucket_idx; + for (old_bucket_idx = 0; old_bucket_idx < old_nbuckets; ++old_bucket_idx) + { + /* items in old bucket might be moved to new bucket */ + uword new_bucket_idx = old_bucket_idx + old_nbuckets; + CVT (clib_cuckoo_bucket) * old_bucket = new + old_bucket_idx; + CVT (clib_cuckoo_bucket) * new_bucket = new + new_bucket_idx; + int i = 0; + int moved = 0; + clib_cuckoo_bucket_aux_t aux = old_bucket->aux; + for (i = 0; i < clib_cuckoo_bucket_aux_get_use_count (aux); ++i) + { + CVT (clib_cuckoo_kv) * elt = old_bucket->elts + i; + u64 hash = CV (clib_cuckoo_hash) (elt); + clib_cuckoo_lookup_info_t old_lookup = + CV (clib_cuckoo_calc_lookup) (old, hash); + clib_cuckoo_lookup_info_t new_lookup = + CV (clib_cuckoo_calc_lookup) (new, hash); + if ((old_bucket_idx == old_lookup.bucket1 && + new_bucket_idx == new_lookup.bucket1) || + (old_bucket_idx == old_lookup.bucket2 && + new_bucket_idx == new_lookup.bucket2)) + { + /* move the item to new bucket */ + CVT (clib_cuckoo_kv) * empty_elt = new_bucket->elts + moved; + ASSERT (empty_elt); + CV (clib_cuckoo_set_locked_elt) + (new_bucket, empty_elt, elt, old_bucket->reduced_hashes[i]); + CV (clib_cuckoo_free_locked_elt) (elt); + ++moved; + } + } + if (moved) + { + CV (clib_cuckoo_bucket_tidy) (old_bucket); + aux = + clib_cuckoo_bucket_aux_set_use_count (aux, + clib_cuckoo_bucket_aux_get_use_count + (aux) - moved); + old_bucket->aux = aux; + aux = new_bucket->aux; + aux = + clib_cuckoo_bucket_aux_set_use_count (aux, + clib_cuckoo_bucket_aux_get_use_count + (aux) + moved); + new_bucket->aux = aux; + } + } + h->buckets = new; +#if CLIB_CUCKOO_DEBUG_COUNTERS + ++h->rehashes; +#endif + h->garbage_callback (h, h->garbage_ctx); +} + +static int CV (clib_cuckoo_bucket_search_internal) (CVT (clib_cuckoo) * h, + uword bucket, + CVT (clib_cuckoo_kv) * + kvp, + CVT (clib_cuckoo_kv) * + *found) +{ + CVT (clib_cuckoo_bucket) * b = CV (clib_cuckoo_bucket_at_index) (h, bucket); + int i; + /* *INDENT-OFF* */ + clib_cuckoo_bucket_foreach_idx_unrolled (i, { + CVT (clib_cuckoo_kv) *elt = &b->elts[i]; + if (CV (clib_cuckoo_key_compare) (elt->key, kvp->key)) + { + *found = elt; + return CLIB_CUCKOO_ERROR_SUCCESS; + } + }); + /* *INDENT-ON* */ + return CLIB_CUCKOO_ERROR_NOT_FOUND; +} + +int CV (clib_cuckoo_add_del) (CVT (clib_cuckoo) * h, + CVT (clib_cuckoo_kv) * kvp, int is_add, + int dont_overwrite) +{ + CLIB_CUCKOO_DBG ("%s %U", is_add ? "Add" : "Del", CV (format_cuckoo_kvp), + kvp); + clib_cuckoo_lookup_info_t lookup; + u64 hash = CV (clib_cuckoo_hash) (kvp); + u8 reduced_hash = clib_cuckoo_reduce_hash (hash); + clib_spinlock_lock (&h->writer_lock); +restart: + lookup = CV (clib_cuckoo_calc_lookup) (h->buckets, hash); + CVT (clib_cuckoo_bucket) * b = + CV (clib_cuckoo_bucket_at_index) (h, lookup.bucket1); + CVT (clib_cuckoo_kv) * found; + int rv = + CV (clib_cuckoo_bucket_search_internal) (h, lookup.bucket1, kvp, &found); + if (CLIB_CUCKOO_ERROR_SUCCESS != rv) + { + ASSERT (CLIB_CUCKOO_ERROR_NOT_FOUND == rv); + b = CV (clib_cuckoo_bucket_at_index) (h, lookup.bucket2); + rv = CV (clib_cuckoo_bucket_search_internal) (h, lookup.bucket2, kvp, + &found); + } + if (CLIB_CUCKOO_ERROR_SUCCESS == rv) + { + if (is_add) + { + if (dont_overwrite) + { + CLIB_CUCKOO_DBG ("Refused replacing existing %U", + CV (format_cuckoo_elt), found, + b->reduced_hashes[found - b->elts]); + rv = CLIB_CUCKOO_ERROR_AGAIN; + } + else + { + /* prevent readers reading this bucket while we switch the values */ + clib_cuckoo_bucket_aux_t aux = + CV (clib_cuckoo_bucket_version_bump_and_lock) (b); + clib_memcpy (&found->value, &kvp->value, sizeof (found->value)); + CLIB_CUCKOO_DBG ("Replaced existing %U", CV (format_cuckoo_elt), + found, b->reduced_hashes[found - b->elts]); + CV (clib_cuckoo_bucket_unlock) (b, aux); + rv = CLIB_CUCKOO_ERROR_SUCCESS; + } + } + else + { + CV (clib_cuckoo_free_elt_in_bucket) (b, found); + rv = CLIB_CUCKOO_ERROR_SUCCESS; + } + CLIB_CUCKOO_DEEP_SELF_CHECK (h); + goto unlock; + } + if (!is_add) + { + CLIB_CUCKOO_DBG ("%U not present in table", CV (format_cuckoo_kvp), + kvp); + rv = CLIB_CUCKOO_ERROR_NOT_FOUND; + goto unlock; + } + /* from this point on, it's add code only */ + ASSERT (CLIB_CUCKOO_ERROR_NOT_FOUND == rv); + /* fast path: try to search for unoccupied slot in one of the buckets */ + rv = CV (clib_cuckoo_add_fast) (h, &lookup, kvp, reduced_hash); + CLIB_CUCKOO_DEEP_SELF_CHECK (h); + if (CLIB_CUCKOO_ERROR_SUCCESS != rv) + { + CLIB_CUCKOO_DBG + ("Fast insert failed, bucket 1: %wu, bucket 2: %wu\n%U%U", + lookup.bucket1, lookup.bucket2, CV (format_cuckoo_bucket), + CV (clib_cuckoo_bucket_at_index) (h, lookup.bucket1), + CV (format_cuckoo_bucket), + CV (clib_cuckoo_bucket_at_index) (h, lookup.bucket2)); + /* slow path */ + rv = CV (clib_cuckoo_add_slow) (h, kvp, &lookup, reduced_hash); + CLIB_CUCKOO_DEEP_SELF_CHECK (h); + if (CLIB_CUCKOO_ERROR_SUCCESS != rv) + { + CLIB_CUCKOO_DBG ("Slow insert failed, rehash required:\n%U", + CV (format_cuckoo), h, 1); + /* ultra slow path */ + CV (clib_cuckoo_rehash) (h); + CLIB_CUCKOO_DEEP_SELF_CHECK (h); + CLIB_CUCKOO_DBG ("Restarting add after rehash..."); + goto restart; + } + } +unlock: + clib_spinlock_unlock (&h->writer_lock); + return rv; +} + +u8 *CV (format_cuckoo) (u8 * s, va_list * args) +{ + CVT (clib_cuckoo) * h = va_arg (*args, CVT (clib_cuckoo) *); + int verbose = va_arg (*args, int); + + s = format (s, "Hash table %s\n", h->name ? h->name : "(unnamed)"); + + uword free = 0; + uword used = 0; + uword use_count_total = 0; + float load_factor; + CVT (clib_cuckoo_bucket) * b; + /* *INDENT-OFF* */ + clib_cuckoo_foreach_bucket (b, h, { + if (verbose) + { + s = format (s, "%U", CV (format_cuckoo_bucket), b); + } + int i; + clib_cuckoo_bucket_foreach_idx (i) + { + CVT (clib_cuckoo_kv) *elt = &b->elts[i]; + if (CV (clib_cuckoo_kv_is_free) (elt)) + { + ++free; + } + else + { + ++used; + } + } + clib_cuckoo_bucket_aux_t aux = b->aux; + use_count_total += clib_cuckoo_bucket_aux_get_use_count (aux); + }); + /* *INDENT-ON* */ + s = format (s, "Used slots: %wu\n", used); + s = format (s, "Use count total: %wu\n", use_count_total); + s = format (s, "Free slots: %wu\n", free); + if (free + used != 0) + load_factor = ((float) used) / ((float) (free + used)); + else + load_factor = 0.0; + s = format (s, "Load factor: %.2f\n", load_factor); +#if CLIB_CUCKOO_DEBUG_COUNTERS + s = format (s, "BFS attempts limited by max steps: %lld\n", + h->steps_exceeded); + s = format (s, "BFS cutoffs due to empty queue: %lld\n", + h->bfs_queue_emptied); + s = format (s, "Fast adds: %lld\n", h->fast_adds); + s = format (s, "Slow adds: %lld\n", h->slow_adds); + s = format (s, "Rehashes: %lld\n", h->rehashes); +#endif + return s; +} + +float CV (clib_cuckoo_calculate_load_factor) (CVT (clib_cuckoo) * h) +{ + uword nonfree = 0; + uword all = 0; + CVT (clib_cuckoo_bucket) * bucket; + /* *INDENT-OFF* */ + clib_cuckoo_foreach_bucket (bucket, h, { + int i; + clib_cuckoo_bucket_foreach_idx (i) + { + CVT (clib_cuckoo_kv) *elt = bucket->elts + i; + ++all; + if (!CV (clib_cuckoo_kv_is_free) (elt)) + { + ++nonfree; + } + } + }); + /* *INDENT-ON* */ + if (all) + return (float) nonfree / (float) all; + else + return 0.0; +} + +/** @endcond */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/cuckoo_template.h b/extras/deprecated/vppinfra/cuckoo_template.h new file mode 100644 index 00000000000..364c2919d96 --- /dev/null +++ b/extras/deprecated/vppinfra/cuckoo_template.h @@ -0,0 +1,460 @@ +/* + Copyright (c) 2017 Cisco and/or its affiliates. + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +/* + * cuckoo hash implementation based on paper + * 'Algorithmic Improvements for Fast Concurrent Cuckoo Hashing' + * by Xiaozhou Li, David G. Andersen, Michael Kaminsky, Michael J. Freedman + * and their libcuckoo implementation (https://github.com/efficient/libcuckoo) + */ + +/* + * Note: to instantiate the template multiple times in a single file, + * #undef __included_cuckoo_template_h__... + */ +#ifndef __included_cuckoo_template_h__ +#define __included_cuckoo_template_h__ + +#include <vppinfra/heap.h> +#include <vppinfra/format.h> +#include <vppinfra/pool.h> +#include <vppinfra/lock.h> +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +#ifndef CLIB_CUCKOO_TYPE +#error CLIB_CUCKOO_TYPE not defined +#endif + +#ifndef CLIB_CUCKOO_BFS_MAX_STEPS +#error CLIB_CUCKOO_BFS_MAX_STEPS not defined +#endif + +#ifndef CLIB_CUCKOO_KVP_PER_BUCKET +#error CLIB_CUCKOO_KVP_PER_BUCKET not defined +#endif + +#ifndef CLIB_CUCKOO_LOG2_KVP_PER_BUCKET +#error CLIB_CUCKOO_LOG2_KVP_PER_BUCKET not defined +#endif + +#ifndef CLIB_CUCKOO_BFS_MAX_PATH_LENGTH +#error CLIB_CUCKOO_BFS_MAX_PATH_LENGTH not defined +#endif + +STATIC_ASSERT (CLIB_CUCKOO_KVP_PER_BUCKET == + (1 << CLIB_CUCKOO_LOG2_KVP_PER_BUCKET), + "CLIB_CUCKOO_KVP_PER_BUCKET != (1 << CLIB_CUCKOO_LOG2_KVP_PER_BUCKET"); + +#define _cv(a, b) a##b +#define __cv(a, b) _cv (a, b) +#define CV(a) __cv (a, CLIB_CUCKOO_TYPE) + +#define _cvt(a, b) a##b##_t +#define __cvt(a, b) _cvt (a, b) +#define CVT(a) __cvt (a, CLIB_CUCKOO_TYPE) + +typedef u64 clib_cuckoo_bucket_aux_t; + +#define CLIB_CUCKOO_USE_COUNT_BIT_WIDTH (1 + CLIB_CUCKOO_LOG2_KVP_PER_BUCKET) + +always_inline u64 +clib_cuckoo_bucket_aux_get_version (clib_cuckoo_bucket_aux_t aux) +{ + return aux >> (1 + CLIB_CUCKOO_USE_COUNT_BIT_WIDTH); +} + +always_inline int +clib_cuckoo_bucket_aux_get_use_count (clib_cuckoo_bucket_aux_t aux) +{ + u64 use_count_mask = (1 << CLIB_CUCKOO_USE_COUNT_BIT_WIDTH) - 1; + return (aux >> 1) & use_count_mask; +} + +always_inline int +clib_cuckoo_bucket_aux_get_writer_flag (clib_cuckoo_bucket_aux_t aux) +{ + return aux & 1; +} + +always_inline clib_cuckoo_bucket_aux_t +clib_cuckoo_bucket_aux_pack (u64 version, int use_count, int writer_flag) +{ + return (version << (1 + CLIB_CUCKOO_USE_COUNT_BIT_WIDTH)) + + (use_count << 1) + writer_flag; +} + +always_inline clib_cuckoo_bucket_aux_t +clib_cuckoo_bucket_aux_set_version (clib_cuckoo_bucket_aux_t aux, u64 version) +{ + int use_count = clib_cuckoo_bucket_aux_get_use_count (aux); + int writer_flag = clib_cuckoo_bucket_aux_get_writer_flag (aux); + return clib_cuckoo_bucket_aux_pack (version, use_count, writer_flag); +} + +always_inline clib_cuckoo_bucket_aux_t +clib_cuckoo_bucket_aux_set_use_count (clib_cuckoo_bucket_aux_t aux, + int use_count) +{ + u64 version = clib_cuckoo_bucket_aux_get_version (aux); + int writer_flag = clib_cuckoo_bucket_aux_get_writer_flag (aux); + return clib_cuckoo_bucket_aux_pack (version, use_count, writer_flag); +} + +always_inline clib_cuckoo_bucket_aux_t +clib_cuckoo_bucket_aux_set_writer_flag (clib_cuckoo_bucket_aux_t aux, + int writer_flag) +{ + u64 version = clib_cuckoo_bucket_aux_get_version (aux); + int use_count = clib_cuckoo_bucket_aux_get_use_count (aux); + return clib_cuckoo_bucket_aux_pack (version, use_count, writer_flag); +} + +#define PATH_BITS_REQ \ + (CLIB_CUCKOO_BFS_MAX_PATH_LENGTH * CLIB_CUCKOO_LOG2_KVP_PER_BUCKET) + +#if PATH_BITS_REQ <= 8 +typedef u8 path_data_t; +#elif PATH_BITS_REQ <= 16 +typedef u16 path_data_t; +#elif PATH_BITS_REQ <= 32 +typedef u32 path_data_t; +#elif PATH_BITS_REQ <= 64 +typedef u64 path_data_t; +#else +#error no suitable datatype for path storage... +#endif + +typedef struct +{ + /** bucket where this path begins */ + u64 start; + /** bucket at end of path */ + u64 bucket; + /** length of the path */ + u8 length; + /** holds compressed offsets in buckets along path */ + path_data_t data; +} clib_cuckoo_path_t; + +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /** reduced hashes corresponding to elements */ + u8 reduced_hashes[CLIB_CUCKOO_KVP_PER_BUCKET]; + + /** auxiliary data - version, writer flag and used count */ + volatile clib_cuckoo_bucket_aux_t aux; + + /** cuckoo elements in this bucket */ + CVT (clib_cuckoo_kv) elts[CLIB_CUCKOO_KVP_PER_BUCKET]; +} CVT (clib_cuckoo_bucket); + +#define clib_cuckoo_bucket_foreach_idx(var) \ + for (var = 0; var < CLIB_CUCKOO_KVP_PER_BUCKET; var++) + +#if CLIB_CUCKOO_OPTIMIZE_UNROLL +#if CLIB_CUCKOO_KVP_PER_BUCKET == 2 +#define clib_cuckoo_bucket_foreach_idx_unrolled(var, body) \ + do \ + { \ + var = 0; \ + body; \ + var = 1; \ + body; \ + } \ + while (0); +#elif CLIB_CUCKOO_KVP_PER_BUCKET == 4 +#define clib_cuckoo_bucket_foreach_idx_unrolled(var, body) \ + do \ + { \ + var = 0; \ + body; \ + var = 1; \ + body; \ + var = 2; \ + body; \ + var = 3; \ + body; \ + } \ + while (0); +#elif CLIB_CUCKOO_KVP_PER_BUCKET == 8 +#define clib_cuckoo_bucket_foreach_idx_unrolled(var, body) \ + do \ + { \ + var = 0; \ + body; \ + var = 1; \ + body; \ + var = 2; \ + body; \ + var = 3; \ + body; \ + var = 4; \ + body; \ + var = 5; \ + body; \ + var = 6; \ + body; \ + var = 7; \ + body; \ + } \ + while (0); +#else +#define clib_cuckoo_bucket_foreach_idx_unrolled(var, body) \ + clib_cuckoo_bucket_foreach_idx (var) \ + { \ + body; \ + } +#endif +#else /* CLIB_CUCKOO_OPTIMIZE_UNROLL */ +#define clib_cuckoo_bucket_foreach_idx_unrolled(var, body) \ + clib_cuckoo_bucket_foreach_idx (var) \ + { \ + body; \ + } +#endif /* CLIB_CUCKOO_OPTIMIZE_UNROLL */ + +#define clib_cuckoo_bucket_foreach_elt_index(var, bucket) \ + for (var = 0; var < CLIB_CUCKOO_KVP_PER_BUCKET; ++i) + +#define clib_cuckoo_foreach_bucket(var, h, body) \ + do \ + { \ + CVT (clib_cuckoo_bucket) *__buckets = h->buckets; \ + vec_foreach (var, __buckets) \ + { \ + body; \ + } \ + } \ + while (0) + +typedef struct CV (clib_cuckoo) +{ + /** vector of elements containing key-value pairs and auxiliary data */ + CVT (clib_cuckoo_bucket) * volatile buckets; + + /** garbage to be freed once its safe to do so .. */ + CVT (clib_cuckoo_bucket) * *to_be_freed; + + /** hash table name */ + const char *name; + + /** pool of cuckoo paths (reused when doing bfd search) */ + clib_cuckoo_path_t *paths; + + /** + * vector used as queue when doing cuckoo path searches - holds offsets + * in paths pool + */ + uword *bfs_search_queue; + + /** + * writer lock - whether this lock is taken or not has zero effect on + * readers + */ + clib_spinlock_t writer_lock; + + /** caller context passed to callback with garbage notification */ + void *garbage_ctx; + + /** + * garbage notify function - called when some garbage needs to be collected + * in main thread while other threads are stopped + */ + void (*garbage_callback) (struct CV (clib_cuckoo) * h, void *garbage_ctx); + +#if CLIB_CUCKOO_DEBUG_COUNTERS + u64 steps_exceeded; + u64 bfs_queue_emptied; + u64 fast_adds; + u64 slow_adds; + u64 rehashes; +#endif + +} CVT (clib_cuckoo); + +void CV (clib_cuckoo_init) (CVT (clib_cuckoo) * h, const char *name, + uword nbuckets, + void (*garbage_callback) (CVT (clib_cuckoo) *, + void *), + void *garbage_ctx); + +void CV (clib_cuckoo_garbage_collect) (CVT (clib_cuckoo) * h); + +void CV (clib_cuckoo_free) (CVT (clib_cuckoo) * h); + +int CV (clib_cuckoo_add_del) (CVT (clib_cuckoo) * h, + CVT (clib_cuckoo_kv) * add_v, int is_add, + int dont_overwrite); +int CV (clib_cuckoo_search) (CVT (clib_cuckoo) * h, + CVT (clib_cuckoo_kv) * search_v, + CVT (clib_cuckoo_kv) * return_v); + +void CV (clib_cuckoo_foreach_key_value_pair) (CVT (clib_cuckoo) * h, + void *callback, void *arg); + +float CV (clib_cuckoo_calc_load) (CVT (clib_cuckoo) * h); + +format_function_t CV (format_cuckoo); +format_function_t CV (format_cuckoo_kvp); + +always_inline u8 +clib_cuckoo_reduce_hash (u64 hash) +{ + u32 v32 = ((u32) hash) ^ ((u32) (hash >> 32)); + u16 v16 = ((u16) v32) ^ ((u16) (v32 >> 16)); + u8 v8 = ((u8) v16) ^ ((u8) (v16 >> 8)); + return v8; +} + +always_inline u64 +clib_cuckoo_get_other_bucket (u64 nbuckets, u64 bucket, u8 reduced_hash) +{ + u64 mask = (nbuckets - 1); + return (bucket ^ ((reduced_hash + 1) * 0xc6a4a7935bd1e995)) & mask; +} + +always_inline clib_cuckoo_lookup_info_t +CV (clib_cuckoo_calc_lookup) (CVT (clib_cuckoo_bucket) * buckets, u64 hash) +{ + clib_cuckoo_lookup_info_t lookup; + u64 nbuckets = vec_len (buckets); + u64 mask = (nbuckets - 1); + lookup.bucket1 = hash & mask; +#if CLIB_CUCKOO_OPTIMIZE_PREFETCH + CLIB_PREFETCH (vec_elt_at_index (buckets, lookup.bucket1), + sizeof (*buckets), LOAD); +#endif + u8 reduced_hash = clib_cuckoo_reduce_hash (hash); + lookup.bucket2 = + clib_cuckoo_get_other_bucket (nbuckets, lookup.bucket1, reduced_hash); +#if CLIB_CUCKOO_OPTIMIZE_PREFETCH + CLIB_PREFETCH (vec_elt_at_index (buckets, lookup.bucket2), + sizeof (*buckets), LOAD); +#endif + lookup.reduced_hash = reduced_hash; + ASSERT (lookup.bucket1 < nbuckets); + ASSERT (lookup.bucket2 < nbuckets); + return lookup; +} + +/** + * search for key within bucket + */ +always_inline int CV (clib_cuckoo_bucket_search) (CVT (clib_cuckoo_bucket) * + b, + CVT (clib_cuckoo_kv) * kvp, + u8 reduced_hash) +{ + clib_cuckoo_bucket_aux_t bucket_aux; + u8 writer_flag; + do + { + bucket_aux = b->aux; + writer_flag = clib_cuckoo_bucket_aux_get_writer_flag (bucket_aux); + } + while (PREDICT_FALSE (writer_flag)); /* loop while writer flag is set */ + + int i; +#if CLIB_CUCKOO_OPTIMIZE_USE_COUNT_LIMITS_SEARCH + const int use_count = clib_cuckoo_bucket_aux_get_use_count (bucket_aux); +#endif + /* *INDENT-OFF* */ + clib_cuckoo_bucket_foreach_idx_unrolled (i, { +#if CLIB_CUCKOO_OPTIMIZE_USE_COUNT_LIMITS_SEARCH + if (i > use_count) + { + break; + } +#endif + if (CV (clib_cuckoo_key_compare) (kvp->key, b->elts[i].key)) + { + kvp->value = b->elts[i].value; + clib_cuckoo_bucket_aux_t bucket_aux2 = b->aux; + if (PREDICT_TRUE (clib_cuckoo_bucket_aux_get_version (bucket_aux) == + clib_cuckoo_bucket_aux_get_version (bucket_aux2))) + { + /* yay, fresh data */ + return CLIB_CUCKOO_ERROR_SUCCESS; + } + else + { + /* oops, modification detected */ + return CLIB_CUCKOO_ERROR_AGAIN; + } + } + }); + /* *INDENT-ON* */ + return CLIB_CUCKOO_ERROR_NOT_FOUND; +} + +always_inline int +CV (clib_cuckoo_search_inline_with_hash) (CVT (clib_cuckoo) * h, u64 hash, + CVT (clib_cuckoo_kv) * kvp) +{ + CVT (clib_cuckoo_bucket) * buckets = h->buckets; + uword bucket1, bucket2; + u8 reduced_hash; + u64 nbuckets = vec_len (buckets); + u64 mask = nbuckets - 1; + int rv; + + bucket1 = hash & mask; + reduced_hash = clib_cuckoo_reduce_hash (hash); + +again: + rv = CV (clib_cuckoo_bucket_search) (vec_elt_at_index (buckets, bucket1), + kvp, reduced_hash); + + if (rv == CLIB_CUCKOO_ERROR_SUCCESS) + return CLIB_CUCKOO_ERROR_SUCCESS; + + if (PREDICT_FALSE (rv == CLIB_CUCKOO_ERROR_AGAIN)) + goto again; + + bucket2 = clib_cuckoo_get_other_bucket (nbuckets, bucket1, reduced_hash); + rv = CV (clib_cuckoo_bucket_search) (vec_elt_at_index (buckets, bucket2), + kvp, reduced_hash); + + /* change to 2nd bucket could bump the item to 1st bucket and the bucket + * indexes might not even be valid anymore - restart the search */ + if (PREDICT_FALSE (rv == CLIB_CUCKOO_ERROR_AGAIN)) + goto again; + + return rv; +} + +always_inline int CV (clib_cuckoo_search_inline) (CVT (clib_cuckoo) * h, + CVT (clib_cuckoo_kv) * kvp) +{ + u64 hash = CV (clib_cuckoo_hash) (kvp); + return CV (clib_cuckoo_search_inline_with_hash) (h, hash, kvp); +} + +#endif /* __included_cuckoo_template_h__ */ + +/** @endcond */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/fheap.c b/extras/deprecated/vppinfra/fheap.c new file mode 100644 index 00000000000..034168e85ab --- /dev/null +++ b/extras/deprecated/vppinfra/fheap.c @@ -0,0 +1,473 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/fheap.h> + +/* Fibonacci heaps. */ +always_inline fheap_node_t * +fheap_get_node (fheap_t * f, u32 i) +{ + return i != ~0 ? vec_elt_at_index (f->nodes, i) : 0; +} + +always_inline fheap_node_t * +fheap_get_root (fheap_t * f) +{ + return fheap_get_node (f, f->min_root); +} + +static void +fheap_validate (fheap_t * f) +{ + fheap_node_t *n, *m; + uword ni, si; + + if (!CLIB_DEBUG || !f->enable_validate) + return; + + vec_foreach_index (ni, f->nodes) + { + n = vec_elt_at_index (f->nodes, ni); + + if (!n->is_valid) + continue; + + /* Min root must have minimal key. */ + m = vec_elt_at_index (f->nodes, f->min_root); + ASSERT (n->key >= m->key); + + /* Min root must have no parent. */ + if (ni == f->min_root) + ASSERT (n->parent == ~0); + + /* Check sibling linkages. */ + if (n->next_sibling == ~0) + ASSERT (n->prev_sibling == ~0); + else if (n->prev_sibling == ~0) + ASSERT (n->next_sibling == ~0); + else + { + fheap_node_t *prev, *next; + u32 si = n->next_sibling, si_start = si; + do + { + m = vec_elt_at_index (f->nodes, si); + prev = vec_elt_at_index (f->nodes, m->prev_sibling); + next = vec_elt_at_index (f->nodes, m->next_sibling); + ASSERT (prev->next_sibling == si); + ASSERT (next->prev_sibling == si); + si = m->next_sibling; + } + while (si != si_start); + } + + /* Loop through all siblings. */ + { + u32 n_siblings = 0; + + foreach_fheap_node_sibling (f, si, n->next_sibling, ( + { + m = + vec_elt_at_index + (f->nodes, si); + /* All siblings must have same parent. */ + ASSERT (m->parent + == + n-> + parent); + n_siblings += 1;} + )); + + /* Either parent is non-empty or there are siblings present. */ + if (n->parent == ~0 && ni != f->min_root) + ASSERT (n_siblings > 0); + } + + /* Loop through all children. */ + { + u32 found_first_child = n->first_child == ~0; + u32 n_children = 0; + + foreach_fheap_node_sibling (f, si, n->first_child, ( + { + m = + vec_elt_at_index + (f->nodes, si); + /* Children must have larger keys than their parent. */ + ASSERT (m->key >= + n->key); + if + (!found_first_child) + found_first_child = + si == + n->first_child; + n_children += 1;} + )); + + /* Check that first child is present on list. */ + ASSERT (found_first_child); + + /* Make sure rank is correct. */ + ASSERT (n->rank == n_children); + } + } + + /* Increment serial number for each successful validate. + Failure can be used as condition for gdb breakpoints. */ + f->validate_serial++; +} + +always_inline void +fheap_node_add_sibling (fheap_t * f, u32 ni, u32 ni_to_add) +{ + fheap_node_t *n = vec_elt_at_index (f->nodes, ni); + fheap_node_t *n_to_add = vec_elt_at_index (f->nodes, ni_to_add); + fheap_node_t *n_next = fheap_get_node (f, n->next_sibling); + fheap_node_t *parent; + + /* Empty list? */ + if (n->next_sibling == ~0) + { + ASSERT (n->prev_sibling == ~0); + n->next_sibling = n->prev_sibling = ni_to_add; + n_to_add->next_sibling = n_to_add->prev_sibling = ni; + } + else + { + /* Add node after existing node. */ + n_to_add->prev_sibling = ni; + n_to_add->next_sibling = n->next_sibling; + + n->next_sibling = ni_to_add; + n_next->prev_sibling = ni_to_add; + } + + n_to_add->parent = n->parent; + parent = fheap_get_node (f, n->parent); + if (parent) + parent->rank += 1; +} + +void +fheap_add (fheap_t * f, u32 ni, u32 key) +{ + fheap_node_t *r, *n; + u32 ri; + + n = vec_elt_at_index (f->nodes, ni); + + clib_memset (n, 0, sizeof (n[0])); + n->parent = n->first_child = n->next_sibling = n->prev_sibling = ~0; + n->key = key; + + r = fheap_get_root (f); + ri = f->min_root; + if (!r) + { + /* No root? Add node as new root. */ + f->min_root = ni; + } + else + { + /* Add node as sibling of current root. */ + fheap_node_add_sibling (f, ri, ni); + + /* New node may become new root. */ + if (r->key > n->key) + f->min_root = ni; + } + + fheap_validate (f); +} + +always_inline u32 +fheap_node_remove_internal (fheap_t * f, u32 ni, u32 invalidate) +{ + fheap_node_t *n = vec_elt_at_index (f->nodes, ni); + u32 prev_ni = n->prev_sibling; + u32 next_ni = n->next_sibling; + u32 list_has_single_element = prev_ni == ni; + fheap_node_t *prev = fheap_get_node (f, prev_ni); + fheap_node_t *next = fheap_get_node (f, next_ni); + fheap_node_t *p = fheap_get_node (f, n->parent); + + if (p) + { + ASSERT (p->rank > 0); + p->rank -= 1; + p->first_child = list_has_single_element ? ~0 : next_ni; + } + + if (prev) + { + ASSERT (prev->next_sibling == ni); + prev->next_sibling = next_ni; + } + if (next) + { + ASSERT (next->prev_sibling == ni); + next->prev_sibling = prev_ni; + } + + n->prev_sibling = n->next_sibling = ni; + n->parent = ~0; + n->is_valid = invalidate == 0; + + return list_has_single_element ? ~0 : next_ni; +} + +always_inline u32 +fheap_node_remove (fheap_t * f, u32 ni) +{ + return fheap_node_remove_internal (f, ni, /* invalidate */ 0); +} + +always_inline u32 +fheap_node_remove_and_invalidate (fheap_t * f, u32 ni) +{ + return fheap_node_remove_internal (f, ni, /* invalidate */ 1); +} + +static void +fheap_link_root (fheap_t * f, u32 ni) +{ + fheap_node_t *n = vec_elt_at_index (f->nodes, ni); + fheap_node_t *r, *lo, *hi; + u32 ri, lo_i, hi_i, k; + + while (1) + { + k = n->rank; + vec_validate_init_empty (f->root_list_by_rank, k, ~0); + ri = f->root_list_by_rank[k]; + r = fheap_get_node (f, ri); + if (!r) + { + f->root_list_by_rank[k] = ni; + return; + } + + f->root_list_by_rank[k] = ~0; + + /* Sort n/r into lo/hi by their keys. */ + lo = r, lo_i = ri; + hi = n, hi_i = ni; + if (hi->key < lo->key) + { + u32 ti; + fheap_node_t *tn; + ti = lo_i, tn = lo; + lo = hi, lo_i = hi_i; + hi = tn, hi_i = ti; + } + + /* Remove larger key. */ + fheap_node_remove (f, hi_i); + + /* Add larger key as child of smaller one. */ + if (lo->first_child == ~0) + { + hi->parent = lo_i; + lo->first_child = hi_i; + lo->rank = 1; + } + else + fheap_node_add_sibling (f, lo->first_child, hi_i); + + /* Following Fredman & Trajan: "When making a root node X a child of another node in a linking step, + we unmark X". */ + hi->is_marked = 0; + + ni = lo_i; + n = lo; + } +} + +u32 +fheap_del_min (fheap_t * f, u32 * min_key) +{ + fheap_node_t *r = fheap_get_root (f); + u32 to_delete_min_ri = f->min_root; + u32 ri, ni; + + /* Empty heap? */ + if (!r) + return ~0; + + /* Root's children become siblings. Call this step a; see below. */ + if (r->first_child != ~0) + { + u32 ci, cni, rni; + fheap_node_t *c, *cn, *rn; + + /* Splice child & root circular lists together. */ + ci = r->first_child; + c = vec_elt_at_index (f->nodes, ci); + + cni = c->next_sibling; + rni = r->next_sibling; + cn = vec_elt_at_index (f->nodes, cni); + rn = vec_elt_at_index (f->nodes, rni); + + r->next_sibling = cni; + c->next_sibling = rni; + cn->prev_sibling = to_delete_min_ri; + rn->prev_sibling = ci; + } + + /* Remove min root. */ + ri = fheap_node_remove_and_invalidate (f, to_delete_min_ri); + + /* Find new min root from among siblings including the ones we've just added. */ + f->min_root = ~0; + if (ri != ~0) + { + u32 ri_last, ri_next, i, min_ds; + + r = fheap_get_node (f, ri); + ri_last = r->prev_sibling; + while (1) + { + /* Step a above can put children (with r->parent != ~0) on root list. */ + r->parent = ~0; + + ri_next = r->next_sibling; + fheap_link_root (f, ri); + if (ri == ri_last) + break; + ri = ri_next; + r = fheap_get_node (f, ri); + } + + min_ds = ~0; + vec_foreach_index (i, f->root_list_by_rank) + { + ni = f->root_list_by_rank[i]; + if (ni == ~0) + continue; + f->root_list_by_rank[i] = ~0; + r = fheap_get_node (f, ni); + if (r->key < min_ds) + { + f->min_root = ni; + min_ds = r->key; + ASSERT (r->parent == ~0); + } + } + } + + /* Return deleted min root. */ + r = vec_elt_at_index (f->nodes, to_delete_min_ri); + if (min_key) + *min_key = r->key; + + fheap_validate (f); + + return to_delete_min_ri; +} + +static void +fheap_mark_parent (fheap_t * f, u32 pi) +{ + fheap_node_t *p = vec_elt_at_index (f->nodes, pi); + + /* Parent is a root: do nothing. */ + if (p->parent == ~0) + return; + + /* If not marked, mark it. */ + if (!p->is_marked) + { + p->is_marked = 1; + return; + } + + /* Its a previously marked, non-root parent. + Cut edge to its parent and add to root list. */ + fheap_node_remove (f, pi); + fheap_node_add_sibling (f, f->min_root, pi); + + /* Unmark it since its now a root node. */ + p->is_marked = 0; + + /* "Cascading cuts": check parent. */ + if (p->parent != ~0) + fheap_mark_parent (f, p->parent); +} + +/* Set key to new smaller value. */ +void +fheap_decrease_key (fheap_t * f, u32 ni, u32 new_key) +{ + fheap_node_t *n = vec_elt_at_index (f->nodes, ni); + fheap_node_t *r = fheap_get_root (f); + + n->key = new_key; + + if (n->parent != ~0) + { + fheap_mark_parent (f, n->parent); + + /* Remove node and add to root list. */ + fheap_node_remove (f, ni); + fheap_node_add_sibling (f, f->min_root, ni); + } + + if (n->key < r->key) + f->min_root = ni; + + fheap_validate (f); +} + +void +fheap_del (fheap_t * f, u32 ni) +{ + fheap_node_t *n; + + n = vec_elt_at_index (f->nodes, ni); + + if (n->parent == ~0) + { + ASSERT (ni == f->min_root); + fheap_del_min (f, 0); + } + else + { + u32 ci; + + fheap_mark_parent (f, n->parent); + + /* Add children to root list. */ + foreach_fheap_node_sibling (f, ci, n->first_child, ( + { + fheap_node_remove + (f, ci); + fheap_node_add_sibling + (f, f->min_root, + ci);} + )); + + fheap_node_remove_and_invalidate (f, ni); + } + + fheap_validate (f); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/fheap.h b/extras/deprecated/vppinfra/fheap.h new file mode 100644 index 00000000000..1dbd52bad76 --- /dev/null +++ b/extras/deprecated/vppinfra/fheap.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_clib_fheap_h +#define included_clib_fheap_h + +/* Fibonacci Heaps Fredman, M. L.; Tarjan (1987). + "Fibonacci heaps and their uses in improved network optimization algorithms" */ + +#include <vppinfra/vec.h> + +typedef struct +{ + /* Node index of parent. */ + u32 parent; + + /* Node index of first child. */ + u32 first_child; + + /* Next and previous nodes in doubly linked list of siblings. */ + u32 next_sibling, prev_sibling; + + /* Key (distance) for this node. Parent always has key + <= than keys of children. */ + u32 key; + + /* Number of children (as opposed to descendents). */ + u32 rank; + + u32 is_marked; + + /* Set to one when node is inserted; zero when deleted. */ + u32 is_valid; +} fheap_node_t; + +#define foreach_fheap_node_sibling(f,ni,first_ni,body) \ +do { \ + u32 __fheap_foreach_first_ni = (first_ni); \ + u32 __fheap_foreach_ni = __fheap_foreach_first_ni; \ + u32 __fheap_foreach_next_ni; \ + fheap_node_t * __fheap_foreach_n; \ + if (__fheap_foreach_ni != ~0) \ + while (1) \ + { \ + __fheap_foreach_n = fheap_get_node ((f), __fheap_foreach_ni); \ + __fheap_foreach_next_ni = __fheap_foreach_n -> next_sibling; \ + (ni) = __fheap_foreach_ni; \ + \ + body; \ + \ + /* End of circular list? */ \ + if (__fheap_foreach_next_ni == __fheap_foreach_first_ni) \ + break; \ + \ + __fheap_foreach_ni = __fheap_foreach_next_ni; \ + \ + } \ +} while (0) + +typedef struct +{ + u32 min_root; + + /* Vector of nodes. */ + fheap_node_t *nodes; + + u32 *root_list_by_rank; + + u32 enable_validate; + + u32 validate_serial; +} fheap_t; + +/* Initialize empty heap. */ +always_inline void +fheap_init (fheap_t * f, u32 n_nodes) +{ + fheap_node_t *save_nodes = f->nodes; + u32 *save_root_list = f->root_list_by_rank; + + clib_memset (f, 0, sizeof (f[0])); + + f->nodes = save_nodes; + f->root_list_by_rank = save_root_list; + + vec_validate (f->nodes, n_nodes - 1); + vec_reset_length (f->root_list_by_rank); + + f->min_root = ~0; +} + +always_inline void +fheap_free (fheap_t * f) +{ + vec_free (f->nodes); + vec_free (f->root_list_by_rank); +} + +always_inline u32 +fheap_find_min (fheap_t * f) +{ + return f->min_root; +} + +always_inline u32 +fheap_is_empty (fheap_t * f) +{ + return f->min_root == ~0; +} + +/* Add/delete nodes. */ +void fheap_add (fheap_t * f, u32 ni, u32 key); +void fheap_del (fheap_t * f, u32 ni); + +/* Delete and return minimum. */ +u32 fheap_del_min (fheap_t * f, u32 * min_key); + +/* Change key value. */ +void fheap_decrease_key (fheap_t * f, u32 ni, u32 new_key); + +#endif /* included_clib_fheap_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/flowhash_24_16.h b/extras/deprecated/vppinfra/flowhash_24_16.h new file mode 100644 index 00000000000..64ee0796c7a --- /dev/null +++ b/extras/deprecated/vppinfra/flowhash_24_16.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_VPPINFRA_FLOWHASH_24_16_H_ +#define SRC_VPPINFRA_FLOWHASH_24_16_H_ + +#ifdef __included_flowhash_template_h__ +#undef __included_flowhash_template_h__ +#endif + +#include <vppinfra/clib.h> +#include <vppinfra/xxhash.h> +#include <vppinfra/crc32.h> + +typedef struct { + u64 as_u64[3]; +} flowhash_skey_24_16_t; + +typedef struct { + u64 as_u64[3]; +} flowhash_lkey_24_16_t; + +typedef struct { + u64 as_u64[2]; +} flowhash_value_24_16_t; + +#define FLOWHASH_TYPE _24_16 +#include <vppinfra/flowhash_template.h> +#undef FLOWHASH_TYPE + +static_always_inline +u32 flowhash_hash_24_16(flowhash_lkey_24_16_t *k) +{ +#ifdef clib_crc32c_uses_intrinsics + return clib_crc32c ((u8 *) &k->as_u64[0], 24); +#else + u64 val = 0; + val ^= k->as_u64[0]; + val ^= k->as_u64[1]; + val ^= k->as_u64[2]; + return (u32)clib_xxhash (val); +#endif +} + +static_always_inline +u8 flowhash_cmp_key_24_16(flowhash_skey_24_16_t *a, + flowhash_lkey_24_16_t *b) +{ + u8 val = 0; + val |= (a->as_u64[0] != b->as_u64[0]); + val |= (a->as_u64[1] != b->as_u64[1]); + val |= (a->as_u64[2] != b->as_u64[2]); + return val; +} + +static_always_inline +void flowhash_cpy_key_24_16(flowhash_skey_24_16_t *dst, + flowhash_lkey_24_16_t *src) +{ + dst->as_u64[0] = src->as_u64[0]; + dst->as_u64[1] = src->as_u64[1]; + dst->as_u64[2] = src->as_u64[2]; +} + +#endif /* SRC_VPPINFRA_FLOWHASH_24_16_H_ */ diff --git a/extras/deprecated/vppinfra/flowhash_8_8.h b/extras/deprecated/vppinfra/flowhash_8_8.h new file mode 100644 index 00000000000..4a5cfc0a0c6 --- /dev/null +++ b/extras/deprecated/vppinfra/flowhash_8_8.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_VPPINFRA_FLOWHASH_8_8_H_ +#define SRC_VPPINFRA_FLOWHASH_8_8_H_ + +#ifdef __included_flowhash_template_h__ +#undef __included_flowhash_template_h__ +#endif + +#include <vppinfra/clib.h> +#include <vppinfra/xxhash.h> +#include <vppinfra/crc32.h> + +typedef struct { + u64 as_u64[1]; +} flowhash_skey_8_8_t; + +typedef struct { + u64 as_u64[1]; +} flowhash_lkey_8_8_t; + +typedef struct { + u64 as_u64[1]; +} flowhash_value_8_8_t; + +#define FLOWHASH_TYPE _8_8 +#include <vppinfra/flowhash_template.h> +#undef FLOWHASH_TYPE + +static_always_inline +u32 flowhash_hash_8_8(flowhash_lkey_8_8_t *k) +{ +#ifdef clib_crc32c_uses_intrinsics + return clib_crc32c ((u8 *) &k->as_u64[0], 8); +#else + return clib_xxhash (k->as_u64[0]); +#endif +} + +static_always_inline +u8 flowhash_cmp_key_8_8(flowhash_skey_8_8_t *a, + flowhash_lkey_8_8_t *b) +{ + return a->as_u64[0] != b->as_u64[0]; +} + +static_always_inline +void flowhash_cpy_key_8_8(flowhash_skey_8_8_t *dst, + flowhash_lkey_8_8_t *src) +{ + dst->as_u64[0] = src->as_u64[0]; +} + +#endif /* SRC_VPPINFRA_FLOWHASH_8_8_H_ */ diff --git a/extras/deprecated/vppinfra/flowhash_template.h b/extras/deprecated/vppinfra/flowhash_template.h new file mode 100644 index 00000000000..d7a621c1754 --- /dev/null +++ b/extras/deprecated/vppinfra/flowhash_template.h @@ -0,0 +1,608 @@ +/* + * Copyright (c) 2012 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Author: Pierre Pfister <ppfister@cisco.com> + * + * DISCLAIMER ! + * + * This most likely is not the hash table you are looking for !! + * + * This structure targets a very specific and quite narrow set of use-cases + * that are not covered by other hash tables. + * + * Read the following text carefully, or ask the author or one of VPP's + * committers to make sure this is what you are looking for. + * + * + * -- Abstract: + * This hash table intends to provide a very fast lookup and insertion of + * key-value pairs for flow tables (although it might be used for other + * purposes), with additional support for lazy-timeouts. + * In particular, it was designed to minimize blocking reads, register usage and + * cache-lines accesses during a typical lookup. + * This hash table therefore provides stateful packet processing + * without performance degradation even when every single lookup has to fetch + * memory from RAM. + * This hash table is not thread-safe and requires executing a garbage + * collection function to clean-up chained buckets. + * + * -- Overview: + * + * One first aspect of this hash table is that it is self-contained in a single + * bulk of memory. Each entry contains a key, a value, and a 32 bits timeout + * value; occupies a full and single cache line; and is identified by a unique + * 32 bits index. The entry index zero is reserved and used when an entry + * could not be found nor inserted. Which means it is not necessary to + * immediately check whether an insertion or lookup was successful before + * behaving accordingly. One can just keep doing business as usual and + * check for the error later. + * + * Each entry is associated with a timeout value (which unit or clock is up to + * the user of the hash table). An entry which timeout is strictly smaller + * than the current time is considered empty, whereas an entry which timeout is + * greater or equal to the current time contains a valid key-value pair. + * + * Hash table lookup and insertion are equivalent: + * - An entry index is always returned (possibly index 0 if no entry could be + * found nor created). + * - The returned entry always has its key set to the provided key. + * - Timeout value will be greater than the provided current time whenever a + * valid entry was found, strictly smaller otherwise. In particular, one can + * not differentiate between an entry which was just created, and an entry + * which already existed in the past but got timeouted in between. + * + * As mentioned earlier, entry index zero is used as an invalid entry which may + * be manipulated as a normal one. Entries which index go from 1 to + * N (where N is a power of 2) are used as direct buckets, each containing a + * single entry. In the absence of hash collision, a single entry which location + * can deterministically be determined from the key-hash and the hash table + * header is accessed (One single cache line, without indirection). This + * allows for efficient pre-fetching of the key-value for more than 95% of + * accesses. + * + * In order to handle hash collisions (i.e. when multiple keys + * end-up in the same bucket), entries which index are greater than N are + * grouped into M groups of 16 collision entries. Such groups are linked + * with regular entries whenever a collision needs to be handled. + * When looking up a key with a bucket where a collision occurred, unused bits + * from the key hash are used to select two entries (from the collision bucket) + * where the new entry might be inserted. + * + * Once an entry is inserted, it will never be moved as long as the entry + * timeout value remains greater or equal to the provided current time value. + * The entry index can therefore be stored in other data structure as a way + * to bypass the hash lookup. But when doing so, one should check if the + * present key is the actual looked-up key. + * + * -- Garbage Collection: + * + * Since there is no explicit element removal, a garbage collector mechanism + * is required in order to remove buckets used for hash collisions. This + * is done by calling the flowhash_gc function on a regular basis. Each call + * to this function examines a single fixed entry. It shall therefore be called + * as many times as there are fixed entries in the hash table in order to + * ensure a full inspection. + * + * -- Time and timeout mechanism: + * + * The hash table makes use of a time value between in [1, 2^32 - 1]. + * The provided time value shall keep increasing, and looping is not handled. + * When seconds are used, the system should run for 136 years without any issue. + * If milliseconds are used, a shift should be operated on all timeout values + * on a regular basis (more than every 49 days). + */ + +#ifndef __included_flowhash_template_h__ +#define __included_flowhash_template_h__ + +#include <vppinfra/clib.h> +#include <vppinfra/mem.h> +#include <vppinfra/cache.h> + +#ifndef FLOWHASH_TYPE +#error FLOWHASH_TYPE not defined +#endif + +#define _fv(a,b) a##b +#define __fv(a,b) _fv(a,b) +#define FV(a) __fv(a,FLOWHASH_TYPE) + +#define _fvt(a,b) a##b##_t +#define __fvt(a,b) _fvt(a,b) +#define FVT(a) __fvt(a,FLOWHASH_TYPE) + +/* Same for all flowhash variants */ +#ifndef __included_flowhash_common__ + +#define FLOWHASH_INVALID_ENTRY_INDEX 0 + +#define FLOWHASH_ENTRIES_PER_BUCKETS_LOG 4 +#define FLOWHASH_ENTRIES_PER_BUCKETS (1 << FLOWHASH_ENTRIES_PER_BUCKETS_LOG) + +#endif /* ifndef __included_flowhash_common__ */ + + /** + * @brief Compare a stored key with a lookup key. + * + * This function must be defined to use this template. It must return 0 + * when the two keys are identical, and a different value otherwise. + */ +static_always_inline +u8 FV(flowhash_cmp_key)(FVT(flowhash_skey) *a, FVT(flowhash_lkey) *b); + + /** + * @brief Hash a lookup key into a 32 bit integer. + * + * This function must be defined to use this template. + * It must provides close to 32 bits of entropy distributed amongst + * all 32 bits of the provided value. + * Keys that are equal must have the same hash. + */ + static_always_inline + u32 FV(flowhash_hash)(FVT(flowhash_lkey) *k); + +/** + * @brief Copy a lookup key into a destination stored key. + * + * This function must be defined to use this template. It must modify the dst + * key such that a later call to flowhash_cmp_key with the same arguments + * would return 0. + */ +static_always_inline +void FV(flowhash_cpy_key)(FVT(flowhash_skey) *dst, FVT(flowhash_lkey) *src); + +/** + * @brief One flow hash entry used for both direct buckets and collision + * buckets. + */ +typedef struct { + /* Each entry is cache-line aligned. */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /* Key is first to take advantage of alignment. */ + FVT(flowhash_skey) key; + + /* Entry value. */ + FVT(flowhash_value) value; + + /* Timeout value */ + u32 timeout; + + /* Entry index to the chained bucket. */ + u32 chained_entry_index; +} FVT(flowhash_entry); + +typedef struct FVT(__flowhash_struct) { + /* Cache aligned to simplify allocation. */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + /* Array going downward containing free bucket indices */ + u32 free_buckets_indices[0]; + + /* Negative index of the first free bucket */ + i32 free_buckets_position; + + /* Number of fixed buckets minus one */ + u32 fixed_entries_mask; + + /* Allocated pointer for this hash table */ + void *mem; + + u32 collision_buckets_mask; + u32 total_entries; + + u64 not_enough_buckets_counter; + u64 collision_lookup_counter; + u64 garbage_collection_counter; + + u32 gc_counter; + + /* Entry array containing: + * - 1 Dummy entry for error return + * - (buckets_mask + 1) Fixed buckets + * - chained_buckets Chained Buckets + */ + FVT(flowhash_entry) entries[0]; +} FVT(flowhash); + +/* Same for all flowhash variants */ +#ifndef __included_flowhash_common__ +#define __included_flowhash_common__ + +/** + * @brief Test whether a returned entry index corresponds to an overflow event. + */ +#define flowhash_is_overflow(ei) \ + ((ei) == FLOWHASH_INVALID_ENTRY_INDEX) + +/** + * @brief Iterate over all entries in the hash table. + * + * Iterate over all entries in the hash table, not including the first invalid + * entry (at index 0), but including all chained hash collision buckets. + * + */ +#define flowhash_foreach_entry(h, ei) \ + for (ei = 1; \ + ei < (h)->total_entries; \ + ei++) + +/** + * @brief Iterate over all currently valid entries. + * + * Iterate over all entries in the hash table which timeout value is greater + * or equal to the current time. + */ +#define flowhash_foreach_valid_entry(h, ei, now) \ + flowhash_foreach_entry(h, ei) \ + if (((now) <= (h)->entries[ei].timeout)) + +/** + * @brief Timeout variable from a given entry. + */ +#define flowhash_timeout(h, ei) (h)->entries[ei].timeout + +/** + * @brief Indicates whether the entry is being used. + */ +#define flowhash_is_timeouted(h, ei, time_now) \ + ((time_now) > flowhash_timeout(h, ei)) + +/** + * @brief Get the key from the entry index, casted to the provided type. + */ +#define flowhash_key(h, ei) (&(h)->entries[ei].key) + +/** + * @brief Get the value from the entry index, casted to the provided type. + */ +#define flowhash_value(h, ei) (&(h)->entries[ei].value) + +/** + * @brief Get the number of octets allocated to this structure. + */ +#define flowhash_memory_size(h) clib_mem_size((h)->mem) + +/** + * @brief Test whether the entry index is in hash table boundaries. + */ +#define flowhash_is_valid_entry_index(h, ei) (ei < (h)->total_entries) + +/** + * @brief Adjust, if necessary, provided parameters such as being valid flowhash + * sizes. + */ +static +void flowhash_validate_sizes(u32 *fixed_entries, u32 *collision_buckets) +{ + /* Find power of two greater or equal to the provided value */ + if (*fixed_entries < FLOWHASH_ENTRIES_PER_BUCKETS) + *fixed_entries = FLOWHASH_ENTRIES_PER_BUCKETS; + if (*fixed_entries > (1 << (32 - FLOWHASH_ENTRIES_PER_BUCKETS_LOG))) + *fixed_entries = (1 << (32 - FLOWHASH_ENTRIES_PER_BUCKETS_LOG)); + + *fixed_entries -= 1; + *fixed_entries |= *fixed_entries >> 16; + *fixed_entries |= *fixed_entries >> 8; + *fixed_entries |= *fixed_entries >> 4; + *fixed_entries |= *fixed_entries >> 2; + *fixed_entries |= *fixed_entries >> 1; + *fixed_entries += 1; + + if (*collision_buckets != 0) + { + if (*collision_buckets < CLIB_CACHE_LINE_BYTES/sizeof(u32)) + *collision_buckets = CLIB_CACHE_LINE_BYTES/sizeof(u32); + + *collision_buckets -= 1; + *collision_buckets |= *collision_buckets >> 16; + *collision_buckets |= *collision_buckets >> 8; + *collision_buckets |= *collision_buckets >> 4; + *collision_buckets |= *collision_buckets >> 2; + *collision_buckets |= *collision_buckets >> 1; + *collision_buckets += 1; + } +} + +/** + * @brief Prefetch the the hash entry bucket. + * + * This should be performed approximately 200-300 cycles before lookup + * if the table is located in RAM. Or 30-40 cycles before lookup + * in case the table is located in L3. + */ +#define flowhash_prefetch(h, hash) \ + CLIB_PREFETCH (&(h)->entries[((hash) & (h)->fixed_entries_mask) + 1], \ + sizeof((h)->entries[0]), LOAD) + +#endif /* ifndef __included_flowhash_common__ */ + +/** + * @brief Allocate a flowhash structure. + * + * @param[in] fixed_entries The number of fixed entries in the hash table. + * @param[in] chained_buckets The number of chained buckets. + * + * fixed_entries and chained_buckets parameters may not be used as is but + * modified in order to fit requirements. + * + * Since the flowhash does not support dynamic resizing, it is fairly + * important to choose the parameters carefully. In particular the performance + * gain from using this structure comes from an efficient lookup in the + * absence of hash collision. + * As a rule of thumbs, if the number of active entries (flows) is M, + * there should be about 16*M fixed entries, and M/16 collision buckets. + * Which represents 17*M allocated entries. + * + * For example: + * M = 2^20 total_size ~= 1GiB collision ~= 3% + * M = 2^18 total_size ~= 250MiB collision ~= 3% + * M = 2^10 total_size ~= 1MiB collision ~= 6% + * + */ +static_always_inline +FVT(flowhash) *FV(flowhash_alloc)(u32 fixed_entries, u32 collision_buckets) +{ + FVT(flowhash) *h; + uword size; + void *mem; + u32 entries; + + flowhash_validate_sizes(&fixed_entries, &collision_buckets); + + entries = 1 + fixed_entries + + collision_buckets * FLOWHASH_ENTRIES_PER_BUCKETS; + size = sizeof(*h) + sizeof(h->entries[0]) * entries + + sizeof(h->free_buckets_indices[0]) * collision_buckets; + + mem = clib_mem_alloc_aligned(size, CLIB_CACHE_LINE_BYTES); + h = mem + collision_buckets * sizeof(h->free_buckets_indices[0]); + h->mem = mem; + + /* Fill free elements list */ + int i; + clib_memset(h->entries, 0, sizeof(h->entries[0]) * entries); + for (i = 1; i <= collision_buckets; i++) + { + h->free_buckets_indices[-i] = + entries - i * FLOWHASH_ENTRIES_PER_BUCKETS; + } + + /* Init buckets */ + for (i=0; i < entries; i++) + { + h->entries[i].chained_entry_index = FLOWHASH_INVALID_ENTRY_INDEX; + h->entries[i].timeout = 0; + } + + h->free_buckets_position = -collision_buckets; + h->fixed_entries_mask = fixed_entries - 1; + h->collision_buckets_mask = collision_buckets - 1; + h->total_entries = entries; + h->not_enough_buckets_counter = 0; + h->collision_lookup_counter = 0; + h->garbage_collection_counter = 0; + h->gc_counter = 0; + + return h; +} + +/** + * @brief Free the flow hash memory. + */ +static_always_inline +void FV(flowhash_free)(FVT(flowhash) *h) +{ + clib_mem_free(h->mem); +} + +static void +FV(__flowhash_get_chained) (FVT(flowhash) *h, FVT(flowhash_lkey) *k, + u32 hash, u32 time_now, u32 *ei); + +/** + * @brief Retrieves an entry index corresponding to a provided key and its hash. + * + * @param h The hash table pointer. + * @param k[in] A pointer to the key value. + * @param hash[in] The hash of the key. + * @param time_now[in] The current time. + * @param ei[out] A pointer set to the found entry index. + * + * This function always sets ei value to a valid entry index which can then be + * used to access the stored value as well as get or set its associated timeout. + * The key stored in the returned entry is always set to the provided key. + * + * In case the provided key is not found, and no entry could be created + * (either because there is no hash collision bucket available or + * the candidate entries in the collision bucket were already used), ei is + * set to the special value FLOWHASH_INVALID_ENTRY_INDEX (which can be tested + * with the flowhash_is_overflow macro). + * + * The timeout value is never modified during a lookup. + * - Use the flowhash_is_timeouted macro to test whether the returned entry + * was already valid, or is proposed for insertion. + * - Use the flowhash_timeout macro to get and set the entry timeout value. + * + */ +static_always_inline +void FV(flowhash_get) (FVT(flowhash) *h, FVT(flowhash_lkey) *k, + u32 hash, u32 time_now, u32 *ei) +{ + *ei = (hash & h->fixed_entries_mask) + 1; + + if (PREDICT_FALSE(FV(flowhash_cmp_key)(&h->entries[*ei].key, k) != 0)) + { + if (PREDICT_TRUE(time_now > h->entries[*ei].timeout && + (h->entries[*ei].chained_entry_index == + FLOWHASH_INVALID_ENTRY_INDEX))) + { + FV(flowhash_cpy_key)(&h->entries[*ei].key, k); + } + else + { + FV(__flowhash_get_chained)(h, k, hash, time_now, ei); + } + } +} + +static_always_inline void +FV(__flowhash_get_chained) (FVT(flowhash) *h, FVT(flowhash_lkey) *k, + u32 hash, u32 time_now, u32 *ei) +{ + h->collision_lookup_counter++; + + if (h->entries[*ei].chained_entry_index == FLOWHASH_INVALID_ENTRY_INDEX) + { + /* No chained entry yet. Let's chain one. */ + if (h->free_buckets_position == 0) + { + /* Oops. No more buckets available. */ + h->not_enough_buckets_counter++; + *ei = FLOWHASH_INVALID_ENTRY_INDEX; + h->entries[FLOWHASH_INVALID_ENTRY_INDEX].timeout = + time_now - 1; + FV(flowhash_cpy_key)( + &h->entries[FLOWHASH_INVALID_ENTRY_INDEX].key, k); + return; + } + + /* Forward link */ + h->entries[*ei].chained_entry_index = + h->free_buckets_indices[h->free_buckets_position]; + + /* Backward link (for garbage collection) */ + h->entries[h->free_buckets_indices[h->free_buckets_position]]. + chained_entry_index = *ei; + + /* Move pointer */ + h->free_buckets_position++; + } + + /* Get the two indexes where to look at. */ + u32 bi0 = h->entries[*ei].chained_entry_index + + (hash >> (32 - FLOWHASH_ENTRIES_PER_BUCKETS_LOG)); + u32 bi1 = bi0 + 1; + bi1 = (bi0 & (FLOWHASH_ENTRIES_PER_BUCKETS - 1)) ? bi1 : + bi1 - FLOWHASH_ENTRIES_PER_BUCKETS; + + /* It is possible that we wait while comparing bi0 key. + * It's better to prefetch bi1 so we don't wait twice. */ + CLIB_PREFETCH(&h->entries[bi1], sizeof (h->entries[0]), READ); + + if (FV(flowhash_cmp_key)(&h->entries[bi0].key, k) == 0) + { + *ei = bi0; + return; + } + + if (FV(flowhash_cmp_key)(&h->entries[bi1].key, k) == 0) + { + *ei = bi1; + return; + } + + if (h->entries[*ei].timeout >= time_now) + { + *ei = FLOWHASH_INVALID_ENTRY_INDEX; + *ei = (time_now > h->entries[bi0].timeout) ? bi0 : *ei; + *ei = (time_now > h->entries[bi1].timeout) ? bi1 : *ei; + } + + FV(flowhash_cpy_key)(&h->entries[*ei].key, k); +} + +static_always_inline void +FV(flowhash_gc)(FVT(flowhash) *h, u32 time_now, + u32 *freed_index, u32 *freed_len) +{ + u32 ei; + if (freed_index) + *freed_len = 0; + + if (PREDICT_FALSE(h->collision_buckets_mask == (((u32)0) - 1))) + return; + + /* prefetch two rounds in advance */ + ei = 2 + h->fixed_entries_mask + + ((h->gc_counter + 2) & h->collision_buckets_mask) * + FLOWHASH_ENTRIES_PER_BUCKETS; + CLIB_PREFETCH(&h->entries[ei], sizeof (h->entries[0]), READ); + + /* prefetch one round in advance */ + ei = 2 + h->fixed_entries_mask + + ((h->gc_counter + 1) & h->collision_buckets_mask) * + FLOWHASH_ENTRIES_PER_BUCKETS; + if (h->entries[ei].chained_entry_index != FLOWHASH_INVALID_ENTRY_INDEX) + { + CLIB_PREFETCH(&h->entries[ei], 4 * CLIB_CACHE_LINE_BYTES, READ); + } + + /* do GC */ + ei = 2 + h->fixed_entries_mask + + ((h->gc_counter) & h->collision_buckets_mask) * + FLOWHASH_ENTRIES_PER_BUCKETS; + if (h->entries[ei].chained_entry_index != FLOWHASH_INVALID_ENTRY_INDEX) + { + u8 found = 0; + int i; + for (i=0; i<FLOWHASH_ENTRIES_PER_BUCKETS; i++) + { + if (time_now <= h->entries[ei + i].timeout) + { + found = 1; + break; + } + } + + if (!found) + { + /* Tell caller we freed this */ + if (freed_index) + { + *freed_index = ei; + *freed_len = FLOWHASH_ENTRIES_PER_BUCKETS; + } + /* The bucket is not used. Let's free it. */ + h->free_buckets_position--; + /* Reset forward link */ + h->entries[h->entries[ei].chained_entry_index].chained_entry_index = + FLOWHASH_INVALID_ENTRY_INDEX; + /* Reset back link */ + h->entries[ei].chained_entry_index = FLOWHASH_INVALID_ENTRY_INDEX; + /* Free element */ + h->free_buckets_indices[h->free_buckets_position] = ei; + /* Count the garbage collection event */ + h->garbage_collection_counter++; + } + } + + h->gc_counter++; +} + +static_always_inline +u32 FV(flowhash_elts)(FVT(flowhash) *h, u32 time_now) +{ + u32 tot = 0; + u32 ei; + + flowhash_foreach_valid_entry(h, ei, time_now) + tot++; + + return tot; +} + +#endif /* __included_flowhash_template_h__ */ diff --git a/extras/deprecated/vppinfra/pfhash.h b/extras/deprecated/vppinfra/pfhash.h new file mode 100644 index 00000000000..2884fa81cf9 --- /dev/null +++ b/extras/deprecated/vppinfra/pfhash.h @@ -0,0 +1,276 @@ +/* + Copyright (c) 2013 Cisco and/or its affiliates. + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#ifndef included_clib_pfhash_h +#define included_clib_pfhash_h + + +#include <vppinfra/clib.h> +#include <vppinfra/hash.h> +#include <vppinfra/pool.h> + +#if defined(CLIB_HAVE_VEC128) && ! defined (__ALTIVEC__) + +typedef struct +{ + /* 3 x 16 = 48 key bytes */ + union + { + u32x4 k_u32x4[3]; + u64 k_u64[6]; + } kb; + /* 3 x 4 = 12 value bytes */ + u32 values[3]; + u32 pad; +} pfhash_kv_16_t; + +typedef struct +{ + /* 5 x 8 = 40 key bytes */ + union + { + u64 k_u64[5]; + } kb; + + /* 5 x 4 = 20 value bytes */ + u32 values[5]; + u32 pad; +} pfhash_kv_8_t; + +typedef struct +{ + /* 4 x 8 = 32 key bytes */ + union + { + u64 k_u64[4]; + } kb; + + /* 4 x 8 = 32 value bytes */ + u64 values[4]; +} pfhash_kv_8v8_t; + +typedef struct +{ + /* 8 x 4 = 32 key bytes */ + union + { + u32x4 k_u32x4[2]; + u32 kb[8]; + } kb; + + /* 8 x 4 = 32 value bytes */ + u32 values[8]; +} pfhash_kv_4_t; + +typedef union +{ + pfhash_kv_16_t kv16; + pfhash_kv_8_t kv8; + pfhash_kv_8v8_t kv8v8; + pfhash_kv_4_t kv4; +} pfhash_kv_t; + +typedef struct +{ + /* Bucket vector */ + u32 *buckets; +#define PFHASH_BUCKET_OVERFLOW (u32)~0 + + /* Pool of key/value pairs */ + pfhash_kv_t *kvp; + + /* overflow plain-o-hash */ + uword *overflow_hash; + + /* Pretty-print name */ + u8 *name; + + u32 key_size; + u32 value_size; + + u32 overflow_count; + u32 nitems; + u32 nitems_in_overflow; +} pfhash_t; + +void pfhash_init (pfhash_t * p, char *name, u32 key_size, u32 value_size, + u32 nbuckets); +void pfhash_free (pfhash_t * p); +u64 pfhash_get (pfhash_t * p, u32 bucket, void *key); +void pfhash_set (pfhash_t * p, u32 bucket, void *key, void *value); +void pfhash_unset (pfhash_t * p, u32 bucket, void *key); + +format_function_t format_pfhash; + +static inline void +pfhash_prefetch_bucket (pfhash_t * p, u32 bucket) +{ + CLIB_PREFETCH (&p->buckets[bucket], CLIB_CACHE_LINE_BYTES, LOAD); +} + +static inline u32 +pfhash_read_bucket_prefetch_kv (pfhash_t * p, u32 bucket) +{ + u32 bucket_contents = p->buckets[bucket]; + if (PREDICT_TRUE ((bucket_contents & PFHASH_BUCKET_OVERFLOW) == 0)) + CLIB_PREFETCH (&p->kvp[bucket_contents], CLIB_CACHE_LINE_BYTES, LOAD); + return bucket_contents; +} + +/* + * pfhash_search_kv_16 + * See if the supplied 16-byte key matches one of three 16-byte (key,value) pairs. + * Return the indicated value, or ~0 if no match + * + * Note: including the overflow test, the fast path is 35 instrs + * on x86_64. Elves will steal your keyboard in the middle of the night if + * you "improve" it without checking the generated code! + */ +static inline u32 +pfhash_search_kv_16 (pfhash_t * p, u32 bucket_contents, u32x4 * key) +{ + u32x4 diff0, diff1, diff2; + u32 is_equal0, is_equal1, is_equal2; + u32 no_match; + pfhash_kv_16_t *kv; + u32 rv; + + if (PREDICT_FALSE (bucket_contents == PFHASH_BUCKET_OVERFLOW)) + { + uword *hp; + hp = hash_get_mem (p->overflow_hash, key); + if (hp) + return hp[0]; + return (u32) ~ 0; + } + + kv = &p->kvp[bucket_contents].kv16; + + diff0 = u32x4_sub (kv->kb.k_u32x4[0], key[0]); + diff1 = u32x4_sub (kv->kb.k_u32x4[1], key[0]); + diff2 = u32x4_sub (kv->kb.k_u32x4[2], key[0]); + + no_match = is_equal0 = (i16) u32x4_zero_byte_mask (diff0); + is_equal1 = (i16) u32x4_zero_byte_mask (diff1); + no_match |= is_equal1; + is_equal2 = (i16) u32x4_zero_byte_mask (diff2); + no_match |= is_equal2; + /* If any of the three items matched, no_match will be zero after this line */ + no_match = ~no_match; + + rv = (is_equal0 & kv->values[0]) + | (is_equal1 & kv->values[1]) | (is_equal2 & kv->values[2]) | no_match; + + return rv; +} + +static inline u32 +pfhash_search_kv_8 (pfhash_t * p, u32 bucket_contents, u64 * key) +{ + pfhash_kv_8_t *kv; + u32 rv = (u32) ~ 0; + + if (PREDICT_FALSE (bucket_contents == PFHASH_BUCKET_OVERFLOW)) + { + uword *hp; + hp = hash_get_mem (p->overflow_hash, key); + if (hp) + return hp[0]; + return (u32) ~ 0; + } + + kv = &p->kvp[bucket_contents].kv8; + + rv = (kv->kb.k_u64[0] == key[0]) ? kv->values[0] : rv; + rv = (kv->kb.k_u64[1] == key[0]) ? kv->values[1] : rv; + rv = (kv->kb.k_u64[2] == key[0]) ? kv->values[2] : rv; + rv = (kv->kb.k_u64[3] == key[0]) ? kv->values[3] : rv; + rv = (kv->kb.k_u64[4] == key[0]) ? kv->values[4] : rv; + + return rv; +} + +static inline u64 +pfhash_search_kv_8v8 (pfhash_t * p, u32 bucket_contents, u64 * key) +{ + pfhash_kv_8v8_t *kv; + u64 rv = (u64) ~ 0; + + if (PREDICT_FALSE (bucket_contents == PFHASH_BUCKET_OVERFLOW)) + { + uword *hp; + hp = hash_get_mem (p->overflow_hash, key); + if (hp) + return hp[0]; + return (u64) ~ 0; + } + + kv = &p->kvp[bucket_contents].kv8v8; + + rv = (kv->kb.k_u64[0] == key[0]) ? kv->values[0] : rv; + rv = (kv->kb.k_u64[1] == key[0]) ? kv->values[1] : rv; + rv = (kv->kb.k_u64[2] == key[0]) ? kv->values[2] : rv; + rv = (kv->kb.k_u64[3] == key[0]) ? kv->values[3] : rv; + + return rv; +} + +static inline u32 +pfhash_search_kv_4 (pfhash_t * p, u32 bucket_contents, u32 * key) +{ + u32x4 vector_key; + u32x4 is_equal[2]; + u32 zbm[2], winner_index; + pfhash_kv_4_t *kv; + + if (PREDICT_FALSE (bucket_contents == PFHASH_BUCKET_OVERFLOW)) + { + uword *hp; + hp = hash_get_mem (p->overflow_hash, key); + if (hp) + return hp[0]; + return (u32) ~ 0; + } + + kv = &p->kvp[bucket_contents].kv4; + + vector_key = u32x4_splat (key[0]); + + is_equal[0] = (kv->kb.k_u32x4[0] == vector_key); + is_equal[1] = (kv->kb.k_u32x4[1] == vector_key); + zbm[0] = ~u32x4_zero_byte_mask (is_equal[0]) & 0xFFFF; + zbm[1] = ~u32x4_zero_byte_mask (is_equal[1]) & 0xFFFF; + + if (PREDICT_FALSE ((zbm[0] == 0) && (zbm[1] == 0))) + return (u32) ~ 0; + + winner_index = min_log2 (zbm[0]) >> 2; + winner_index = zbm[1] ? (4 + (min_log2 (zbm[1]) >> 2)) : winner_index; + + return kv->values[winner_index]; +} + +#endif /* CLIB_HAVE_VEC128 */ + +#endif /* included_clib_pfhash_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/phash.c b/extras/deprecated/vppinfra/phash.c new file mode 100644 index 00000000000..52c29b33f78 --- /dev/null +++ b/extras/deprecated/vppinfra/phash.c @@ -0,0 +1,1017 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + Copyright (c) 2005 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +/* This is all stolen from Bob Jenkins and reworked for clib. Thanks + once again Bob for the great work. */ + +/* +------------------------------------------------------------------------------ +perfect.c: code to generate code for a hash for perfect hashing. +(c) Bob Jenkins, September 1996, December 1999 +You may use this code in any way you wish, and it is free. No warranty. +I hereby place this in the public domain. +Source is http://burtleburtle.net/bob/c/perfect.c + +This generates a minimal perfect hash function. That means, given a +set of n keys, this determines a hash function that maps each of +those keys into a value in 0..n-1 with no collisions. + +The perfect hash function first uses a normal hash function on the key +to determine (a,b) such that the pair (a,b) is distinct for all +keys, then it computes a^scramble[tab[b]] to get the final perfect hash. +tab[] is an array of 1-byte values and scramble[] is a 256-term array of +2-byte or 4-byte values. If there are n keys, the length of tab[] is a +power of two between n/3 and n. + +I found the idea of computing distinct (a,b) values in "Practical minimal +perfect hash functions for large databases", Fox, Heath, Chen, and Daoud, +Communications of the ACM, January 1992. They found the idea in Chichelli +(CACM Jan 1980). Beyond that, our methods differ. + +The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in +0..*blen*-1. A fast hash function determines both a and b +simultaneously. Any decent hash function is likely to produce +hashes so that (a,b) is distinct for all pairs. I try the hash +using different values of *salt* until all pairs are distinct. + +The final hash is (a XOR scramble[tab[b]]). *scramble* is a +predetermined mapping of 0..255 into 0..smax-1. *tab* is an +array that we fill in in such a way as to make the hash perfect. + +First we fill in all values of *tab* that are used by more than one +key. We try all possible values for each position until one works. + +This leaves m unmapped keys and m values that something could hash to. +If you treat unmapped keys as lefthand nodes and unused hash values +as righthand nodes, and draw a line connecting each key to each hash +value it could map to, you get a bipartite graph. We attempt to +find a perfect matching in this graph. If we succeed, we have +determined a perfect hash for the whole set of keys. + +*scramble* is used because (a^tab[i]) clusters keys around *a*. +------------------------------------------------------------------------------ +*/ + +#include <vppinfra/bitmap.h> +#include <vppinfra/format.h> +#include <vppinfra/phash.h> +#include <vppinfra/random.h> + +static void +init_keys_direct_u32 (phash_main_t * pm) +{ + int n_keys_left, b_mask, a_shift; + u32 seed; + phash_key_t *k; + + seed = pm->hash_seed; + b_mask = (1 << pm->b_bits) - 1; + a_shift = BITS (seed) - pm->a_bits; + + k = pm->keys; + n_keys_left = vec_len (pm->keys); + + while (n_keys_left >= 2) + { + u32 x0, y0, z0; + u32 x1, y1, z1; + + x0 = y0 = z0 = seed; + x1 = y1 = z1 = seed; + x0 += (u32) k[0].key; + x1 += (u32) k[1].key; + + hash_mix32 (x0, y0, z0); + hash_mix32 (x1, y1, z1); + + k[0].b = z0 & b_mask; + k[1].b = z1 & b_mask; + k[0].a = z0 >> a_shift; + k[1].a = z1 >> a_shift; + if (PREDICT_FALSE (a_shift >= BITS (z0))) + k[0].a = k[1].a = 0; + + k += 2; + n_keys_left -= 2; + } + + if (n_keys_left >= 1) + { + u32 x0, y0, z0; + + x0 = y0 = z0 = seed; + x0 += k[0].key; + + hash_mix32 (x0, y0, z0); + + k[0].b = z0 & b_mask; + k[0].a = z0 >> a_shift; + if (PREDICT_FALSE (a_shift >= BITS (z0))) + k[0].a = 0; + + k += 1; + n_keys_left -= 1; + } +} + +static void +init_keys_direct_u64 (phash_main_t * pm) +{ + int n_keys_left, b_mask, a_shift; + u64 seed; + phash_key_t *k; + + seed = pm->hash_seed; + b_mask = (1 << pm->b_bits) - 1; + a_shift = BITS (seed) - pm->a_bits; + + k = pm->keys; + n_keys_left = vec_len (pm->keys); + + while (n_keys_left >= 2) + { + u64 x0, y0, z0; + u64 x1, y1, z1; + + x0 = y0 = z0 = seed; + x1 = y1 = z1 = seed; + x0 += (u64) k[0].key; + x1 += (u64) k[1].key; + + hash_mix64 (x0, y0, z0); + hash_mix64 (x1, y1, z1); + + k[0].b = z0 & b_mask; + k[1].b = z1 & b_mask; + k[0].a = z0 >> a_shift; + k[1].a = z1 >> a_shift; + if (PREDICT_FALSE (a_shift >= BITS (z0))) + k[0].a = k[1].a = 0; + + k += 2; + n_keys_left -= 2; + } + + if (n_keys_left >= 1) + { + u64 x0, y0, z0; + + x0 = y0 = z0 = seed; + x0 += k[0].key; + + hash_mix64 (x0, y0, z0); + + k[0].b = z0 & b_mask; + k[0].a = z0 >> a_shift; + if (PREDICT_FALSE (a_shift >= BITS (z0))) + k[0].a = 0; + + k += 1; + n_keys_left -= 1; + } +} + +static void +init_keys_indirect_u32 (phash_main_t * pm) +{ + int n_keys_left, b_mask, a_shift; + u32 seed; + phash_key_t *k; + + seed = pm->hash_seed; + b_mask = (1 << pm->b_bits) - 1; + a_shift = BITS (seed) - pm->a_bits; + + k = pm->keys; + n_keys_left = vec_len (pm->keys); + + while (n_keys_left >= 2) + { + u32 xyz[6]; + u32 x0, y0, z0; + u32 x1, y1, z1; + + pm->key_seed2 (pm->private, k[0].key, k[1].key, &xyz); + + x0 = y0 = z0 = seed; + x1 = y1 = z1 = seed; + x0 += xyz[0]; + y0 += xyz[1]; + z0 += xyz[2]; + x1 += xyz[3]; + y1 += xyz[4]; + z1 += xyz[5]; + + hash_mix32 (x0, y0, z0); + hash_mix32 (x1, y1, z1); + + k[0].b = z0 & b_mask; + k[1].b = z1 & b_mask; + k[0].a = z0 >> a_shift; + k[1].a = z1 >> a_shift; + if (PREDICT_FALSE (a_shift >= BITS (z0))) + k[0].a = k[1].a = 0; + + k += 2; + n_keys_left -= 2; + } + + if (n_keys_left >= 1) + { + u32 xyz[3]; + u32 x0, y0, z0; + + pm->key_seed1 (pm->private, k[0].key, &xyz); + + x0 = y0 = z0 = seed; + x0 += xyz[0]; + y0 += xyz[1]; + z0 += xyz[2]; + + hash_mix32 (x0, y0, z0); + + k[0].b = z0 & b_mask; + k[0].a = z0 >> a_shift; + if (PREDICT_FALSE (a_shift >= BITS (z0))) + k[0].a = 0; + + k += 1; + n_keys_left -= 1; + } +} + +static void +init_keys_indirect_u64 (phash_main_t * pm) +{ + int n_keys_left, b_mask, a_shift; + u64 seed; + phash_key_t *k; + + seed = pm->hash_seed; + b_mask = (1 << pm->b_bits) - 1; + a_shift = BITS (seed) - pm->a_bits; + + k = pm->keys; + n_keys_left = vec_len (pm->keys); + + while (n_keys_left >= 2) + { + u64 xyz[6]; + u64 x0, y0, z0; + u64 x1, y1, z1; + + pm->key_seed2 (pm->private, k[0].key, k[1].key, &xyz); + + x0 = y0 = z0 = seed; + x1 = y1 = z1 = seed; + x0 += xyz[0]; + y0 += xyz[1]; + z0 += xyz[2]; + x1 += xyz[3]; + y1 += xyz[4]; + z1 += xyz[5]; + + hash_mix64 (x0, y0, z0); + hash_mix64 (x1, y1, z1); + + k[0].b = z0 & b_mask; + k[1].b = z1 & b_mask; + k[0].a = z0 >> a_shift; + k[1].a = z1 >> a_shift; + if (PREDICT_FALSE (a_shift >= BITS (z0))) + k[0].a = k[1].a = 0; + + k += 2; + n_keys_left -= 2; + } + + if (n_keys_left >= 1) + { + u64 xyz[3]; + u64 x0, y0, z0; + + pm->key_seed1 (pm->private, k[0].key, &xyz); + + x0 = y0 = z0 = seed; + x0 += xyz[0]; + y0 += xyz[1]; + z0 += xyz[2]; + + hash_mix64 (x0, y0, z0); + + k[0].b = z0 & b_mask; + k[0].a = z0 >> a_shift; + if (PREDICT_FALSE (a_shift >= BITS (z0))) + k[0].a = 0; + + k += 1; + n_keys_left -= 1; + } +} + +/* + * insert keys into table according to key->b + * check if the initial hash might work + */ +static int +init_tabb (phash_main_t * pm) +{ + int no_collisions; + phash_tabb_t *tb; + phash_key_t *k, *l; + + if (pm->key_seed1) + { + if (pm->flags & PHASH_FLAG_MIX64) + init_keys_indirect_u64 (pm); + else + init_keys_indirect_u32 (pm); + } + else + { + if (pm->flags & PHASH_FLAG_MIX64) + init_keys_direct_u64 (pm); + else + init_keys_direct_u32 (pm); + } + + if (!pm->tabb) + vec_resize (pm->tabb, 1 << pm->b_bits); + else + vec_foreach (tb, pm->tabb) phash_tabb_free (tb); + + /* Two keys with the same (a,b) guarantees a collision */ + no_collisions = 1; + vec_foreach (k, pm->keys) + { + u32 i, *ki; + + tb = pm->tabb + k->b; + ki = tb->keys; + for (i = 0; i < vec_len (ki); i++) + { + l = pm->keys + ki[i]; + if (k->a == l->a) + { + /* Given keys are supposed to be unique. */ + if (pm->key_is_equal + && pm->key_is_equal (pm->private, l->key, k->key)) + clib_error ("duplicate keys"); + no_collisions = 0; + goto done; + } + } + + vec_add1 (tb->keys, k - pm->keys); + } + +done: + return no_collisions; +} + +/* Try to apply an augmenting list */ +static int +apply (phash_main_t * pm, u32 tail, u32 rollback) +{ + phash_key_t *k; + phash_tabb_t *pb; + phash_tabq_t *q_child, *q_parent; + u32 ki, i, hash, child, parent; + u32 stabb; /* scramble[tab[b]] */ + int no_collision; + + no_collision = 1; + + /* Walk from child to parent until root is reached. */ + for (child = tail - 1; child; child = parent) + { + q_child = &pm->tabq[child]; + parent = q_child->parent_q; + q_parent = &pm->tabq[parent]; + + /* find parent's list of siblings */ + ASSERT (q_parent->b_q < vec_len (pm->tabb)); + pb = pm->tabb + q_parent->b_q; + + /* erase old hash values */ + stabb = pm->scramble[pb->val_b]; + for (i = 0; i < vec_len (pb->keys); i++) + { + ki = pb->keys[i]; + k = pm->keys + ki; + hash = k->a ^ stabb; + + /* Erase hash for all of child's siblings. */ + if (ki == pm->tabh[hash]) + pm->tabh[hash] = ~0; + } + + /* change pb->val_b, which will change the hashes of all parent siblings */ + pb->val_b = rollback ? q_child->oldval_q : q_child->newval_q; + + /* set new hash values */ + stabb = pm->scramble[pb->val_b]; + for (i = 0; i < vec_len (pb->keys); i++) + { + ki = pb->keys[i]; + k = pm->keys + ki; + + hash = k->a ^ stabb; + if (rollback) + { + if (parent == 0) + continue; /* root never had a hash */ + } + else if (pm->tabh[hash] != ~0) + { + /* Very rare case: roll back any changes. */ + apply (pm, tail, /* rollback changes */ 1); + no_collision = 0; + goto done; + } + pm->tabh[hash] = ki; + } + } + +done: + return no_collision; +} + + +/* +------------------------------------------------------------------------------- +augment(): Add item to the mapping. + +Construct a spanning tree of *b*s with *item* as root, where each +parent can have all its hashes changed (by some new val_b) with +at most one collision, and each child is the b of that collision. + +I got this from Tarjan's "Data Structures and Network Algorithms". The +path from *item* to a *b* that can be remapped with no collision is +an "augmenting path". Change values of tab[b] along the path so that +the unmapped key gets mapped and the unused hash value gets used. + +Assuming 1 key per b, if m out of n hash values are still unused, +you should expect the transitive closure to cover n/m nodes before +an unused node is found. Sum(i=1..n)(n/i) is about nlogn, so expect +this approach to take about nlogn time to map all single-key b's. +------------------------------------------------------------------------------- + +high_water: a value higher than any now in tabb[].water_b. +*/ +static int +augment (phash_main_t * pm, u32 b_root, u32 high_water) +{ + u32 q; /* current position walking through the queue */ + u32 tail; /* tail of the queue. 0 is the head of the queue. */ + phash_tabb_t *tb_parent, *tb_child, *tb_hit; + phash_key_t *k_parent, *k_child; + u32 v, v_limit; /* possible value for myb->val_b */ + u32 i, ki, hash; + + v_limit = + 1 << ((pm->flags & PHASH_FLAG_USE_SCRAMBLE) ? pm->s_bits : BITS (u8)); + + /* Initialize the root of the spanning tree. */ + pm->tabq[0].b_q = b_root; + tail = 1; + + /* construct the spanning tree by walking the queue, add children to tail */ + for (q = 0; q < tail; q++) + { + if ((pm->flags & PHASH_FLAG_FAST_MODE) + && !(pm->flags & PHASH_FLAG_MINIMAL) && q == 1) + break; /* don't do transitive closure */ + + tb_parent = pm->tabb + pm->tabq[q].b_q; /* the b for this node */ + + for (v = 0; v < v_limit; v++) + { + tb_child = 0; + + for (i = 0; i < vec_len (tb_parent->keys); i++) + { + ki = tb_parent->keys[i]; + k_parent = pm->keys + ki; + + hash = k_parent->a ^ pm->scramble[v]; + if (hash >= pm->hash_max) + goto try_next_v; /* hash code out of bounds => we can't use this v */ + + ki = pm->tabh[hash]; + if (ki == ~0) + continue; + + k_child = pm->keys + ki; + tb_hit = pm->tabb + k_child->b; + + if (tb_child) + { + /* Hit at most one child b. */ + if (tb_child == tb_hit) + goto try_next_v; + } + else + { + /* Remember this as child b. */ + tb_child = tb_hit; + if (tb_hit->water_b == high_water) + goto try_next_v; /* already explored */ + } + } + + /* tb_parent with v has either one or zero collisions. */ + + /* add child b to the queue of reachable things */ + if (tb_child) + tb_child->water_b = high_water; + pm->tabq[tail].b_q = tb_child ? tb_child - pm->tabb : ~0; + pm->tabq[tail].newval_q = v; /* how to make parent (myb) use this hash */ + pm->tabq[tail].oldval_q = tb_parent->val_b; /* need this for rollback */ + pm->tabq[tail].parent_q = q; + ++tail; + + /* Found a v with no collisions? */ + if (!tb_child) + { + /* Try to apply the augmenting path. */ + if (apply (pm, tail, /* rollback */ 0)) + return 1; /* success, item was added to the perfect hash */ + --tail; /* don't know how to handle such a child! */ + } + + try_next_v: + ; + } + } + return 0; +} + + +static phash_tabb_t *sort_tabb; + +static int +phash_tabb_compare (void *a1, void *a2) +{ + u32 *b1 = a1; + u32 *b2 = a2; + phash_tabb_t *tb1, *tb2; + + tb1 = sort_tabb + b1[0]; + tb2 = sort_tabb + b2[0]; + + return ((int) vec_len (tb2->keys) - (int) vec_len (tb1->keys)); +} + +/* find a mapping that makes this a perfect hash */ +static int +perfect (phash_main_t * pm) +{ + u32 i; + + /* clear any state from previous attempts */ + if (vec_bytes (pm->tabh)) + clib_memset (pm->tabh, ~0, vec_bytes (pm->tabh)); + + vec_validate (pm->tabb_sort, vec_len (pm->tabb) - 1); + for (i = 0; i < vec_len (pm->tabb_sort); i++) + pm->tabb_sort[i] = i; + + sort_tabb = pm->tabb; + + vec_sort_with_function (pm->tabb_sort, phash_tabb_compare); + + /* In descending order by number of keys, map all *b*s */ + for (i = 0; i < vec_len (pm->tabb_sort); i++) + { + if (!augment (pm, pm->tabb_sort[i], i + 1)) + return 0; + } + + /* Success! We found a perfect hash of all keys into 0..nkeys-1. */ + return 1; +} + + +/* + * Find initial a_bits = log2 (a_max), b_bits = log2 (b_max). + * Initial a_max and b_max values were found empirically. Some factors: + * + * If s_max<256 there is no scramble, so tab[b] needs to cover 0..s_max-1. + * + * a_max and b_max must be powers of 2 because the values in 0..a_max-1 and + * 0..b_max-1 are produced by applying a bitmask to the initial hash function. + * + * a_max must be less than s_max, in fact less than n_keys, because otherwise + * there would often be no i such that a^scramble[i] is in 0..n_keys-1 for + * all the *a*s associated with a given *b*, so there would be no legal + * value to assign to tab[b]. This only matters when we're doing a minimal + * perfect hash. + * + * It takes around 800 trials to find distinct (a,b) with nkey=s_max*(5/8) + * and a_max*b_max = s_max*s_max/32. + * + * Values of b_max less than s_max/4 never work, and s_max/2 always works. + * + * We want b_max as small as possible because it is the number of bytes in + * the huge array we must create for the perfect hash. + * + * When nkey <= s_max*(5/8), b_max=s_max/4 works much more often with + * a_max=s_max/8 than with a_max=s_max/4. Above s_max*(5/8), b_max=s_max/4 + * doesn't seem to care whether a_max=s_max/8 or a_max=s_max/4. I think it + * has something to do with 5/8 = 1/8 * 5. For example examine 80000, + * 85000, and 90000 keys with different values of a_max. This only matters + * if we're doing a minimal perfect hash. + * + * When a_max*b_max <= 1<<U32BITS, the initial hash must produce one integer. + * Bigger than that it must produce two integers, which increases the + * cost of the hash per character hashed. + */ +static void +guess_initial_parameters (phash_main_t * pm) +{ + u32 s_bits, s_max, a_max, b_max, n_keys; + int is_minimal, is_fast_mode; + const u32 b_max_use_scramble_threshold = 4096; + + is_minimal = (pm->flags & PHASH_FLAG_MINIMAL) != 0; + is_fast_mode = (pm->flags & PHASH_FLAG_FAST_MODE) != 0; + + n_keys = vec_len (pm->keys); + s_bits = max_log2 (n_keys); + s_max = 1 << s_bits; + a_max = 0; + + if (is_minimal) + { + switch (s_bits) + { + case 0: + a_max = 1; + b_max = 1; + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + /* + * Was: a_max = is_minimal ? s_max / 2 : s_max; + * However, we know that is_minimal must be true, so the + * if-arm of the ternary expression is always executed. + */ + a_max = s_max / 2; + b_max = s_max / 2; + break; + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + case 16: + case 17: + if (is_fast_mode) + { + a_max = s_max / 2; + b_max = s_max / 4; + } + else if (s_max / 4 < b_max_use_scramble_threshold) + { + if (n_keys <= s_max * 0.52) + a_max = b_max = s_max / 8; + else + a_max = b_max = s_max / 4; + } + else + { + a_max = ((n_keys <= s_max * (5.0 / 8.0)) ? s_max / 8 : + (n_keys <= + s_max * (3.0 / 4.0)) ? s_max / 4 : s_max / 2); + b_max = s_max / 4; /* always give the small size a shot */ + } + break; + case 18: + if (is_fast_mode) + a_max = b_max = s_max / 2; + else + { + a_max = s_max / 8; /* never require the multiword hash */ + b_max = (n_keys <= s_max * (5.0 / 8.0)) ? s_max / 4 : s_max / 2; + } + break; + case 19: + case 20: + a_max = (n_keys <= s_max * (5.0 / 8.0)) ? s_max / 8 : s_max / 2; + b_max = (n_keys <= s_max * (5.0 / 8.0)) ? s_max / 4 : s_max / 2; + break; + default: + /* Just find a hash as quick as possible. + We'll be thrashing virtual memory at this size. */ + a_max = b_max = s_max / 2; + break; + } + } + else + { + /* Non-minimal perfect hash. */ + if (is_fast_mode && n_keys > s_max * 0.8) + { + s_max *= 2; + s_bits += 1; + } + + if (s_max / 4 <= (1 << 14)) + b_max = ((n_keys <= s_max * 0.56) ? s_max / 32 : + (n_keys <= s_max * 0.74) ? s_max / 16 : s_max / 8); + else + b_max = ((n_keys <= s_max * 0.6) ? s_max / 16 : + (n_keys <= s_max * 0.8) ? s_max / 8 : s_max / 4); + + if (is_fast_mode && b_max < s_max / 8) + b_max = s_max / 8; + + if (a_max < 1) + a_max = 1; + if (b_max < 1) + b_max = 1; + } + + ASSERT (s_max == (1 << s_bits)); + ASSERT (is_pow2 (a_max)); + ASSERT (is_pow2 (b_max)); + pm->s_bits = s_bits; + pm->a_bits = min_log2 (a_max); + pm->b_bits = min_log2 (b_max); + if (b_max >= b_max_use_scramble_threshold) + pm->flags |= PHASH_FLAG_USE_SCRAMBLE; +} + +/* compute p(x), where p is a permutation of 0..(1<<nbits)-1 */ +/* permute(0)=0. This is intended and useful. */ +always_inline u32 +scramble_permute (u32 x, u32 nbits) +{ + int i; + int mask = (1 << nbits) - 1; + int const2 = 1 + nbits / 2; + int const3 = 1 + nbits / 3; + int const4 = 1 + nbits / 4; + int const5 = 1 + nbits / 5; + for (i = 0; i < 20; i++) + { + x = (x + (x << const2)) & mask; + x = (x ^ (x >> const3)); + x = (x + (x << const4)) & mask; + x = (x ^ (x >> const5)); + } + return x; +} + +/* initialize scramble[] with distinct random values in 0..smax-1 */ +static void +scramble_init (phash_main_t * pm) +{ + u32 i; + + /* fill scramble[] with distinct random integers in 0..smax-1 */ + vec_validate (pm->scramble, (1 << (pm->s_bits < 8 ? 8 : pm->s_bits)) - 1); + for (i = 0; i < vec_len (pm->scramble); i++) + pm->scramble[i] = scramble_permute (i, pm->s_bits); +} + +/* Try to find a perfect hash function. */ +clib_error_t * +phash_find_perfect_hash (phash_main_t * pm) +{ + clib_error_t *error = 0; + u32 max_a_bits, n_tries_this_a_b, want_minimal; + + /* guess initial values for s_max, a_max and b_max */ + guess_initial_parameters (pm); + + want_minimal = pm->flags & PHASH_FLAG_MINIMAL; + +new_s: + if (pm->b_bits == 0) + pm->a_bits = pm->s_bits; + + max_a_bits = pm->s_bits - want_minimal; + if (max_a_bits < 1) + max_a_bits = 1; + + pm->hash_max = want_minimal ? vec_len (pm->keys) : (1 << pm->s_bits); + + scramble_init (pm); + + /* Allocate working memory. */ + vec_free (pm->tabh); + vec_validate_init_empty (pm->tabh, pm->hash_max - 1, ~0); + vec_free (pm->tabq); + vec_validate (pm->tabq, 1 << pm->b_bits); + + /* Actually find the perfect hash */ + n_tries_this_a_b = 0; + while (1) + { + /* Choose random hash seeds until keys become unique. */ + pm->hash_seed = random_u64 (&pm->random_seed); + pm->n_seed_trials++; + if (init_tabb (pm)) + { + /* Found unique (A, B). */ + + /* Hash may already be perfect. */ + if (pm->b_bits == 0) + goto done; + + pm->n_perfect_calls++; + if (perfect (pm)) + goto done; + + goto increase_b; + } + + /* Keep trying with different seed value. */ + n_tries_this_a_b++; + if (n_tries_this_a_b < 2048) + continue; + + /* Try to put more bits in (A,B) to make distinct (A,B) more likely */ + if (pm->a_bits < max_a_bits) + pm->a_bits++; + else if (pm->b_bits < pm->s_bits) + { + increase_b: + vec_resize (pm->tabb, vec_len (pm->tabb)); + vec_resize (pm->tabq, vec_len (pm->tabq)); + pm->b_bits++; + } + else + { + /* Can't increase (A, B) any more, so try increasing S. */ + goto new_s; + } + } + +done: + /* Construct mapping table for hash lookups. */ + if (!error) + { + u32 b, v; + + pm->a_shift = ((pm->flags & PHASH_FLAG_MIX64) ? 64 : 32) - pm->a_bits; + pm->b_mask = (1 << pm->b_bits) - 1; + + vec_resize (pm->tab, vec_len (pm->tabb)); + for (b = 0; b < vec_len (pm->tabb); b++) + { + v = pm->tabb[b].val_b; + + /* Apply scramble now for small enough value of b_bits. */ + if (!(pm->flags & PHASH_FLAG_USE_SCRAMBLE)) + v = pm->scramble[v]; + + pm->tab[b] = v; + } + } + + /* Free working memory. */ + phash_main_free_working_memory (pm); + + return error; +} + +/* Slow hash computation for general keys. */ +uword +phash_hash_slow (phash_main_t * pm, uword key) +{ + u32 a, b, v; + + if (pm->flags & PHASH_FLAG_MIX64) + { + u64 x0, y0, z0; + + x0 = y0 = z0 = pm->hash_seed; + + if (pm->key_seed1) + { + u64 xyz[3]; + pm->key_seed1 (pm->private, key, &xyz); + x0 += xyz[0]; + y0 += xyz[1]; + z0 += xyz[2]; + } + else + x0 += key; + + hash_mix64 (x0, y0, z0); + + a = z0 >> pm->a_shift; + b = z0 & pm->b_mask; + } + else + { + u32 x0, y0, z0; + + x0 = y0 = z0 = pm->hash_seed; + + if (pm->key_seed1) + { + u32 xyz[3]; + pm->key_seed1 (pm->private, key, &xyz); + x0 += xyz[0]; + y0 += xyz[1]; + z0 += xyz[2]; + } + else + x0 += key; + + hash_mix32 (x0, y0, z0); + + a = z0 >> pm->a_shift; + b = z0 & pm->b_mask; + } + + v = pm->tab[b]; + if (pm->flags & PHASH_FLAG_USE_SCRAMBLE) + v = pm->scramble[v]; + return a ^ v; +} + +/* Verify that perfect hash is perfect. */ +clib_error_t * +phash_validate (phash_main_t * pm) +{ + phash_key_t *k; + uword *unique_bitmap = 0; + clib_error_t *error = 0; + + vec_foreach (k, pm->keys) + { + uword h = phash_hash_slow (pm, k->key); + + if (h >= pm->hash_max) + { + error = clib_error_return (0, "hash out of range %wd", h); + goto done; + } + + if (clib_bitmap_get (unique_bitmap, h)) + { + error = clib_error_return (0, "hash non-unique"); + goto done; + } + + unique_bitmap = clib_bitmap_ori (unique_bitmap, h); + } + +done: + clib_bitmap_free (unique_bitmap); + return error; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/phash.h b/extras/deprecated/vppinfra/phash.h new file mode 100644 index 00000000000..3dc59c724f7 --- /dev/null +++ b/extras/deprecated/vppinfra/phash.h @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + Copyright (c) 2005 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef included_phash_h +#define included_phash_h + +#include <vppinfra/hash.h> /* for Bob's mixing functions */ + +typedef struct +{ + /* Maybe either pointer to vector or inline word. */ + uword key; + + /* Hash code (A, B). */ + u32 a, b; +} phash_key_t; + +/* Table indexed by B. */ +typedef struct +{ + /* Vector of key indices with this same value of B. */ + u32 *keys; + + /* hash=a^tabb[b].val_b */ + u32 val_b; + + /* High watermark of who has visited this map node. */ + u32 water_b; +} phash_tabb_t; + +always_inline void +phash_tabb_free (phash_tabb_t * b) +{ + vec_free (b->keys); + b->val_b = b->water_b = 0; +} + +typedef struct +{ + /* b that currently occupies this hash */ + u32 b_q; + + /* Queue position of parent that could use this hash. */ + u32 parent_q; + + /* What to change parent tab[b] to use this hash. */ + u32 newval_q; + + /* Original value of tab[b]. */ + u32 oldval_q; +} phash_tabq_t; + +typedef struct +{ + u8 a_bits, b_bits, s_bits, a_shift; + u32 b_mask; + u32 *tab; + u32 *scramble; + + /* Seed value for hash mixer. */ + u64 hash_seed; + + u32 flags; + + /* Key functions want 64 bit keys. + Use hash_mix64 rather than hash_mix32. */ +#define PHASH_FLAG_MIX64 (1 << 0) +#define PHASH_FLAG_MIX32 (0 << 0) + + /* When b_bits is large enough (>= 12) we scramble. */ +#define PHASH_FLAG_USE_SCRAMBLE (1 << 1) + + /* Slow mode gives smaller tables but at the expense of more run time. */ +#define PHASH_FLAG_SLOW_MODE (0 << 2) +#define PHASH_FLAG_FAST_MODE (1 << 2) + + /* Generate minimal perfect hash instead of perfect hash. */ +#define PHASH_FLAG_NON_MINIMAL (0 << 3) +#define PHASH_FLAG_MINIMAL (1 << 3) + + /* vec_len (keys) for minimal hash; + 1 << s_bits for non-minimal hash. */ + u32 hash_max; + + /* Vector of keys. */ + phash_key_t *keys; + + /* Used by callbacks to identify keys. */ + void *private; + + /* Key comparison callback. */ + int (*key_is_equal) (void *private, uword key1, uword key2); + + /* Callback to reduce single key -> hash seeds. */ + void (*key_seed1) (void *private, uword key, void *seed); + + /* Callback to reduce two key2 -> hash seeds. */ + void (*key_seed2) (void *private, uword key1, uword key2, void *seed); + + /* Stuff used to compute perfect hash. */ + u32 random_seed; + + /* Stuff indexed by B. */ + phash_tabb_t *tabb; + + /* Table of B ordered by number of keys in tabb[b]. */ + u32 *tabb_sort; + + /* Unique key (or ~0 if none) for a given hash + H = A ^ scramble[tab[B].val_b]. */ + u32 *tabh; + + /* Stuff indexed by q. */ + phash_tabq_t *tabq; + + /* Stats. */ + u32 n_seed_trials, n_perfect_calls; +} phash_main_t; + +always_inline void +phash_main_free_working_memory (phash_main_t * pm) +{ + vec_free (pm->tabb); + vec_free (pm->tabq); + vec_free (pm->tabh); + vec_free (pm->tabb_sort); + if (!(pm->flags & PHASH_FLAG_USE_SCRAMBLE)) + vec_free (pm->scramble); +} + +always_inline void +phash_main_free (phash_main_t * pm) +{ + phash_main_free_working_memory (pm); + vec_free (pm->tab); + vec_free (pm->keys); + clib_memset (pm, 0, sizeof (pm[0])); +} + +/* Slow hash computation for general keys. */ +uword phash_hash_slow (phash_main_t * pm, uword key); + +/* Main routine to compute perfect hash. */ +clib_error_t *phash_find_perfect_hash (phash_main_t * pm); + +/* Validates that hash is indeed perfect. */ +clib_error_t *phash_validate (phash_main_t * pm); + +/* Unit test. */ +int phash_test_main (unformat_input_t * input); + +#endif /* included_phash_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/pipeline.h b/extras/deprecated/vppinfra/pipeline.h new file mode 100644 index 00000000000..5a9799b455e --- /dev/null +++ b/extras/deprecated/vppinfra/pipeline.h @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pipeline.h: software pipeline infrastructure + * + * Copyright (c) 2010 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_clib_pipeline_h +#define included_clib_pipeline_h + +#define clib_pipeline_stage(F,TYPE,ARG,I,BODY) \ + always_inline void F##_inline (void * _, u32 I) \ + { TYPE ARG = _; { BODY; } } \ + never_inline void F##_no_inline (TYPE ARG, u32 I) \ + { F##_inline (ARG, I); } + +#define clib_pipeline_stage_static(F,TYPE,ARG,I,BODY) \ + static_always_inline void F##_inline (void * _, u32 I) \ + { TYPE ARG = _; { BODY; } } \ + never_inline void F##_no_inline (TYPE ARG, u32 I) \ + { F##_inline (ARG, I); } + +#define clib_pipeline_stage_no_inline(F,TYPE,ARG,I,BODY) \ + never_inline void F##_no_inline (void * _, u32 I) \ + { TYPE ARG = _; { BODY; } } \ + never_inline void F##_inline (TYPE ARG, u32 I) \ + { F##_no_inline (ARG, I); } + +#define _clib_pipeline_var(v) _clib_pipeline_##v + +#define clib_pipeline_stage_execute(F,A,I,S) \ + F##_##S (A, _clib_pipeline_var(i) - (I)) + +#define clib_pipeline_main_stage(F,A,I) \ + clib_pipeline_stage_execute (F, A, I, inline) +#define clib_pipeline_init_stage(F,A,I) \ + if (_clib_pipeline_var(i) >= (I)) clib_pipeline_stage_execute (F, A, I, no_inline) +#define clib_pipeline_exit_stage(F,A,I) \ + if (_clib_pipeline_var(i) >= (I) && _clib_pipeline_var(i) - (I) < _clib_pipeline_var(n_vectors)) \ + clib_pipeline_stage_execute (F, A, I, no_inline) + +#define clib_pipeline_init_loop \ + for (_clib_pipeline_var(i) = 0; \ + _clib_pipeline_var(i) < \ + clib_min (_clib_pipeline_var(n_stages) - 1, \ + _clib_pipeline_var(n_vectors)); \ + _clib_pipeline_var(i)++) + +#define clib_pipeline_main_loop \ + for (; _clib_pipeline_var(i) < _clib_pipeline_var(n_vectors); \ + _clib_pipeline_var(i)++) + +#define clib_pipeline_exit_loop \ + for (; _clib_pipeline_var(i) < (_clib_pipeline_var(n_vectors) \ + + _clib_pipeline_var(n_stages) - 1); \ + _clib_pipeline_var(i)++) + +#define clib_pipeline_run_2_stage(N,ARG,STAGE0,STAGE1) \ +do { \ + uword _clib_pipeline_var(n_vectors) = (N); \ + uword _clib_pipeline_var(n_stages) = 2; \ + uword _clib_pipeline_var(i); \ + \ + clib_pipeline_init_loop \ + { \ + clib_pipeline_init_stage (STAGE0, ARG, 0); \ + } \ + \ + clib_pipeline_main_loop \ + { \ + clib_pipeline_main_stage (STAGE0, ARG, 0); \ + clib_pipeline_main_stage (STAGE1, ARG, 1); \ + } \ + \ + clib_pipeline_exit_loop \ + { \ + clib_pipeline_exit_stage (STAGE1, ARG, 1); \ + } \ +} while (0) + +#define clib_pipeline_run_3_stage(N,ARG,STAGE0,STAGE1,STAGE2) \ +do { \ + uword _clib_pipeline_var(n_vectors) = (N); \ + uword _clib_pipeline_var(n_stages) = 3; \ + uword _clib_pipeline_var(i); \ + \ + clib_pipeline_init_loop \ + { \ + clib_pipeline_init_stage (STAGE0, ARG, 0); \ + clib_pipeline_init_stage (STAGE1, ARG, 1); \ + } \ + \ + clib_pipeline_main_loop \ + { \ + clib_pipeline_main_stage (STAGE0, ARG, 0); \ + clib_pipeline_main_stage (STAGE1, ARG, 1); \ + clib_pipeline_main_stage (STAGE2, ARG, 2); \ + } \ + \ + clib_pipeline_exit_loop \ + { \ + clib_pipeline_exit_stage (STAGE1, ARG, 1); \ + clib_pipeline_exit_stage (STAGE2, ARG, 2); \ + } \ +} while (0) + +#define clib_pipeline_run_4_stage(N,ARG,STAGE0,STAGE1,STAGE2,STAGE3) \ +do { \ + uword _clib_pipeline_var(n_vectors) = (N); \ + uword _clib_pipeline_var(n_stages) = 4; \ + uword _clib_pipeline_var(i); \ + \ + clib_pipeline_init_loop \ + { \ + clib_pipeline_init_stage (STAGE0, ARG, 0); \ + clib_pipeline_init_stage (STAGE1, ARG, 1); \ + clib_pipeline_init_stage (STAGE2, ARG, 2); \ + } \ + \ + clib_pipeline_main_loop \ + { \ + clib_pipeline_main_stage (STAGE0, ARG, 0); \ + clib_pipeline_main_stage (STAGE1, ARG, 1); \ + clib_pipeline_main_stage (STAGE2, ARG, 2); \ + clib_pipeline_main_stage (STAGE3, ARG, 3); \ + } \ + \ + clib_pipeline_exit_loop \ + { \ + clib_pipeline_exit_stage (STAGE1, ARG, 1); \ + clib_pipeline_exit_stage (STAGE2, ARG, 2); \ + clib_pipeline_exit_stage (STAGE3, ARG, 3); \ + } \ +} while (0) + +#endif /* included_clib_pipeline_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/qhash.c b/extras/deprecated/vppinfra/qhash.c new file mode 100644 index 00000000000..3b5a175065d --- /dev/null +++ b/extras/deprecated/vppinfra/qhash.c @@ -0,0 +1,859 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + Copyright (c) 2006 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include <vppinfra/qhash.h> + +#define QHASH_ALL_VALID ((1 << QHASH_KEYS_PER_BUCKET) - 1) + +void * +_qhash_resize (void *v, uword length, uword elt_bytes) +{ + qhash_t *h; + uword l; + + l = clib_max (max_log2 (length), 2 + QHASH_LOG2_KEYS_PER_BUCKET); + + /* Round up if less than 1/2 full. */ + l += ((f64) length / (f64) (1 << l)) < .5; + + v = _vec_resize (0, 1 << l, elt_bytes << l, sizeof (h[0]), + /* align */ sizeof (uword)); + + h = qhash_header (v); + h->n_elts = 0; + h->log2_hash_size = l; + h->hash_keys = + clib_mem_alloc_aligned_no_fail (sizeof (h->hash_keys[0]) << l, + CLIB_CACHE_LINE_BYTES); + vec_resize (h->hash_key_valid_bitmap, + 1 << (l - QHASH_LOG2_KEYS_PER_BUCKET)); + clib_memset (v, ~0, elt_bytes << l); + + return v; +} + +static u8 min_log2_table[256]; + +static inline uword +qhash_min_log2 (uword x) +{ + ASSERT (is_pow2 (x)); + ASSERT (x < 256); + return min_log2_table[x]; +} + +static void +qhash_min_log2_init () +{ + int i; + for (i = 0; i < 256; i++) + min_log2_table[i] = min_log2 (i); +} + +always_inline uword +qhash_get_valid_elt_mask (qhash_t * h, uword i) +{ + return h->hash_key_valid_bitmap[i / QHASH_KEYS_PER_BUCKET]; +} + +always_inline void +qhash_set_valid_elt_mask (qhash_t * h, uword i, uword mask) +{ + h->hash_key_valid_bitmap[i / QHASH_KEYS_PER_BUCKET] = mask; +} + +always_inline uword +qhash_search_bucket (uword * hash_keys, uword search_key, uword m) +{ + uword t; +#define _(i) ((hash_keys[i] == search_key) << i) + t = (_(0) | _(1) | _(2) | _(3)); + if (QHASH_KEYS_PER_BUCKET > 4) + t |= (_(4) | _(5) | _(6) | _(7)); + if (QHASH_KEYS_PER_BUCKET > 8) + t |= (_(8) | _(9) | _(10) | _(11) | _(12) | _(13) | _(14) | _(15)); +#undef _ + return m & t; +} + +/* Lookup multiple keys in the same hash table. */ +void +qhash_get_multiple (void *v, + uword * search_keys, + uword n_search_keys, u32 * result_indices) +{ + qhash_t *h = qhash_header (v); + uword *k, *hash_keys; + uword n_left, bucket_mask; + u32 *r; + + if (!v) + { + clib_memset (result_indices, ~0, + sizeof (result_indices[0]) * n_search_keys); + return; + } + + bucket_mask = pow2_mask (h->log2_hash_size) & ~(QHASH_KEYS_PER_BUCKET - 1); + + k = search_keys; + n_left = n_search_keys; + hash_keys = h->hash_keys; + r = result_indices; + + while (n_left >= 2) + { + u32 a0, b0, c0, bi0, valid0, match0; + u32 a1, b1, c1, bi1, valid1, match1; + uword k0, k1, *h0, *h1; + + k0 = k[0]; + k1 = k[1]; + n_left -= 2; + k += 2; + + a0 = a1 = h->hash_seeds[0]; + b0 = b1 = h->hash_seeds[1]; + c0 = c1 = h->hash_seeds[2]; + a0 ^= k0; + a1 ^= k1; +#if uword_bits == 64 + b0 ^= k0 >> 32; + b1 ^= k1 >> 32; +#endif + + hash_mix32_step_1 (a0, b0, c0); + hash_mix32_step_1 (a1, b1, c1); + hash_mix32_step_2 (a0, b0, c0); + hash_mix32_step_2 (a1, b1, c1); + hash_mix32_step_3 (a0, b0, c0); + hash_mix32_step_3 (a1, b1, c1); + + bi0 = c0 & bucket_mask; + bi1 = c1 & bucket_mask; + + h0 = hash_keys + bi0; + h1 = hash_keys + bi1; + + /* Search two buckets. */ + valid0 = qhash_get_valid_elt_mask (h, bi0); + valid1 = qhash_get_valid_elt_mask (h, bi1); + + match0 = qhash_search_bucket (h0, k0, valid0); + match1 = qhash_search_bucket (h1, k1, valid1); + + bi0 += qhash_min_log2 (match0); + bi1 += qhash_min_log2 (match1); + + r[0] = match0 ? bi0 : ~0; + r[1] = match1 ? bi1 : ~0; + r += 2; + + /* Full buckets trigger search of overflow hash. */ + if (PREDICT_FALSE (!match0 && valid0 == QHASH_ALL_VALID)) + { + uword *p = hash_get (h->overflow_hash, k0); + r[-2] = p ? p[0] : ~0; + } + + /* Full buckets trigger search of overflow hash. */ + if (PREDICT_FALSE (!match1 && valid1 == QHASH_ALL_VALID)) + { + uword *p = hash_get (h->overflow_hash, k1); + r[-1] = p ? p[0] : ~0; + } + } + + while (n_left >= 1) + { + u32 a0, b0, c0, bi0, valid0, match0; + uword k0, *h0; + + k0 = k[0]; + n_left -= 1; + k += 1; + + a0 = h->hash_seeds[0]; + b0 = h->hash_seeds[1]; + c0 = h->hash_seeds[2]; + a0 ^= k0; +#if uword_bits == 64 + b0 ^= k0 >> 32; +#endif + + hash_mix32 (a0, b0, c0); + + bi0 = c0 & bucket_mask; + + h0 = hash_keys + bi0; + + /* Search one bucket. */ + valid0 = qhash_get_valid_elt_mask (h, bi0); + match0 = qhash_search_bucket (h0, k0, valid0); + + bi0 += qhash_min_log2 (match0); + + r[0] = match0 ? bi0 : ~0; + r += 1; + + /* Full buckets trigger search of overflow hash. */ + if (PREDICT_FALSE (!match0 && valid0 == QHASH_ALL_VALID)) + { + uword *p = hash_get (h->overflow_hash, k0); + r[-1] = p ? p[0] : ~0; + } + } +} + +/* Lookup multiple keys in the same hash table. + Returns index of first matching key. */ +u32 +qhash_get_first_match (void *v, + uword * search_keys, + uword n_search_keys, uword * matching_key) +{ + qhash_t *h = qhash_header (v); + uword *k, *hash_keys; + uword n_left, match_mask, bucket_mask; + + if (!v) + return ~0; + + match_mask = 0; + bucket_mask = pow2_mask (h->log2_hash_size) & ~(QHASH_KEYS_PER_BUCKET - 1); + + k = search_keys; + n_left = n_search_keys; + hash_keys = h->hash_keys; + while (n_left >= 2) + { + u32 a0, b0, c0, bi0, valid0; + u32 a1, b1, c1, bi1, valid1; + uword k0, k1, *h0, *h1; + + k0 = k[0]; + k1 = k[1]; + n_left -= 2; + k += 2; + + a0 = a1 = h->hash_seeds[0]; + b0 = b1 = h->hash_seeds[1]; + c0 = c1 = h->hash_seeds[2]; + a0 ^= k0; + a1 ^= k1; +#if uword_bits == 64 + b0 ^= k0 >> 32; + b1 ^= k1 >> 32; +#endif + + hash_mix32_step_1 (a0, b0, c0); + hash_mix32_step_1 (a1, b1, c1); + hash_mix32_step_2 (a0, b0, c0); + hash_mix32_step_2 (a1, b1, c1); + hash_mix32_step_3 (a0, b0, c0); + hash_mix32_step_3 (a1, b1, c1); + + bi0 = c0 & bucket_mask; + bi1 = c1 & bucket_mask; + + h0 = hash_keys + bi0; + h1 = hash_keys + bi1; + + /* Search two buckets. */ + valid0 = qhash_get_valid_elt_mask (h, bi0); + valid1 = qhash_get_valid_elt_mask (h, bi1); + match_mask = qhash_search_bucket (h0, k0, valid0); + match_mask |= (qhash_search_bucket (h1, k1, valid1) + << QHASH_KEYS_PER_BUCKET); + if (match_mask) + { + uword bi, is_match1; + + bi = qhash_min_log2 (match_mask); + is_match1 = bi >= QHASH_KEYS_PER_BUCKET; + + bi += ((is_match1 ? bi1 : bi0) + - (is_match1 << QHASH_LOG2_KEYS_PER_BUCKET)); + *matching_key = (k - 2 - search_keys) + is_match1; + return bi; + } + + /* Full buckets trigger search of overflow hash. */ + if (PREDICT_FALSE (valid0 == QHASH_ALL_VALID + || valid1 == QHASH_ALL_VALID)) + { + uword *p = 0; + uword ki = k - 2 - search_keys; + + if (valid0 == QHASH_ALL_VALID) + p = hash_get (h->overflow_hash, k0); + + if (!p && valid1 == QHASH_ALL_VALID) + { + p = hash_get (h->overflow_hash, k1); + ki++; + } + + if (p) + { + *matching_key = ki; + return p[0]; + } + } + } + + while (n_left >= 1) + { + u32 a0, b0, c0, bi0, valid0; + uword k0, *h0; + + k0 = k[0]; + n_left -= 1; + k += 1; + + a0 = h->hash_seeds[0]; + b0 = h->hash_seeds[1]; + c0 = h->hash_seeds[2]; + a0 ^= k0; +#if uword_bits == 64 + b0 ^= k0 >> 32; +#endif + + hash_mix32 (a0, b0, c0); + + bi0 = c0 & bucket_mask; + + h0 = hash_keys + bi0; + + /* Search one bucket. */ + valid0 = qhash_get_valid_elt_mask (h, bi0); + match_mask = qhash_search_bucket (h0, k0, valid0); + if (match_mask) + { + uword bi; + bi = bi0 + qhash_min_log2 (match_mask); + *matching_key = (k - 1 - search_keys); + return bi; + } + + /* Full buckets trigger search of overflow hash. */ + if (PREDICT_FALSE (valid0 == QHASH_ALL_VALID)) + { + uword *p = hash_get (h->overflow_hash, k0); + if (p) + { + *matching_key = (k - 1 - search_keys); + return p[0]; + } + } + } + + return ~0; +} + +static void * +qhash_set_overflow (void *v, uword elt_bytes, + uword key, uword bi, uword * n_elts, u32 * result) +{ + qhash_t *h = qhash_header (v); + uword *p = hash_get (h->overflow_hash, key); + uword i; + + bi /= QHASH_KEYS_PER_BUCKET; + + if (p) + i = p[0]; + else + { + uword l = vec_len (h->overflow_free_indices); + if (l > 0) + { + i = h->overflow_free_indices[l - 1]; + _vec_len (h->overflow_free_indices) = l - 1; + } + else + i = (1 << h->log2_hash_size) + hash_elts (h->overflow_hash); + hash_set (h->overflow_hash, key, i); + vec_validate (h->overflow_counts, bi); + h->overflow_counts[bi] += 1; + *n_elts += 1; + + l = vec_len (v); + if (i >= l) + { + uword dl = round_pow2 (1 + i - l, 8); + v = _vec_resize (v, dl, (l + dl) * elt_bytes, sizeof (h[0]), + /* align */ sizeof (uword)); + clib_memset (v + l * elt_bytes, ~0, dl * elt_bytes); + } + } + + *result = i; + + return v; +} + +static uword +qhash_unset_overflow (void *v, uword key, uword bi, uword * n_elts) +{ + qhash_t *h = qhash_header (v); + uword *p = hash_get (h->overflow_hash, key); + uword result; + + bi /= QHASH_KEYS_PER_BUCKET; + + if (p) + { + result = p[0]; + hash_unset (h->overflow_hash, key); + ASSERT (bi < vec_len (h->overflow_counts)); + ASSERT (h->overflow_counts[bi] > 0); + ASSERT (*n_elts > 0); + vec_add1 (h->overflow_free_indices, result); + h->overflow_counts[bi] -= 1; + *n_elts -= 1; + } + else + result = ~0; + + return result; +} + +always_inline uword +qhash_find_free (uword i, uword valid_mask) +{ + return first_set (~valid_mask & pow2_mask (QHASH_KEYS_PER_BUCKET)); +} + +void * +_qhash_set_multiple (void *v, + uword elt_bytes, + uword * search_keys, + uword n_search_keys, u32 * result_indices) +{ + qhash_t *h = qhash_header (v); + uword *k, *hash_keys; + uword n_left, n_elts, bucket_mask; + u32 *r; + + if (vec_len (v) < n_search_keys) + v = _qhash_resize (v, n_search_keys, elt_bytes); + + if (qhash_min_log2 (2) != 1) + { + qhash_min_log2_init (); + ASSERT (qhash_min_log2 (2) == 1); + } + + ASSERT (v != 0); + + bucket_mask = pow2_mask (h->log2_hash_size) & ~(QHASH_KEYS_PER_BUCKET - 1); + + hash_keys = h->hash_keys; + k = search_keys; + r = result_indices; + n_left = n_search_keys; + n_elts = h->n_elts; + + while (n_left >= 2) + { + u32 a0, b0, c0, bi0, match0, valid0, free0; + u32 a1, b1, c1, bi1, match1, valid1, free1; + uword k0, *h0; + uword k1, *h1; + + k0 = k[0]; + k1 = k[1]; + + /* Keys must be unique. */ + ASSERT (k0 != k1); + + n_left -= 2; + k += 2; + + a0 = a1 = h->hash_seeds[0]; + b0 = b1 = h->hash_seeds[1]; + c0 = c1 = h->hash_seeds[2]; + a0 ^= k0; + a1 ^= k1; +#if uword_bits == 64 + b0 ^= k0 >> 32; + b1 ^= k1 >> 32; +#endif + + hash_mix32_step_1 (a0, b0, c0); + hash_mix32_step_1 (a1, b1, c1); + hash_mix32_step_2 (a0, b0, c0); + hash_mix32_step_2 (a1, b1, c1); + hash_mix32_step_3 (a0, b0, c0); + hash_mix32_step_3 (a1, b1, c1); + + bi0 = c0 & bucket_mask; + bi1 = c1 & bucket_mask; + + h0 = hash_keys + bi0; + h1 = hash_keys + bi1; + + /* Search two buckets. */ + valid0 = qhash_get_valid_elt_mask (h, bi0); + valid1 = qhash_get_valid_elt_mask (h, bi1); + + match0 = qhash_search_bucket (h0, k0, valid0); + match1 = qhash_search_bucket (h1, k1, valid1); + + /* Find first free element starting at hash offset into bucket. */ + free0 = qhash_find_free (c0 & (QHASH_KEYS_PER_BUCKET - 1), valid0); + + valid1 = valid1 | (bi0 == bi1 ? free0 : 0); + free1 = qhash_find_free (c1 & (QHASH_KEYS_PER_BUCKET - 1), valid1); + + n_elts += (match0 == 0) + (match1 == 0); + + match0 = match0 ? match0 : free0; + match1 = match1 ? match1 : free1; + + valid0 |= match0; + valid1 |= match1; + + h0 += qhash_min_log2 (match0); + h1 += qhash_min_log2 (match1); + + if (PREDICT_FALSE (!match0 || !match1)) + goto slow_path2; + + h0[0] = k0; + h1[0] = k1; + r[0] = h0 - hash_keys; + r[1] = h1 - hash_keys; + r += 2; + qhash_set_valid_elt_mask (h, bi0, valid0); + qhash_set_valid_elt_mask (h, bi1, valid1); + continue; + + slow_path2: + if (!match0) + { + n_elts -= 1; + v = qhash_set_overflow (v, elt_bytes, k0, bi0, &n_elts, &r[0]); + } + else + { + h0[0] = k0; + r[0] = h0 - hash_keys; + qhash_set_valid_elt_mask (h, bi0, valid0); + } + if (!match1) + { + n_elts -= 1; + v = qhash_set_overflow (v, elt_bytes, k1, bi1, &n_elts, &r[1]); + } + else + { + h1[0] = k1; + r[1] = h1 - hash_keys; + qhash_set_valid_elt_mask (h, bi1, valid1); + } + r += 2; + } + + while (n_left >= 1) + { + u32 a0, b0, c0, bi0, match0, valid0, free0; + uword k0, *h0; + + k0 = k[0]; + n_left -= 1; + k += 1; + + a0 = h->hash_seeds[0]; + b0 = h->hash_seeds[1]; + c0 = h->hash_seeds[2]; + a0 ^= k0; +#if uword_bits == 64 + b0 ^= k0 >> 32; +#endif + + hash_mix32 (a0, b0, c0); + + bi0 = c0 & bucket_mask; + + h0 = hash_keys + bi0; + + valid0 = qhash_get_valid_elt_mask (h, bi0); + + /* Find first free element starting at hash offset into bucket. */ + free0 = qhash_find_free (c0 & (QHASH_KEYS_PER_BUCKET - 1), valid0); + + match0 = qhash_search_bucket (h0, k0, valid0); + + n_elts += (match0 == 0); + + match0 = match0 ? match0 : free0; + + valid0 |= match0; + + h0 += qhash_min_log2 (match0); + + if (PREDICT_FALSE (!match0)) + goto slow_path1; + + h0[0] = k0; + r[0] = h0 - hash_keys; + r += 1; + qhash_set_valid_elt_mask (h, bi0, valid0); + continue; + + slow_path1: + n_elts -= 1; + v = qhash_set_overflow (v, elt_bytes, k0, bi0, &n_elts, &r[0]); + r += 1; + } + + h = qhash_header (v); + h->n_elts = n_elts; + + return v; +} + +static uword +unset_slow_path (void *v, uword elt_bytes, + uword k0, uword bi0, uword valid0, uword match0, + uword * n_elts) +{ + qhash_t *h = qhash_header (v); + uword i, j = 0, k, l, t = ~0; + hash_pair_t *p, *found; + + if (!match0) + { + if (valid0 == QHASH_ALL_VALID) + t = qhash_unset_overflow (v, k0, bi0, n_elts); + return t; + } + + i = bi0 / QHASH_KEYS_PER_BUCKET; + t = bi0 + qhash_min_log2 (match0); + + if (valid0 == QHASH_ALL_VALID + && i < vec_len (h->overflow_counts) && h->overflow_counts[i] > 0) + { + found = 0; + /* *INDENT-OFF* */ + hash_foreach_pair (p, h->overflow_hash, ({ + j = qhash_hash_mix (h, p->key) / QHASH_KEYS_PER_BUCKET; + if (j == i) + { + found = p; + break; + } + })); + /* *INDENT-ON* */ + ASSERT (found != 0); + ASSERT (j == i); + + l = found->value[0]; + k = found->key; + hash_unset3 (h->overflow_hash, k, &j); + vec_add1 (h->overflow_free_indices, j); + h->overflow_counts[i] -= 1; + + qhash_set_valid_elt_mask (h, bi0, valid0); + + h->hash_keys[t] = k; + clib_memswap (v + t * elt_bytes, v + l * elt_bytes, elt_bytes); + t = l; + } + else + qhash_set_valid_elt_mask (h, bi0, valid0 ^ match0); + + return t; +} + +void +_qhash_unset_multiple (void *v, + uword elt_bytes, + uword * search_keys, + uword n_search_keys, u32 * result_indices) +{ + qhash_t *h = qhash_header (v); + uword *k, *hash_keys; + uword n_left, n_elts, bucket_mask; + u32 *r; + + if (!v) + { + uword i; + for (i = 0; i < n_search_keys; i++) + result_indices[i] = ~0; + } + + bucket_mask = pow2_mask (h->log2_hash_size) & ~(QHASH_KEYS_PER_BUCKET - 1); + + hash_keys = h->hash_keys; + k = search_keys; + r = result_indices; + n_left = n_search_keys; + n_elts = h->n_elts; + + while (n_left >= 2) + { + u32 a0, b0, c0, bi0, match0, valid0; + u32 a1, b1, c1, bi1, match1, valid1; + uword k0, *h0; + uword k1, *h1; + + k0 = k[0]; + k1 = k[1]; + + /* Keys must be unique. */ + ASSERT (k0 != k1); + + n_left -= 2; + k += 2; + + a0 = a1 = h->hash_seeds[0]; + b0 = b1 = h->hash_seeds[1]; + c0 = c1 = h->hash_seeds[2]; + a0 ^= k0; + a1 ^= k1; +#if uword_bits == 64 + b0 ^= k0 >> 32; + b1 ^= k1 >> 32; +#endif + + hash_mix32_step_1 (a0, b0, c0); + hash_mix32_step_1 (a1, b1, c1); + hash_mix32_step_2 (a0, b0, c0); + hash_mix32_step_2 (a1, b1, c1); + hash_mix32_step_3 (a0, b0, c0); + hash_mix32_step_3 (a1, b1, c1); + + bi0 = c0 & bucket_mask; + bi1 = c1 & bucket_mask; + + h0 = hash_keys + bi0; + h1 = hash_keys + bi1; + + /* Search two buckets. */ + valid0 = qhash_get_valid_elt_mask (h, bi0); + valid1 = qhash_get_valid_elt_mask (h, bi1); + + match0 = qhash_search_bucket (h0, k0, valid0); + match1 = qhash_search_bucket (h1, k1, valid1); + + n_elts -= (match0 != 0) + (match1 != 0); + + if (PREDICT_FALSE (valid0 == QHASH_ALL_VALID + || valid1 == QHASH_ALL_VALID)) + goto slow_path2; + + valid0 ^= match0; + qhash_set_valid_elt_mask (h, bi0, valid0); + + valid1 = bi0 == bi1 ? valid0 : valid1; + valid1 ^= match1; + + qhash_set_valid_elt_mask (h, bi1, valid1); + + r[0] = match0 ? bi0 + qhash_min_log2 (match0) : ~0; + r[1] = match1 ? bi1 + qhash_min_log2 (match1) : ~0; + r += 2; + continue; + + slow_path2: + r[0] = unset_slow_path (v, elt_bytes, k0, bi0, valid0, match0, &n_elts); + if (bi0 == bi1) + { + /* Search again in same bucket to test new overflow element. */ + valid1 = qhash_get_valid_elt_mask (h, bi0); + if (!match1) + { + match1 = qhash_search_bucket (h1, k1, valid1); + n_elts -= (match1 != 0); + } + } + r[1] = unset_slow_path (v, elt_bytes, k1, bi1, valid1, match1, &n_elts); + r += 2; + } + + while (n_left >= 1) + { + u32 a0, b0, c0, bi0, match0, valid0; + uword k0, *h0; + + k0 = k[0]; + n_left -= 1; + k += 1; + + a0 = h->hash_seeds[0]; + b0 = h->hash_seeds[1]; + c0 = h->hash_seeds[2]; + a0 ^= k0; +#if uword_bits == 64 + b0 ^= k0 >> 32; +#endif + + hash_mix32 (a0, b0, c0); + + bi0 = c0 & bucket_mask; + + h0 = hash_keys + bi0; + + valid0 = qhash_get_valid_elt_mask (h, bi0); + + match0 = qhash_search_bucket (h0, k0, valid0); + n_elts -= (match0 != 0); + qhash_set_valid_elt_mask (h, bi0, valid0 ^ match0); + + r[0] = match0 ? bi0 + qhash_min_log2 (match0) : ~0; + r += 1; + + if (PREDICT_FALSE (valid0 == QHASH_ALL_VALID)) + r[-1] = unset_slow_path (v, elt_bytes, k0, bi0, valid0, match0, + &n_elts); + } + + h->n_elts = n_elts; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/qhash.h b/extras/deprecated/vppinfra/qhash.h new file mode 100644 index 00000000000..9dbbd971ade --- /dev/null +++ b/extras/deprecated/vppinfra/qhash.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + Copyright (c) 2006 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef included_qhash_h +#define included_qhash_h + +#include <vppinfra/cache.h> +#include <vppinfra/hash.h> + +/* Word hash tables. */ +typedef struct +{ + /* Number of elements in hash. */ + u32 n_elts; + + u32 log2_hash_size; + + /* Jenkins hash seeds. */ + u32 hash_seeds[3]; + + /* Fall back CLIB hash for overflow in fixed sized buckets. */ + uword *overflow_hash; + + u32 *overflow_counts, *overflow_free_indices; + + u8 *hash_key_valid_bitmap; + + uword *hash_keys; +} qhash_t; + +always_inline qhash_t * +qhash_header (void *v) +{ + return vec_header (v, sizeof (qhash_t)); +} + +always_inline uword +qhash_elts (void *v) +{ + return v ? qhash_header (v)->n_elts : 0; +} + +always_inline uword +qhash_n_overflow (void *v) +{ + return v ? hash_elts (qhash_header (v)->overflow_hash) : 0; +} + +#define QHASH_LOG2_KEYS_PER_BUCKET 2 +#define QHASH_KEYS_PER_BUCKET (1 << QHASH_LOG2_KEYS_PER_BUCKET) + +always_inline uword +qhash_hash_mix (qhash_t * h, uword key) +{ + u32 a, b, c; + + a = h->hash_seeds[0]; + b = h->hash_seeds[1]; + c = h->hash_seeds[2]; + + a ^= key; +#if uword_bits == 64 + b ^= key >> 32; +#endif + + hash_mix32 (a, b, c); + + return c & pow2_mask (h->log2_hash_size); +} + +#define qhash_resize(v,n) (v) = _qhash_resize ((v), (n), sizeof ((v)[0])) + +#define qhash_foreach(var,v,body) + +#define qhash_set_multiple(v,keys,n,results) \ + (v) = _qhash_set_multiple ((v), sizeof ((v)[0]), (keys), (n), (results)) + +#define qhash_unset_multiple(v,keys,n,results) \ + _qhash_unset_multiple ((v), sizeof ((v)[0]), (keys), (n), (results)) + +#define qhash_get(v,key) \ +({ \ + uword _qhash_get_k = (key); \ + qhash_get_first_match ((v), &_qhash_get_k, 1, &_qhash_get_k); \ +}) + +#define qhash_set(v,k) \ +({ \ + uword _qhash_set_k = (k); \ + qhash_set_multiple ((v), &_qhash_set_k, 1, &_qhash_set_k); \ + _qhash_set_k; \ +}) + +#define qhash_unset(v,k) \ +({ \ + uword _qhash_unset_k = (k); \ + qhash_unset_multiple ((v), &_qhash_unset_k, 1, &_qhash_unset_k); \ + _qhash_unset_k; \ +}) + +void *_qhash_resize (void *v, uword length, uword elt_bytes); + +/* Lookup multiple keys in the same hash table. */ +void +qhash_get_multiple (void *v, + uword * search_keys, + uword n_search_keys, u32 * result_indices); + +/* Lookup multiple keys in the same hash table. + Returns index of first matching key. */ +u32 +qhash_get_first_match (void *v, + uword * search_keys, + uword n_search_keys, uword * matching_key); + +/* Set/unset helper functions. */ +void *_qhash_set_multiple (void *v, + uword elt_bytes, + uword * search_keys, + uword n_search_keys, u32 * result_indices); +void +_qhash_unset_multiple (void *v, + uword elt_bytes, + uword * search_keys, + uword n_search_keys, u32 * result_indices); + +#endif /* included_qhash_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/slist.c b/extras/deprecated/vppinfra/slist.c new file mode 100644 index 00000000000..5598871c884 --- /dev/null +++ b/extras/deprecated/vppinfra/slist.c @@ -0,0 +1,336 @@ +/* + Copyright (c) 2012 Cisco and/or its affiliates. + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#include <vppinfra/slist.h> + +/* + * skip-list implementation + * + * Good news / bad news. As balanced binary tree schemes go, + * this one seems pretty fast and is reasonably simple. There's a very + * limited amount that can be done to mitigate sdram read latency. + * + * Each active clib_slist_elt_t is on from 1 to N lists. Each active element + * is always on the "level-0" list. Since most elements are *only* on + * level 0, we keep the level 0 (and level 1) in the element. For those + * elements on more than two lists, we switch to a vector. Hence, the + * "n" union in slib_slist_elt_t. + * + * The low-order bit of elt->n.next0[0] is 1 for inlined next indices, + * 0 for vector indices (since the allocator always aligns to at least + * a 4-byte boundary). We can only represent 2e9 items, but since the + * practical performance limit is O(1e7), it doesn't matter. + * + * We create a "head" element which (by construction) is always + * lexically lighter than any other element. This makes a large number + * of irritating special cases go away. + * + * User code is in charge of comparing a supplied key with + * the key component of a user pool element. The user tells this code + * to add or delete (opaque key, 32-bit integer) pairs to the skip-list. + * + * The algorithm adds new elements to one or more lists. + * For levels greater than zero, the probability of a new element landing on + * a list is branching_factor**N. Branching_factor = 0.2 seems to work + * OK, yielding about 50 compares per search at O(1e7) items. + */ + +clib_error_t * +clib_slist_init (clib_slist_t * sp, f64 branching_factor, + clib_slist_key_compare_function_t compare, + format_function_t format_user_element) +{ + clib_slist_elt_t *head; + clib_memset (sp, 0, sizeof (sp[0])); + sp->branching_factor = branching_factor; + sp->format_user_element = format_user_element; + sp->compare = compare; + sp->seed = 0xdeaddabe; + pool_get (sp->elts, head); + vec_add1 (head->n.nexts, (u32) ~ 0); + head->user_pool_index = (u32) ~ 0; + vec_validate (sp->path, 1); + vec_validate (sp->occupancy, 0); + + return 0; +} + +/* + * slist_search_internal + */ +static inline clib_slist_search_result_t +slist_search_internal (clib_slist_t * sp, void *key, int need_full_path) +{ + int level, comp_result; + clib_slist_elt_t *search_elt, *head_elt; + + sp->ncompares = 0; + /* + * index 0 is the magic listhead element which is + * lexically lighter than / to the left of every element + */ + search_elt = head_elt = pool_elt_at_index (sp->elts, 0); + + /* + * Initial negotiating position, only the head_elt is + * lighter than the supplied key + */ + clib_memset (sp->path, 0, vec_len (head_elt->n.nexts) * sizeof (u32)); + + /* Walk the fastest lane first */ + level = vec_len (head_elt->n.nexts) - 1; + _vec_len (sp->path) = level + 1; + + while (1) + { + u32 next_index_this_level; + clib_slist_elt_t *prefetch_elt; + + /* + * Prefetching the next element at this level makes a measurable + * difference, but doesn't fix the dependent read stall problem + */ + prefetch_elt = sp->elts + + clib_slist_get_next_at_level (search_elt, level); + + CLIB_PREFETCH (prefetch_elt, CLIB_CACHE_LINE_BYTES, READ); + + /* Compare the key with the current element */ + comp_result = (search_elt == head_elt) ? 1 : + sp->compare (key, search_elt->user_pool_index); + + sp->ncompares++; + /* key "lighter" than this element */ + if (comp_result < 0) + { + /* + * Back up to previous item on this list + * and search the next finer-grained list + * starting there. + */ + search_elt = pool_elt_at_index (sp->elts, sp->path[level]); + next_list: + if (level > 0) + { + level--; + continue; + } + else + { + return CLIB_SLIST_NO_MATCH; + } + } + /* Match */ + if (comp_result == 0) + { + /* + * If we're trying to delete an element, we need to + * track down all of the elements which point at it. + * Otherwise, don't bother with it + */ + if (need_full_path && level > 0) + { + search_elt = pool_elt_at_index (sp->elts, sp->path[level]); + level--; + continue; + } + level = vec_len (head_elt->n.nexts); + sp->path[level] = search_elt - sp->elts; + _vec_len (sp->path) = level + 1; + return CLIB_SLIST_MATCH; + } + /* + * comp_result positive, key is to the right of + * this element + */ + sp->path[level] = search_elt - sp->elts; + + /* Out of list at this level? */ + next_index_this_level = + clib_slist_get_next_at_level (search_elt, level); + if (next_index_this_level == (u32) ~ 0) + goto next_list; + + /* No, try the next element */ + search_elt = pool_elt_at_index (sp->elts, next_index_this_level); + } + return 0; /* notreached */ +} + +u32 +clib_slist_search (clib_slist_t * sp, void *key, u32 * ncompares) +{ + clib_slist_search_result_t rv; + + rv = slist_search_internal (sp, key, 0 /* dont need full path */ ); + if (rv == CLIB_SLIST_MATCH) + { + clib_slist_elt_t *elt; + elt = pool_elt_at_index (sp->elts, sp->path[vec_len (sp->path) - 1]); + if (ncompares) + *ncompares = sp->ncompares; + return elt->user_pool_index; + } + return (u32) ~ 0; +} + +void +clib_slist_add (clib_slist_t * sp, void *key, u32 user_pool_index) +{ + clib_slist_elt_t *new_elt; + clib_slist_search_result_t search_result; + int level; + + search_result = slist_search_internal (sp, key, + 0 /* don't need full path */ ); + + /* Special case: key exists, just replace user_pool_index */ + if (PREDICT_FALSE (search_result == CLIB_SLIST_MATCH)) + { + clib_slist_elt_t *elt; + elt = pool_elt_at_index (sp->elts, sp->path[0]); + elt->user_pool_index = user_pool_index; + return; + } + + pool_get (sp->elts, new_elt); + new_elt->n.nexts = 0; + new_elt->user_pool_index = user_pool_index; + + /* sp->path lists elements to the left of key, by level */ + for (level = 0; level < vec_len (sp->path); level++) + { + clib_slist_elt_t *prev_elt_this_level; + u32 prev_elt_next_index_this_level; + + /* Add to list at the current level */ + prev_elt_this_level = pool_elt_at_index (sp->elts, sp->path[level]); + prev_elt_next_index_this_level = clib_slist_get_next_at_level + (prev_elt_this_level, level); + + clib_slist_set_next_at_level (new_elt, prev_elt_next_index_this_level, + level); + + clib_slist_set_next_at_level (prev_elt_this_level, new_elt - sp->elts, + level); + sp->occupancy[level]++; + + /* Randomly add to the next-higher level */ + if (random_f64 (&sp->seed) > sp->branching_factor) + break; + } + { + /* Time to add a new ply? */ + clib_slist_elt_t *head_elt = pool_elt_at_index (sp->elts, 0); + int top_level = vec_len (head_elt->n.nexts) - 1; + if (((f64) sp->occupancy[top_level]) * sp->branching_factor > 1.0) + { + vec_add1 (sp->occupancy, 0); + vec_add1 (head_elt->n.nexts, (u32) ~ 0); + /* full match case returns n+1 items */ + vec_validate (sp->path, vec_len (head_elt->n.nexts)); + } + } +} + +clib_slist_search_result_t +clib_slist_del (clib_slist_t * sp, void *key) +{ + clib_slist_search_result_t search_result; + clib_slist_elt_t *del_elt; + int level; + + search_result = slist_search_internal (sp, key, 1 /* need full path */ ); + + if (PREDICT_FALSE (search_result == CLIB_SLIST_NO_MATCH)) + return search_result; + + del_elt = pool_elt_at_index (sp->elts, sp->path[vec_len (sp->path) - 1]); + ASSERT (vec_len (sp->path) > 1); + + for (level = 0; level < vec_len (sp->path) - 1; level++) + { + clib_slist_elt_t *path_elt; + u32 path_elt_next_index; + + path_elt = pool_elt_at_index (sp->elts, sp->path[level]); + path_elt_next_index = clib_slist_get_next_at_level (path_elt, level); + + /* Splice the item out of the list if it's adjacent to the victim */ + if (path_elt_next_index == del_elt - sp->elts) + { + sp->occupancy[level]--; + path_elt_next_index = clib_slist_get_next_at_level (del_elt, level); + clib_slist_set_next_at_level (path_elt, path_elt_next_index, level); + } + } + + /* If this element is on more than two lists it has a vector of nexts */ + if (!(del_elt->n.next0[0] & 1)) + vec_free (del_elt->n.nexts); + pool_put (sp->elts, del_elt); + return CLIB_SLIST_MATCH; +} + +u8 * +format_slist (u8 * s, va_list * args) +{ + clib_slist_t *sl = va_arg (*args, clib_slist_t *); + int verbose = va_arg (*args, int); + int i; + clib_slist_elt_t *head_elt, *elt; + + s = format (s, "slist 0x%x, %u items, branching_factor %.2f\n", sl, + sl->occupancy ? sl->occupancy[0] : 0, sl->branching_factor); + + if (pool_elts (sl->elts) == 0) + return s; + + head_elt = pool_elt_at_index (sl->elts, 0); + + for (i = 0; i < vec_len (head_elt->n.nexts); i++) + { + s = format (s, "level %d: %d elts\n", i, + sl->occupancy ? sl->occupancy[i] : 0); + + if (verbose && head_elt->n.nexts[i] != (u32) ~ 0) + { + elt = pool_elt_at_index (sl->elts, head_elt->n.nexts[i]); + while (elt) + { + u32 next_index; + s = format (s, "%U(%d) ", sl->format_user_element, + elt->user_pool_index, elt - sl->elts); + next_index = clib_slist_get_next_at_level (elt, i); + ASSERT (next_index != 0x7fffffff); + if (next_index == (u32) ~ 0) + break; + else + elt = pool_elt_at_index (sl->elts, next_index); + } + } + s = format (s, "\n"); + } + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/slist.h b/extras/deprecated/vppinfra/slist.h new file mode 100644 index 00000000000..a7c77e27c96 --- /dev/null +++ b/extras/deprecated/vppinfra/slist.h @@ -0,0 +1,145 @@ +/* + Copyright (c) 2012 Cisco and/or its affiliates. + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#ifndef included_slist_h +#define included_slist_h + +#include <stdarg.h> +#include <vppinfra/clib.h> +#include <vppinfra/vec.h> +#include <vppinfra/pool.h> +#include <vppinfra/error.h> +#include <vppinfra/format.h> +#include <vppinfra/cache.h> + +typedef word (clib_slist_key_compare_function_t) + (void *key, u32 elt_pool_index); + +typedef enum +{ + CLIB_SLIST_MATCH = 0, + CLIB_SLIST_NO_MATCH +} clib_slist_search_result_t; + +typedef struct +{ + /* Vector of next elements. Every valid instance has at least one */ + union + { + u32 next0[2]; + u32 *nexts; + } n; + + /* Index of item in user's pool */ + u32 user_pool_index; + /* $$$ pad to even divisor of cache line */ +} clib_slist_elt_t; + +static inline u32 +clib_slist_get_next_at_level (clib_slist_elt_t * elt, int level) +{ + if (elt->n.next0[0] & 1) + { + ASSERT (level < 2); + if (level == 1) + return elt->n.next0[1]; + /* preserve ~0 (end of list) */ + return (elt->n.next0[0] == (u32) ~ 0) ? elt->n.next0[0] : + (elt->n.next0[0] >> 1); + } + else + { + ASSERT (level < vec_len (elt->n.nexts)); + return elt->n.nexts[level]; + } +} + +static inline void +clib_slist_set_next_at_level (clib_slist_elt_t * elt, u32 index, int level) +{ + u32 old_level0_value[2]; + /* level0 and not a vector */ + if (level < 2 && (elt->n.next0[0] == 0 || elt->n.next0[0] & 1)) + { + if (level == 0) + { + elt->n.next0[0] = (index << 1) | 1; + return; + } + elt->n.next0[1] = index; + return; + } + /* have to save old level0 values? */ + if (elt->n.next0[0] & 1) + { + old_level0_value[0] = (elt->n.next0[0] == (u32) ~ 0) ? + elt->n.next0[0] : elt->n.next0[0] >> 1; + old_level0_value[1] = elt->n.next0[1]; + elt->n.nexts = 0; + vec_add1 (elt->n.nexts, old_level0_value[0]); + vec_add1 (elt->n.nexts, old_level0_value[1]); + } + vec_validate (elt->n.nexts, level); + elt->n.nexts[level] = index; +} + + +typedef struct +{ + /* pool of skip-list elements */ + clib_slist_elt_t *elts; + + /* last search path */ + u32 *path; + + /* last search number of compares */ + u32 ncompares; + + /* occupancy stats */ + u32 *occupancy; + + /* Comparison function */ + clib_slist_key_compare_function_t *compare; + + /* Format function */ + format_function_t *format_user_element; + + /* items appear in successive plies with Pr (1 / branching_factor) */ + f64 branching_factor; + + /* random seed */ + u32 seed; +} clib_slist_t; + +clib_error_t *clib_slist_init (clib_slist_t * sp, f64 branching_factor, + clib_slist_key_compare_function_t compare, + format_function_t format_user_element); + +format_function_t format_slist; + +void clib_slist_add (clib_slist_t * sp, void *key, u32 user_pool_index); +clib_slist_search_result_t clib_slist_del (clib_slist_t * sp, void *key); +u32 clib_slist_search (clib_slist_t * sp, void *key, u32 * ncompares); + +#endif /* included_slist_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/test_cuckoo_bihash.c b/extras/deprecated/vppinfra/test_cuckoo_bihash.c new file mode 100644 index 00000000000..e992e26fa7d --- /dev/null +++ b/extras/deprecated/vppinfra/test_cuckoo_bihash.c @@ -0,0 +1,451 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +__thread int thread_id = 0; + +#include <vppinfra/time.h> +#include <vppinfra/cache.h> +#include <vppinfra/error.h> +#include <vppinfra/heap.h> +#include <vppinfra/format.h> +#include <vppinfra/pool.h> +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +#define os_get_cpu_number() (thread_id) + +#include <vppinfra/cuckoo_8_8.h> +#include <vppinfra/cuckoo_template.h> +#include <vppinfra/cuckoo_template.c> + +#include <vppinfra/bihash_8_8.h> +#include <vppinfra/bihash_template.h> +#include <vppinfra/bihash_template.c> + +#include <pthread.h> + +#define MAX_THREADS 255 + +typedef struct +{ + void *tm; + int thread_idx; + u64 nlookups; +} thread_data_t; + +typedef struct +{ + u64 deadline; + u64 seed; + u32 nbuckets; + u32 nitems; + u32 runtime; + int verbose; + int non_random_keys; + int nthreads; + int search_iter; + uword *key_hash; + u64 *keys; + CVT (clib_cuckoo) ch; + BVT (clib_bihash) bh; + clib_time_t clib_time; + u64 *key_add_del_sequence; + u8 *key_op_sequence; + u64 *key_search_sequence[MAX_THREADS]; + unformat_input_t *input; + u64 nadds; + u64 ndels; + pthread_t bwriter_thread; + pthread_t breader_threads[MAX_THREADS]; + pthread_t cwriter_thread; + pthread_t creader_threads[MAX_THREADS]; + thread_data_t wthread_data; + thread_data_t rthread_data[MAX_THREADS]; +} test_main_t; + +test_main_t test_main; + +uword +vl (void *v) +{ + return vec_len (v); +} + +#define w_thread(x, guts) \ + void *x##writer_thread (void *v) \ + { \ + test_main_t *tm = v; \ + uword counter = 0; \ + u64 nadds = 0; \ + u64 ndels = 0; \ + u64 deadline = tm->deadline; \ + do \ + { \ + for (counter = 0; counter < vec_len (tm->key_add_del_sequence); \ + ++counter) \ + { \ + u64 idx = tm->key_add_del_sequence[counter]; \ + u8 op = tm->key_op_sequence[counter]; \ + if (op) \ + { \ + ++nadds; \ + } \ + else \ + { \ + ++ndels; \ + } \ + guts; \ + if (clib_cpu_time_now () > deadline) \ + { \ + break; \ + } \ + } \ + } \ + while (clib_cpu_time_now () < deadline); \ + tm->nadds = nadds; \ + tm->ndels = ndels; \ + return NULL; \ + } + +/* *INDENT-OFF* */ +w_thread (b, { + BVT (clib_bihash_kv) kv; + kv.key = tm->keys[idx]; + kv.value = *hash_get (tm->key_hash, kv.key); + BV (clib_bihash_add_del) (&tm->bh, &kv, op); +}); +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +w_thread (c, { + CVT (clib_cuckoo_kv) kv; + kv.key = tm->keys[idx]; + kv.value = *hash_get (tm->key_hash, kv.key); + CV (clib_cuckoo_add_del) (&tm->ch, &kv, op, 0); +}); +/* *INDENT-ON* */ + +#define r_thread(x, guts) \ + void *x##reader_thread (void *v) \ + { \ + thread_data_t *data = v; \ + thread_id = data->thread_idx; \ + test_main_t *tm = data->tm; \ + uword thread_idx = data->thread_idx; \ + u64 *idx; \ + uword nlookups = 0; \ + u64 deadline = tm->deadline; \ + do \ + { \ + vec_foreach (idx, tm->key_search_sequence[thread_idx]) \ + { \ + guts; \ + ++nlookups; \ + if (clib_cpu_time_now () > deadline) \ + { \ + break; \ + } \ + } \ + } \ + while (clib_cpu_time_now () < deadline); \ + data->nlookups = nlookups; \ + return NULL; \ + } + +/* *INDENT-OFF* */ +r_thread (c, { + CVT (clib_cuckoo_kv) kv; + kv.key = tm->keys[*idx]; + kv.value = *hash_get (tm->key_hash, kv.key); + CV (clib_cuckoo_search) (&tm->ch, &kv, &kv); +}); +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +r_thread (b, { + BVT (clib_bihash_kv) kv; + kv.key = tm->keys[*idx]; + kv.value = *hash_get (tm->key_hash, kv.key); + BV (clib_bihash_search) (&tm->bh, &kv, &kv); +}); +/* *INDENT-ON* */ + +#define run_threads(x) \ + do \ + { \ + \ + before = clib_time_now (&tm->clib_time); \ + tm->deadline = clib_cpu_time_now () + \ + tm->runtime * tm->clib_time.clocks_per_second; \ + fformat (stdout, #x "-> Start threads..., runtime is %llu second(s)\n", \ + (long long unsigned)tm->runtime); \ + \ + /* \ + fformat (stdout, #x "-> Writer thread only...\n"); \ + if (0 != \ + pthread_create (&tm->x##writer_thread, NULL, x##writer_thread, tm)) \ + { \ + perror ("pthread_create()"); \ + abort (); \ + } \ + \ + if (0 != pthread_join (tm->x##writer_thread, NULL)) \ + { \ + perror ("pthread_join()"); \ + abort (); \ + } \ + \ + delta = clib_time_now (&tm->clib_time) - before; \ + fformat (stdout, #x "-> %wu adds, %wu dels in %.6f seconds\n", \ + tm->nadds, tm->ndels, delta); \ + tm->nadds = 0; \ + tm->ndels = 0; \ + */ \ + \ + fformat (stdout, #x "-> Writer + %d readers\n", tm->nthreads); \ + before = clib_time_now (&tm->clib_time); \ + tm->deadline = clib_cpu_time_now () + \ + tm->runtime * tm->clib_time.clocks_per_second; \ + if (0 != \ + pthread_create (&tm->x##writer_thread, NULL, x##writer_thread, tm)) \ + { \ + perror ("pthread_create()"); \ + abort (); \ + } \ + \ + for (i = 0; i < tm->nthreads; i++) \ + { \ + tm->rthread_data[i].nlookups = 0; \ + if (0 != pthread_create (&tm->x##reader_threads[i], NULL, \ + x##reader_thread, &tm->rthread_data[i])) \ + { \ + perror ("pthread_create()"); \ + abort (); \ + } \ + } \ + \ + if (0 != pthread_join (tm->x##writer_thread, NULL)) \ + { \ + perror ("pthread_join()"); \ + abort (); \ + } \ + \ + for (i = 0; i < tm->nthreads; i++) \ + { \ + if (0 != pthread_join (tm->x##reader_threads[i], NULL)) \ + { \ + perror ("pthread_join()"); \ + abort (); \ + } \ + } \ + \ + delta = clib_time_now (&tm->clib_time) - before; \ + \ + total_searches = 0; \ + for (i = 0; i < tm->nthreads; ++i) \ + { \ + u64 nlookups = tm->rthread_data[i].nlookups; \ + fformat (stdout, #x "-> Thread #%d: %u searches\n", i, nlookups); \ + total_searches += nlookups; \ + } \ + \ + if (delta > 0) \ + { \ + ops = (tm->nadds + tm->ndels) / (f64)delta; \ + fformat (stdout, #x "-> %.f add/dels per second\n", ops); \ + sps = ((f64)total_searches) / delta; \ + fformat (stdout, #x "-> %.f searches per second\n", sps); \ + } \ + \ + fformat (stdout, \ + #x "-> %wu adds, %wu dels, %lld searches in %.6f seconds\n", \ + tm->nadds, tm->ndels, total_searches, delta); \ + } \ + while (0); + +static void +cb (CVT (clib_cuckoo) * h, void *ctx) +{ + fformat (stdout, "Garbage callback called...\n"); +} + +static clib_error_t * +test_cuckoo_bihash (test_main_t * tm) +{ + int i; + uword *p; + uword total_searches; + f64 before, delta; + f64 ops = 0, sps = 0; + f64 bops = 0, bsps = 0; + f64 cops = 0, csps = 0; + CVT (clib_cuckoo) * ch; + BVT (clib_bihash) * bh; + + ch = &tm->ch; + bh = &tm->bh; + + CV (clib_cuckoo_init) (ch, "test", 1, cb, NULL); + BV (clib_bihash_init) (bh, (char *) "test", tm->nbuckets, 256 << 20); + + fformat (stdout, "Pick %lld unique %s keys...\n", tm->nitems, + tm->non_random_keys ? "non-random" : "random"); + + for (i = 0; i < tm->nitems; i++) + { + u64 rndkey; + + if (tm->non_random_keys == 0) + { + + again: + rndkey = random_u64 (&tm->seed); + + p = hash_get (tm->key_hash, rndkey); + if (p) + goto again; + } + else + rndkey = (u64) (i + 1) << 16; + + hash_set (tm->key_hash, rndkey, i + 1); + vec_add1 (tm->keys, rndkey); + + int j; + for (j = 0; j < tm->nthreads; ++j) + { + u64 *x = tm->key_search_sequence[j]; + vec_add1 (x, random_u64 (&tm->seed) % tm->nitems); + tm->key_search_sequence[j] = x; + } + vec_add1 (tm->key_add_del_sequence, + random_u64 (&tm->seed) % tm->nitems); + vec_add1 (tm->key_op_sequence, (rndkey % 10 < 8) ? 1 : 0); + } + + int thread_counter = 0; + tm->wthread_data.tm = tm; + tm->wthread_data.thread_idx = thread_counter; + for (i = 0; i < tm->nthreads; ++i) + { + tm->rthread_data[i].tm = tm; + tm->rthread_data[i].thread_idx = thread_counter; + tm->rthread_data[i].nlookups = 0; + ++thread_counter; + } + + int iter; + for (iter = 0; iter < tm->search_iter; ++iter) + { + fformat (stdout, "Bihash test #%d\n", iter); + run_threads (b); + bops = ops; + bsps = sps; + fformat (stdout, "%U", BV (format_bihash), bh, 0); + fformat (stdout, "Cuckoo test #%d\n", iter); + run_threads (c); + cops = ops; + csps = sps; + fformat (stdout, "%U", CV (format_cuckoo), ch, 0); + fformat (stdout, + "Bihash add/del speed is %.2f%% of cuckoo add/del speed\n", + bops / cops * 100); + fformat (stdout, + "Bihash search speed is %.2f%% of cuckoo search speed\n", + bsps / csps * 100); + } + return 0; +} + +clib_error_t * +test_cuckoo_bihash_main (test_main_t * tm) +{ + unformat_input_t *i = tm->input; + clib_error_t *error; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "seed %u", &tm->seed)) + ; + else if (unformat (i, "nbuckets %d", &tm->nbuckets)) + ; + else if (unformat (i, "non-random-keys")) + tm->non_random_keys = 1; + else if (unformat (i, "nitems %d", &tm->nitems)) + ; + else if (unformat (i, "search_iter %d", &tm->search_iter)) + ; + else if (unformat (i, "verbose %d", &tm->verbose)) + ; + else if (unformat (i, "runtime %d", &tm->runtime)) + ; + else if (unformat (i, "nthreads %d", &tm->nthreads)) + ; + else if (unformat (i, "verbose")) + tm->verbose = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, i); + } + + error = test_cuckoo_bihash (tm); + + return error; +} + +#ifdef CLIB_UNIX +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + clib_error_t *error; + test_main_t *tm = &test_main; + clib_memset (&test_main, 0, sizeof (test_main)); + + clib_mem_init (0, 3ULL << 30); + + tm->input = &i; + tm->seed = 0xdeaddabe; + + tm->nbuckets = 2; + tm->nitems = 5; + tm->verbose = 1; + tm->nthreads = 1; + clib_time_init (&tm->clib_time); + tm->runtime = 1; + tm->search_iter = 1; + tm->key_hash = hash_create (0, sizeof (uword)); + + unformat_init_command_line (&i, argv); + error = test_cuckoo_bihash_main (tm); + unformat_free (&i); + + if (error) + { + clib_error_report (error); + return 1; + } + return 0; +} +#endif /* CLIB_UNIX */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/test_cuckoo_template.c b/extras/deprecated/vppinfra/test_cuckoo_template.c new file mode 100644 index 00000000000..9619dc2e802 --- /dev/null +++ b/extras/deprecated/vppinfra/test_cuckoo_template.c @@ -0,0 +1,318 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vppinfra/time.h> +#include <vppinfra/cache.h> +#include <vppinfra/error.h> +#include <vppinfra/heap.h> +#include <vppinfra/format.h> +#include <vppinfra/pool.h> +#include <vppinfra/error.h> +#include <vppinfra/hash.h> +#include <vppinfra/cache.h> + +#include <vppinfra/time.h> +#include <vppinfra/cache.h> +#include <vppinfra/error.h> + +#include <vppinfra/cuckoo_8_8.h> +#include <vppinfra/cuckoo_template.h> +#include <vppinfra/cuckoo_template.c> + +typedef struct +{ + u64 seed; + u32 nbuckets; + u32 nitems; + u32 search_iter; + int careful_delete_tests; + int verbose; + int non_random_keys; + uword *key_hash; + u64 *keys; + CVT (clib_cuckoo) hash; + clib_time_t clib_time; + + unformat_input_t *input; + +} test_main_t; + +test_main_t test_main; + +uword +vl (void *v) +{ + return vec_len (v); +} + +void +do_search (test_main_t * tm, CVT (clib_cuckoo) * h) +{ + int i, j; + CVT (clib_cuckoo_kv) kv; + for (j = 0; j < tm->search_iter; j++) + { + for (i = 0; i < tm->nitems; i++) + { + kv.key = tm->keys[i]; + if (CV (clib_cuckoo_search) (h, &kv, &kv) < 0) + if (CV (clib_cuckoo_search) (h, &kv, &kv) < 0) + clib_warning ("[%d] search for key %llu failed unexpectedly\n", + i, tm->keys[i]); + if (kv.value != (u64) (i + 1)) + clib_warning + ("[%d] search for key %llu returned %llu, not %llu\n", i, + tm->keys[i], kv.value, (u64) (i + 1)); + } + } +} + +static void +cb (CVT (clib_cuckoo) * h, void *ctx) +{ + fformat (stdout, "Garbage callback called..."); + if (clib_cpu_time_now () % 3) + { + fformat (stdout, "collecting garbage...\n"); + CV (clib_cuckoo_garbage_collect) (h); + } + else + { + fformat (stdout, "ignoring for now...\n"); + } +} + +static clib_error_t * +test_cuckoo (test_main_t * tm) +{ + int i; + uword *p; + uword total_searches; + f64 before, delta; + CVT (clib_cuckoo) * h; + CVT (clib_cuckoo_kv) kv; + + h = &tm->hash; + + CV (clib_cuckoo_init) (h, "test", tm->nbuckets, cb, NULL); + + fformat (stdout, "Pick %lld unique %s keys...\n", tm->nitems, + tm->non_random_keys ? "non-random" : "random"); + + for (i = 0; i < tm->nitems; i++) + { + u64 rndkey; + + if (tm->non_random_keys == 0) + { + + again: + rndkey = random_u64 (&tm->seed); + + p = hash_get (tm->key_hash, rndkey); + if (p) + goto again; + } + else + rndkey = (u64) (i + 1) << 16; + + hash_set (tm->key_hash, rndkey, i + 1); + vec_add1 (tm->keys, rndkey); + } + + fformat (stdout, "Add items...\n"); + for (i = 0; i < tm->nitems; i++) + { + kv.key = tm->keys[i]; + kv.value = i + 1; + + CV (clib_cuckoo_add_del) (h, &kv, 1 /* is_add */ , + 0 /* overwrite */ ); + + if (tm->verbose > 1) + { + fformat (stdout, "--------------------\n"); + fformat (stdout, "After adding key %llu value %lld...\n", + tm->keys[i], (u64) (i + 1)); + fformat (stdout, "%U", CV (format_cuckoo), h, + 2 /* very verbose */ ); + } + + CVT (clib_cuckoo_kv) kv2; + int rv = CV (clib_cuckoo_search) (h, &kv, &kv2); + ASSERT (CLIB_CUCKOO_ERROR_SUCCESS == rv); + } + + fformat (stdout, "%U", CV (format_cuckoo), h, 0 /* very verbose */ ); + + fformat (stdout, "Search for items %d times...\n", tm->search_iter); + + before = clib_time_now (&tm->clib_time); + + do_search (tm, h); + + delta = clib_time_now (&tm->clib_time) - before; + total_searches = (uword) tm->search_iter * (uword) tm->nitems; + + if (delta > 0) + fformat (stdout, "%.f searches per second\n", + ((f64) total_searches) / delta); + + fformat (stdout, "%lld searches in %.6f seconds\n", total_searches, delta); + +#if 0 + int j; + fformat (stdout, "Standard E-hash search for items %d times...\n", + tm->search_iter); + + before = clib_time_now (&tm->clib_time); + + for (j = 0; j < tm->search_iter; j++) + { + for (i = 0; i < tm->nitems; i++) + { + p = hash_get (tm->key_hash, tm->keys[i]); + if (p == 0 || p[0] != (uword) (i + 1)) + clib_warning ("ugh, couldn't find %lld\n", tm->keys[i]); + } + } + + delta = clib_time_now (&tm->clib_time) - before; + total_searches = (uword) tm->search_iter * (uword) tm->nitems; + + fformat (stdout, "%lld searches in %.6f seconds\n", total_searches, delta); + + if (delta > 0) + fformat (stdout, "%.f searches per second\n", + ((f64) total_searches) / delta); + +#endif + fformat (stdout, "Delete items...\n"); + + for (i = 0; i < tm->nitems; i++) + { + int j; + int rv; + + kv.key = tm->keys[i]; + kv.value = (u64) (i + 1); + rv = CV (clib_cuckoo_add_del) (h, &kv, 0 /* is_add */ , + 0 /* dont_overwrite */ ); + + if (rv < 0) + clib_warning ("delete key %lld not ok but should be", tm->keys[i]); + + if (tm->careful_delete_tests) + { + for (j = 0; j < tm->nitems; j++) + { + kv.key = tm->keys[j]; + rv = CV (clib_cuckoo_search) (h, &kv, &kv); + if (j <= i && rv >= 0) + { + clib_warning + ("i %d j %d search ok but should not be, value %lld", i, + j, kv.value); + } + if (j > i && rv < 0) + { + clib_warning ("i %d j %d search not ok but should be", i, + j); + } + } + } + } + + fformat (stdout, "After deletions, should be empty...\n"); + + fformat (stdout, "%U", CV (format_cuckoo), h, 0 /* very verbose */ ); + return 0; +} + +clib_error_t * +test_cuckoo_main (test_main_t * tm) +{ + unformat_input_t *i = tm->input; + clib_error_t *error; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "seed %u", &tm->seed)) + ; + else if (unformat (i, "nbuckets %d", &tm->nbuckets)) + ; + else if (unformat (i, "non-random-keys")) + tm->non_random_keys = 1; + else if (unformat (i, "nitems %d", &tm->nitems)) + ; + else if (unformat (i, "careful %d", &tm->careful_delete_tests)) + ; + else if (unformat (i, "verbose %d", &tm->verbose)) + ; + else if (unformat (i, "search %d", &tm->search_iter)) + ; + else if (unformat (i, "verbose")) + tm->verbose = 1; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, i); + } + + error = test_cuckoo (tm); + + return error; +} + +#ifdef CLIB_UNIX +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + clib_error_t *error; + test_main_t *tm = &test_main; + + clib_mem_init (0, 3ULL << 30); + + tm->input = &i; + tm->seed = 0xdeaddabe; + + tm->nbuckets = 2; + tm->nitems = 100000; + tm->verbose = 1; + tm->search_iter = 10000; + tm->careful_delete_tests = 0; + tm->key_hash = hash_create (0, sizeof (uword)); + clib_time_init (&tm->clib_time); + + unformat_init_command_line (&i, argv); + error = test_cuckoo_main (tm); + unformat_free (&i); + + if (error) + { + clib_error_report (error); + return 1; + } + return 0; +} +#endif /* CLIB_UNIX */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/test_flowhash_template.c b/extras/deprecated/vppinfra/test_flowhash_template.c new file mode 100644 index 00000000000..19ac4edf2e2 --- /dev/null +++ b/extras/deprecated/vppinfra/test_flowhash_template.c @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vppinfra/time.h> +#include <vppinfra/cache.h> +#include <vppinfra/error.h> + +#include <vppinfra/heap.h> +#include <vppinfra/format.h> +#include <vppinfra/random.h> +#include <vppinfra/hash.h> + +#include <vppinfra/flowhash_8_8.h> + +/* Not actually tested here. But included for compilation purposes. */ +#include <vppinfra/flowhash_24_16.h> + +typedef struct +{ + u64 seed; + u32 fixed_entries; + u32 collision_buckets; + u32 nitems; + u32 iterations; + u32 prefetch; + int non_random_keys; + uword *key_hash; + flowhash_lkey_8_8_t *keys; + flowhash_8_8_t *hash; + clib_time_t clib_time; + unformat_input_t *input; +} test_main_t; + +test_main_t test_main; + +static clib_error_t * +test_flowhash (test_main_t * tm) +{ + f64 before, delta; + u64 total; + u32 overflow; + int i, j; + uword *p; + tm->hash = flowhash_alloc_8_8 (tm->fixed_entries, tm->collision_buckets); + if (tm->hash == NULL) + return clib_error_return (0, "Could not alloc hash"); + + fformat (stdout, "Allocated hash memory size: %llu\n", + flowhash_memory_size (tm->hash)); + + fformat (stdout, "Pick %lld unique %s keys...\n", + tm->nitems, tm->non_random_keys ? "non-random" : "random"); + + for (i = 0; i < tm->nitems; i++) + { + flowhash_lkey_8_8_t rndkey; + if (tm->non_random_keys == 0) + { + again: + rndkey.as_u64[0] = random_u64 (&tm->seed); + if ((p = hash_get (tm->key_hash, rndkey.as_u64[0]))) + goto again; + } + else + rndkey.as_u64[0] = (u64) (i + 1) << 16; + + hash_set (tm->key_hash, rndkey.as_u64[0], i + 1); + vec_add1 (tm->keys, rndkey); + } + + hash_free (tm->key_hash); + + /* Additions */ + overflow = 0; + before = clib_time_now (&tm->clib_time); + fformat (stdout, "Adding %u items...\n", tm->nitems); + for (i = 0; i < tm->nitems; i++) + { + u32 hash = flowhash_hash_8_8 (&tm->keys[i]); + u32 ei; + flowhash_get_8_8 (tm->hash, &tm->keys[i], hash, 1, &ei); + if (flowhash_is_overflow (ei)) + overflow++; + + /* Set value (No matter if success) */ + flowhash_value (tm->hash, ei)->as_u64[0] = i + 1; + + /* Save value until time > 1 */ + flowhash_timeout (tm->hash, ei) = 1; + } + + delta = clib_time_now (&tm->clib_time) - before; + total = tm->nitems; + fformat (stdout, "%lld additions in %.6f seconds\n", total, delta); + if (delta > 0) + fformat (stdout, "%.f additions per second\n", ((f64) total) / delta); + + fformat (stdout, "%u elements in table\n", flowhash_elts_8_8 (tm->hash, 1)); + fformat (stdout, "Flowhash counters:\n"); + fformat (stdout, " collision-lookup: %lu\n", + tm->hash->collision_lookup_counter); + fformat (stdout, " not-enough-buckets: %lu\n", + tm->hash->not_enough_buckets_counter); + fformat (stdout, " overflows: %lu\n", overflow); + + /* Lookups (very similar to additions) */ + overflow = 0; + before = clib_time_now (&tm->clib_time); + fformat (stdout, "Looking up %u items %u times...\n", tm->nitems, + tm->iterations); + + for (j = 0; j < tm->iterations; j++) + { + i = 0; + if (tm->prefetch) + for (; i < tm->nitems - tm->prefetch; i++) + { + u32 ei; + u32 hash = flowhash_hash_8_8 (&tm->keys[i + tm->prefetch]); + flowhash_prefetch (tm->hash, hash); + hash = flowhash_hash_8_8 (&tm->keys[i]); + flowhash_get_8_8 (tm->hash, &tm->keys[i], hash, 1, &ei); + if (flowhash_is_overflow (ei)) + overflow++; + else if (flowhash_timeout (tm->hash, ei) != 1) + clib_warning ("Key not found: %lld\n", tm->keys[i].as_u64[0]); + else if (flowhash_value (tm->hash, ei)->as_u64[0] != i + 1) + clib_warning ("Value mismatch for key %lld\n", + tm->keys[i].as_u64[0]); + } + + for (; i < tm->nitems; i++) + { + u32 ei; + u32 hash = flowhash_hash_8_8 (&tm->keys[i]); + flowhash_get_8_8 (tm->hash, &tm->keys[i], hash, 1, &ei); + if (flowhash_is_overflow (ei)) + overflow++; + else if (flowhash_timeout (tm->hash, ei) != 1) + clib_warning ("Key not found: %lld\n", tm->keys[i].as_u64[0]); + else if (flowhash_value (tm->hash, ei)->as_u64[0] != i + 1) + clib_warning ("Value mismatch for key %lld\n", + tm->keys[i].as_u64[0]); + } + } + + delta = clib_time_now (&tm->clib_time) - before; + total = tm->nitems * tm->iterations; + fformat (stdout, "%lld lookups in %.6f seconds\n", total, delta); + if (delta > 0) + fformat (stdout, "%.f lookups per second\n", ((f64) total) / delta); + + /* Delete */ + for (i = 0; i < tm->nitems; i++) + { + u32 hash = flowhash_hash_8_8 (&tm->keys[i]); + u32 ei; + flowhash_get_8_8 (tm->hash, &tm->keys[i], hash, 1, &ei); + flowhash_timeout (tm->hash, ei) = 0; + } + + fformat (stdout, "%u elements in table\n", flowhash_elts_8_8 (tm->hash, 1)); + + vec_free (tm->keys); + flowhash_free_8_8 (tm->hash); + + return NULL; +} + +clib_error_t * +test_flowhash_main (test_main_t * tm) +{ + unformat_input_t *i = tm->input; + clib_error_t *error; + + while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT) + { + if (unformat (i, "seed %u", &tm->seed)) + ; + else if (unformat (i, "fixed-entries %d", &tm->fixed_entries)) + ; + else if (unformat (i, "collision-buckets %d", &tm->collision_buckets)) + ; + else if (unformat (i, "non-random-keys")) + tm->non_random_keys = 1; + else if (unformat (i, "nitems %d", &tm->nitems)) + ; + else if (unformat (i, "prefetch %d", &tm->prefetch)) + ; + else if (unformat (i, "iterations %d", &tm->iterations)) + ; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, i); + } + + error = test_flowhash (tm); + return error; +} + +#ifdef CLIB_UNIX +int +main (int argc, char *argv[]) +{ + + unformat_input_t i; + clib_error_t *error; + test_main_t *tm = &test_main; + + clib_mem_init (0, 3ULL << 30); + + tm->fixed_entries = 8 << 20; + tm->collision_buckets = 1 << 20; + tm->seed = 0xdeadf00l; + tm->iterations = 1; + tm->input = &i; + tm->nitems = 1000; + tm->non_random_keys = 0; + tm->key_hash = hash_create (0, sizeof (uword)); + tm->prefetch = 0; + clib_time_init (&tm->clib_time); + + unformat_init_command_line (&i, argv); + error = test_flowhash_main (tm); + unformat_free (&i); + + if (error) + { + clib_error_report (error); + return 1; + } + return 0; + + return 0; +} +#endif /* CLIB_UNIX */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/test_phash.c b/extras/deprecated/vppinfra/test_phash.c new file mode 100644 index 00000000000..47711c28dbc --- /dev/null +++ b/extras/deprecated/vppinfra/test_phash.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + Copyright (c) 2005 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include <vppinfra/phash.h> +#include <vppinfra/format.h> +#include <vppinfra/random.h> + +static int verbose; +#define if_verbose(format,args...) \ + if (verbose) { clib_warning(format, ## args); } + +int +test_phash_main (unformat_input_t * input) +{ + phash_main_t _pm = { 0 }, *pm = &_pm; + int n_keys, random_keys; + u32 seed; + clib_error_t *error; + + random_keys = 1; + n_keys = 1000; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (0 == unformat (input, "keys %d", &n_keys) + && 0 == unformat (input, "verbose %=", &verbose, 1) + && 0 == unformat (input, "random-keys %=", &random_keys, 1) + && 0 == unformat (input, "sequential-keys %=", &random_keys, 0) + && 0 == unformat (input, "seed %d", &pm->random_seed) + && 0 == unformat (input, "64-bit %|", &pm->flags, PHASH_FLAG_MIX64) + && 0 == unformat (input, "32-bit %|", &pm->flags, PHASH_FLAG_MIX32) + && 0 == unformat (input, "fast %|", &pm->flags, + PHASH_FLAG_FAST_MODE) + && 0 == unformat (input, "slow %|", &pm->flags, + PHASH_FLAG_SLOW_MODE) + && 0 == unformat (input, "minimal %|", &pm->flags, + PHASH_FLAG_MINIMAL) + && 0 == unformat (input, "non-minimal %|", &pm->flags, + PHASH_FLAG_NON_MINIMAL)) + clib_error ("unknown input `%U'", format_unformat_error, input); + } + + if (!pm->random_seed) + pm->random_seed = random_default_seed (); + + if_verbose + ("%d %d-bit keys, random seed %d, %s mode, looking for %sminimal hash", + n_keys, (pm->flags & PHASH_FLAG_MIX64) ? 64 : 32, pm->random_seed, + (pm->flags & PHASH_FLAG_FAST_MODE) ? "fast" : "slow", + (pm->flags & PHASH_FLAG_MINIMAL) ? "" : "non-"); + + seed = pm->random_seed; + + /* Initialize random keys. */ + { + phash_key_t *k; + + vec_resize (pm->keys, n_keys); + vec_foreach (k, pm->keys) + { + k->key = k - pm->keys; + if (random_keys) + { + if (pm->flags & PHASH_FLAG_MIX64) + k->key = random_u64 (&seed); + else + k->key = random_u32 (&seed); + } + } + } + + error = phash_find_perfect_hash (pm); + if (error) + { + clib_error_report (error); + return 1; + } + else + { + if_verbose ("(%d,%d) (a,b) bits, %d seeds tried, %d tree walks", + pm->a_bits, pm->b_bits, + pm->n_seed_trials, pm->n_perfect_calls); + + error = phash_validate (pm); + if (error) + { + clib_error_report (error); + return 1; + } + } + + return 0; +} + +#ifdef CLIB_UNIX +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + int res; + + clib_mem_init (0, 64ULL << 20); + + verbose = (argc > 1); + unformat_init_command_line (&i, argv); + res = test_phash_main (&i); + unformat_free (&i); + return res; +} +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/test_qhash.c b/extras/deprecated/vppinfra/test_qhash.c new file mode 100644 index 00000000000..a520fa4bd77 --- /dev/null +++ b/extras/deprecated/vppinfra/test_qhash.c @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/bitmap.h> +#include <vppinfra/os.h> +#include <vppinfra/qhash.h> +#include <vppinfra/random.h> +#include <vppinfra/time.h> + +typedef struct +{ + u32 n_iter, seed, n_keys, n_hash_keys, verbose; + + u32 max_vector; + + uword *hash; + + uword *keys_in_hash_bitmap; + + u32 *qhash; + + uword *keys; + + uword *lookup_keys; + uword *lookup_key_indices; + u32 *lookup_results; + + u32 *get_multiple_results; + + clib_time_t time; + + f64 overflow_fraction, ave_elts; + f64 get_time, hash_get_time; + f64 set_time, set_count; + f64 unset_time, unset_count; + f64 hash_set_time, hash_unset_time; +} test_qhash_main_t; + +clib_error_t * +test_qhash_main (unformat_input_t * input) +{ + clib_error_t *error = 0; + test_qhash_main_t _tm, *tm = &_tm; + uword i, iter; + + clib_memset (tm, 0, sizeof (tm[0])); + tm->n_iter = 10; + tm->seed = 1; + tm->n_keys = 10; + tm->max_vector = 1; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "iter %d", &tm->n_iter)) + ; + else if (unformat (input, "seed %d", &tm->seed)) + ; + else if (unformat (input, "keys %d", &tm->n_keys)) + ; + else if (unformat (input, "size %d", &tm->n_hash_keys)) + ; + else if (unformat (input, "vector %d", &tm->max_vector)) + ; + else if (unformat (input, "verbose")) + tm->verbose = 1; + else + { + error = clib_error_create ("unknown input `%U'\n", + format_unformat_error, input); + goto done; + } + } + + if (!tm->seed) + tm->seed = random_default_seed (); + + clib_time_init (&tm->time); + + clib_warning ("iter %d, seed %u, keys %d, max vector %d, ", + tm->n_iter, tm->seed, tm->n_keys, tm->max_vector); + + vec_resize (tm->keys, tm->n_keys); + vec_resize (tm->get_multiple_results, tm->n_keys); + for (i = 0; i < vec_len (tm->keys); i++) + tm->keys[i] = random_uword (&tm->seed); + + if (!tm->n_hash_keys) + tm->n_hash_keys = 2 * max_pow2 (tm->n_keys); + tm->n_hash_keys = clib_max (tm->n_keys, tm->n_hash_keys); + qhash_resize (tm->qhash, tm->n_hash_keys); + + { + qhash_t *h = qhash_header (tm->qhash); + int i; + for (i = 0; i < ARRAY_LEN (h->hash_seeds); i++) + h->hash_seeds[i] = random_uword (&tm->seed); + } + + vec_resize (tm->lookup_keys, tm->max_vector); + vec_resize (tm->lookup_key_indices, tm->max_vector); + vec_resize (tm->lookup_results, tm->max_vector); + + for (iter = 0; iter < tm->n_iter; iter++) + { + uword *p, j, n, is_set; + + n = tm->max_vector; + + is_set = random_u32 (&tm->seed) & 1; + is_set |= hash_elts (tm->hash) < (tm->n_keys / 4); + if (hash_elts (tm->hash) > (3 * tm->n_keys) / 4) + is_set = 0; + + _vec_len (tm->lookup_keys) = n; + _vec_len (tm->lookup_key_indices) = n; + j = 0; + while (j < n) + { + i = random_u32 (&tm->seed) % vec_len (tm->keys); + if (clib_bitmap_get (tm->keys_in_hash_bitmap, i) != is_set) + { + f64 t[2]; + tm->lookup_key_indices[j] = i; + tm->lookup_keys[j] = tm->keys[i]; + t[0] = clib_time_now (&tm->time); + if (is_set) + hash_set (tm->hash, tm->keys[i], i); + else + hash_unset (tm->hash, tm->keys[i]); + t[1] = clib_time_now (&tm->time); + if (is_set) + tm->hash_set_time += t[1] - t[0]; + else + tm->hash_unset_time += t[1] - t[0]; + tm->keys_in_hash_bitmap + = clib_bitmap_set (tm->keys_in_hash_bitmap, i, is_set); + j++; + } + } + + { + f64 t[2]; + + if (is_set) + { + t[0] = clib_time_now (&tm->time); + qhash_set_multiple (tm->qhash, + tm->lookup_keys, + vec_len (tm->lookup_keys), + tm->lookup_results); + t[1] = clib_time_now (&tm->time); + tm->set_time += t[1] - t[0]; + tm->set_count += vec_len (tm->lookup_keys); + for (i = 0; i < vec_len (tm->lookup_keys); i++) + { + uword r = tm->lookup_results[i]; + *vec_elt_at_index (tm->qhash, r) = tm->lookup_key_indices[i]; + } + } + else + { + t[0] = clib_time_now (&tm->time); + qhash_unset_multiple (tm->qhash, + tm->lookup_keys, + vec_len (tm->lookup_keys), + tm->lookup_results); + t[1] = clib_time_now (&tm->time); + tm->unset_time += t[1] - t[0]; + tm->unset_count += vec_len (tm->lookup_keys); + + for (i = 0; i < vec_len (tm->lookup_keys); i++) + { + uword r = tm->lookup_results[i]; + *vec_elt_at_index (tm->qhash, r) = ~0; + } + } + } + + if (qhash_elts (tm->qhash) != hash_elts (tm->hash)) + os_panic (); + + { + qhash_t *h; + uword i, k, l, count; + + h = qhash_header (tm->qhash); + + for (i = k = 0; k < vec_len (h->hash_key_valid_bitmap); k++) + i += count_set_bits (h->hash_key_valid_bitmap[k]); + k = hash_elts (h->overflow_hash); + l = qhash_elts (tm->qhash); + if (i + k != l) + os_panic (); + + count = hash_elts (h->overflow_hash); + for (i = 0; i < (1 << h->log2_hash_size); i++) + count += tm->qhash[i] != ~0; + if (count != qhash_elts (tm->qhash)) + os_panic (); + + { + u32 *tmp = 0; + + /* *INDENT-OFF* */ + hash_foreach (k, l, h->overflow_hash, ({ + j = qhash_hash_mix (h, k) / QHASH_KEYS_PER_BUCKET; + vec_validate (tmp, j); + tmp[j] += 1; + })); + /* *INDENT-ON* */ + + for (k = 0; k < vec_len (tmp); k++) + { + if (k >= vec_len (h->overflow_counts)) + os_panic (); + if (h->overflow_counts[k] != tmp[k]) + os_panic (); + } + for (; k < vec_len (h->overflow_counts); k++) + if (h->overflow_counts[k] != 0) + os_panic (); + + vec_free (tmp); + } + } + + { + f64 t[2]; + + t[0] = clib_time_now (&tm->time); + qhash_get_multiple (tm->qhash, tm->keys, vec_len (tm->keys), + tm->get_multiple_results); + t[1] = clib_time_now (&tm->time); + tm->get_time += t[1] - t[0]; + + for (i = 0; i < vec_len (tm->keys); i++) + { + u32 r; + + t[0] = clib_time_now (&tm->time); + p = hash_get (tm->hash, tm->keys[i]); + t[1] = clib_time_now (&tm->time); + tm->hash_get_time += t[1] - t[0]; + + r = qhash_get (tm->qhash, tm->keys[i]); + if (p) + { + if (p[0] != i) + os_panic (); + if (*vec_elt_at_index (tm->qhash, r) != i) + os_panic (); + } + else + { + if (r != ~0) + os_panic (); + } + if (r != tm->get_multiple_results[i]) + os_panic (); + } + } + + tm->overflow_fraction += + ((f64) qhash_n_overflow (tm->qhash) / qhash_elts (tm->qhash)); + tm->ave_elts += qhash_elts (tm->qhash); + } + + fformat (stderr, "%d iter %.6e overflow, %.4f ave. elts\n", + tm->n_iter, + tm->overflow_fraction / tm->n_iter, tm->ave_elts / tm->n_iter); + + tm->get_time /= tm->n_iter * vec_len (tm->keys); + tm->hash_get_time /= tm->n_iter * vec_len (tm->keys); + + tm->set_time /= tm->set_count; + tm->unset_time /= tm->unset_count; + tm->hash_set_time /= tm->set_count; + tm->hash_unset_time /= tm->unset_count; + + fformat (stderr, + "get/set/unset clocks %.2e %.2e %.2e clib %.2e %.2e %.2e ratio %.2f %.2f %.2f\n", + tm->get_time * tm->time.clocks_per_second, + tm->set_time * tm->time.clocks_per_second, + tm->unset_time * tm->time.clocks_per_second, + tm->hash_get_time * tm->time.clocks_per_second, + tm->hash_set_time * tm->time.clocks_per_second, + tm->hash_unset_time * tm->time.clocks_per_second, + tm->hash_get_time / tm->get_time, tm->hash_set_time / tm->set_time, + tm->hash_unset_time / tm->unset_time); + + +done: + return error; +} + +#ifdef CLIB_UNIX +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + clib_error_t *error; + + clib_mem_init (0, 64ULL << 20); + + unformat_init_command_line (&i, argv); + error = test_qhash_main (&i); + unformat_free (&i); + if (error) + { + clib_error_report (error); + return 1; + } + else + return 0; +} +#endif /* CLIB_UNIX */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/test_slist.c b/extras/deprecated/vppinfra/test_slist.c new file mode 100644 index 00000000000..3c3cbf73ca9 --- /dev/null +++ b/extras/deprecated/vppinfra/test_slist.c @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef CLIB_UNIX +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#endif + +#include <vppinfra/slist.h> + +typedef struct +{ + u32 *random_pool; + u32 seed; + u32 iter; + u32 verbose; + f64 branching_factor; + clib_slist_t slist; +} test_main_t; + +test_main_t test_main; + +#define foreach_simple_test \ +_(2) \ +_(4) \ +_(3) \ +_(1) + + +void +run_test (test_main_t * tm) +{ + int i; + u32 *tv; + u32 ncompares; + u64 total_compares = 0; + + if (1) + { + /* + * Add a bunch of random numbers to the skip-list, + * sorting them. + */ + for (i = 0; i < tm->iter; i++) + { + pool_get (tm->random_pool, tv); + *tv = random_u32 (&tm->seed); + clib_slist_add (&tm->slist, tv, tv - tm->random_pool); + } + /* make sure we can find each one */ + for (i = 0; i < tm->iter; i++) + { + u32 search_result; + tv = pool_elt_at_index (tm->random_pool, i); + + search_result = clib_slist_search (&tm->slist, tv, &ncompares); + ASSERT (search_result == i); + + total_compares += ncompares; + } + + fformat (stdout, "%.2f avg compares/search\n", + (f64) total_compares / (f64) i); + + fformat (stdout, "%U\n", format_slist, &tm->slist, + tm->iter < 1000 /* verbose */ ); + + /* delete half of them */ + for (i = tm->iter / 2; i < tm->iter; i++) + { + tv = pool_elt_at_index (tm->random_pool, i); + (void) clib_slist_del (&tm->slist, tv); + } + + /* make sure we can find the set we should find, and no others */ + for (i = 0; i < tm->iter; i++) + { + u32 search_result; + tv = pool_elt_at_index (tm->random_pool, i); + + search_result = clib_slist_search (&tm->slist, tv, &ncompares); + if (i >= tm->iter / 2) + ASSERT (search_result == (u32) ~ 0); + else + ASSERT (search_result == i); + + } + + fformat (stdout, "%U\n", format_slist, &tm->slist, + tm->iter < 1000 /* verbose */ ); + + /* delete the rest */ + for (i = 0; i < tm->iter; i++) + { + tv = pool_elt_at_index (tm->random_pool, i); + + (void) clib_slist_del (&tm->slist, tv); + } + + fformat (stdout, "%U\n", format_slist, &tm->slist, + tm->iter < 1000 /* verbose */ ); + } + else + { + +#define _(n) \ + do { \ + pool_get (tm->random_pool, tv); \ + *tv = n; \ + clib_slist_add (&tm->slist, tv, tv - tm->random_pool); \ + fformat(stdout, "%U\n", format_slist, &tm->slist, 1 /* verbose */); \ + } while (0); + foreach_simple_test; +#undef _ + } + + return; +} + +word +test_compare (void *key, u32 elt_index) +{ + u32 *k = (u32 *) key; + u32 elt = test_main.random_pool[elt_index]; + + if (*k < elt) + return -1; + if (*k > elt) + return 1; + return 0; +} + +u8 * +test_format (u8 * s, va_list * args) +{ + u32 elt_index = va_arg (*args, u32); + u32 elt = test_main.random_pool[elt_index]; + + return format (s, "%u", elt); +} + +void +initialize_slist (test_main_t * tm) +{ + clib_slist_init (&tm->slist, tm->branching_factor, + test_compare, test_format); +} + +int +test_slist_main (unformat_input_t * input) +{ + test_main_t *tm = &test_main; + u32 tmp; + + tm->seed = 0xbabeb00b; + tm->iter = 100000; + tm->verbose = 1; + tm->branching_factor = 1.0 / 5.0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "seed %d", &tm->seed)) + continue; + else if (unformat (input, "iter %d", &tm->iter)) + continue; + else if (unformat (input, "verbose")) + tm->verbose = 1; + else if (unformat (input, "branch %d", &tmp)) + { + if (tmp > 0) + tm->branching_factor = 1.0 / (f64) tmp; + else + fformat (stderr, "warning: branch = 0, ignored\n"); + } + else + { + clib_error ("unknown input `%U'", format_unformat_error, input); + goto usage; + } + } + initialize_slist (tm); + run_test (tm); + + return 0; + +usage: + fformat (stderr, "usage: test_slist seed <seed> iter <iter> [verbose]\n"); + return 1; + +} + +#ifdef CLIB_UNIX +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + int ret; + + clib_mem_init (0, (u64) 4 << 30); + + unformat_init_command_line (&i, argv); + ret = test_slist_main (&i); + unformat_free (&i); + + return ret; +} +#endif /* CLIB_UNIX */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/test_timing_wheel.c b/extras/deprecated/vppinfra/test_timing_wheel.c new file mode 100644 index 00000000000..48020d520a0 --- /dev/null +++ b/extras/deprecated/vppinfra/test_timing_wheel.c @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/bitmap.h> +#include <vppinfra/error.h> +#include <vppinfra/format.h> +#include <vppinfra/pool.h> +#include <vppinfra/random.h> +#include <vppinfra/time.h> +#include <vppinfra/timing_wheel.h> +#include <vppinfra/zvec.h> + +#include <vppinfra/math.h> + +#if __GNUC__ < 4 +#define SQRT(a) a +#else +#define SQRT(a) sqrt(a) +#endif + +typedef struct +{ + uword n_iter; + + u32 n_events; + u32 seed; + u32 verbose; + + /* Time is "synthetic" e.g. not taken from CPU timer. */ + u32 synthetic_time; + + clib_time_t time; + timing_wheel_t timing_wheel; + + u64 *events; + + f64 max_time; + f64 wait_time; + + f64 total_iterate_time; + f64 time_iterate_start; + + f64 time_per_status_update; + f64 time_next_status_update; +} test_timing_wheel_main_t; + +typedef struct +{ + f64 dt; + f64 fraction; + u64 count; +} test_timing_wheel_tmp_t; + +static void +set_event (test_timing_wheel_main_t * tm, uword i) +{ + timing_wheel_t *w = &tm->timing_wheel; + u64 cpu_time; + + cpu_time = w->current_time_index << w->log2_clocks_per_bin; + if (tm->synthetic_time) + cpu_time += random_u32 (&tm->seed) % tm->n_iter; + else + cpu_time += + random_f64 (&tm->seed) * tm->max_time * tm->time.clocks_per_second; + + timing_wheel_insert (w, cpu_time, i); + timing_wheel_validate (w); + tm->events[i] = cpu_time; +} + +static int +test_timing_wheel_tmp_cmp (void *a1, void *a2) +{ + test_timing_wheel_tmp_t *f1 = a1; + test_timing_wheel_tmp_t *f2 = a2; + + return f1->dt < f2->dt ? -1 : (f1->dt > f2->dt ? +1 : 0); +} + +clib_error_t * +test_timing_wheel_main (unformat_input_t * input) +{ + clib_error_t *error = 0; + test_timing_wheel_main_t _tm, *tm = &_tm; + timing_wheel_t *w = &tm->timing_wheel; + uword iter, i; + + clib_memset (tm, 0, sizeof (tm[0])); + tm->n_iter = 10; + tm->time_per_status_update = 0; + tm->n_events = 100; + tm->seed = 1; + tm->synthetic_time = 1; + tm->max_time = 1; + tm->wait_time = 1e-3; + + w->validate = 0; + w->n_wheel_elt_time_bits = 32; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "iter %wd", &tm->n_iter)) + ; + else if (unformat (input, "events %d", &tm->n_events)) + ; + else + if (unformat (input, "elt-time-bits %d", &w->n_wheel_elt_time_bits)) + ; + else if (unformat (input, "seed %d", &tm->seed)) + ; + else if (unformat (input, "verbose")) + tm->verbose = 1; + else if (unformat (input, "validate")) + w->validate = 1; + + else if (unformat (input, "real-time")) + tm->synthetic_time = 0; + else if (unformat (input, "synthetic-time")) + tm->synthetic_time = 1; + else if (unformat (input, "max-time %f", &tm->max_time)) + ; + else if (unformat (input, "wait-time %f", &tm->wait_time)) + ; + else if (unformat (input, "iter-time %f", &tm->total_iterate_time)) + ; + else if (unformat (input, "print %f", &tm->time_per_status_update)) + ; + + else + { + error = clib_error_create ("unknown input `%U'\n", + format_unformat_error, input); + goto done; + } + } + + if (!tm->seed) + tm->seed = random_default_seed (); + + clib_time_init (&tm->time); + + if (tm->synthetic_time) + { + w->min_sched_time = tm->time.seconds_per_clock; + w->max_sched_time = w->min_sched_time * 256; + timing_wheel_init (w, 0, tm->time.clocks_per_second); + } + else + { + timing_wheel_init (w, clib_cpu_time_now (), tm->time.clocks_per_second); + } + + clib_warning ("iter %wd, events %d, seed %u, %U", + tm->n_iter, tm->n_events, tm->seed, + format_timing_wheel, &tm->timing_wheel, /* verbose */ 0); + + /* Make some events. */ + vec_resize (tm->events, tm->n_events); + for (i = 0; i < vec_len (tm->events); i++) + set_event (tm, i); + + { + u32 *expired = 0; + f64 ave_error = 0; + f64 rms_error = 0; + f64 max_error = 0, min_error = 1e30; + u32 *error_hist = 0; + uword n_expired = 0; + uword *expired_bitmap[2] = { 0 }; + uword n_events_in_wheel = vec_len (tm->events); + + vec_resize (expired, 32); + vec_resize (error_hist, 1024); + + tm->time_iterate_start = clib_time_now (&tm->time); + tm->time_next_status_update = + tm->time_iterate_start + tm->time_per_status_update; + + if (tm->total_iterate_time != 0) + tm->n_iter = ~0; + + for (iter = 0; iter < tm->n_iter || n_events_in_wheel > 0; iter++) + { + u64 cpu_time, min_next_time[2]; + + if (tm->synthetic_time) + cpu_time = iter << w->log2_clocks_per_bin; + else + cpu_time = clib_cpu_time_now (); + + _vec_len (expired) = 0; + expired = + timing_wheel_advance (w, cpu_time, expired, &min_next_time[0]); + timing_wheel_validate (w); + + /* Update bitmap of expired events. */ + if (w->validate) + { + for (i = 0; i < vec_len (tm->events); i++) + { + uword is_expired; + + is_expired = + (cpu_time >> w->log2_clocks_per_bin) >= + (tm->events[i] >> w->log2_clocks_per_bin); + expired_bitmap[0] = + clib_bitmap_set (expired_bitmap[0], i, is_expired); + + /* Validate min next time. */ + if (is_expired) + ASSERT (min_next_time[0] > tm->events[i]); + else + ASSERT (min_next_time[0] <= tm->events[i]); + } + } + + n_expired += vec_len (expired); + for (i = 0; i < vec_len (expired); i++) + { + word j, idt; + i64 dt_cpu; + f64 fdt_cpu; + + j = expired[i]; + expired_bitmap[1] = clib_bitmap_ori (expired_bitmap[1], j); + + dt_cpu = cpu_time - tm->events[j]; + + /* Event must be scheduled in correct bin. */ + if (tm->synthetic_time) + ASSERT (dt_cpu >= 0 && dt_cpu <= (1 << w->log2_clocks_per_bin)); + + fdt_cpu = dt_cpu * tm->time.seconds_per_clock; + + ave_error += fdt_cpu; + rms_error += fdt_cpu * fdt_cpu; + + if (fdt_cpu > max_error) + max_error = fdt_cpu; + if (fdt_cpu < min_error) + min_error = fdt_cpu; + + idt = + (cpu_time >> w->log2_clocks_per_bin) - + (tm->events[j] >> w->log2_clocks_per_bin); + idt = zvec_signed_to_unsigned (idt); + vec_validate (error_hist, idt); + error_hist[idt] += 1; + } + + if (w->validate) + for (i = 0; i < vec_len (tm->events); i++) + { + int is_expired = clib_bitmap_get (expired_bitmap[0], i); + int is_expired_w = clib_bitmap_get (expired_bitmap[1], i); + ASSERT (is_expired == is_expired_w); + } + + min_next_time[1] = ~0; + for (i = 0; i < vec_len (tm->events); i++) + { + if (!clib_bitmap_get (expired_bitmap[1], i)) + min_next_time[1] = clib_min (min_next_time[1], tm->events[i]); + } + if (min_next_time[0] != min_next_time[1]) + clib_error ("min next time wrong 0x%Lx != 0x%Lx", min_next_time[0], + min_next_time[1]); + + if (tm->time_per_status_update != 0 + && clib_time_now (&tm->time) >= tm->time_next_status_update) + { + f64 ave = 0, rms = 0; + + tm->time_next_status_update += tm->time_per_status_update; + if (n_expired > 0) + { + ave = ave_error / n_expired; + rms = SQRT (rms_error / n_expired - ave * ave); + } + + clib_warning + ("%12wd iter done %10wd expired; ave. error %.4e +- %.4e, range %.4e %.4e", + iter, n_expired, ave, rms, min_error, max_error); + } + + if (tm->total_iterate_time != 0 + && (clib_time_now (&tm->time) - tm->time_iterate_start + >= tm->total_iterate_time)) + tm->n_iter = iter; + + /* Add new events to wheel to replace expired ones. */ + n_events_in_wheel -= vec_len (expired); + if (iter < tm->n_iter) + { + for (i = 0; i < vec_len (expired); i++) + { + uword j = expired[i]; + set_event (tm, j); + expired_bitmap[1] = + clib_bitmap_andnoti (expired_bitmap[1], j); + } + n_events_in_wheel += vec_len (expired); + } + } + + ave_error /= n_expired; + rms_error = SQRT (rms_error / n_expired - ave_error * ave_error); + + clib_warning + ("%wd iter done %wd expired; ave. error %.4e +- %.4e, range %.4e %.4e", + 1 + iter, n_expired, ave_error, rms_error, min_error, max_error); + + { + test_timing_wheel_tmp_t *fs, *f; + f64 total_fraction; + + fs = 0; + for (i = 0; i < vec_len (error_hist); i++) + { + if (error_hist[i] == 0) + continue; + vec_add2 (fs, f, 1); + f->dt = + (((i64) zvec_unsigned_to_signed (i) << w->log2_clocks_per_bin) * + tm->time.seconds_per_clock); + f->fraction = (f64) error_hist[i] / (f64) n_expired; + f->count = error_hist[i]; + } + + vec_sort_with_function (fs, test_timing_wheel_tmp_cmp); + + total_fraction = 0; + vec_foreach (f, fs) + { + total_fraction += f->fraction; + if (f == fs) + fformat (stdout, "%=12s %=16s %=16s %s\n", "Error max", "Fraction", + "Total", "Count"); + fformat (stdout, "%12.4e %16.4f%% %16.4f%% %Ld\n", f->dt, + f->fraction * 100, total_fraction * 100, f->count); + } + } + + clib_warning ("%U", format_timing_wheel, w, /* verbose */ 1); + } + +done: + return error; +} + +#ifdef CLIB_UNIX +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + clib_error_t *error; + + clib_mem_init (0, 64ULL << 20); + + unformat_init_command_line (&i, argv); + error = test_timing_wheel_main (&i); + unformat_free (&i); + if (error) + { + clib_error_report (error); + return 1; + } + else + return 0; +} +#endif /* CLIB_UNIX */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/test_vhash.c b/extras/deprecated/vppinfra/test_vhash.c new file mode 100644 index 00000000000..594d46c38df --- /dev/null +++ b/extras/deprecated/vppinfra/test_vhash.c @@ -0,0 +1,759 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + Copyright (c) 2010 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#if 0 +#ifdef __OPTIMIZE__ +#undef CLIB_DEBUG +#endif +#endif + +#include <vppinfra/bitmap.h> +#include <vppinfra/error.h> +#include <vppinfra/os.h> +#include <vppinfra/random.h> +#include <vppinfra/time.h> +#include <vppinfra/vhash.h> + +#ifdef CLIB_HAVE_VEC128 + +typedef struct +{ + u32 n_iter; + u32 seed; + u32 verbose; + u32 n_keys; + u32 log2_size; + u32 n_key_u32; + + u32 n_vectors_div_4; + u32 n_vectors_mod_4; + + u32 *keys; + u32 *results; + + u32 *vhash_get_key_indices; + u32 *vhash_get_results; + + u32 *vhash_key_indices; + u32 *vhash_results; + + vhash_t vhash; + + uword **key_hash; + + struct + { + u64 n_clocks; + u64 n_vectors; + u64 n_calls; + } get_stats, set_stats, unset_stats; +} test_vhash_main_t; + +always_inline u32 +test_vhash_key_gather (void *_tm, u32 vi, u32 wi, u32 n_key_u32s) +{ + test_vhash_main_t *tm = _tm; + ASSERT (n_key_u32s == tm->n_key_u32); + ASSERT (wi < n_key_u32s); + vi = vec_elt (tm->vhash_key_indices, vi); + return vec_elt (tm->keys, vi * n_key_u32s + wi); +} + +always_inline u32x4 +test_vhash_4key_gather (void *_tm, u32 vi, u32 wi, u32 n_key_u32s) +{ + test_vhash_main_t *tm = _tm; + u32 *p; + u32x4_union_t x; + + ASSERT (n_key_u32s == tm->n_key_u32); + ASSERT (wi < n_key_u32s); + + p = vec_elt_at_index (tm->vhash_key_indices, vi + 0); + x.as_u32[0] = tm->keys[p[0] * n_key_u32s + wi]; + x.as_u32[1] = tm->keys[p[1] * n_key_u32s + wi]; + x.as_u32[2] = tm->keys[p[2] * n_key_u32s + wi]; + x.as_u32[3] = tm->keys[p[3] * n_key_u32s + wi]; + return x.as_u32x4; +} + +always_inline u32 +test_vhash_get_result (void *_tm, + u32 vector_index, u32 result_index, u32 n_key_u32s) +{ + test_vhash_main_t *tm = _tm; + u32 *p = vec_elt_at_index (tm->vhash_results, vector_index); + p[0] = result_index; + return result_index; +} + +always_inline u32x4 +test_vhash_get_4result (void *_tm, + u32 vector_index, u32x4 results, u32 n_key_u32s) +{ + test_vhash_main_t *tm = _tm; + u32 *p = vec_elt_at_index (tm->vhash_results, vector_index); + *(u32x4 *) p = results; + return results; +} + +always_inline u32 +test_vhash_set_result (void *_tm, + u32 vector_index, u32 old_result, u32 n_key_u32s) +{ + test_vhash_main_t *tm = _tm; + u32 *p = vec_elt_at_index (tm->vhash_results, vector_index); + u32 new_result = p[0]; + p[0] = old_result; + return new_result; +} + +always_inline u32 +test_vhash_unset_result (void *_tm, u32 i, u32 old_result, u32 n_key_u32s) +{ + test_vhash_main_t *tm = _tm; + u32 *p = vec_elt_at_index (tm->vhash_results, i); + p[0] = old_result; + return 0; +} + +#define _(N_KEY_U32) \ + always_inline u32 \ + test_vhash_key_gather_##N_KEY_U32 (void * _tm, u32 vi, u32 i) \ + { return test_vhash_key_gather (_tm, vi, i, N_KEY_U32); } \ + \ + always_inline u32x4 \ + test_vhash_key_gather_4_##N_KEY_U32 (void * _tm, u32 vi, u32 i) \ + { return test_vhash_4key_gather (_tm, vi, i, N_KEY_U32); } \ + \ + clib_pipeline_stage \ + (test_vhash_gather_keys_stage_##N_KEY_U32, \ + test_vhash_main_t *, tm, i, \ + { \ + vhash_gather_4key_stage \ + (&tm->vhash, \ + /* vector_index */ i, \ + test_vhash_key_gather_4_##N_KEY_U32, \ + tm, \ + N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_no_inline \ + (test_vhash_gather_keys_mod_stage_##N_KEY_U32, \ + test_vhash_main_t *, tm, i, \ + { \ + vhash_gather_key_stage \ + (&tm->vhash, \ + /* vector_index */ tm->n_vectors_div_4, \ + /* n_vectors */ tm->n_vectors_mod_4, \ + test_vhash_key_gather_##N_KEY_U32, \ + tm, \ + N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage \ + (test_vhash_hash_finalize_stage_##N_KEY_U32, \ + test_vhash_main_t *, tm, i, \ + { \ + vhash_finalize_stage (&tm->vhash, i, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_no_inline \ + (test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \ + test_vhash_main_t *, tm, i, \ + { \ + vhash_finalize_stage (&tm->vhash, tm->n_vectors_div_4, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage \ + (test_vhash_get_stage_##N_KEY_U32, \ + test_vhash_main_t *, tm, i, \ + { \ + vhash_get_4_stage (&tm->vhash, \ + /* vector_index */ i, \ + test_vhash_get_4result, \ + tm, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_no_inline \ + (test_vhash_get_mod_stage_##N_KEY_U32, \ + test_vhash_main_t *, tm, i, \ + { \ + vhash_get_stage (&tm->vhash, \ + /* vector_index */ tm->n_vectors_div_4, \ + /* n_vectors */ tm->n_vectors_mod_4, \ + test_vhash_get_result, \ + tm, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage \ + (test_vhash_set_stage_##N_KEY_U32, \ + test_vhash_main_t *, tm, i, \ + { \ + vhash_set_stage (&tm->vhash, \ + /* vector_index */ i, \ + /* n_vectors */ VECTOR_WORD_TYPE_LEN (u32), \ + test_vhash_set_result, \ + tm, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_no_inline \ + (test_vhash_set_mod_stage_##N_KEY_U32, \ + test_vhash_main_t *, tm, i, \ + { \ + vhash_set_stage (&tm->vhash, \ + /* vector_index */ tm->n_vectors_div_4, \ + /* n_vectors */ tm->n_vectors_mod_4, \ + test_vhash_set_result, \ + tm, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage \ + (test_vhash_unset_stage_##N_KEY_U32, \ + test_vhash_main_t *, tm, i, \ + { \ + vhash_unset_stage (&tm->vhash, \ + /* vector_index */ i, \ + /* n_vectors */ VECTOR_WORD_TYPE_LEN (u32), \ + test_vhash_unset_result, \ + tm, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_no_inline \ + (test_vhash_unset_mod_stage_##N_KEY_U32, \ + test_vhash_main_t *, tm, i, \ + { \ + vhash_unset_stage (&tm->vhash, \ + /* vector_index */ tm->n_vectors_div_4, \ + /* n_vectors */ tm->n_vectors_mod_4, \ + test_vhash_unset_result, \ + tm, N_KEY_U32); \ + }) + +_(1); +_(2); +_(3); +_(4); +_(5); +_(6); + +#undef _ + +#define _(N_KEY_U32) \ + clib_pipeline_stage \ + (test_vhash_hash_mix_stage_##N_KEY_U32, \ + test_vhash_main_t *, tm, i, \ + { \ + vhash_mix_stage (&tm->vhash, i, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_no_inline \ + (test_vhash_hash_mix_mod_stage_##N_KEY_U32, \ + test_vhash_main_t *, tm, i, \ + { \ + vhash_mix_stage (&tm->vhash, tm->n_vectors_div_4, N_KEY_U32); \ + }) + +_(4); +_(5); +_(6); + +#undef _ + +typedef enum +{ + GET, SET, UNSET, +} test_vhash_op_t; + +static void +test_vhash_op (test_vhash_main_t * tm, + u32 * key_indices, + u32 * results, uword n_keys, test_vhash_op_t op) +{ + vhash_validate_sizes (&tm->vhash, tm->n_key_u32, n_keys); + + tm->vhash_results = results; + tm->vhash_key_indices = key_indices; + tm->n_vectors_div_4 = n_keys / 4; + tm->n_vectors_mod_4 = n_keys % 4; + + if (tm->n_vectors_div_4 > 0) + { + switch (tm->n_key_u32) + { + default: + ASSERT (0); + break; + +#define _(N_KEY_U32) \ + case N_KEY_U32: \ + if (op == GET) \ + clib_pipeline_run_3_stage \ + (tm->n_vectors_div_4, \ + tm, \ + test_vhash_gather_keys_stage_##N_KEY_U32, \ + test_vhash_hash_finalize_stage_##N_KEY_U32, \ + test_vhash_get_stage_##N_KEY_U32); \ + else if (op == SET) \ + clib_pipeline_run_3_stage \ + (tm->n_vectors_div_4, \ + tm, \ + test_vhash_gather_keys_stage_##N_KEY_U32, \ + test_vhash_hash_finalize_stage_##N_KEY_U32, \ + test_vhash_set_stage_##N_KEY_U32); \ + else \ + clib_pipeline_run_3_stage \ + (tm->n_vectors_div_4, \ + tm, \ + test_vhash_gather_keys_stage_##N_KEY_U32, \ + test_vhash_hash_finalize_stage_##N_KEY_U32, \ + test_vhash_unset_stage_##N_KEY_U32); \ + break; + + _(1); + _(2); + _(3); + +#undef _ + +#define _(N_KEY_U32) \ + case N_KEY_U32: \ + if (op == GET) \ + clib_pipeline_run_4_stage \ + (tm->n_vectors_div_4, \ + tm, \ + test_vhash_gather_keys_stage_##N_KEY_U32, \ + test_vhash_hash_mix_stage_##N_KEY_U32, \ + test_vhash_hash_finalize_stage_##N_KEY_U32, \ + test_vhash_get_stage_##N_KEY_U32); \ + else if (op == SET) \ + clib_pipeline_run_4_stage \ + (tm->n_vectors_div_4, \ + tm, \ + test_vhash_gather_keys_stage_##N_KEY_U32, \ + test_vhash_hash_mix_stage_##N_KEY_U32, \ + test_vhash_hash_finalize_stage_##N_KEY_U32, \ + test_vhash_set_stage_##N_KEY_U32); \ + else \ + clib_pipeline_run_4_stage \ + (tm->n_vectors_div_4, \ + tm, \ + test_vhash_gather_keys_stage_##N_KEY_U32, \ + test_vhash_hash_mix_stage_##N_KEY_U32, \ + test_vhash_hash_finalize_stage_##N_KEY_U32, \ + test_vhash_unset_stage_##N_KEY_U32); \ + break; + + _(4); + _(5); + _(6); + +#undef _ + } + } + + + if (tm->n_vectors_mod_4 > 0) + { + switch (tm->n_key_u32) + { + default: + ASSERT (0); + break; + +#define _(N_KEY_U32) \ + case N_KEY_U32: \ + if (op == GET) \ + clib_pipeline_run_3_stage \ + (1, \ + tm, \ + test_vhash_gather_keys_mod_stage_##N_KEY_U32, \ + test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \ + test_vhash_get_mod_stage_##N_KEY_U32); \ + else if (op == SET) \ + clib_pipeline_run_3_stage \ + (1, \ + tm, \ + test_vhash_gather_keys_mod_stage_##N_KEY_U32, \ + test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \ + test_vhash_set_mod_stage_##N_KEY_U32); \ + else \ + clib_pipeline_run_3_stage \ + (1, \ + tm, \ + test_vhash_gather_keys_mod_stage_##N_KEY_U32, \ + test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \ + test_vhash_unset_mod_stage_##N_KEY_U32); \ + break; + + _(1); + _(2); + _(3); + +#undef _ + +#define _(N_KEY_U32) \ + case N_KEY_U32: \ + if (op == GET) \ + clib_pipeline_run_4_stage \ + (1, \ + tm, \ + test_vhash_gather_keys_mod_stage_##N_KEY_U32, \ + test_vhash_hash_mix_mod_stage_##N_KEY_U32, \ + test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \ + test_vhash_get_mod_stage_##N_KEY_U32); \ + else if (op == SET) \ + clib_pipeline_run_4_stage \ + (1, \ + tm, \ + test_vhash_gather_keys_mod_stage_##N_KEY_U32, \ + test_vhash_hash_mix_mod_stage_##N_KEY_U32, \ + test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \ + test_vhash_set_mod_stage_##N_KEY_U32); \ + else \ + clib_pipeline_run_4_stage \ + (1, \ + tm, \ + test_vhash_gather_keys_mod_stage_##N_KEY_U32, \ + test_vhash_hash_mix_mod_stage_##N_KEY_U32, \ + test_vhash_hash_finalize_mod_stage_##N_KEY_U32, \ + test_vhash_unset_mod_stage_##N_KEY_U32); \ + break; + + _(4); + _(5); + _(6); + +#undef _ + } + } +} + +int +test_vhash_main (unformat_input_t * input) +{ + clib_error_t *error = 0; + test_vhash_main_t _tm, *tm = &_tm; + vhash_t *vh = &tm->vhash; + uword i, j; + + clib_memset (tm, 0, sizeof (tm[0])); + tm->n_iter = 100; + tm->seed = 1; + tm->n_keys = 1; + tm->n_key_u32 = 1; + tm->log2_size = 8; + tm->verbose = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "iter %d", &tm->n_iter)) + ; + else if (unformat (input, "seed %d", &tm->seed)) + ; + else if (unformat (input, "n-keys %d", &tm->n_keys)) + ; + else if (unformat (input, "log2-size %d", &tm->log2_size)) + ; + else if (unformat (input, "key-words %d", &tm->n_key_u32)) + ; + else if (unformat (input, "verbose %=", &tm->verbose, 1)) + ; + else + { + error = clib_error_create ("unknown input `%U'\n", + format_unformat_error, input); + goto done; + } + } + + if (tm->seed == 0) + tm->seed = random_default_seed (); + + clib_warning ("iter %d seed %d n-keys %d log2-size %d key-words %d", + tm->n_iter, tm->seed, tm->n_keys, tm->log2_size, + tm->n_key_u32); + + { + u32 seeds[3]; + seeds[0] = seeds[1] = seeds[2] = 0xdeadbeef; + vhash_init (vh, tm->log2_size, tm->n_key_u32, seeds); + } + + /* Choose unique keys. */ + vec_resize (tm->keys, tm->n_keys * tm->n_key_u32); + vec_resize (tm->key_hash, tm->n_key_u32); + for (i = j = 0; i < vec_len (tm->keys); i++, j++) + { + j = j == tm->n_key_u32 ? 0 : j; + do + { + tm->keys[i] = random_u32 (&tm->seed); + } + while (hash_get (tm->key_hash[j], tm->keys[i])); + hash_set (tm->key_hash[j], tm->keys[i], 0); + } + + vec_resize (tm->results, tm->n_keys); + for (i = 0; i < vec_len (tm->results); i++) + { + do + { + tm->results[i] = random_u32 (&tm->seed); + } + while (tm->results[i] == ~0); + } + + vec_resize_aligned (tm->vhash_get_results, tm->n_keys, + CLIB_CACHE_LINE_BYTES); + vec_clone (tm->vhash_get_key_indices, tm->results); + for (i = 0; i < vec_len (tm->vhash_get_key_indices); i++) + tm->vhash_get_key_indices[i] = i; + + { + uword *is_set_bitmap = 0; + uword *to_set_bitmap = 0; + uword *to_unset_bitmap = 0; + u32 *to_set = 0, *to_unset = 0; + u32 *to_set_results = 0, *to_unset_results = 0; + u64 t[2]; + + for (i = 0; i < tm->n_iter; i++) + { + vec_reset_length (to_set); + vec_reset_length (to_unset); + vec_reset_length (to_set_results); + vec_reset_length (to_unset_results); + + do + { + to_set_bitmap = clib_bitmap_random (to_set_bitmap, + tm->n_keys, &tm->seed); + } + while (clib_bitmap_is_zero (to_set_bitmap)); + to_unset_bitmap = clib_bitmap_dup_and (to_set_bitmap, is_set_bitmap); + to_set_bitmap = clib_bitmap_andnot (to_set_bitmap, to_unset_bitmap); + + /* *INDENT-OFF* */ + clib_bitmap_foreach (j, to_set_bitmap, ({ + vec_add1 (to_set, j); + vec_add1 (to_set_results, tm->results[j]); + })); + /* *INDENT-ON* */ + /* *INDENT-OFF* */ + clib_bitmap_foreach (j, to_unset_bitmap, ({ + vec_add1 (to_unset, j); + vec_add1 (to_unset_results, 0xdeadbeef); + })); + /* *INDENT-ON* */ + + if (vec_len (to_set) > 0) + { + t[0] = clib_cpu_time_now (); + test_vhash_op (tm, to_set, to_set_results, vec_len (to_set), SET); + t[1] = clib_cpu_time_now (); + tm->set_stats.n_clocks += t[1] - t[0]; + tm->set_stats.n_vectors += vec_len (to_set); + tm->set_stats.n_calls += 1; + is_set_bitmap = clib_bitmap_or (is_set_bitmap, to_set_bitmap); + } + + t[0] = clib_cpu_time_now (); + test_vhash_op (tm, tm->vhash_get_key_indices, + tm->vhash_get_results, + vec_len (tm->vhash_get_key_indices), GET); + t[1] = clib_cpu_time_now (); + tm->get_stats.n_clocks += t[1] - t[0]; + tm->get_stats.n_vectors += vec_len (tm->vhash_get_key_indices); + tm->get_stats.n_calls += 1; + + for (j = 0; j < vec_len (tm->vhash_get_results); j++) + { + u32 r0 = tm->vhash_get_results[j]; + u32 r1 = tm->results[j]; + if (clib_bitmap_get (is_set_bitmap, j)) + { + if (r0 != r1) + os_panic (); + } + else + { + if (r0 != ~0) + os_panic (); + } + } + + if (vh->n_elts != clib_bitmap_count_set_bits (is_set_bitmap)) + os_panic (); + + if (vec_len (to_unset) > 0) + { + t[0] = clib_cpu_time_now (); + test_vhash_op (tm, to_unset, to_unset_results, + vec_len (to_unset), UNSET); + t[1] = clib_cpu_time_now (); + tm->unset_stats.n_clocks += t[1] - t[0]; + tm->unset_stats.n_vectors += vec_len (to_unset); + tm->unset_stats.n_calls += 1; + is_set_bitmap = + clib_bitmap_andnot (is_set_bitmap, to_unset_bitmap); + } + + t[0] = clib_cpu_time_now (); + test_vhash_op (tm, tm->vhash_get_key_indices, + tm->vhash_get_results, + vec_len (tm->vhash_get_key_indices), GET); + t[1] = clib_cpu_time_now (); + tm->get_stats.n_clocks += t[1] - t[0]; + tm->get_stats.n_vectors += vec_len (tm->vhash_get_key_indices); + tm->get_stats.n_calls += 1; + + for (j = 0; j < vec_len (tm->vhash_get_results); j++) + { + u32 r0 = tm->vhash_get_results[j]; + u32 r1 = tm->results[j]; + if (clib_bitmap_get (is_set_bitmap, j)) + { + if (r0 != r1) + os_panic (); + } + else + { + if (r0 != ~0) + os_panic (); + } + } + + if (vh->n_elts != clib_bitmap_count_set_bits (is_set_bitmap)) + os_panic (); + } + + vhash_resize (vh, tm->log2_size + 1); + + test_vhash_op (tm, tm->vhash_get_key_indices, + tm->vhash_get_results, + vec_len (tm->vhash_get_key_indices), GET); + + for (j = 0; j < vec_len (tm->vhash_get_results); j++) + { + u32 r0 = tm->vhash_get_results[j]; + u32 r1 = tm->results[j]; + if (clib_bitmap_get (is_set_bitmap, j)) + { + if (r0 != r1) + os_panic (); + } + else + { + if (r0 != ~0) + os_panic (); + } + } + + if (vh->n_elts != clib_bitmap_count_set_bits (is_set_bitmap)) + os_panic (); + } + + { + clib_time_t ct; + + clib_time_init (&ct); + + clib_warning ("%.4e clocks/get %.4e gets/call %.4e gets/sec", + (f64) tm->get_stats.n_clocks / + (f64) tm->get_stats.n_vectors, + (f64) tm->get_stats.n_vectors / (f64) tm->get_stats.n_calls, + (f64) tm->get_stats.n_vectors / + (f64) (tm->get_stats.n_clocks * ct.seconds_per_clock)); + if (tm->set_stats.n_calls > 0) + clib_warning ("%.4e clocks/set %.4e sets/call %.4e sets/sec", + (f64) tm->set_stats.n_clocks / + (f64) tm->set_stats.n_vectors, + (f64) tm->set_stats.n_vectors / + (f64) tm->set_stats.n_calls, + (f64) tm->set_stats.n_vectors / + (f64) (tm->set_stats.n_clocks * ct.seconds_per_clock)); + if (tm->unset_stats.n_calls > 0) + clib_warning ("%.4e clocks/unset %.4e unsets/call %.4e unsets/sec", + (f64) tm->unset_stats.n_clocks / + (f64) tm->unset_stats.n_vectors, + (f64) tm->unset_stats.n_vectors / + (f64) tm->unset_stats.n_calls, + (f64) tm->unset_stats.n_vectors / + (f64) (tm->unset_stats.n_clocks * ct.seconds_per_clock)); + } + +done: + if (error) + clib_error_report (error); + return 0; +} + +#endif /* CLIB_HAVE_VEC128 */ + +#ifndef CLIB_HAVE_VEC128 +int +test_vhash_main (unformat_input_t * input) +{ + clib_error ("compiled without vector support"); + return 0; +} +#endif + +#ifdef CLIB_UNIX +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + int r; + + clib_mem_init (0, 64ULL << 20); + + unformat_init_command_line (&i, argv); + r = test_vhash_main (&i); + unformat_free (&i); + return r; +} +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/test_zvec.c b/extras/deprecated/vppinfra/test_zvec.c new file mode 100644 index 00000000000..7d777fabf83 --- /dev/null +++ b/extras/deprecated/vppinfra/test_zvec.c @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + Copyright (c) 2005 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include <vppinfra/zvec.h> +#include <vppinfra/format.h> +#include <vppinfra/random.h> + +static int verbose; +#define if_verbose(format,args...) \ + if (verbose) { clib_warning(format, ## args); } + +int +test_zvec_main (unformat_input_t * input) +{ + uword n_iterations; + uword i; + u32 seed; + + n_iterations = 1024; + seed = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (0 == unformat (input, "iter %d", &n_iterations) + && 0 == unformat (input, "seed %d", &seed)) + clib_error ("unknown input `%U'", format_unformat_error, input); + } + + if_verbose ("%d iterations, seed %d\n", n_iterations, seed); + + for (i = 0; i < n_iterations; i++) + { + uword coding, data, d[2], limit, n_zdata_bits[2]; + + if (seed) + coding = random_u32 (&seed); + else + coding = i; + + limit = coding - 1; + if (limit > (1 << 16)) + limit = 1 << 16; + for (data = 0; data <= limit; data++) + { + d[0] = zvec_encode (coding, data, &n_zdata_bits[0]); + + if (coding != 0) + ASSERT ((d[0] >> n_zdata_bits[0]) == 0); + + d[1] = zvec_decode (coding, d[0], &n_zdata_bits[1]); + ASSERT (data == d[1]); + + ASSERT (n_zdata_bits[0] == n_zdata_bits[1]); + } + } + + return 0; +} + +#ifdef CLIB_UNIX +int +main (int argc, char *argv[]) +{ + unformat_input_t i; + int ret; + + clib_mem_init (0, 64ULL << 20); + + verbose = (argc > 1); + unformat_init_command_line (&i, argv); + ret = test_zvec_main (&i); + unformat_free (&i); + + return ret; +} +#endif /* CLIB_UNIX */ + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/unix-kelog.c b/extras/deprecated/vppinfra/unix-kelog.c new file mode 100644 index 00000000000..88428ee8f2e --- /dev/null +++ b/extras/deprecated/vppinfra/unix-kelog.c @@ -0,0 +1,415 @@ +/* + Copyright (c) 2010 Cisco and/or its affiliates. + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#include <vppinfra/error.h> +#include <vppinfra/unix.h> +#include <vppinfra/elog.h> +#include <vppinfra/format.h> +#include <vppinfra/os.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> + +typedef enum +{ + RUNNING = 0, + WAKEUP, +} sched_event_type_t; + +typedef struct +{ + u32 cpu; + u8 *task; + u32 pid; + f64 timestamp; + sched_event_type_t type; +} sched_event_t; + +void +kelog_init (elog_main_t * em, char *kernel_tracer, u32 n_events) +{ + int enable_fd, current_tracer_fd, data_fd; + int len; + struct timespec ts, ts2; + char *trace_enable = "/debug/tracing/tracing_enabled"; + char *current_tracer = "/debug/tracing/current_tracer"; + char *trace_data = "/debug/tracing/trace"; + f64 realtime, monotonic; + f64 freq, secs_per_clock; + + ASSERT (kernel_tracer); + + /*$$$$ fixme */ + n_events = 1 << 18; + + /* init first so we won't hurt ourselves if we bail */ + elog_init (em, n_events); + + enable_fd = open (trace_enable, O_RDWR); + if (enable_fd < 0) + { + clib_warning ("Couldn't open %s", trace_enable); + return; + } + /* disable kernel tracing */ + if (write (enable_fd, "0\n", 2) != 2) + { + clib_unix_warning ("disable tracing"); + close (enable_fd); + return; + } + + /* + * open + clear the data buffer. + * see .../linux/kernel/trace/trace.c:tracing_open() + */ + data_fd = open (trace_data, O_RDWR | O_TRUNC); + if (data_fd < 0) + { + clib_warning ("Couldn't open+clear %s", trace_data); + return; + } + close (data_fd); + + /* configure tracing */ + current_tracer_fd = open (current_tracer, O_RDWR); + + if (current_tracer_fd < 0) + { + clib_warning ("Couldn't open %s", current_tracer); + close (enable_fd); + return; + } + + len = strlen (kernel_tracer); + + if (write (current_tracer_fd, kernel_tracer, len) != len) + { + clib_unix_warning ("configure trace"); + close (current_tracer_fd); + close (enable_fd); + return; + } + + close (current_tracer_fd); + + /* + * The kernel event log uses CLOCK_MONOTONIC timestamps, + * not CLOCK_REALTIME timestamps. These differ by a constant + * but the constant is not available in user mode. + * This estimate will be off by one syscall round-trip. + */ + clib_time_init (&em->cpu_timer); + em->init_time.cpu = em->cpu_timer.init_cpu_time; + syscall (SYS_clock_gettime, CLOCK_MONOTONIC, &ts); + + /* enable kernel tracing */ + if (write (enable_fd, "1\n", 2) != 2) + { + clib_unix_warning ("enable tracing"); + close (enable_fd); + return; + } + + close (enable_fd); +} + + +u8 * +format_sched_event (u8 * s, va_list * va) +{ + sched_event_t *e = va_arg (*va, sched_event_t *); + + s = format (s, "cpu %d task %10s type %s timestamp %12.6f\n", + e->cpu, e->task, e->type ? "WAKEUP " : "RUNNING", e->timestamp); + + return s; +} + +sched_event_t * +parse_sched_switch_trace (u8 * tdata, u32 * index) +{ + u8 *cp = tdata + *index; + u8 *limit = tdata + vec_len (tdata); + int colons; + static sched_event_t event; + sched_event_t *e = &event; + static u8 *task_name; + u32 secs, usecs; + int i; + +again: + /* eat leading w/s */ + while (cp < limit && (*cp == ' ' && *cp == '\t')) + cp++; + if (cp == limit) + return 0; + + /* header line */ + if (*cp == '#') + { + while (cp < limit && (*cp != '\n')) + cp++; + if (*cp == '\n') + { + cp++; + goto again; + } + clib_warning ("bugger 0"); + return 0; + } + + while (cp < limit && *cp != ']') + cp++; + + if (*cp == 0) + return 0; + + if (*cp != ']') + { + clib_warning ("bugger 0.1"); + return 0; + } + + cp++; + while (cp < limit && (*cp == ' ' && *cp == '\t')) + cp++; + if (cp == limit) + { + clib_warning ("bugger 0.2"); + return 0; + } + + secs = atoi (cp); + + while (cp < limit && (*cp != '.')) + cp++; + + if (cp == limit) + { + clib_warning ("bugger 0.3"); + return 0; + } + + cp++; + + usecs = atoi (cp); + + e->timestamp = ((f64) secs) + ((f64) usecs) * 1e-6; + + /* eat up to third colon */ + for (i = 0; i < 3; i++) + { + while (cp < limit && *cp != ':') + cp++; + cp++; + } + --cp; + if (*cp != ':') + { + clib_warning ("bugger 1"); + return 0; + } + /* aim at '>' (switch-to) / '+' (wakeup) */ + cp += 5; + if (cp >= limit) + { + clib_warning ("bugger 2"); + return 0; + } + if (*cp == '>') + e->type = RUNNING; + else if (*cp == '+') + e->type = WAKEUP; + else + { + clib_warning ("bugger 3"); + return 0; + } + + cp += 3; + if (cp >= limit) + { + clib_warning ("bugger 4"); + return 0; + } + + e->cpu = atoi (cp); + cp += 4; + + if (cp >= limit) + { + clib_warning ("bugger 4"); + return 0; + } + while (cp < limit && (*cp == ' ' || *cp == '\t')) + cp++; + + e->pid = atoi (cp); + + for (i = 0; i < 2; i++) + { + while (cp < limit && *cp != ':') + cp++; + cp++; + } + --cp; + if (*cp != ':') + { + clib_warning ("bugger 5"); + return 0; + } + + cp += 3; + if (cp >= limit) + { + clib_warning ("bugger 6"); + return 0; + } + while (cp < limit && (*cp != ' ' && *cp != '\n')) + { + vec_add1 (task_name, *cp); + cp++; + } + vec_add1 (task_name, 0); + /* _vec_len() = 0 in caller */ + e->task = task_name; + + if (cp < limit) + cp++; + + *index = cp - tdata; + return e; +} + +static u32 +elog_id_for_pid (elog_main_t * em, u8 * name, u32 pid) +{ + uword *p, r; + mhash_t *h = &em->string_table_hash; + + if (!em->string_table_hash.hash) + mhash_init (h, sizeof (uword), sizeof (pid)); + + p = mhash_get (h, &pid); + if (p) + return p[0]; + r = elog_string (em, "%s(%d)", name, pid); + mhash_set (h, &pid, r, /* old_value */ 0); + return r; +} + +void +kelog_collect_sched_switch_trace (elog_main_t * em) +{ + int enable_fd, data_fd; + char *trace_enable = "/debug/tracing/tracing_enabled"; + char *trace_data = "/debug/tracing/trace"; + u8 *data = 0; + u8 *dp; + int bytes, total_bytes; + u32 pos; + sched_event_t *evt; + u64 nsec_to_add; + u32 index; + f64 clocks_per_sec; + + enable_fd = open (trace_enable, O_RDWR); + if (enable_fd < 0) + { + clib_warning ("Couldn't open %s", trace_enable); + return; + } + /* disable kernel tracing */ + if (write (enable_fd, "0\n", 2) != 2) + { + clib_unix_warning ("disable tracing"); + close (enable_fd); + return; + } + close (enable_fd); + + /* Read the trace data */ + data_fd = open (trace_data, O_RDWR); + if (data_fd < 0) + { + clib_warning ("Couldn't open %s", trace_data); + return; + } + + /* + * Extract trace into a vector. Note that seq_printf() [kernel] + * is not guaranteed to produce 4096 bytes at a time. + */ + vec_validate (data, 4095); + total_bytes = 0; + pos = 0; + while (1) + { + bytes = read (data_fd, data + pos, 4096); + if (bytes <= 0) + break; + + total_bytes += bytes; + _vec_len (data) = total_bytes; + + pos = vec_len (data); + vec_validate (data, vec_len (data) + 4095); + } + vec_add1 (data, 0); + + /* Synthesize events */ + em->is_enabled = 1; + + index = 0; + while ((evt = parse_sched_switch_trace (data, &index))) + { + u64 fake_cpu_clock; + + fake_cpu_clock = evt->timestamp * em->cpu_timer.clocks_per_second; + { + ELOG_TYPE_DECLARE (e) = + { + .format = "%d: %s %s",.format_args = "i4T4t4",.n_enum_strings = + 2,.enum_strings = + { + "running", "wakeup",} + ,}; + struct + { + u32 cpu, string_table_offset, which; + } *ed; + + ed = elog_event_data_not_inline (em, &__ELOG_TYPE_VAR (e), + &em->default_track, fake_cpu_clock); + ed->cpu = evt->cpu; + ed->string_table_offset = elog_id_for_pid (em, evt->task, evt->pid); + ed->which = evt->type; + } + _vec_len (evt->task) = 0; + } + em->is_enabled = 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/vhash.c b/extras/deprecated/vppinfra/vhash.c new file mode 100644 index 00000000000..9120f502c91 --- /dev/null +++ b/extras/deprecated/vppinfra/vhash.c @@ -0,0 +1,772 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + Copyright (c) 2010 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include <vppinfra/vhash.h> + +#ifdef CLIB_HAVE_VEC128 + +/* Overflow search buckets have an extra u32x4 for saving key_hash data. + This makes it easier to refill main search bucket from overflow vector. */ +typedef struct +{ + /* 4 results for this bucket. */ + u32x4_union_t result; + + /* 4 hash codes for this bucket. These are used to refill main + search buckets from overflow buckets when space becomes available. */ + u32x4_union_t key_hash; + + /* n_key_u32s u32x4s of key data follow. */ + u32x4_union_t key[0]; +} vhash_overflow_search_bucket_t; + +always_inline void +set_overflow_result (vhash_overflow_search_bucket_t * b, + u32 i, u32 result, u32 key_hash) +{ + b->result.as_u32[i] = result; + b->key_hash.as_u32[i] = key_hash; +} + +always_inline void +free_overflow_bucket (vhash_overflow_buckets_t * ob, + vhash_overflow_search_bucket_t * b, u32 i) +{ + u32 o = (u32x4_union_t *) b - ob->search_buckets; + ASSERT (o < vec_len (ob->search_buckets)); + vec_add1 (ob->free_indices, 4 * o + i); +} + +always_inline vhash_overflow_search_bucket_t * +get_overflow_search_bucket (vhash_overflow_buckets_t * obs, u32 i, + u32 n_key_u32s) +{ + return ((vhash_overflow_search_bucket_t *) + vec_elt_at_index (obs->search_buckets, i)); +} + +always_inline vhash_overflow_search_bucket_t * +next_overflow_bucket (vhash_overflow_search_bucket_t * b, u32 n_key_u32s) +{ + return (vhash_overflow_search_bucket_t *) & b->key[n_key_u32s]; +} + +#define foreach_vhash_overflow_bucket(b,ob,n_key_u32s) \ + for ((b) = (vhash_overflow_search_bucket_t *) ob->search_buckets; \ + (u32x4_union_t *) (b) < vec_end (ob->search_buckets); \ + b = next_overflow_bucket (b, n_key_u32s)) + +u32 +vhash_get_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s) +{ + vhash_overflow_buckets_t *ob = vhash_get_overflow_buckets (h, key_hash); + vhash_overflow_search_bucket_t *b; + u32 i, result = 0; + + foreach_vhash_overflow_bucket (b, ob, n_key_u32s) + { + u32x4 r = b->result.as_u32x4; + + for (i = 0; i < n_key_u32s; i++) + r &= vhash_bucket_compare (h, &b->key[0], i, vi); + + result = vhash_merge_results (r); + if (result) + break; + } + + return result; +} + +u32 +vhash_set_overflow (vhash_t * h, + u32 key_hash, u32 vi, u32 new_result, u32 n_key_u32s) +{ + vhash_overflow_buckets_t *ob = vhash_get_overflow_buckets (h, key_hash); + vhash_overflow_search_bucket_t *b; + u32 i_set, i, old_result; + + foreach_vhash_overflow_bucket (b, ob, n_key_u32s) + { + u32x4 r; + + r = b->result.as_u32x4; + for (i = 0; i < n_key_u32s; i++) + r &= vhash_bucket_compare (h, &b->key[0], i, vi); + + old_result = vhash_merge_results (r); + if (old_result) + { + i_set = vhash_non_empty_result_index (r); + set_overflow_result (b, i_set, new_result, key_hash); + return old_result; + } + } + + /* Check free list. */ + if (vec_len (ob->free_indices) == 0) + { + /* Out of free overflow buckets. Resize. */ + u32 j, *p; + i = vec_len (ob->search_buckets); + vec_resize_aligned (ob->search_buckets, + sizeof (b[0]) / sizeof (u32x4) + n_key_u32s, + CLIB_CACHE_LINE_BYTES); + vec_add2 (ob->free_indices, p, 4); + for (j = 0; j < 4; j++) + p[j] = 4 * i + j; + } + + i = vec_pop (ob->free_indices); + + i_set = i & 3; + b = ((vhash_overflow_search_bucket_t *) + vec_elt_at_index (ob->search_buckets, i / 4)); + + /* Insert result. */ + set_overflow_result (b, i_set, new_result, key_hash); + + /* Insert key. */ + for (i = 0; i < n_key_u32s; i++) + b->key[i].as_u32[i_set] = vhash_get_key_word (h, i, vi); + + ob->n_overflow++; + h->n_elts++; + + return /* old result was invalid */ 0; +} + +u32 +vhash_unset_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s) +{ + vhash_overflow_buckets_t *ob = vhash_get_overflow_buckets (h, key_hash); + vhash_overflow_search_bucket_t *b; + u32 i_set, i, old_result; + + foreach_vhash_overflow_bucket (b, ob, n_key_u32s) + { + u32x4 r; + + r = b->result.as_u32x4; + for (i = 0; i < n_key_u32s; i++) + r &= vhash_bucket_compare (h, &b->key[0], i, vi); + + old_result = vhash_merge_results (r); + if (old_result) + { + i_set = vhash_non_empty_result_index (r); + + /* Invalidate result and invert key hash so that this will + never match since all keys in this overflow bucket have + matching key hashs. */ + set_overflow_result (b, i_set, 0, ~key_hash); + + free_overflow_bucket (ob, b, i_set); + + ASSERT (ob->n_overflow > 0); + ob->n_overflow--; + h->n_elts--; + return old_result; + } + } + + /* Could not find key. */ + return 0; +} + +void +vhash_unset_refill_from_overflow (vhash_t * h, + vhash_search_bucket_t * sb, + u32 key_hash, u32 n_key_u32s) +{ + vhash_overflow_buckets_t *obs = vhash_get_overflow_buckets (h, key_hash); + vhash_overflow_search_bucket_t *ob; + u32 i, j, i_refill, bucket_mask = h->bucket_mask.as_u32[0]; + + /* Find overflow element with matching key hash. */ + foreach_vhash_overflow_bucket (ob, obs, n_key_u32s) + { + for (i = 0; i < 4; i++) + { + if (!ob->result.as_u32[i]) + continue; + if ((ob->key_hash.as_u32[i] & bucket_mask) + != (key_hash & bucket_mask)) + continue; + + i_refill = vhash_empty_result_index (sb->result.as_u32x4); + sb->result.as_u32[i_refill] = ob->result.as_u32[i]; + for (j = 0; j < n_key_u32s; j++) + sb->key[j].as_u32[i_refill] = ob->key[j].as_u32[i]; + set_overflow_result (ob, i, 0, ~key_hash); + free_overflow_bucket (obs, ob, i); + return; + } + } +} + +void +vhash_init (vhash_t * h, u32 log2_n_keys, u32 n_key_u32, u32 * hash_seeds) +{ + uword i, j, m; + vhash_search_bucket_t *b; + + clib_memset (h, 0, sizeof (h[0])); + + /* Must have at least 4 keys (e.g. one search bucket). */ + log2_n_keys = clib_max (log2_n_keys, 2); + + h->log2_n_keys = log2_n_keys; + h->n_key_u32 = n_key_u32; + m = pow2_mask (h->log2_n_keys) & ~3; + for (i = 0; i < VECTOR_WORD_TYPE_LEN (u32); i++) + h->bucket_mask.as_u32[i] = m; + + /* Allocate and zero search buckets. */ + i = (sizeof (b[0]) / sizeof (u32x4) + n_key_u32) << (log2_n_keys - 2); + vec_validate_aligned (h->search_buckets, i - 1, CLIB_CACHE_LINE_BYTES); + + for (i = 0; i < ARRAY_LEN (h->find_first_zero_table); i++) + h->find_first_zero_table[i] = min_log2 (first_set (~i)); + + for (i = 0; i < ARRAY_LEN (h->hash_seeds); i++) + for (j = 0; j < VECTOR_WORD_TYPE_LEN (u32); j++) + h->hash_seeds[i].as_u32[j] = hash_seeds[i]; +} + +static_always_inline u32 +vhash_main_key_gather (void *_vm, u32 vi, u32 wi, u32 n_key_u32) +{ + vhash_main_t *vm = _vm; + return vec_elt (vm->keys, vi * n_key_u32 + wi); +} + +static_always_inline u32x4 +vhash_main_4key_gather (void *_vm, u32 vi, u32 wi, u32 n_key_u32s) +{ + vhash_main_t *vm = _vm; + u32x4_union_t x; + + ASSERT (n_key_u32s == vm->n_key_u32); + ASSERT (wi < n_key_u32s); + + x.as_u32[0] = vec_elt (vm->keys, (vi + 0) * n_key_u32s + wi); + x.as_u32[1] = vec_elt (vm->keys, (vi + 1) * n_key_u32s + wi); + x.as_u32[2] = vec_elt (vm->keys, (vi + 2) * n_key_u32s + wi); + x.as_u32[3] = vec_elt (vm->keys, (vi + 3) * n_key_u32s + wi); + return x.as_u32x4; +} + +static_always_inline u32 +vhash_main_set_result (void *_vm, u32 vi, u32 old_result, u32 n_key_u32) +{ + vhash_main_t *vm = _vm; + u32 *p = vec_elt_at_index (vm->results, vi); + u32 new_result = p[0]; + p[0] = old_result; + return new_result; +} + +static_always_inline u32 +vhash_main_get_result (void *_vm, u32 vi, u32 old_result, u32 n_key_u32) +{ + vhash_main_t *vm = _vm; + vec_elt (vm->results, vi) = old_result; + return old_result; +} + +static_always_inline u32x4 +vhash_main_get_4result (void *_vm, u32 vi, u32x4 old_result, u32 n_key_u32) +{ + vhash_main_t *vm = _vm; + u32x4 *p = (u32x4 *) vec_elt_at_index (vm->results, vi); + p[0] = old_result; + return old_result; +} + +#define _(N_KEY_U32) \ + static_always_inline u32 \ + vhash_main_key_gather_##N_KEY_U32 (void * _vm, u32 vi, u32 i) \ + { return vhash_main_key_gather (_vm, vi, i, N_KEY_U32); } \ + \ + static_always_inline u32x4 \ + vhash_main_4key_gather_##N_KEY_U32 (void * _vm, u32 vi, u32 i) \ + { return vhash_main_4key_gather (_vm, vi, i, N_KEY_U32); } \ + \ + clib_pipeline_stage_static \ + (vhash_main_gather_keys_stage_##N_KEY_U32, \ + vhash_main_t *, vm, i, \ + { \ + vhash_gather_4key_stage \ + (vm->vhash, \ + /* vector_index */ i, \ + vhash_main_4key_gather_##N_KEY_U32, \ + vm, \ + N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_no_inline \ + (vhash_main_gather_keys_mod_stage_##N_KEY_U32, \ + vhash_main_t *, vm, i, \ + { \ + vhash_gather_key_stage \ + (vm->vhash, \ + /* vector_index */ vm->n_vectors_div_4, \ + /* n_vectors */ vm->n_vectors_mod_4, \ + vhash_main_key_gather_##N_KEY_U32, \ + vm, \ + N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage \ + (vhash_main_hash_finalize_stage_##N_KEY_U32, \ + vhash_main_t *, vm, i, \ + { \ + vhash_finalize_stage (vm->vhash, i, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_no_inline \ + (vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \ + vhash_main_t *, vm, i, \ + { \ + vhash_finalize_stage (vm->vhash, vm->n_vectors_div_4, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_static \ + (vhash_main_get_stage_##N_KEY_U32, \ + vhash_main_t *, vm, i, \ + { \ + vhash_get_4_stage (vm->vhash, \ + /* vector_index */ i, \ + vhash_main_get_4result, \ + vm, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_no_inline \ + (vhash_main_get_mod_stage_##N_KEY_U32, \ + vhash_main_t *, vm, i, \ + { \ + vhash_get_stage (vm->vhash, \ + /* vector_index */ vm->n_vectors_div_4, \ + /* n_vectors */ vm->n_vectors_mod_4, \ + vhash_main_get_result, \ + vm, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_static \ + (vhash_main_set_stage_##N_KEY_U32, \ + vhash_main_t *, vm, i, \ + { \ + vhash_set_stage (vm->vhash, \ + /* vector_index */ i, \ + /* n_vectors */ VECTOR_WORD_TYPE_LEN (u32), \ + vhash_main_set_result, \ + vm, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_no_inline \ + (vhash_main_set_mod_stage_##N_KEY_U32, \ + vhash_main_t *, vm, i, \ + { \ + vhash_set_stage (vm->vhash, \ + /* vector_index */ vm->n_vectors_div_4, \ + /* n_vectors */ vm->n_vectors_mod_4, \ + vhash_main_set_result, \ + vm, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_static \ + (vhash_main_unset_stage_##N_KEY_U32, \ + vhash_main_t *, vm, i, \ + { \ + vhash_unset_stage (vm->vhash, \ + /* vector_index */ i, \ + /* n_vectors */ VECTOR_WORD_TYPE_LEN (u32), \ + vhash_main_get_result, \ + vm, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_no_inline \ + (vhash_main_unset_mod_stage_##N_KEY_U32, \ + vhash_main_t *, vm, i, \ + { \ + vhash_unset_stage (vm->vhash, \ + /* vector_index */ vm->n_vectors_div_4, \ + /* n_vectors */ vm->n_vectors_mod_4, \ + vhash_main_get_result, \ + vm, N_KEY_U32); \ + }) + +_(1); +_(2); +_(3); +_(4); +_(5); +_(6); + +#undef _ + +#define _(N_KEY_U32) \ + clib_pipeline_stage \ + (vhash_main_hash_mix_stage_##N_KEY_U32, \ + vhash_main_t *, vm, i, \ + { \ + vhash_mix_stage (vm->vhash, i, N_KEY_U32); \ + }) \ + \ + clib_pipeline_stage_no_inline \ + (vhash_main_hash_mix_mod_stage_##N_KEY_U32, \ + vhash_main_t *, vm, i, \ + { \ + vhash_mix_stage (vm->vhash, vm->n_vectors_div_4, N_KEY_U32); \ + }) + +_(4); +_(5); +_(6); + +#undef _ + +typedef enum +{ + GET, SET, UNSET, +} vhash_main_op_t; + +static void +vhash_main_op (vhash_main_t * vm, vhash_main_op_t op) +{ + u32 n_keys = vec_len (vm->results); + + vm->n_key_u32 = vm->vhash->n_key_u32; + + vhash_validate_sizes (vm->vhash, vm->n_key_u32, n_keys); + + vm->n_vectors_div_4 = n_keys / 4; + vm->n_vectors_mod_4 = n_keys % 4; + + if (vm->n_vectors_div_4 > 0) + { + switch (vm->n_key_u32) + { + default: + ASSERT (0); + break; + +#define _(N_KEY_U32) \ + case N_KEY_U32: \ + if (op == GET) \ + clib_pipeline_run_3_stage \ + (vm->n_vectors_div_4, \ + vm, \ + vhash_main_gather_keys_stage_##N_KEY_U32, \ + vhash_main_hash_finalize_stage_##N_KEY_U32, \ + vhash_main_get_stage_##N_KEY_U32); \ + else if (op == SET) \ + clib_pipeline_run_3_stage \ + (vm->n_vectors_div_4, \ + vm, \ + vhash_main_gather_keys_stage_##N_KEY_U32, \ + vhash_main_hash_finalize_stage_##N_KEY_U32, \ + vhash_main_set_stage_##N_KEY_U32); \ + else \ + clib_pipeline_run_3_stage \ + (vm->n_vectors_div_4, \ + vm, \ + vhash_main_gather_keys_stage_##N_KEY_U32, \ + vhash_main_hash_finalize_stage_##N_KEY_U32, \ + vhash_main_unset_stage_##N_KEY_U32); \ + break; + + _(1); + _(2); + _(3); + +#undef _ + +#define _(N_KEY_U32) \ + case N_KEY_U32: \ + if (op == GET) \ + clib_pipeline_run_4_stage \ + (vm->n_vectors_div_4, \ + vm, \ + vhash_main_gather_keys_stage_##N_KEY_U32, \ + vhash_main_hash_mix_stage_##N_KEY_U32, \ + vhash_main_hash_finalize_stage_##N_KEY_U32, \ + vhash_main_get_stage_##N_KEY_U32); \ + else if (op == SET) \ + clib_pipeline_run_4_stage \ + (vm->n_vectors_div_4, \ + vm, \ + vhash_main_gather_keys_stage_##N_KEY_U32, \ + vhash_main_hash_mix_stage_##N_KEY_U32, \ + vhash_main_hash_finalize_stage_##N_KEY_U32, \ + vhash_main_set_stage_##N_KEY_U32); \ + else \ + clib_pipeline_run_4_stage \ + (vm->n_vectors_div_4, \ + vm, \ + vhash_main_gather_keys_stage_##N_KEY_U32, \ + vhash_main_hash_mix_stage_##N_KEY_U32, \ + vhash_main_hash_finalize_stage_##N_KEY_U32, \ + vhash_main_unset_stage_##N_KEY_U32); \ + break; + + _(4); + _(5); + _(6); + +#undef _ + } + } + + + if (vm->n_vectors_mod_4 > 0) + { + switch (vm->n_key_u32) + { + default: + ASSERT (0); + break; + +#define _(N_KEY_U32) \ + case N_KEY_U32: \ + if (op == GET) \ + clib_pipeline_run_3_stage \ + (1, \ + vm, \ + vhash_main_gather_keys_mod_stage_##N_KEY_U32, \ + vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \ + vhash_main_get_mod_stage_##N_KEY_U32); \ + else if (op == SET) \ + clib_pipeline_run_3_stage \ + (1, \ + vm, \ + vhash_main_gather_keys_mod_stage_##N_KEY_U32, \ + vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \ + vhash_main_set_mod_stage_##N_KEY_U32); \ + else \ + clib_pipeline_run_3_stage \ + (1, \ + vm, \ + vhash_main_gather_keys_mod_stage_##N_KEY_U32, \ + vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \ + vhash_main_unset_mod_stage_##N_KEY_U32); \ + break; + + _(1); + _(2); + _(3); + +#undef _ + +#define _(N_KEY_U32) \ + case N_KEY_U32: \ + if (op == GET) \ + clib_pipeline_run_4_stage \ + (1, \ + vm, \ + vhash_main_gather_keys_mod_stage_##N_KEY_U32, \ + vhash_main_hash_mix_mod_stage_##N_KEY_U32, \ + vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \ + vhash_main_get_mod_stage_##N_KEY_U32); \ + else if (op == SET) \ + clib_pipeline_run_4_stage \ + (1, \ + vm, \ + vhash_main_gather_keys_mod_stage_##N_KEY_U32, \ + vhash_main_hash_mix_mod_stage_##N_KEY_U32, \ + vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \ + vhash_main_set_mod_stage_##N_KEY_U32); \ + else \ + clib_pipeline_run_4_stage \ + (1, \ + vm, \ + vhash_main_gather_keys_mod_stage_##N_KEY_U32, \ + vhash_main_hash_mix_mod_stage_##N_KEY_U32, \ + vhash_main_hash_finalize_mod_stage_##N_KEY_U32, \ + vhash_main_unset_mod_stage_##N_KEY_U32); \ + break; + + _(4); + _(5); + _(6); + +#undef _ + } + } +} + +void +vhash_main_get (vhash_main_t * vm) +{ + vhash_main_op (vm, GET); +} + +void +vhash_main_set (vhash_main_t * vm) +{ + vhash_main_op (vm, SET); +} + +void +vhash_main_unset (vhash_main_t * vm) +{ + vhash_main_op (vm, UNSET); +} + +u32 +vhash_resize_incremental (vhash_resize_t * vr, u32 vector_index, + u32 n_keys_this_call) +{ + vhash_t *old = vr->old; + vhash_main_t *vm = &vr->new; + vhash_t *new = vm->vhash; + uword i, j, n_key_u32; + + n_key_u32 = old->n_key_u32; + + if (vector_index == 0) + { + u32 hash_seeds[3]; + hash_seeds[0] = old->hash_seeds[0].as_u32[0]; + hash_seeds[1] = old->hash_seeds[1].as_u32[0]; + hash_seeds[2] = old->hash_seeds[2].as_u32[0]; + vhash_init (new, old->log2_n_keys + 1, n_key_u32, hash_seeds); + } + + vec_reset_length (vm->keys); + vec_reset_length (vm->results); + + if (0 == (vector_index >> old->log2_n_keys)) + { + for (i = vector_index; 0 == (i >> (old->log2_n_keys - 2)); i++) + { + vhash_search_bucket_t *b = + vhash_get_search_bucket_with_index (old, 4 * i, n_key_u32); + u32 r, *k; + +#define _(I) \ + if ((r = b->result.as_u32[I]) != 0) \ + { \ + vec_add1 (vm->results, r - 1); \ + vec_add2 (vm->keys, k, n_key_u32); \ + for (j = 0; j < n_key_u32; j++) \ + k[j] = b->key[j].as_u32[I]; \ + } + + _(0); + _(1); + _(2); + _(3); + +#undef _ + + if (vec_len (vm->results) >= n_keys_this_call) + { + vhash_main_op (vm, SET); + return i; + } + } + } + + /* Add overflow buckets. */ + { + vhash_overflow_buckets_t *ob; + vhash_overflow_search_bucket_t *b; + + for (ob = old->overflow_buckets; + ob < old->overflow_buckets + ARRAY_LEN (old->overflow_buckets); ob++) + { + foreach_vhash_overflow_bucket (b, ob, old->n_key_u32) + { + u32 r, *k; + +#define _(I) \ + if ((r = b->result.as_u32[I]) != 0) \ + { \ + vec_add1 (vm->results, r - 1); \ + vec_add2 (vm->keys, k, n_key_u32); \ + for (j = 0; j < n_key_u32; j++) \ + k[j] = b->key[j].as_u32[I]; \ + } + + _(0); + _(1); + _(2); + _(3); + +#undef _ + } + } + } + + vhash_main_op (vm, SET); + + /* Let caller know we are done. */ + return ~0; +} + +void +vhash_resize (vhash_t * old, u32 log2_n_keys) +{ + static vhash_resize_t vr; + vhash_t new; + u32 i = 0; + + vr.old = old; + vr.new.vhash = &new; + + while (1) + { + i = vhash_resize_incremental (&vr, i, 1024); + if (i == ~0) + break; + } + + vhash_free (old); + *old = new; +} + +#endif /* CLIB_HAVE_VEC128 */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/vhash.h b/extras/deprecated/vppinfra/vhash.h new file mode 100644 index 00000000000..85dfb788308 --- /dev/null +++ b/extras/deprecated/vppinfra/vhash.h @@ -0,0 +1,850 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + Copyright (c) 2010 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef included_clib_vhash_h +#define included_clib_vhash_h + +#include <vppinfra/vector.h> + +#ifdef CLIB_HAVE_VEC128 + +#include <vppinfra/cache.h> +#include <vppinfra/hash.h> +#include <vppinfra/pipeline.h> + +/* Gathers 32 bits worth of key with given index. */ +typedef u32 (vhash_key_function_t) (void *state, u32 vector_index, + u32 key_word_index); +typedef u32x4 (vhash_4key_function_t) (void *state, u32 vector_index, + u32 key_word_index); +/* Sets/gets result of hash lookup. */ +typedef u32 (vhash_result_function_t) (void *state, u32 vector_index, + u32 result, u32 n_key_u32); +typedef u32x4 (vhash_4result_function_t) (void *state, u32 vector_index, + u32x4 results, u32 n_key_u32); + +typedef struct +{ + u32x4_union_t hashed_key[3]; +} vhash_hashed_key_t; + +/* Search buckets are really this structure. */ +typedef struct +{ + /* 4 results for this bucket. + Zero is used to mark empty results. This means user can't use the result ~0 + since user results differ from internal results stored in buckets by 1. + e.g. internal result = user result + 1. */ + u32x4_union_t result; + + /* n_key_u32s u32x4s of key data follow. */ + u32x4_union_t key[0]; +} vhash_search_bucket_t; + +typedef struct +{ + u32x4_union_t *search_buckets; + + /* Vector of bucket free indices. */ + u32 *free_indices; + + /* Number of entries in this overflow bucket. */ + u32 n_overflow; +} vhash_overflow_buckets_t; + +typedef struct +{ + /* 2^log2_n_keys keys grouped in groups of 4. + Each bucket contains 4 results plus 4 keys for a + total of (1 + n_key_u32) u32x4s. */ + u32x4_union_t *search_buckets; + + /* When a bucket of 4 results/keys are full we search + the overflow. hash_key is used to select which overflow + bucket. */ + vhash_overflow_buckets_t overflow_buckets[16]; + + /* Total count of occupied elements in hash table. */ + u32 n_elts; + + u32 log2_n_keys; + + /* Number of 32 bit words in a hash key. */ + u32 n_key_u32; + + u32x4_union_t bucket_mask; + + /* table[i] = min_log2 (first_set (~i)). */ + u8 find_first_zero_table[16]; + + /* Hash seeds for Jenkins hash. */ + u32x4_union_t hash_seeds[3]; + + /* Key work space is a vector of length + n_key_u32s << log2_n_key_word_len_u32x. */ + u32 log2_n_key_word_len_u32x; + + /* Work space to store keys between pipeline stages. */ + u32x4_union_t *key_work_space; + + /* Hash work space to store Jenkins hash values between + pipeline stages. */ + vhash_hashed_key_t *hash_work_space; +} vhash_t; + +always_inline vhash_overflow_buckets_t * +vhash_get_overflow_buckets (vhash_t * h, u32 key) +{ + u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf); + ASSERT (i < ARRAY_LEN (h->overflow_buckets)); + return h->overflow_buckets + i; +} + +always_inline uword +vhash_is_non_empty_overflow_bucket (vhash_t * h, u32 key) +{ + u32 i = (((key & h->bucket_mask.as_u32[0]) >> 2) & 0xf); + ASSERT (i < ARRAY_LEN (h->overflow_buckets)); + return h->overflow_buckets[i].n_overflow > 0; +} + +always_inline void +vhash_free_overflow_buckets (vhash_overflow_buckets_t * obs) +{ + vec_free (obs->search_buckets); + vec_free (obs->free_indices); +} + +always_inline void +vhash_free (vhash_t * h) +{ + uword i; + for (i = 0; i < ARRAY_LEN (h->overflow_buckets); i++) + vhash_free_overflow_buckets (&h->overflow_buckets[i]); + vec_free (h->search_buckets); + vec_free (h->key_work_space); + vec_free (h->hash_work_space); +} + +always_inline void +vhash_set_key_word (vhash_t * h, u32 wi, u32 vi, u32 value) +{ + u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4); + u32 i1 = vi % 4; + vec_elt (h->key_work_space, i0).as_u32[i1] = value; +} + +always_inline void +vhash_set_key_word_u32x (vhash_t * h, u32 wi, u32 vi, u32x value) +{ + u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4); + vec_elt (h->key_work_space, i0).as_u32x4 = value; +} + +always_inline u32 +vhash_get_key_word (vhash_t * h, u32 wi, u32 vi) +{ + u32 i0 = (wi << h->log2_n_key_word_len_u32x) + (vi / 4); + u32 i1 = vi % 4; + return vec_elt (h->key_work_space, i0).as_u32[i1]; +} + +always_inline u32x +vhash_get_key_word_u32x (vhash_t * h, u32 wi, u32 vi) +{ + u32 i0 = (wi << h->log2_n_key_word_len_u32x) + vi; + return vec_elt (h->key_work_space, i0).as_u32x4; +} + +always_inline void +vhash_validate_sizes (vhash_t * h, u32 n_key_u32, u32 n_vectors) +{ + u32 n, l; + + n = max_pow2 (n_vectors) / 4; + n = clib_max (n, 8); + + h->log2_n_key_word_len_u32x = l = min_log2 (n); + vec_validate_aligned (h->key_work_space, (n_key_u32 << l) - 1, + CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (h->hash_work_space, n - 1, CLIB_CACHE_LINE_BYTES); +} + +always_inline void +vhash_gather_key_stage (vhash_t * h, + u32 vector_index, + u32 n_vectors, + vhash_key_function_t key_function, + void *state, u32 n_key_u32s) +{ + u32 i, j, vi; + + /* Gather keys for 4 packets (for 128 bit vector length e.g. u32x4). */ + for (i = 0; i < n_vectors; i++) + { + vi = vector_index * 4 + i; + for (j = 0; j < n_key_u32s; j++) + vhash_set_key_word (h, j, vi, key_function (state, vi, j)); + } +} + +always_inline void +vhash_gather_4key_stage (vhash_t * h, + u32 vector_index, + vhash_4key_function_t key_function, + void *state, u32 n_key_u32s) +{ + u32 j, vi; + vi = vector_index * 4; + for (j = 0; j < n_key_u32s; j++) + vhash_set_key_word_u32x (h, j, vi, key_function (state, vi, j)); +} + +always_inline void +vhash_mix_stage (vhash_t * h, u32 vector_index, u32 n_key_u32s) +{ + i32 i, n_left; + u32x a, b, c; + + /* Only need to do this for keys longer than 12 bytes. */ + ASSERT (n_key_u32s > 3); + + a = h->hash_seeds[0].as_u32x4; + b = h->hash_seeds[1].as_u32x4; + c = h->hash_seeds[2].as_u32x4; + for (i = 0, n_left = n_key_u32s - 3; n_left > 0; n_left -= 3, i += 3) + { + a += + vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 0), vector_index); + if (n_left > 1) + b += + vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 1), vector_index); + if (n_left > 2) + c += + vhash_get_key_word_u32x (h, n_key_u32s - 1 - (i + 2), vector_index); + + hash_v3_mix_u32x (a, b, c); + } + + /* Save away a, b, c for later finalize. */ + { + vhash_hashed_key_t *hk = + vec_elt_at_index (h->hash_work_space, vector_index); + hk->hashed_key[0].as_u32x4 = a; + hk->hashed_key[1].as_u32x4 = b; + hk->hashed_key[2].as_u32x4 = c; + } +} + +always_inline vhash_search_bucket_t * +vhash_get_search_bucket_with_index (vhash_t * h, u32 i, u32 n_key_u32s) +{ + return ((vhash_search_bucket_t *) + vec_elt_at_index (h->search_buckets, + (i / 4) * + ((sizeof (vhash_search_bucket_t) / + sizeof (u32x4)) + n_key_u32s))); +} + +always_inline vhash_search_bucket_t * +vhash_get_search_bucket (vhash_t * h, u32 key_hash, u32 n_key_u32s) +{ + u32 i = key_hash & h->bucket_mask.as_u32[0]; + return vhash_get_search_bucket_with_index (h, i, n_key_u32s); +} + +always_inline u32x4 +vhash_get_4_search_bucket_byte_offsets (vhash_t * h, u32x4 key_hash, + u32 n_key_u32s) +{ + vhash_search_bucket_t *b; + u32 n_bytes_per_bucket = sizeof (b[0]) + n_key_u32s * sizeof (b->key[0]); + u32x4 r = key_hash & h->bucket_mask.as_u32x4; + + /* Multiply with shifts and adds to get bucket byte offset. */ +#define _(x) u32x4_ishift_left (r, (x) - 2) + if (n_bytes_per_bucket == (1 << 5)) + r = _(5); + else if (n_bytes_per_bucket == ((1 << 5) + (1 << 4))) + r = _(5) + _(4); + else if (n_bytes_per_bucket == (1 << 6)) + r = _(6); + else if (n_bytes_per_bucket == ((1 << 6) + (1 << 4))) + r = _(6) + _(4); + else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5))) + r = _(6) + _(5); + else if (n_bytes_per_bucket == ((1 << 6) + (1 << 5) + (1 << 4))) + r = _(6) + _(5) + _(4); + else + ASSERT (0); +#undef _ + return r; +} + +always_inline void +vhash_finalize_stage (vhash_t * h, u32 vector_index, u32 n_key_u32s) +{ + i32 n_left; + u32x a, b, c; + vhash_hashed_key_t *hk = + vec_elt_at_index (h->hash_work_space, vector_index); + + if (n_key_u32s <= 3) + { + a = h->hash_seeds[0].as_u32x4; + b = h->hash_seeds[1].as_u32x4; + c = h->hash_seeds[2].as_u32x4; + n_left = n_key_u32s; + } + else + { + a = hk->hashed_key[0].as_u32x4; + b = hk->hashed_key[1].as_u32x4; + c = hk->hashed_key[2].as_u32x4; + n_left = 3; + } + + if (n_left > 0) + a += vhash_get_key_word_u32x (h, 0, vector_index); + if (n_left > 1) + b += vhash_get_key_word_u32x (h, 1, vector_index); + if (n_left > 2) + c += vhash_get_key_word_u32x (h, 2, vector_index); + + hash_v3_finalize_u32x (a, b, c); + + /* Only save away last 32 bits of hash code. */ + hk->hashed_key[2].as_u32x4 = c; + + /* Prefetch buckets. This costs a bit for small tables but saves + big for large ones. */ + { + vhash_search_bucket_t *b0, *b1, *b2, *b3; + u32x4_union_t kh; + + kh.as_u32x4 = vhash_get_4_search_bucket_byte_offsets (h, c, n_key_u32s); + hk->hashed_key[1].as_u32x4 = kh.as_u32x4; + + b0 = (void *) h->search_buckets + kh.as_u32[0]; + b1 = (void *) h->search_buckets + kh.as_u32[1]; + b2 = (void *) h->search_buckets + kh.as_u32[2]; + b3 = (void *) h->search_buckets + kh.as_u32[3]; + + CLIB_PREFETCH (b0, sizeof (b0[0]) + n_key_u32s * sizeof (b0->key[0]), + READ); + CLIB_PREFETCH (b1, sizeof (b1[0]) + n_key_u32s * sizeof (b1->key[0]), + READ); + CLIB_PREFETCH (b2, sizeof (b2[0]) + n_key_u32s * sizeof (b2->key[0]), + READ); + CLIB_PREFETCH (b3, sizeof (b3[0]) + n_key_u32s * sizeof (b3->key[0]), + READ); + } +} + +always_inline u32 +vhash_merge_results (u32x4 r) +{ + r = r | u32x4_word_shift_right (r, 2); + r = r | u32x4_word_shift_right (r, 1); + return u32x4_get0 (r); +} + +/* Bucket is full if none of its 4 results are 0. */ +always_inline u32 +vhash_search_bucket_is_full (u32x4 r) +{ + return u32x4_zero_byte_mask (r) == 0; +} + +always_inline u32 +vhash_non_empty_result_index (u32x4 x) +{ + u32 empty_mask = u32x4_zero_byte_mask (x); + ASSERT (empty_mask != 0xffff); + return min_log2 (0xffff & ~empty_mask) / 4; +} + +always_inline u32 +vhash_empty_result_index (u32x4 x) +{ + u32 empty_mask = u32x4_zero_byte_mask (x); + ASSERT (empty_mask != 0); + return min_log2 (0xffff & empty_mask) / 4; +} + +always_inline u32x4 +vhash_bucket_compare (vhash_t * h, + u32x4_union_t * bucket, u32 key_word_index, u32 vi) +{ + u32 k = vhash_get_key_word (h, key_word_index, vi); + u32x4 x = { k, k, k, k }; + return (bucket[key_word_index].as_u32x4 == x); +} + +#define vhash_bucket_compare_4(h,wi,vi,b0,b1,b2,b3,cmp0,cmp1,cmp2,cmp3) \ +do { \ + u32x4 _k4 = vhash_get_key_word_u32x ((h), (wi), (vi)); \ + u32x4 _k0 = u32x4_splat_word (_k4, 0); \ + u32x4 _k1 = u32x4_splat_word (_k4, 1); \ + u32x4 _k2 = u32x4_splat_word (_k4, 2); \ + u32x4 _k3 = u32x4_splat_word (_k4, 3); \ + \ + cmp0 = (b0->key[wi].as_u32x4 == _k0); \ + cmp1 = (b1->key[wi].as_u32x4 == _k1); \ + cmp2 = (b2->key[wi].as_u32x4 == _k2); \ + cmp3 = (b3->key[wi].as_u32x4 == _k3); \ +} while (0) + +u32 vhash_get_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s); + +always_inline void +vhash_get_stage (vhash_t * h, + u32 vector_index, + u32 n_vectors, + vhash_result_function_t result_function, + void *state, u32 n_key_u32s) +{ + u32 i, j; + vhash_hashed_key_t *hk = + vec_elt_at_index (h->hash_work_space, vector_index); + vhash_search_bucket_t *b; + + for (i = 0; i < n_vectors; i++) + { + u32 vi = vector_index * 4 + i; + u32 key_hash = hk->hashed_key[2].as_u32[i]; + u32 result; + u32x4 r, r0; + + b = vhash_get_search_bucket (h, key_hash, n_key_u32s); + + r = r0 = b->result.as_u32x4; + for (j = 0; j < n_key_u32s; j++) + r &= vhash_bucket_compare (h, &b->key[0], j, vi); + + /* At this point only one of 4 results should be non-zero. + So we can or all 4 together and get the valid result (if there is one). */ + result = vhash_merge_results (r); + + if (!result && vhash_search_bucket_is_full (r0)) + result = vhash_get_overflow (h, key_hash, vi, n_key_u32s); + + result_function (state, vi, result - 1, n_key_u32s); + } +} + +always_inline void +vhash_get_4_stage (vhash_t * h, + u32 vector_index, + vhash_4result_function_t result_function, + void *state, u32 n_key_u32s) +{ + u32 i, vi; + vhash_hashed_key_t *hk = + vec_elt_at_index (h->hash_work_space, vector_index); + vhash_search_bucket_t *b0, *b1, *b2, *b3; + u32x4 r0, r1, r2, r3, r0_before, r1_before, r2_before, r3_before; + u32x4_union_t kh; + + kh.as_u32x4 = hk->hashed_key[1].as_u32x4; + + b0 = (void *) h->search_buckets + kh.as_u32[0]; + b1 = (void *) h->search_buckets + kh.as_u32[1]; + b2 = (void *) h->search_buckets + kh.as_u32[2]; + b3 = (void *) h->search_buckets + kh.as_u32[3]; + + r0 = r0_before = b0->result.as_u32x4; + r1 = r1_before = b1->result.as_u32x4; + r2 = r2_before = b2->result.as_u32x4; + r3 = r3_before = b3->result.as_u32x4; + + vi = vector_index * 4; + + for (i = 0; i < n_key_u32s; i++) + { + u32x4 c0, c1, c2, c3; + vhash_bucket_compare_4 (h, i, vector_index, + b0, b1, b2, b3, c0, c1, c2, c3); + r0 &= c0; + r1 &= c1; + r2 &= c2; + r3 &= c3; + } + + u32x4_transpose (r0, r1, r2, r3); + + /* Gather together 4 results. */ + { + u32x4_union_t r; + u32x4 ones = { 1, 1, 1, 1 }; + u32 not_found_mask; + + r.as_u32x4 = r0 | r1 | r2 | r3; + not_found_mask = u32x4_zero_byte_mask (r.as_u32x4); + not_found_mask &= ((vhash_search_bucket_is_full (r0_before) << (4 * 0)) + | (vhash_search_bucket_is_full (r1_before) << (4 * 1)) + | (vhash_search_bucket_is_full (r2_before) << (4 * 2)) + | (vhash_search_bucket_is_full (r3_before) << + (4 * 3))); + if (not_found_mask) + { + u32x4_union_t key_hash; + + key_hash.as_u32x4 = + hk->hashed_key[2].as_u32x4 & h->bucket_mask.as_u32x4; + + /* Slow path: one of the buckets may have been full and we need to search overflow. */ + if (not_found_mask & (1 << (4 * 0))) + r.as_u32[0] = vhash_get_overflow (h, key_hash.as_u32[0], + vi + 0, n_key_u32s); + if (not_found_mask & (1 << (4 * 1))) + r.as_u32[1] = vhash_get_overflow (h, key_hash.as_u32[1], + vi + 1, n_key_u32s); + if (not_found_mask & (1 << (4 * 2))) + r.as_u32[2] = vhash_get_overflow (h, key_hash.as_u32[2], + vi + 2, n_key_u32s); + if (not_found_mask & (1 << (4 * 3))) + r.as_u32[3] = vhash_get_overflow (h, key_hash.as_u32[3], + vi + 3, n_key_u32s); + } + + result_function (state, vi, r.as_u32x4 - ones, n_key_u32s); + } +} + +u32 +vhash_set_overflow (vhash_t * h, + u32 key_hash, u32 vi, u32 new_result, u32 n_key_u32s); + +always_inline void +vhash_set_stage (vhash_t * h, + u32 vector_index, + u32 n_vectors, + vhash_result_function_t result_function, + void *state, u32 n_key_u32s) +{ + u32 i, j, n_new_elts = 0; + vhash_hashed_key_t *hk = + vec_elt_at_index (h->hash_work_space, vector_index); + vhash_search_bucket_t *b; + + for (i = 0; i < n_vectors; i++) + { + u32 vi = vector_index * 4 + i; + u32 key_hash = hk->hashed_key[2].as_u32[i]; + u32 old_result, new_result; + u32 i_set; + u32x4 r, r0, cmp; + + b = vhash_get_search_bucket (h, key_hash, n_key_u32s); + + cmp = vhash_bucket_compare (h, &b->key[0], 0, vi); + for (j = 1; j < n_key_u32s; j++) + cmp &= vhash_bucket_compare (h, &b->key[0], j, vi); + + r0 = b->result.as_u32x4; + r = r0 & cmp; + + /* At this point only one of 4 results should be non-zero. + So we can or all 4 together and get the valid result (if there is one). */ + old_result = vhash_merge_results (r); + + if (!old_result && vhash_search_bucket_is_full (r0)) + old_result = vhash_get_overflow (h, key_hash, vi, n_key_u32s); + + /* Get new result; possibly do something with old result. */ + new_result = result_function (state, vi, old_result - 1, n_key_u32s); + + /* User cannot use ~0 as a hash result since a result of 0 is + used to mark unused bucket entries. */ + ASSERT (new_result + 1 != 0); + new_result += 1; + + /* Set over-writes existing result. */ + if (old_result) + { + i_set = vhash_non_empty_result_index (r); + b->result.as_u32[i_set] = new_result; + } + else + { + /* Set allocates new result. */ + u32 valid_mask; + + valid_mask = (((b->result.as_u32[0] != 0) << 0) + | ((b->result.as_u32[1] != 0) << 1) + | ((b->result.as_u32[2] != 0) << 2) + | ((b->result.as_u32[3] != 0) << 3)); + + /* Rotate 4 bit valid mask so that key_hash corresponds to bit 0. */ + i_set = key_hash & 3; + valid_mask = + ((valid_mask >> i_set) | (valid_mask << (4 - i_set))) & 0xf; + + /* Insert into first empty position in bucket after key_hash. */ + i_set = (i_set + h->find_first_zero_table[valid_mask]) & 3; + + if (valid_mask != 0xf) + { + n_new_elts += 1; + + b->result.as_u32[i_set] = new_result; + + /* Insert new key into search bucket. */ + for (j = 0; j < n_key_u32s; j++) + b->key[j].as_u32[i_set] = vhash_get_key_word (h, j, vi); + } + else + vhash_set_overflow (h, key_hash, vi, new_result, n_key_u32s); + } + } + + h->n_elts += n_new_elts; +} + +u32 vhash_unset_overflow (vhash_t * h, u32 key_hash, u32 vi, u32 n_key_u32s); + +void +vhash_unset_refill_from_overflow (vhash_t * h, + vhash_search_bucket_t * b, + u32 key_hash, u32 n_key_u32s); + +/* Note: Eliot tried doing 4 unsets at once and could not get a speed up + and abandoned vhash_unset_4_stage. */ +always_inline void +vhash_unset_stage (vhash_t * h, + u32 vector_index, + u32 n_vectors, + vhash_result_function_t result_function, + void *state, u32 n_key_u32s) +{ + u32 i, j, n_elts_unset = 0; + vhash_hashed_key_t *hk = + vec_elt_at_index (h->hash_work_space, vector_index); + vhash_search_bucket_t *b; + + for (i = 0; i < n_vectors; i++) + { + u32 vi = vector_index * 4 + i; + u32 key_hash = hk->hashed_key[2].as_u32[i]; + u32 old_result; + u32x4 cmp, r0; + + b = vhash_get_search_bucket (h, key_hash, n_key_u32s); + + cmp = vhash_bucket_compare (h, &b->key[0], 0, vi); + for (j = 1; j < n_key_u32s; j++) + cmp &= vhash_bucket_compare (h, &b->key[0], j, vi); + + r0 = b->result.as_u32x4; + + /* At this point cmp is all ones where key matches and zero otherwise. + So, this will invalidate results for matching key and do nothing otherwise. */ + b->result.as_u32x4 = r0 & ~cmp; + + old_result = vhash_merge_results (r0 & cmp); + + n_elts_unset += old_result != 0; + + if (vhash_search_bucket_is_full (r0)) + { + if (old_result) + vhash_unset_refill_from_overflow (h, b, key_hash, n_key_u32s); + else + old_result = vhash_unset_overflow (h, key_hash, vi, n_key_u32s); + } + + result_function (state, vi, old_result - 1, n_key_u32s); + } + ASSERT (h->n_elts >= n_elts_unset); + h->n_elts -= n_elts_unset; +} + +void vhash_init (vhash_t * h, u32 log2_n_keys, u32 n_key_u32, + u32 * hash_seeds); + +void vhash_resize (vhash_t * old, u32 log2_n_keys); + +typedef struct +{ + vhash_t *vhash; + + union + { + struct + { + u32 *keys; + u32 *results; + }; + + /* Vector layout for get keys. */ + struct + { + u32x4_union_t *get_keys; + u32x4_union_t *get_results; + }; + }; + + u32 n_vectors_div_4; + u32 n_vectors_mod_4; + + u32 n_key_u32; + + u32 n_keys; +} vhash_main_t; + +always_inline u32 +vhash_get_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32) +{ + u32 i, n; + + i = vm->n_keys; + vm->n_keys = i + n_keys; + + n = (round_pow2 (vm->n_keys, 4) / 4) * n_key_u32; + + vec_validate_aligned (vm->get_keys, n - 1, sizeof (vm->get_keys[0])); + vec_validate_aligned (vm->get_results, n - 1, sizeof (vm->get_results[0])); + + return i; +} + +always_inline void +vhash_get_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32, + u32 value) +{ + u32x4_union_t *k = vec_elt_at_index (vm->get_keys, (vi / 4) * n_key_u32); + ASSERT (wi < n_key_u32); + k[wi].as_u32[vi % 4] = value; +} + +always_inline u32 +vhash_get_fetch_result (vhash_main_t * vm, u32 vi) +{ + u32x4_union_t *r = vec_elt_at_index (vm->get_results, vi / 4); + return r->as_u32[vi % 4]; +} + +void vhash_main_get (vhash_main_t * vm); + +always_inline u32 +vhash_set_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32) +{ + u32 i; + + i = vm->n_keys; + vm->n_keys = i + n_keys; + + vec_resize (vm->keys, n_keys * n_key_u32); + vec_resize (vm->results, n_keys); + + return i; +} + +always_inline void +vhash_set_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32, + u32 value) +{ + u32 *k = vec_elt_at_index (vm->keys, vi * n_key_u32); + ASSERT (wi < n_key_u32); + k[wi] = value; +} + +always_inline void +vhash_set_set_result (vhash_main_t * vm, u32 vi, u32 result) +{ + u32 *r = vec_elt_at_index (vm->results, vi); + r[0] = result; +} + +always_inline u32 +vhash_set_fetch_old_result (vhash_main_t * vm, u32 vi) +{ + u32 *r = vec_elt_at_index (vm->results, vi); + return r[0]; +} + +void vhash_main_set (vhash_main_t * vm); + +always_inline u32 +vhash_unset_alloc_keys (vhash_main_t * vm, u32 n_keys, u32 n_key_u32) +{ + return vhash_set_alloc_keys (vm, n_keys, n_key_u32); +} + +always_inline void +vhash_unset_set_key_word (vhash_main_t * vm, u32 vi, u32 wi, u32 n_key_u32, + u32 value) +{ + vhash_set_set_key_word (vm, vi, wi, n_key_u32, value); +} + +always_inline void +vhash_unset_set_result (vhash_main_t * vm, u32 vi, u32 result) +{ + vhash_set_set_result (vm, vi, result); +} + +always_inline u32 +vhash_unset_fetch_old_result (vhash_main_t * vm, u32 vi) +{ + return vhash_set_fetch_old_result (vm, vi); +} + +void vhash_main_unset (vhash_main_t * vm); + +typedef struct +{ + vhash_main_t new; + + vhash_t *old; +} vhash_resize_t; + +u32 vhash_resize_incremental (vhash_resize_t * vr, u32 vector_index, + u32 n_vectors); + +#endif /* CLIB_HAVE_VEC128 */ + +#endif /* included_clib_vhash_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/xy.h b/extras/deprecated/vppinfra/xy.h new file mode 100644 index 00000000000..fb562161a62 --- /dev/null +++ b/extras/deprecated/vppinfra/xy.h @@ -0,0 +1,56 @@ +/* (X,Y) coordinates. */ + +/* + Copyright (c) 2008 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef included_clib_xy_h +#define included_clib_xy_h + +#include <vppinfra/types.h> + +/* Basic definitions: coordinates and points. */ +typedef double xy_float_t; +typedef __complex__ double xy_t; +typedef __complex__ int ixy_t; + +typedef __complex__ char i8xy_t; +typedef __complex__ short i16xy_t; +typedef __complex__ int i32xy_t; + +/* X/Y components of a point: can be used as either rvalue/lvalue. */ +#define xy_x(x) __real__ (x) +#define xy_y(x) __imag__ (x) + +/* Unit vectors in x/y directions. */ +#define xy_x_unit_vector (1) +#define xy_y_unit_vector (1I) + +#endif /* included_clib_xy_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/zvec.c b/extras/deprecated/vppinfra/zvec.c new file mode 100644 index 00000000000..d062e5f7db1 --- /dev/null +++ b/extras/deprecated/vppinfra/zvec.c @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + Copyright (c) 2001, 2002, 2003, 2005 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include <vppinfra/bitmap.h> +#include <vppinfra/bitops.h> /* for next_with_same_number_of_set_bits */ +#include <vppinfra/error.h> /* for ASSERT */ +#include <vppinfra/mem.h> +#include <vppinfra/os.h> /* for os_panic */ +#include <vppinfra/vec.h> +#include <vppinfra/zvec.h> + +/* Consider coding as bitmap, coding = 2^c_0 + 2^c_1 + ... + 2^c_n + With c_0 < c_1 < ... < c_n. coding == 0 represents c_n = BITS (uword). + + Unsigned integers i = 0 ... are represented as follows: + + 0 <= i < 2^c_0 (i << 1) | (1 << 0) binary: i 1 + 2^c_0 <= i < 2^c_0 + 2^c_1 (i << 2) | (1 << 1) binary: i 1 0 + ... binary: i 0 ... 0 + + Smaller numbers use less bits. Coding is chosen so that encoding + of given histogram of typical values gives smallest number of bits. + The number and position of coding bits c_i are used to best fit the + histogram of typical values. +*/ + +/* Decode given compressed data. Return number of compressed data + bits used. */ +uword +zvec_decode (uword coding, uword zdata, uword * n_zdata_bits) +{ + uword c, d, result, n_bits; + uword explicit_end, implicit_end; + + result = 0; + n_bits = 0; + while (1) + { + c = first_set (coding); + implicit_end = c == coding; + explicit_end = (zdata & 1) & ~implicit_end; + d = (zdata >> explicit_end) & (c - 1); + if (explicit_end | implicit_end) + { + result += d; + n_bits += min_log2 (c) + explicit_end; + break; + } + n_bits += 1; + result += c; + coding ^= c; + zdata >>= 1; + } + + if (coding == 0) + n_bits = BITS (uword); + + *n_zdata_bits = n_bits; + return result; +} + +uword +zvec_encode (uword coding, uword data, uword * n_result_bits) +{ + uword c, shift, result; + uword explicit_end, implicit_end; + + /* Data must be in range. Note special coding == 0 + would break for data - 1 <= coding. */ + ASSERT (data <= coding - 1); + + shift = 0; + while (1) + { + c = first_set (coding); + implicit_end = c == coding; + explicit_end = ((data & (c - 1)) == data); + if (explicit_end | implicit_end) + { + uword t = explicit_end & ~implicit_end; + result = ((data << t) | t) << shift; + *n_result_bits = + /* data bits */ (c == 0 ? BITS (uword) : min_log2 (c)) + /* shift bits */ + shift + t; + return result; + } + data -= c; + coding ^= c; + shift++; + } + + /* Never reached. */ + ASSERT (0); + return ~0; +} + +always_inline uword +get_data (void *data, uword data_bytes, uword is_signed) +{ + if (data_bytes == 1) + return is_signed ? zvec_signed_to_unsigned (*(i8 *) data) : *(u8 *) data; + else if (data_bytes == 2) + return is_signed ? zvec_signed_to_unsigned (*(i16 *) data) : *(u16 *) + data; + else if (data_bytes == 4) + return is_signed ? zvec_signed_to_unsigned (*(i32 *) data) : *(u32 *) + data; + else if (data_bytes == 8) + return is_signed ? zvec_signed_to_unsigned (*(i64 *) data) : *(u64 *) + data; + else + { + os_panic (); + return ~0; + } +} + +always_inline void +put_data (void *data, uword data_bytes, uword is_signed, uword x) +{ + if (data_bytes == 1) + { + if (is_signed) + *(i8 *) data = zvec_unsigned_to_signed (x); + else + *(u8 *) data = x; + } + else if (data_bytes == 2) + { + if (is_signed) + *(i16 *) data = zvec_unsigned_to_signed (x); + else + *(u16 *) data = x; + } + else if (data_bytes == 4) + { + if (is_signed) + *(i32 *) data = zvec_unsigned_to_signed (x); + else + *(u32 *) data = x; + } + else if (data_bytes == 8) + { + if (is_signed) + *(i64 *) data = zvec_unsigned_to_signed (x); + else + *(u64 *) data = x; + } + else + { + os_panic (); + } +} + +always_inline uword * +zvec_encode_inline (uword * zvec, + uword * zvec_n_bits, + uword coding, + void *data, + uword data_stride, + uword n_data, uword data_bytes, uword is_signed) +{ + uword i; + + i = *zvec_n_bits; + while (n_data >= 1) + { + uword d0, z0, l0; + + d0 = get_data (data + 0 * data_stride, data_bytes, is_signed); + data += 1 * data_stride; + n_data -= 1; + + z0 = zvec_encode (coding, d0, &l0); + zvec = clib_bitmap_set_multiple (zvec, i, z0, l0); + i += l0; + } + + *zvec_n_bits = i; + return zvec; +} + +#define _(TYPE,IS_SIGNED) \ + uword * zvec_encode_##TYPE (uword * zvec, \ + uword * zvec_n_bits, \ + uword coding, \ + void * data, \ + uword data_stride, \ + uword n_data) \ + { \ + return zvec_encode_inline (zvec, zvec_n_bits, \ + coding, \ + data, data_stride, n_data, \ + /* data_bytes */ sizeof (TYPE), \ + /* is_signed */ IS_SIGNED); \ + } + +_(u8, /* is_signed */ 0); +_(u16, /* is_signed */ 0); +_(u32, /* is_signed */ 0); +_(u64, /* is_signed */ 0); +_(i8, /* is_signed */ 1); +_(i16, /* is_signed */ 1); +_(i32, /* is_signed */ 1); +_(i64, /* is_signed */ 1); + +#undef _ + +always_inline uword +coding_max_n_bits (uword coding) +{ + uword n_bits; + (void) zvec_decode (coding, 0, &n_bits); + return n_bits; +} + +always_inline void +zvec_decode_inline (uword * zvec, + uword * zvec_n_bits, + uword coding, + void *data, + uword data_stride, + uword n_data, uword data_bytes, uword is_signed) +{ + uword i, n_max; + + i = *zvec_n_bits; + n_max = coding_max_n_bits (coding); + while (n_data >= 1) + { + uword d0, z0, l0; + + z0 = clib_bitmap_get_multiple (zvec, i, n_max); + d0 = zvec_decode (coding, z0, &l0); + i += l0; + put_data (data + 0 * data_stride, data_bytes, is_signed, d0); + data += 1 * data_stride; + n_data -= 1; + } + *zvec_n_bits = i; +} + +#define _(TYPE,IS_SIGNED) \ + void zvec_decode_##TYPE (uword * zvec, \ + uword * zvec_n_bits, \ + uword coding, \ + void * data, \ + uword data_stride, \ + uword n_data) \ + { \ + return zvec_decode_inline (zvec, zvec_n_bits, \ + coding, \ + data, data_stride, n_data, \ + /* data_bytes */ sizeof (TYPE), \ + /* is_signed */ IS_SIGNED); \ + } + +_(u8, /* is_signed */ 0); +_(u16, /* is_signed */ 0); +_(u32, /* is_signed */ 0); +_(u64, /* is_signed */ 0); +_(i8, /* is_signed */ 1); +_(i16, /* is_signed */ 1); +_(i32, /* is_signed */ 1); +_(i64, /* is_signed */ 1); + +#undef _ + +/* Compute number of bits needed to encode given histogram. */ +static uword +zvec_coding_bits (uword coding, uword * histogram_counts, uword min_bits) +{ + uword n_type_bits, n_bits; + uword this_count, last_count, max_count_index; + uword i, b, l; + + n_bits = 0; + n_type_bits = 1; + last_count = 0; + max_count_index = vec_len (histogram_counts) - 1; + + /* Coding is not large enough to encode given data. */ + if (coding <= max_count_index) + return ~0; + + i = 0; + while (coding != 0) + { + b = first_set (coding); + l = min_log2 (b); + i += b; + + this_count = + histogram_counts[i > max_count_index ? max_count_index : i - 1]; + + /* No more data to encode? */ + if (this_count == last_count) + break; + + /* Last coding is i 0 ... 0 so we don't need an extra type bit. */ + if (coding == b) + n_type_bits--; + + n_bits += (this_count - last_count) * (n_type_bits + l); + + /* This coding cannot be minimal: so return. */ + if (n_bits >= min_bits) + return ~0; + + last_count = this_count; + coding ^= b; + n_type_bits++; + } + + return n_bits; +} + +uword +_zvec_coding_from_histogram (void *histogram, + uword histogram_len, + uword histogram_elt_count_offset, + uword histogram_elt_bytes, + uword max_value_to_encode, + zvec_coding_info_t * coding_return) +{ + uword coding, min_coding; + uword min_coding_bits, coding_bits; + uword i, n_bits_set, total_count; + uword *counts; + zvec_histogram_count_t *h_count = histogram + histogram_elt_count_offset; + + if (histogram_len < 1) + { + coding_return->coding = 0; + coding_return->min_coding_bits = 0; + coding_return->n_data = 0; + coding_return->n_codes = 0; + coding_return->ave_coding_bits = 0; + return 0; + } + + total_count = 0; + counts = vec_new (uword, histogram_len); + for (i = 0; i < histogram_len; i++) + { + zvec_histogram_count_t this_count = h_count[0]; + total_count += this_count; + counts[i] = total_count; + h_count = + (zvec_histogram_count_t *) ((void *) h_count + histogram_elt_bytes); + } + + min_coding = 0; + min_coding_bits = ~0; + + { + uword base_coding = + max_value_to_encode != + ~0 ? (1 + max_value_to_encode) : vec_len (counts); + uword max_coding = max_pow2 (2 * base_coding); + + for (n_bits_set = 1; n_bits_set <= 8; n_bits_set++) + { + for (coding = pow2_mask (n_bits_set); + coding < max_coding; + coding = next_with_same_number_of_set_bits (coding)) + { + coding_bits = zvec_coding_bits (coding, counts, min_coding_bits); + if (coding_bits >= min_coding_bits) + continue; + min_coding_bits = coding_bits; + min_coding = coding; + } + } + } + + if (coding_return) + { + coding_return->coding = min_coding; + coding_return->min_coding_bits = min_coding_bits; + coding_return->n_data = total_count; + coding_return->n_codes = vec_len (counts); + coding_return->ave_coding_bits = + (f64) min_coding_bits / (f64) total_count; + } + + vec_free (counts); + + return min_coding; +} + +u8 * +format_zvec_coding (u8 * s, va_list * args) +{ + zvec_coding_info_t *c = va_arg (*args, zvec_coding_info_t *); + return format (s, + "zvec coding 0x%x, %d elts, %d codes, %d bits total, %.4f ave bits/code", + c->coding, c->n_data, c->n_codes, c->min_coding_bits, + c->ave_coding_bits); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/extras/deprecated/vppinfra/zvec.h b/extras/deprecated/vppinfra/zvec.h new file mode 100644 index 00000000000..7d35a3fe41f --- /dev/null +++ b/extras/deprecated/vppinfra/zvec.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#ifndef included_zvec_h +#define included_zvec_h + +#include <vppinfra/clib.h> +#include <vppinfra/error.h> /* for ASSERT */ +#include <vppinfra/format.h> + +/* zvec: compressed vectors. + + Data is entropy coded with 32 bit "codings". + + Consider coding as bitmap, coding = 2^c_0 + 2^c_1 + ... + 2^c_n + With c_0 < c_1 < ... < c_n. coding == 0 represents c_n = BITS (uword). + + Unsigned integers i = 0 ... are represented as follows: + + 0 <= i < 2^c_0 (i << 1) | (1 << 0) binary: i 1 + 2^c_0 <= i < 2^c_0 + 2^c_1 (i << 2) | (1 << 1) binary: i 1 0 + ... binary: i 0 ... 0 + + Smaller numbers use less bits. Coding is chosen so that encoding + of given histogram of typical values gives smallest number of bits. + The number and position of coding bits c_i are used to best fit the + histogram of typical values. +*/ + +typedef struct +{ + /* Smallest coding for given histogram of typical data. */ + u32 coding; + + /* Number of data in histogram. */ + u32 n_data; + + /* Number of codes (unique values) in histogram. */ + u32 n_codes; + + /* Number of bits in smallest coding of data. */ + u32 min_coding_bits; + + /* Average number of bits per code. */ + f64 ave_coding_bits; +} zvec_coding_info_t; + +/* Encode/decode data. */ +uword zvec_encode (uword coding, uword data, uword * n_result_bits); +uword zvec_decode (uword coding, uword zdata, uword * n_zdata_bits); + +format_function_t format_zvec_coding; + +typedef u32 zvec_histogram_count_t; + +#define zvec_coding_from_histogram(h,count_field,len,max_value_to_encode,zc) \ + _zvec_coding_from_histogram ((h), (len), \ + STRUCT_OFFSET_OF_VAR (h, count_field), \ + sizeof (h[0]), \ + max_value_to_encode, \ + (zc)) + +uword +_zvec_coding_from_histogram (void *_histogram, + uword histogram_len, + uword histogram_elt_count_offset, + uword histogram_elt_bytes, + uword max_value_to_encode, + zvec_coding_info_t * coding_info_return); + +#define _(TYPE,IS_SIGNED) \ + uword * zvec_encode_##TYPE (uword * zvec, uword * zvec_n_bits, uword coding, \ + void * data, uword data_stride, uword n_data); + +_(u8, /* is_signed */ 0); +_(u16, /* is_signed */ 0); +_(u32, /* is_signed */ 0); +_(u64, /* is_signed */ 0); +_(i8, /* is_signed */ 1); +_(i16, /* is_signed */ 1); +_(i32, /* is_signed */ 1); +_(i64, /* is_signed */ 1); + +#undef _ + +#define _(TYPE,IS_SIGNED) \ + void zvec_decode_##TYPE (uword * zvec, \ + uword * zvec_n_bits, \ + uword coding, \ + void * data, \ + uword data_stride, \ + uword n_data) + +_(u8, /* is_signed */ 0); +_(u16, /* is_signed */ 0); +_(u32, /* is_signed */ 0); +_(u64, /* is_signed */ 0); +_(i8, /* is_signed */ 1); +_(i16, /* is_signed */ 1); +_(i32, /* is_signed */ 1); +_(i64, /* is_signed */ 1); + +#undef _ + +/* Signed <=> unsigned conversion. + -1, -2, -3, ... => 1, 3, 5, ... odds + 0, +1, +2, +3, ... => 0, 2, 4, 6, ... evens */ +always_inline uword +zvec_signed_to_unsigned (word s) +{ + uword a = s < 0; + s = 2 * s + a; + return a ? -s : s; +} + +always_inline word +zvec_unsigned_to_signed (uword u) +{ + uword a = u & 1; + u >>= 1; + return a ? -u : u; +} + +#endif /* included_zvec_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |