/* * Copyright (c) 2015 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* Copyright (c) 2005 Eliot Dresselhaus Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* This is all stolen from Bob Jenkins and reworked for clib. Thanks once again Bob for the great work. */ /* ------------------------------------------------------------------------------ perfect.c: code to generate code for a hash for perfect hashing. (c) Bob Jenkins, September 1996, December 1999 You may use this code in any way you wish, and it is free. No warranty. I hereby place this in the public domain. Source is http://burtleburtle.net/bob/c/perfect.c This generates a minimal perfect hash function. That means, given a set of n keys, this determines a hash function that maps each of those keys into a value in 0..n-1 with no collisions. The perfect hash function first uses a normal hash function on the key to determine (a,b) such that the pair (a,b) is distinct for all keys, then it computes a^scramble[tab[b]] to get the final perfect hash. tab[] is an array of 1-byte values and scramble[] is a 256-term array of 2-byte or 4-byte values. If there are n keys, the length of tab[] is a power of two between n/3 and n. I found the idea of computing distinct (a,b) values in "Practical minimal perfect hash functions for large databases", Fox, Heath, Chen, and Daoud, Communications of the ACM, January 1992. They found the idea in Chichelli (CACM Jan 1980). Beyond that, our methods differ. The key is hashed to a pair (a,b) where a in 0..*alen*-1 and b in 0..*blen*-1. A fast hash function determines both a and b simultaneously. Any decent hash function is likely to produce hashes so that (a,b) is distinct for all pairs. I try the hash using different values of *salt* until all pairs are distinct. The final hash is (a XOR scramble[tab[b]]). *scramble* is a predetermined mapping of 0..255 into 0..smax-1. *tab* is an array that we fill in in such a way as to make the hash perfect. First we fill in all values of *tab* that are used by more than one key. We try all possible values for each position until one works. This leaves m unmapped keys and m values that something could hash to. If you treat unmapped keys as lefthand nodes and unused hash values as righthand nodes, and draw a line connecting each key to each hash value it could map to, you get a bipartite graph. We attempt to find a perfect matching in this graph. If we succeed, we have determined a perfect hash for the whole set of keys. *scramble* is used because (a^tab[i]) clusters keys around *a*. ------------------------------------------------------------------------------ */ #include #include #include #include static void init_keys_direct_u32 (phash_main_t * pm) { int n_keys_left, b_mask, a_shift; u32 seed; phash_key_t *k; seed = pm->hash_seed; b_mask = (1 << pm->b_bits) - 1; a_shift = BITS (seed) - pm->a_bits; k = pm->keys; n_keys_left = vec_len (pm->keys); while (n_keys_left >= 2) { u32 x0, y0, z0; u32 x1, y1, z1; x0 = y0 = z0 = seed; x1 = y1 = z1 = seed; x0 += (u32) k[0].key; x1 += (u32) k[1].key; hash_mix32 (x0, y0, z0); hash_mix32 (x1, y1, z1); k[0].b = z0 & b_mask; k[1].b = z1 & b_mask; k[0].a = z0 >> a_shift; k[1].a = z1 >> a_shift; if (PREDICT_FALSE (a_shift >= BITS (z0))) k[0].a = k[1].a = 0; k += 2; n_keys_left -= 2; } if (n_keys_left >= 1) { u32 x0, y0, z0; x0 = y0 = z0 = seed; x0 += k[0].key; hash_mix32 (x0, y0, z0); k[0].b = z0 & b_mask; k[0].a = z0 >> a_shift; if (PREDICT_FALSE (a_shift >= BITS (z0))) k[0].a = 0; k += 1; n_keys_left -= 1; } } static void init_keys_direct_u64 (phash_main_t * pm) { int n_keys_left, b_mask, a_shift; u64 seed; phash_key_t *k; seed = pm->hash_seed; b_mask = (1 << pm->b_bits) - 1; a_shift = BITS (seed) - pm->a_bits; k = pm->keys; n_keys_left = vec_len (pm->keys); while (n_keys_left >= 2) { u64 x0, y0, z0; u64 x1, y1, z1; x0 = y0 = z0 = seed; x1 = y1 = z1 = seed; x0 += (u64) k[0].key; x1 += (u64) k[1].key; hash_mix64 (x0, y0, z0); hash_mix64 (x1, y1, z1); k[0].b = z0 & b_mask; k[1].b = z1 & b_mask; k[0].a = z0 >> a_shift; k[1].a = z1 >> a_shift; if (PREDICT_FALSE (a_shift >= BITS (z0))) k[0].a = k[1].a = 0; k += 2; n_keys_left -= 2; } if (n_keys_left >= 1) { u64 x0, y0, z0; x0 = y0 = z0 = seed; x0 += k[0].key; hash_mix64 (x0, y0, z0); k[0].b = z0 & b_mask; k[0].a = z0 >> a_shift; if (PREDICT_FALSE (a_shift >= BITS (z0))) k[0].a = 0; k += 1; n_keys_left -= 1; } } static void init_keys_indirect_u32 (phash_main_t * pm) { int n_keys_left, b_mask, a_shift; u32 seed; phash_key_t *k; seed = pm->hash_seed; b_mask = (1 << pm->b_bits) - 1; a_shift = BITS (seed) - pm->a_bits; k = pm->keys; n_keys_left = vec_len (pm->keys); while (n_keys_left >= 2) { u32 xyz[6]; u32 x0, y0, z0; u32 x1, y1, z1; pm->key_seed2 (pm->private, k[0].key, k[1].key, &xyz); x0 = y0 = z0 = seed; x1 = y1 = z1 = seed; x0 += xyz[0]; y0 += xyz[1]; z0 += xyz[2]; x1 += xyz[3]; y1 += xyz[4]; z1 += xyz[5]; hash_mix32 (x0, y0, z0); hash_mix32 (x1, y1, z1); k[0].b = z0 & b_mask; k[1].b = z1 & b_mask; k[0].a = z0 >> a_shift; k[1].a = z1 >> a_shift; if (PREDICT_FALSE (a_shift >= BITS (z0))) k[0].a = k[1].a = 0; k += 2; n_keys_left -= 2; } if (n_keys_left >= 1) { u32 xyz[3]; u32 x0, y0, z0; pm->key_seed1 (pm->private, k[0].key, &xyz); x0 = y0 = z0 = seed; x0 += xyz[0]; y0 += xyz[1]; z0 += xyz[2]; hash_mix32 (x0, y0, z0); k[0].b = z0 & b_mask; k[0].a = z0 >> a_shift; if (PREDICT_FALSE (a_shift >= BITS (z0))) k[0].a = 0; k += 1; n_keys_left -= 1; } } static void init_keys_indirect_u64 (phash_main_t * pm) { int n_keys_left, b_mask, a_shift; u64 seed; phash_key_t *k; seed = pm->hash_seed; b_mask = (1 << pm->b_bits) - 1; a_shift = BITS (seed) - pm->a_bits; k = pm->keys; n_keys_left = vec_len (pm->keys); while (n_keys_left >= 2) { u64 xyz[6]; u64 x0, y0, z0; u64 x1, y1, z1; pm->key_seed2 (pm->private, k[0].key, k[1].key, &xyz); x0 = y0 = z0 = seed; x1 = y1 = z1 = seed; x0 += xyz[0]; y0 += xyz[1]; z0 += xyz[2]; x1 += xyz[3]; y1 += xyz[4]; z1 += xyz[5]; hash_mix64 (x0, y0, z0); hash_mix64 (x1, y1, z1); k[0].b = z0 & b_mask; k[1].b = z1 & b_mask; k[0].a = z0 >> a_shift; k[1].a = z1 >> a_shift; if (PREDICT_FALSE (a_shift >= BITS (z0))) k[0].a = k[1].a = 0; k += 2; n_keys_left -= 2; } if (n_keys_left >= 1) { u64 xyz[3]; u64 x0, y0, z0; pm->key_seed1 (pm->private, k[0].key, &xyz); x0 = y0 = z0 = seed; x0 += xyz[0]; y0 += xyz[1]; z0 += xyz[2]; hash_mix64 (x0, y0, z0); k[0].b = z0 & b_mask; k[0].a = z0 >> a_shift; if (PREDICT_FALSE (a_shift >= BITS (z0))) k[0].a = 0; k += 1; n_keys_left -= 1; } } /* * insert keys into table according to key->b * check if the initial hash might work */ static int init_tabb (phash_main_t * pm) { int no_collisions; phash_tabb_t *tb; phash_key_t *k, *l; if (pm->key_seed1) { if (pm->flags & PHASH_FLAG_MIX64) init_keys_indirect_u64 (pm); else init_keys_indirect_u32 (pm); } else { if (pm->flags & PHASH_FLAG_MIX64) init_keys_direct_u64 (pm); else init_keys_direct_u32 (pm); } if (!pm->tabb) vec_resize (pm->tabb, 1 << pm->b_bits); else vec_foreach (tb, pm->tabb) phash_tabb_free (tb); /* Two keys with the same (a,b) guarantees a collision */ no_collisions = 1; vec_foreach (k, pm->keys) { u32 i, *ki; tb = pm->tabb + k->b; ki = tb->keys; for (i = 0; i < vec_len (ki); i++) { l = pm->keys + ki[i]; if (k->a == l->a) { /* Given keys are supposed to be unique. */ if (pm->key_is_equal && pm->key_is_equal (pm->private, l->key, k->key)) clib_error ("duplicate keys"); no_collisions = 0; goto done; } } vec_add1 (tb->keys, k - pm->keys); } done: return no_collisions; } /* Try to apply an augmenting list */ static int apply (phash_main_t * pm, u32 tail, u32 rollback) { phash_key_t *k; phash_tabb_t *pb; phash_tabq_t *q_child, *q_parent; u32 ki, i, hash, child, parent; u32 stabb; /* scramble[tab[b]] */ int no_collision; no_collision = 1; /* Walk from child to parent until root is reached. */ for (child = tail - 1; child; child = parent) { q_child = &pm->tabq[child]; parent = q_child->parent_q; q_parent = &pm->tabq[parent]; /* find parent's list of siblings */ ASSERT (q_parent->b_q < vec_len (pm->tabb)); pb = pm->tabb + q_parent->b_q; /* erase old hash values */ stabb = pm->scramble[pb->val_b]; for (i = 0; i < vec_len (pb->keys); i++) { ki = pb->keys[i]; k = pm->keys + ki; hash = k->a ^ stabb; /* Erase hash for all of child's siblings. */ if (ki == pm->tabh[hash]) pm->tabh[hash] = ~0; } /* change pb->val_b, which will change the hashes of all parent siblings */ pb->val_b = rollback ? q_child->oldval_q : q_child->newval_q; /* set new hash values */ stabb = pm->scramble[pb->val_b]; for (i = 0; i < vec_len (pb->keys); i++) { ki = pb->keys[i]; k = pm->keys + ki; hash = k->a ^ stabb; if (rollback) { if (parent == 0) continue; /* root never had a hash */ } else if (pm->tabh[hash] != ~0) { /* Very rare case: roll back any changes. */ apply (pm, tail, /* rollback changes */ 1); no_collision = 0; goto done; } pm->tabh[hash] = ki; } } done: return no_collision; } /* ------------------------------------------------------------------------------- augment(): Add item to the mapping. Construct a spanning tree of *b*s with *item* as root, where each parent can have all its hashes changed (by some new val_b) with at most one collision, and each child is the b of that collision. I got this from Tarjan's "Data Structures and Network Algorithms". The path from *item* to a *b* that can be remapped with no collision is an "augmenting path". Change values of tab[b] along the path so that the unmapped key gets mapped and the unused hash value gets used. Assuming 1 key per b, if m out of n hash values are still
/*
 * Copyright (c) 2016 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*-
 *   BSD LICENSE
 *
 *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
 *   All rights reserved.
 *
 *   Redistribution and use in source and binary forms, with or without
 *   modification, are permitted provided that the following conditions
 *   are met:
 *
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in
 *       the documentation and/or other materials provided with the
 *       distribution.
 *     * Neither the name of Intel Corporation nor the names of its
 *       contributors may be used to endorse or promote products derived
 *       from this software without specific prior written permission.
 *
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef included_clib_memcpy_avx2_h
#define included_clib_memcpy_avx2_h

#include <stdint.h>
#include <x86intrin.h>

static inline void
clib_mov16 (u8 * dst, const u8