summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOle Troan <ot@cisco.com>2018-09-06 22:58:05 +0200
committerNeale Ranns <nranns@cisco.com>2018-09-07 11:08:06 +0000
commitc84cbad785fa11c1dba92b8e87ad234d779d1cbd (patch)
treee712213958abe851573beb9cb06ad581404df9c6
parenta85e0de8aebe0775fefc03872cd3162081c8885b (diff)
PAPI: Unpack embedded types with variable length arrays.
Change-Id: Ic952ed5b837ac8409fd95e2b5cb92eb028ba0c40 Signed-off-by: Ole Troan <ot@cisco.com>
-rwxr-xr-xsrc/vpp-api/python/vpp_papi/tests/test_vpp_serializer.py76
-rw-r--r--src/vpp-api/python/vpp_papi/vpp_papi.py6
-rw-r--r--src/vpp-api/python/vpp_papi/vpp_serializer.py51
3 files changed, 97 insertions, 36 deletions
diff --git a/src/vpp-api/python/vpp_papi/tests/test_vpp_serializer.py b/src/vpp-api/python/vpp_papi/tests/test_vpp_serializer.py
index 9fee161684d..4e8a417c6fd 100755
--- a/src/vpp-api/python/vpp_papi/tests/test_vpp_serializer.py
+++ b/src/vpp-api/python/vpp_papi/tests/test_vpp_serializer.py
@@ -16,8 +16,8 @@ class TestAddType(unittest.TestCase):
['u32', 'is_int']])
b = un.pack({'is_int': 0x12345678})
- self.assertEqual(len(b), 4)
- nt = un.unpack(b)
+ nt, size = un.unpack(b)
+ self.assertEqual(len(b), size)
self.assertEqual(nt.is_bool, 0x12)
self.assertEqual(nt.is_int, 0x12345678)
@@ -31,12 +31,23 @@ class TestAddType(unittest.TestCase):
[["vl_api_ip4_address_t", "ip4"],
["vl_api_ip6_address_t", "ip6"]])
- address = VPPType('address', [['vl_api_address_family_t', 'af'],
- ['vl_api_address_union_t', 'un']])
+ address = VPPType('vl_api_address_t',
+ [['vl_api_address_family_t', 'af'],
+ ['vl_api_address_union_t', 'un']])
+
+ va_address_list = VPPType('list_addresses',
+ [['u8', 'count'],
+ ['vl_api_address_t', 'addresses',
+ 0, 'count']])
+
+ message_with_va_address_list = VPPType('msg_with_vla',
+ [['list_addresses',
+ 'vla_address'],
+ ['u8', 'is_cool']])
b = ip4.pack({'address': inet_pton(AF_INET, '1.1.1.1')})
self.assertEqual(len(b), 4)
- nt = ip4.unpack(b)
+ nt, size = ip4.unpack(b)
self.assertEqual(nt.address, inet_pton(AF_INET, '1.1.1.1'))
b = ip6.pack({'address': inet_pton(AF_INET6, '1::1')})
@@ -48,19 +59,45 @@ class TestAddType(unittest.TestCase):
{'address': inet_pton(AF_INET, '2.2.2.2')}}})
self.assertEqual(len(b), 20)
- nt = address.unpack(b)
+ nt, size = address.unpack(b)
self.assertEqual(nt.af, af.ADDRESS_IP4)
self.assertEqual(nt.un.ip4.address,
inet_pton(AF_INET, '2.2.2.2'))
self.assertEqual(nt.un.ip6.address,
inet_pton(AF_INET6, '0202:0202::'))
+ # List of addresses
+ address_list = []
+ for i in range(4):
+ address_list.append({'af': af.ADDRESS_IP4,
+ 'un':
+ {'ip4':
+ {'address': inet_pton(AF_INET, '2.2.2.2')}}})
+ b = va_address_list.pack({'count': len(address_list),
+ 'addresses': address_list})
+ self.assertEqual(len(b), 81)
+
+ nt, size = va_address_list.unpack(b)
+ self.assertEqual(nt.addresses[0].un.ip4.address,
+ inet_pton(AF_INET, '2.2.2.2'))
+
+ b = message_with_va_address_list.pack({'vla_address':
+ {'count': len(address_list),
+ 'addresses': address_list},
+ 'is_cool': 100})
+ self.assertEqual(len(b), 82)
+ nt, size = message_with_va_address_list.unpack(b)
+ self.assertEqual(nt.is_cool, 100)
+
def test_arrays(self):
# Test cases
# 1. Fixed list
# 2. Fixed list of variable length sub type
# 3. Variable length type
#
+ s = VPPType('str', [['u32', 'length'],
+ ['u8', 'string', 0, 'length']])
+
ip4 = VPPType('ip4_address', [['u8', 'address', 4]])
listip4 = VPPType('list_ip4_t', [['ip4_address', 'addresses', 4]])
valistip4 = VPPType('list_ip4_t',
@@ -76,26 +113,39 @@ class TestAddType(unittest.TestCase):
addresses.append({'address': inet_pton(AF_INET, '2.2.2.2')})
b = listip4.pack({'addresses': addresses})
self.assertEqual(len(b), 16)
- nt = listip4.unpack(b)
-
+ nt, size = listip4.unpack(b)
self.assertEqual(nt.addresses[0].address,
inet_pton(AF_INET, '2.2.2.2'))
b = valistip4.pack({'count': len(addresses), 'addresses': addresses})
self.assertEqual(len(b), 17)
- nt = valistip4.unpack(b)
+ nt, size = valistip4.unpack(b)
self.assertEqual(nt.count, 4)
self.assertEqual(nt.addresses[0].address,
inet_pton(AF_INET, '2.2.2.2'))
b = valistip4_legacy.pack({'foo': 1, 'addresses': addresses})
self.assertEqual(len(b), 17)
- nt = valistip4_legacy.unpack(b)
+ nt, size = valistip4_legacy.unpack(b)
self.assertEqual(len(nt.addresses), 4)
self.assertEqual(nt.addresses[0].address,
inet_pton(AF_INET, '2.2.2.2'))
+ string = 'foobar foobar'
+ b = s.pack({'length': len(string), 'string': string})
+ nt, size = s.unpack(b)
+ self.assertEqual(len(b), size)
+
+ def test_string(self):
+ s = VPPType('str', [['u32', 'length'],
+ ['u8', 'string', 0, 'length']])
+
+ string = ''
+ b = s.pack({'length': len(string), 'string': string})
+ nt, size = s.unpack(b)
+ self.assertEqual(len(b), size)
+
def test_message(self):
foo = VPPMessage('foo', [['u16', '_vl_msg_id'],
['u8', 'client_index'],
@@ -103,8 +153,8 @@ class TestAddType(unittest.TestCase):
{"crc": "0x559b9f3c"}])
b = foo.pack({'_vl_msg_id': 1, 'client_index': 5,
'something': 200})
- self.assertEqual(len(b), 4)
- nt = foo.unpack(b)
+ nt, size = foo.unpack(b)
+ self.assertEqual(len(b), size)
self.assertEqual(nt.something, 200)
def test_abf(self):
@@ -189,7 +239,7 @@ class TestAddType(unittest.TestCase):
'_vl_msg_id': 1066,
'policy': policy})
- nt = abf_policy_add_del.unpack(b)
+ nt, size = abf_policy_add_del.unpack(b)
self.assertEqual(nt.policy.paths[0].next_hop,
b'\x10\x02\x02\xac\x00\x00\x00\x00'
b'\x00\x00\x00\x00\x00\x00\x00\x00')
diff --git a/src/vpp-api/python/vpp_papi/vpp_papi.py b/src/vpp-api/python/vpp_papi/vpp_papi.py
index d5ad837331e..4f765ecbd18 100644
--- a/src/vpp-api/python/vpp_papi/vpp_papi.py
+++ b/src/vpp-api/python/vpp_papi/vpp_papi.py
@@ -523,8 +523,7 @@ class VPP():
if not msg:
self.logger.warning('vpp_api.read failed')
return
-
- i, ci = self.header.unpack(msg, 0)
+ (i, ci), size = self.header.unpack(msg, 0)
if self.id_names[i] == 'rx_thread_exit':
return
@@ -535,8 +534,7 @@ class VPP():
if not msgobj:
raise IOError(2, 'Reply message undefined')
- r = msgobj.unpack(msg)
-
+ r, size = msgobj.unpack(msg)
return r
def msg_handler_async(self, msg):
diff --git a/src/vpp-api/python/vpp_papi/vpp_serializer.py b/src/vpp-api/python/vpp_papi/vpp_serializer.py
index cd9a281964a..2177cdbb2e4 100644
--- a/src/vpp-api/python/vpp_papi/vpp_serializer.py
+++ b/src/vpp-api/python/vpp_papi/vpp_serializer.py
@@ -48,7 +48,7 @@ class BaseTypes():
return self.packer.pack(data)
def unpack(self, data, offset, result=None):
- return self.packer.unpack_from(data, offset)[0]
+ return self.packer.unpack_from(data, offset)[0], self.packer.size
types = {}
@@ -102,15 +102,18 @@ class FixedList():
def unpack(self, data, offset=0, result=None):
# Return a list of arguments
result = []
+ total = 0
for e in range(self.num):
- x = self.packer.unpack(data, offset)
+ x, size = self.packer.unpack(data, offset)
result.append(x)
- offset += self.packer.size
- return result
+ offset += size
+ total += size
+ return result, total
class VLAList():
def __init__(self, name, field_type, len_field_name, index):
+ self.name = name
self.index = index
self.packer = types[field_type]
self.size = self.packer.size
@@ -132,21 +135,22 @@ class VLAList():
def unpack(self, data, offset=0, result=None):
# Return a list of arguments
+ total = 0
# u8 array
if self.packer.size == 1:
if result[self.index] == 0:
- return b''
+ return b'', 0
p = BaseTypes('u8', result[self.index])
- r = p.unpack(data, offset)
- return r
+ return p.unpack(data, offset)
r = []
for e in range(result[self.index]):
- x = self.packer.unpack(data, offset)
+ x, size = self.packer.unpack(data, offset)
r.append(x)
- offset += self.packer.size
- return r
+ offset += size
+ total += size
+ return r, total
class VLAList_legacy():
@@ -164,16 +168,18 @@ class VLAList_legacy():
return b
def unpack(self, data, offset=0, result=None):
+ total = 0
# Return a list of arguments
if (len(data) - offset) % self.packer.size:
raise ValueError('Legacy Variable Length Array length mismatch.')
elements = int((len(data) - offset) / self.packer.size)
r = []
for e in range(elements):
- x = self.packer.unpack(data, offset)
+ x, size = self.packer.unpack(data, offset)
r.append(x)
offset += self.packer.size
- return r
+ total += size
+ return r, total
class VPPEnumType():
@@ -198,8 +204,8 @@ class VPPEnumType():
return types['u32'].pack(data, kwargs)
def unpack(self, data, offset=0, result=None):
- x = types['u32'].unpack(data, offset)
- return self.enum(x)
+ x, size = types['u32'].unpack(data, offset)
+ return self.enum(x), size
class VPPUnionType():
@@ -239,9 +245,13 @@ class VPPUnionType():
def unpack(self, data, offset=0, result=None):
r = []
+ maxsize = 0
for k, p in self.packers.items():
- r.append(p.unpack(data, offset))
- return self.tuple._make(r)
+ x, size = p.unpack(data, offset)
+ if size > maxsize:
+ maxsize = size
+ r.append(x)
+ return self.tuple._make(r), maxsize
class VPPType():
@@ -310,13 +320,16 @@ class VPPType():
def unpack(self, data, offset=0, result=None):
# Return a list of arguments
result = []
+ total = 0
for p in self.packers:
- x = p.unpack(data, offset, result)
+ x, size = p.unpack(data, offset, result)
if type(x) is tuple and len(x) == 1:
x = x[0]
result.append(x)
- offset += p.size
- return self.tuple._make(result)
+ offset += size
+ total += size
+ t = self.tuple._make(result)
+ return t, total
class VPPMessage(VPPType):
709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/** @cond DOCUMENTATION_IS_IN_BIHASH_DOC_H */

static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes)
{
  uword rv;

  /* Round to an even number of cache lines */
  nbytes += CLIB_CACHE_LINE_BYTES - 1;
  nbytes &= ~(CLIB_CACHE_LINE_BYTES - 1);

  rv = alloc_arena_next (h);
  alloc_arena_next (h) += nbytes;

  if (alloc_arena_next (h) > alloc_arena_size (h))
    os_out_of_memory ();

  return (void *) (uword) (rv + alloc_arena (h));
}

void BV (clib_bihash_instantiate) (BVT (clib_bihash) * h)
{
  uword bucket_size;

  alloc_arena (h) = (uword) clib_mem_vm_alloc (h->memory_size);
  alloc_arena_next (h) = 0;
  alloc_arena_size (h) = h->memory_size;

  bucket_size = h->nbuckets * sizeof (h->buckets[0]);
  h->buckets = BV (alloc_aligned) (h, bucket_size);
  CLIB_MEMORY_BARRIER ();
  h->instantiated = 1;
}

void BV (clib_bihash_init2) (BVT (clib_bihash_init2_args) * a)
{
  int i;
  void *oldheap;
  BVT (clib_bihash) * h = a->h;

  a->nbuckets = 1 << (max_log2 (a->nbuckets));

  h->name = (u8 *) a->name;
  h->nbuckets = a->nbuckets;
  h->log2_nbuckets = max_log2 (a->nbuckets);
  h->memory_size = a->memory_size;
  h->instantiated = 0;
  h->fmt_fn = a->fmt_fn;

  alloc_arena (h) = 0;

  /*
   * Make sure the requested size is rational. The max table
   * size without playing the alignment card is 64 Gbytes.
   * If someone starts complaining that's not enough, we can shift
   * the offset by CLIB_LOG2_CACHE_LINE_BYTES...
   */
  ASSERT (h->memory_size < (1ULL << BIHASH_BUCKET_OFFSET_BITS));

  /* Add this hash table to the list */
  if (a->dont_add_to_all_bihash_list == 0)
    {
      for (i = 0; i < vec_len (clib_all_bihashes); i++)
	if (clib_all_bihashes[i] == h)
	  goto do_lock;
      oldheap = clib_all_bihash_set_heap ();
      vec_add1 (clib_all_bihashes, (void *) h);
      clib_mem_set_heap (oldheap);
    }

do_lock:
  if (h->alloc_lock)
    clib_mem_free ((void *) h->alloc_lock);

  /*
   * Set up the lock now, so we can use it to make the first add
   * thread-safe
   */
  h->alloc_lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
					  CLIB_CACHE_LINE_BYTES);
  h->alloc_lock[0] = 0;

  if (a->instantiate_immediately)
    BV (clib_bihash_instantiate) (h);
}

void BV (clib_bihash_init)
  (BVT (clib_bihash) * h, char *name, u32 nbuckets, uword memory_size)
{
  BVT (clib_bihash_init2_args) _a, *a = &_a;

  memset (a, 0, sizeof (*a));

  a->h = h;
  a->name = name;
  a->nbuckets = nbuckets;
  a->memory_size = memory_size;

  BV (clib_bihash_init2) (a);
}

#if BIHASH_32_64_SVM
#if !defined (MFD_ALLOW_SEALING)
#define MFD_ALLOW_SEALING 0x0002U
#endif

void BV (clib_bihash_master_init_svm)
  (BVT (clib_bihash) * h, char *name, u32 nbuckets, u64 memory_size)
{
  uword bucket_size;
  u8 *mmap_addr;
  vec_header_t *freelist_vh;
  int fd;

  ASSERT (memory_size < (1ULL << 32));
  /* Set up for memfd sharing */
  if ((fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1)
    {
      clib_unix_warning ("memfd_create");
      return;
    }

  if (ftruncate (fd, memory_size) < 0)
    {
      clib_unix_warning ("ftruncate");
      return;
    }

  /* Not mission-critical, complain and continue */
  if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)
    clib_unix_warning ("fcntl (F_ADD_SEALS)");

  mmap_addr = mmap (0, memory_size,
		    PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 /* offset */ );

  if (mmap_addr == MAP_FAILED)
    {
      clib_unix_warning ("mmap failed");
      ASSERT (0);
    }

  h->sh = (void *) mmap_addr;
  h->memfd = fd;
  nbuckets = 1 << (max_log2 (nbuckets));

  h->name = (u8 *) name;
  h->sh->nbuckets = h->nbuckets = nbuckets;
  h->log2_nbuckets = max_log2 (nbuckets);

  alloc_arena (h) = (u64) (uword) mmap_addr;
  alloc_arena_next (h) = CLIB_CACHE_LINE_BYTES;
  alloc_arena_size (h) = memory_size;

  bucket_size = nbuckets * sizeof (h->buckets[0]);
  h->buckets = BV (alloc_aligned) (h, bucket_size);
  h->sh->buckets_as_u64 = (u64) BV (clib_bihash_get_offset) (h, h->buckets);

  h->alloc_lock = BV (alloc_aligned) (h, CLIB_CACHE_LINE_BYTES);
  h->alloc_lock[0] = 0;

  h->sh->alloc_lock_as_u64 =
    (u64) BV (clib_bihash_get_offset) (h, (void *) h->alloc_lock);
  freelist_vh =
    BV (alloc_aligned) (h,
			sizeof (vec_header_t) +
			BIHASH_FREELIST_LENGTH * sizeof (u64));
  freelist_vh->len = BIHASH_FREELIST_LENGTH;
  freelist_vh->dlmalloc_header_offset = 0xDEADBEEF;
  h->sh->freelists_as_u64 =
    (u64) BV (clib_bihash_get_offset) (h, freelist_vh->vector_data);
  h->freelists = (void *) (freelist_vh->vector_data);

  h->fmt_fn = NULL;
}

void BV (clib_bihash_slave_init_svm)
  (BVT (clib_bihash) * h, char *name, int fd)
{
  u8 *mmap_addr;
  u64 memory_size;
  BVT (clib_bihash_shared_header) * sh;

  /* Trial mapping, to learn the segment size */
  mmap_addr = mmap (0, 4096, PROT_READ, MAP_SHARED, fd, 0 /* offset */ );
  if (mmap_addr == MAP_FAILED)
    {
      clib_unix_warning ("trial mmap failed");
      ASSERT (0);
    }

  sh = (BVT (clib_bihash_shared_header) *) mmap_addr;

  memory_size = sh->alloc_arena_size;

  munmap (mmap_addr, 4096);

  /* Actual mapping, at the required size */
  mmap_addr = mmap (0, memory_size,
		    PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 /* offset */ );

  if (mmap_addr == MAP_FAILED)
    {
      clib_unix_warning ("mmap failed");
      ASSERT (0);
    }

  (void) close (fd);

  h->sh = (void *) mmap_addr;
  alloc_arena (h) = (u64) (uword) mmap_addr;
  h->memfd = -1;

  h->name = (u8 *) name;
  h->buckets = BV (clib_bihash_get_value) (h, h->sh->buckets_as_u64);
  h->nbuckets = h->sh->nbuckets;
  h->log2_nbuckets = max_log2 (h->nbuckets);

  h->alloc_lock = BV (clib_bihash_get_value) (h, h->sh->alloc_lock_as_u64);
  h->freelists = BV (clib_bihash_get_value) (h, h->sh->freelists_as_u64);
  h->fmt_fn = NULL;
}
#endif /* BIHASH_32_64_SVM */

void BV (clib_bihash_set_kvp_format_fn) (BVT (clib_bihash) * h,
					 format_function_t * fmt_fn)
{
  h->fmt_fn = fmt_fn;
}

void BV (clib_bihash_free) (BVT (clib_bihash) * h)
{
  int i;

  if (PREDICT_FALSE (h->instantiated == 0))
    goto never_initialized;

  h->instantiated = 0;
  vec_free (h->working_copies);
  vec_free (h->working_copy_lengths);
#if BIHASH_32_64_SVM == 0
  vec_free (h->freelists);
#else
  if (h->memfd > 0)
    (void) close (h->memfd);
#endif
  clib_mem_vm_free ((void *) (uword) (alloc_arena (h)), alloc_arena_size (h));
never_initialized:
  clib_memset (h, 0, sizeof (*h));
  for (i = 0; i < vec_len (clib_all_bihashes); i++)
    {
      if ((void *) h == clib_all_bihashes[i])
	{
	  vec_delete (clib_all_bihashes, 1, i);
	  return;
	}
    }
  clib_warning ("Couldn't find hash table %llx on clib_all_bihashes...",
		(u64) h);
}

static
BVT (clib_bihash_value) *
BV (value_alloc) (BVT (clib_bihash) * h, u32 log2_pages)
{
  BVT (clib_bihash_value) * rv = 0;

  ASSERT (h->alloc_lock[0]);

#if BIHASH_32_64_SVM
  ASSERT (log2_pages < vec_len (h->freelists));
#endif

  if (log2_pages >= vec_len (h->freelists) || h->freelists[log2_pages] == 0)
    {
      vec_validate_init_empty (h->freelists, log2_pages, 0);
      rv = BV (alloc_aligned) (h, (sizeof (*rv) * (1 << log2_pages)));
      goto initialize;
    }
  rv = BV (clib_bihash_get_value) (h, (uword) h->freelists[log2_pages]);
  h->freelists[log2_pages] = rv->next_free_as_u64;

initialize:
  ASSERT (rv);
  /*
   * Latest gcc complains that the length arg is zero
   * if we replace (1<<log2_pages) with vec_len(rv).
   * No clue.
   */
  clib_memset (rv, 0xff, sizeof (*rv) * (1 << log2_pages));
  return rv;
}

static void
BV (value_free) (BVT (clib_bihash) * h, BVT (clib_bihash_value) * v,
		 u32 log2_pages)
{
  ASSERT (h->alloc_lock[0]);

  ASSERT (vec_len (h->freelists) > log2_pages);

  if (CLIB_DEBUG > 0)
    clib_memset (v, 0xFE, sizeof (*v) * (1 << log2_pages));

  v->next_free_as_u64 = (u64) h->freelists[log2_pages];
  h->freelists[log2_pages] = (u64) BV (clib_bihash_get_offset) (h, v);
}

static inline void
BV (make_working_copy) (BVT (clib_bihash) * h, BVT (clib_bihash_bucket) * b)
{
  BVT (clib_bihash_value) * v;
  BVT (clib_bihash_bucket) working_bucket __attribute__ ((aligned (8)));
  BVT (clib_bihash_value) * working_copy;
  u32 thread_index = os_get_thread_index ();
  int log2_working_copy_length;

  ASSERT (h->alloc_lock[0]);

  if (thread_index >= vec_len (h->working_copies))
    {
      vec_validate (h->working_copies, thread_index);
      vec_validate_init_empty (h->working_copy_lengths, thread_index, ~0);
    }

  /*
   * working_copies are per-cpu so that near-simultaneous
   * updates from multiple threads will not result in sporadic, spurious
   * lookup failures.
   */
  working_copy = h->working_copies[thread_index];
  log2_working_copy_length = h->working_copy_lengths[thread_index];

  h->saved_bucket.as_u64 = b->as_u64;

  if (b->log2_pages > log2_working_copy_length)
    {
      /*
       * It's not worth the bookkeeping to free working copies
       *   if (working_copy)
       *     clib_mem_free (working_copy);
       */
      working_copy = BV (alloc_aligned)
	(h, sizeof (working_copy[0]) * (1 << b->log2_pages));
      h->working_copy_lengths[thread_index] = b->log2_pages;
      h->working_copies[thread_index] = working_copy;

      BV (clib_bihash_increment_stat) (h, BIHASH_STAT_working_copy_lost,
				       1ULL << b->log2_pages);
    }

  v = BV (clib_bihash_get_value) (h, b->offset);

  clib_memcpy_fast (working_copy, v, sizeof (*v) * (1 << b->log2_pages));
  working_bucket.as_u64 = b->as_u64;
  working_bucket.offset = BV (clib_bihash_get_offset) (h, working_copy);
  CLIB_MEMORY_BARRIER ();
  b->as_u64 = working_bucket.as_u64;
  h->working_copies[thread_index] = working_copy;
}

static
BVT (clib_bihash_value) *
BV (split_and_rehash)
  (BVT (clib_bihash) * h,
   BVT (clib_bihash_value) * old_values, u32 old_log2_pages,
   u32 new_log2_pages)
{
  BVT (clib_bihash_value) * new_values, *new_v;
  int i, j, length_in_kvs;

  ASSERT (h->alloc_lock[0]);

  new_values = BV (value_alloc) (h, new_log2_pages);
  length_in_kvs = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE;

  for (i = 0; i < length_in_kvs; i++)
    {
      u64 new_hash;

      /* Entry not in use? Forget it */
      if (BV (clib_bihash_is_free) (&(old_values->kvp[i])))
	continue;

      /* rehash the item onto its new home-page */
      new_hash = BV (clib_bihash_hash) (&(old_values->kvp[i]));
      new_hash >>= h->log2_nbuckets;
      new_hash &= (1 << new_log2_pages) - 1;
      new_v = &new_values[new_hash];

      /* Across the new home-page */
      for (j = 0; j < BIHASH_KVP_PER_PAGE; j++)
	{
	  /* Empty slot */
	  if (BV (clib_bihash_is_free) (&(new_v->kvp[j])))
	    {
	      clib_memcpy_fast (&(new_v->kvp[j]), &(old_values->kvp[i]),
				sizeof (new_v->kvp[j]));
	      goto doublebreak;
	    }
	}
      /* Crap. Tell caller to try again */
      BV (value_free) (h, new_values, new_log2_pages);
      return 0;
    doublebreak:;
    }

  return new_values;
}

static
BVT (clib_bihash_value) *
BV (split_and_rehash_linear)
  (BVT (clib_bihash) * h,
   BVT (clib_bihash_value) * old_values, u32 old_log2_pages,
   u32 new_log2_pages)
{
  BVT (clib_bihash_value) * new_values;
  int i, j, new_length, old_length;

  ASSERT (h->alloc_lock[0]);

  new_values = BV (value_alloc) (h, new_log2_pages);
  new_length = (1 << new_log2_pages) * BIHASH_KVP_PER_PAGE;
  old_length = (1 << old_log2_pages) * BIHASH_KVP_PER_PAGE;

  j = 0;
  /* Across the old value array */
  for (i = 0; i < old_length; i++)
    {
      /* Find a free slot in the new linear scan bucket */
      for (; j < new_length; j++)
	{
	  /* Old value not in use? Forget it. */
	  if (BV (clib_bihash_is_free) (&(old_values->kvp[i])))
	    goto doublebreak;

	  /* New value should never be in use */
	  if (BV (clib_bihash_is_free) (&(new_values->kvp[j])))
	    {
	      /* Copy the old value and move along */
	      clib_memcpy_fast (&(new_values->kvp[j]), &(old_values->kvp[i]),
				sizeof (new_values->kvp[j]));
	      j++;
	      goto doublebreak;
	    }
	}
      /* This should never happen... */
      clib_warning ("BUG: linear rehash failed!");
      BV (value_free) (h, new_values, new_log2_pages);
      return 0;

    doublebreak:;
    }
  return new_values;
}

static inline int BV (clib_bihash_add_del_inline)
  (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, int is_add,
   int (*is_stale_cb) (BVT (clib_bihash_kv) *, void *), void *arg)
{
  u32 bucket_index;
  BVT (clib_bihash_bucket) * b, tmp_b;
  BVT (clib_bihash_value) * v, *new_v, *save_new_v, *working_copy;
  int i, limit;
  u64 hash, new_hash;
  u32 new_log2_pages, old_log2_pages;
  u32 thread_index = os_get_thread_index ();
  int mark_bucket_linear;
  int resplit_once;

  /*
   * Create the table (is_add=1,2), or flunk the request now (is_add=0)
   * Use the alloc_lock to protect the instantiate operation.
   */
  if (PREDICT_FALSE (h->instantiated == 0))
    {
      if (is_add == 0)
	return (-1);

      BV (clib_bihash_alloc_lock) (h);
      if (h->instantiated == 0)
	BV (clib_bihash_instantiate) (h);
      BV (clib_bihash_alloc_unlock) (h);
    }

  hash = BV (clib_bihash_hash) (add_v);

  bucket_index = hash & (h->nbuckets - 1);
  b = &h->buckets[bucket_index];

  hash >>= h->log2_nbuckets;

  BV (clib_bihash_lock_bucket) (b);

  /* First elt in the bucket? */
  if (BV (clib_bihash_bucket_is_empty) (b))
    {
      if (is_add == 0)
	{
	  BV (clib_bihash_unlock_bucket) (b);
	  return (-1);
	}

      BV (clib_bihash_alloc_lock) (h);
      v = BV (value_alloc) (h, 0);
      BV (clib_bihash_alloc_unlock) (h);

      *v->kvp = *add_v;
      tmp_b.as_u64 = 0;		/* clears bucket lock */
      tmp_b.offset = BV (clib_bihash_get_offset) (h, v);
      tmp_b.refcnt = 1;
      CLIB_MEMORY_BARRIER ();

      b->as_u64 = tmp_b.as_u64;	/* unlocks the bucket */
      BV (clib_bihash_increment_stat) (h, BIHASH_STAT_alloc_add, 1);

      return (0);
    }

  /* WARNING: we're still looking at the live copy... */
  limit = BIHASH_KVP_PER_PAGE;
  v = BV (clib_bihash_get_value) (h, b->offset);

  v += (b->linear_search == 0) ? hash & ((1 << b->log2_pages) - 1) : 0;
  if (b->linear_search)
    limit <<= b->log2_pages;

  if (is_add)
    {
      /*
       * Because reader threads are looking at live data,
       * we have to be extra careful. Readers do NOT hold the
       * bucket lock. We need to be SLOWER than a search, past the
       * point where readers CHECK the bucket lock.
       */

      /*
       * For obvious (in hindsight) reasons, see if we're supposed to
       * replace an existing key, then look for an empty slot.
       */
      for (i = 0; i < limit; i++)
	{
	  if (BV (clib_bihash_key_compare) (v->kvp[i].key, add_v->key))
	    {
	      /* Add but do not overwrite? */
	      if (is_add == 2)
		{
		  BV (clib_bihash_unlock_bucket) (b);
		  return (-2);
		}

	      CLIB_MEMORY_BARRIER ();	/* Add a delay */
	      clib_memcpy_fast (&(v->kvp[i]), add_v, sizeof (*add_v));
	      BV (clib_bihash_unlock_bucket) (b);
	      BV (clib_bihash_increment_stat) (h, BIHASH_STAT_replace, 1);
	      return (0);
	    }
	}
      /*
       * Look for an empty slot. If found, use it
       */
      for (i = 0; i < limit; i++)
	{
	  if (BV (clib_bihash_is_free) (&(v->kvp[i])))
	    {
	      /*
	       * Copy the value first, so that if a reader manages
	       * to match the new key, the value will be right...
	       */
	      clib_memcpy_fast (&(v->kvp[i].value),
				&add_v->value, sizeof (add_v->value));
	      CLIB_MEMORY_BARRIER ();	/* Make sure the value has settled */
	      clib_memcpy_fast (&(v->kvp[i]), &add_v->key,
				sizeof (add_v->key));
	      b->refcnt++;
	      ASSERT (b->refcnt > 0);
	      BV (clib_bihash_unlock_bucket) (b);
	      BV (clib_bihash_increment_stat) (h, BIHASH_STAT_add, 1);
	      return (0);
	    }
	}
      /* look for stale data to overwrite */
      if (is_stale_cb)
	{
	  for (i = 0; i < limit; i++)
	    {
	      if (is_stale_cb (&(v->kvp[i]), arg))
		{
		  CLIB_MEMORY_BARRIER ();
		  clib_memcpy_fast (&(v->kvp[i]), add_v, sizeof (*add_v));
		  BV (clib_bihash_unlock_bucket) (b);
		  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_replace, 1);
		  return (0);
		}
	    }
	}
      /* Out of space in this bucket, split the bucket... */
    }
  else				/* delete case */
    {
      for (i = 0; i < limit; i++)
	{
	  /* Found the key? Kill it... */
	  if (BV (clib_bihash_key_compare) (v->kvp[i].key, add_v->key))
	    {
	      clib_memset (&(v->kvp[i]), 0xff, sizeof (*(add_v)));
	      /* Is the bucket empty? */
	      if (PREDICT_TRUE (b->refcnt > 1))
		{
		  b->refcnt--;
		  BV (clib_bihash_unlock_bucket) (b);
		  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_del, 1);
		  return (0);
		}
	      else		/* yes, free it */
		{
		  /* Save old bucket value, need log2_pages to free it */
		  tmp_b.as_u64 = b->as_u64;
		  CLIB_MEMORY_BARRIER ();

		  /* Kill and unlock the bucket */
		  b->as_u64 = 0;

		  /* And free the backing storage */
		  BV (clib_bihash_alloc_lock) (h);
		  /* Note: v currently points into the middle of the bucket */
		  v = BV (clib_bihash_get_value) (h, tmp_b.offset);
		  BV (value_free) (h, v, tmp_b.log2_pages);
		  BV (clib_bihash_alloc_unlock) (h);
		  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_del_free,
						   1);
		  return (0);
		}
	    }
	}
      /* Not found... */
      BV (clib_bihash_unlock_bucket) (b);
      return (-3);
    }

  /* Move readers to a (locked) temp copy of the bucket */
  BV (clib_bihash_alloc_lock) (h);
  BV (make_working_copy) (h, b);

  v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset);

  old_log2_pages = h->saved_bucket.log2_pages;
  new_log2_pages = old_log2_pages + 1;
  mark_bucket_linear = 0;
  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_split_add, 1);
  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_splits, old_log2_pages);

  working_copy = h->working_copies[thread_index];
  resplit_once = 0;
  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_splits, 1);

  new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages,
				 new_log2_pages);
  if (new_v == 0)
    {
    try_resplit:
      resplit_once = 1;
      new_log2_pages++;
      /* Try re-splitting. If that fails, fall back to linear search */
      new_v = BV (split_and_rehash) (h, working_copy, old_log2_pages,
				     new_log2_pages);
      if (new_v == 0)
	{
	mark_linear:
	  new_log2_pages--;
	  /* pinned collisions, use linear search */
	  new_v =
	    BV (split_and_rehash_linear) (h, working_copy, old_log2_pages,
					  new_log2_pages);
	  mark_bucket_linear = 1;
	  BV (clib_bihash_increment_stat) (h, BIHASH_STAT_linear, 1);
	}
      BV (clib_bihash_increment_stat) (h, BIHASH_STAT_resplit, 1);
      BV (clib_bihash_increment_stat) (h, BIHASH_STAT_splits,
				       old_log2_pages + 1);
    }

  /* Try to add the new entry */
  save_new_v = new_v;
  new_hash = BV (clib_bihash_hash) (add_v);
  limit = BIHASH_KVP_PER_PAGE;
  if (mark_bucket_linear)
    limit <<= new_log2_pages;
  new_hash >>= h->log2_nbuckets;
  new_hash &= (1 << new_log2_pages) - 1;
  new_v += mark_bucket_linear ? 0 : new_hash;

  for (i = 0; i < limit; i++)
    {
      if (BV (clib_bihash_is_free) (&(new_v->kvp[i])))
	{
	  clib_memcpy_fast (&(new_v->kvp[i]), add_v, sizeof (*add_v));
	  goto expand_ok;
	}
    }

  /* Crap. Try again */
  BV (value_free) (h, save_new_v, new_log2_pages);
  /*
   * If we've already doubled the size of the bucket once,
   * fall back to linear search now.
   */
  if (resplit_once)
    goto mark_linear;
  else
    goto try_resplit;

expand_ok:
  tmp_b.log2_pages = new_log2_pages;
  tmp_b.offset = BV (clib_bihash_get_offset) (h, save_new_v);
  tmp_b.linear_search = mark_bucket_linear;
  tmp_b.refcnt = h->saved_bucket.refcnt + 1;
  ASSERT (tmp_b.refcnt > 0);
  tmp_b.lock = 0;
  CLIB_MEMORY_BARRIER ();
  b->as_u64 = tmp_b.as_u64;
  /* free the old bucket */
  v = BV (clib_bihash_get_value) (h, h->saved_bucket.offset);
  BV (value_free) (h, v, h->saved_bucket.log2_pages);
  BV (clib_bihash_alloc_unlock) (h);
  return (0);
}

int BV (clib_bihash_add_del)
  (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, int is_add)
{
  return BV (clib_bihash_add_del_inline) (h, add_v, is_add, 0, 0);
}

int BV (clib_bihash_add_or_overwrite_stale)
  (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v,
   int (*stale_callback) (BVT (clib_bihash_kv) *, void *), void *arg)
{
  return BV (clib_bihash_add_del_inline) (h, add_v, 1, stale_callback, arg);
}

int BV (clib_bihash_search)
  (BVT (clib_bihash) * h,
   BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
{
  u64 hash;
  u32 bucket_index;
  BVT (clib_bihash_value) * v;
  BVT (clib_bihash_bucket) * b;
  int i, limit;

  ASSERT (valuep);

  if (PREDICT_FALSE (alloc_arena (h) == 0))
    return -1;

  hash = BV (clib_bihash_hash) (search_key);

  bucket_index = hash & (h->nbuckets - 1);
  b = &h->buckets[bucket_index];

  if (BV (clib_bihash_bucket_is_empty) (b))
    return -1;

  if (PREDICT_FALSE (b->lock))
    {
      volatile BVT (clib_bihash_bucket) * bv = b;
      while (bv->lock)
	CLIB_PAUSE ();
    }

  hash >>= h->log2_nbuckets;

  v = BV (clib_bihash_get_value) (h, b->offset);
  limit = BIHASH_KVP_PER_PAGE;
  v += (b->linear_search == 0) ? hash & ((1 << b->log2_pages) - 1) : 0;
  if (PREDICT_FALSE (b->linear_search))
    limit <<= b->log2_pages;

  for (i = 0; i < limit; i++)
    {
      if (BV (clib_bihash_key_compare) (v->kvp[i].key, search_key->key))
	{
	  *valuep = v->kvp[i];
	  return 0;
	}
    }
  return -1;
}

u8 *BV (format_bihash) (u8 * s, va_list * args)
{
  BVT (clib_bihash) * h = va_arg (*args, BVT (clib_bihash) *);
  int verbose = va_arg (*args, int);
  BVT (clib_bihash_bucket) * b;
  BVT (clib_bihash_value) * v;
  int i, j, k;
  u64 active_elements = 0;
  u64 active_buckets = 0;
  u64 linear_buckets = 0;
  u64 used_bytes;

  s = format (s, "Hash table %s\n", h->name ? h->name : (u8 *) "(unnamed)");

  if (PREDICT_FALSE (alloc_arena (h) == 0))
    return format (s, "[empty, uninitialized]");

  for (i = 0; i < h->nbuckets; i++)
    {
      b = &h->buckets[i];
      if (BV (clib_bihash_bucket_is_empty) (b))
	{
	  if (verbose > 1)
	    s = format (s, "[%d]: empty\n", i);
	  continue;
	}

      active_buckets++;

      if (b->linear_search)
	linear_buckets++;

      if (verbose)
	{
	  s = format (s, "[%d]: heap offset %lld, len %d, linear %d\n", i,
		      b->offset, (1 << b->log2_pages), b->linear_search);
	}

      v = BV (clib_bihash_get_value) (h, b->offset);
      for (j = 0; j < (1 << b->log2_pages); j++)
	{
	  for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
	    {
	      if (BV (clib_bihash_is_free) (&v->kvp[k]))
		{
		  if (verbose > 1)
		    s = format (s, "    %d: empty\n",
				j * BIHASH_KVP_PER_PAGE + k);
		  continue;
		}
	      if (verbose)
		{
		  if (h->fmt_fn)
		    {
		      s = format (s, "    %d: %U\n",
				  j * BIHASH_KVP_PER_PAGE + k,
				  h->fmt_fn, &(v->kvp[k]), verbose);
		    }
		  else
		    {
		      s = format (s, "    %d: %U\n",
				  j * BIHASH_KVP_PER_PAGE + k,
				  BV (format_bihash_kvp), &(v->kvp[k]));
		    }
		}
	      active_elements++;
	    }
	  v++;
	}
    }

  s = format (s, "    %lld active elements %lld active buckets\n",
	      active_elements, active_buckets);
  s = format (s, "    %d free lists\n", vec_len (h->freelists));

  for (i = 0; i < vec_len (h->freelists); i++)
    {
      u32 nfree = 0;
      BVT (clib_bihash_value) * free_elt;
      u64 free_elt_as_u64 = h->freelists[i];

      while (free_elt_as_u64)
	{
	  free_elt = BV (clib_bihash_get_value) (h, free_elt_as_u64);
	  nfree++;
	  free_elt_as_u64 = free_elt->next_free_as_u64;
	}

      if (nfree || verbose)
	s = format (s, "       [len %d] %u free elts\n", 1 << i, nfree);
    }

  s = format (s, "    %lld linear search buckets\n", linear_buckets);
  used_bytes = alloc_arena_next (h);
  s = format (s,
	      "    arena: base %llx, next %llx\n"
	      "           used %lld b (%lld Mbytes) of %lld b (%lld Mbytes)\n",
	      alloc_arena (h), alloc_arena_next (h),
	      used_bytes, used_bytes >> 20,
	      alloc_arena_size (h), alloc_arena_size (h) >> 20);
  return s;
}

void BV (clib_bihash_foreach_key_value_pair)
  (BVT (clib_bihash) * h, void *callback, void *arg)
{
  int i, j, k;
  BVT (clib_bihash_bucket) * b;
  BVT (clib_bihash_value) * v;
  void (*fp) (BVT (clib_bihash_kv) *, void *) = callback;

  if (PREDICT_FALSE (alloc_arena (h) == 0))
    return;

  for (i = 0; i < h->nbuckets; i++)
    {
      b = &h->buckets[i];
      if (BV (clib_bihash_bucket_is_empty) (b))
	continue;

      v = BV (clib_bihash_get_value) (h, b->offset);
      for (j = 0; j < (1 << b->log2_pages); j++)
	{
	  for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
	    {
	      if (BV (clib_bihash_is_free) (&v->kvp[k]))
		continue;

	      (*fp) (&v->kvp[k], arg);
	      /*
	       * In case the callback deletes the last entry in the bucket...
	       */
	      if (BV (clib_bihash_bucket_is_empty) (b))
		goto doublebreak;
	    }
	  v++;
	}
    doublebreak:
      ;
    }
}

/** @endcond */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */