/*
 *------------------------------------------------------------------
 * Copyright (c) 2017 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *------------------------------------------------------------------
 */

#include <stddef.h>
#include <netinet/in.h>

#include <vlibapi/api.h>
#include <vlibmemory/api.h>

#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vnet/pg/pg.h>
#include <vppinfra/error.h>
#include <vnet/plugin/plugin.h>
#include <acl/acl.h>
#include <vppinfra/bihash_48_8.h>

#include "hash_lookup.h"
#include "hash_lookup_private.h"


always_inline applied_hash_ace_entry_t **get_applied_hash_aces(acl_main_t *am, u32 lc_index)
{
  applied_hash_ace_entry_t **applied_hash_aces = vec_elt_at_index(am->hash_entry_vec_by_lc_index, lc_index);

/*is_input ? vec_elt_at_index(am->input_hash_entry_vec_by_sw_if_index, sw_if_index)
                                                          : vec_elt_at_index(am->output_hash_entry_vec_by_sw_if_index, sw_if_index);
*/
  return applied_hash_aces;
}


static void
hashtable_add_del(acl_main_t *am, clib_bihash_kv_48_8_t *kv, int is_add)
{
    DBG("HASH ADD/DEL: %016llx %016llx %016llx %016llx %016llx %016llx %016llx add %d",
                        kv->key[0], kv->key[1], kv->key[2],
                        kv->key[3], kv->key[4], kv->key[5], kv->value, is_add);
    BV (clib_bihash_add_del) (&am->acl_lookup_hash, kv, is_add);
}

/*
 * TupleMerge
 *
 * Initial adaptation by Valerio Bruschi (valerio.bruschi@telecom-paristech.fr)
 * based on the TupleMerge [1] simulator kindly made available
 * by  James Daly (dalyjamese@gmail.com) and  Eric Torng (torng@cse.msu.edu)
 * ( http://www.cse.msu.edu/~dalyjame/ or http://www.cse.msu.edu/~torng/ ),
 * refactoring by Andrew Yourtchenko.
 *
 * [1] James Daly, Eric Torng "TupleMerge: Building Online Packet Classifiers
 * by Omitting Bits", In Proc. IEEE ICCCN 2017, pp. 1-10
 *
 */

static int
count_bits (u64 word)
{
  int counter = 0;
  while (word)
    {
      counter += word & 1;
      word >>= 1;
    }
  return counter;
}

/* check if mask2 can be contained by mask1 */
static u8
first_mask_contains_second_mask(int is_ip6, fa_5tuple_t * mask1, fa_5tuple_t * mask2)
{
  int i;
  if (is_ip6)
    {
      for (i = 0; i < 2; i++)
        {
          if ((mask1->ip6_addr[0].as_u64[i] & mask2->ip6_addr[0].as_u64[i]) !=
              mask1->ip6_addr[0].as_u64[i])
            return 0;
          if ((mask1->ip6_addr[1].as_u64[i] & mask2->ip6_addr[1].as_u64[i]) !=
              mask1->ip6_addr[1].as_u64[i])
            return 0;
        }
    }
  else
    {
      /* check the pads, both masks must have it 0 */
      u32 padcheck = 0;
      int i;
      for (i=0; i<6; i++) {
        padcheck |= mask1->l3_zero_pad[i];
        padcheck |= mask2->l3_zero_pad[i];
      }
      if (padcheck != 0)
        return 0;
      if ((mask1->ip4_addr[0].as_u32 & mask2->ip4_addr[0].as_u32) !=
          mask1->ip4_addr[0].as_u32)
        return 0;
      if ((mask1->ip4_addr[1].as_u32 & mask2->ip4_addr[1].as_u32) !=
          mask1->ip4_addr[1].as_u32)
        return 0;
    }

  /* take care if port are not exact-match  */
  if ((mask1->l4.as_u64 & mask2->l4.as_u64) != mask1->l4.as_u64)
    return 0;

  if ((mask1->pkt.as_u64 & mask2->pkt.as_u64) != mask1->pkt.as_u64)
    return 0;

  return 1;
}



/*
 * TupleMerge:
 *
 * Consider the situation when we have to create a new table
 * T for a given rule R. This occurs for the first rule inserted and
 * for later rules if it is incompatible with all existing tables.
 * In this event, we need to determine mT for a new table.
 * Setting mT = mR is not a good strategy; if another similar,
 * but slightly less specific, rule appears we will be unable to
 * add it to T and will thus have to create another new table. We
 * thus consider two factors: is the rule more strongly aligned
 * with source or destination addresses (usually the two most
 * important fields) and how much slack needs to be given to
 * allow for other rules. If the source and destination addresses
 * are close together (within 4 bits for our experiments), we use
 * both of them. Otherwise, we drop the smaller (less specific)
 * address and its associated port field from consideration; R is
 * predominantly aligned with one of the two fields and should
 * be grouped with other similar rules. This is similar to TSS
 * dropping port fields, but since it is based on observable rule
 * characteristics it is more likely to keep important fields and
 * discard less useful ones.
 * We then look at the absolute lengths of the addresses. If
 * the address is long, we are more likely to try to add shorter
 * lengths and likewise the reverse. We thus remove a few bits
 * from both address fields with more bits removed from longer
 * addresses. For 32 bit addresses, we remove 4 bits, 3 for more
 * than 24, 2 for more than 16, and so on (so 8 and fewer bits
 * don’t have any removed). We only do this for prefix fields like
 * addresses; both range fields (like ports) and exact match fields
 * (like protocol) should remain as they are.
 */


static u32
shift_ip4_if(u32 mask, u32 thresh, int numshifts, u32 else_val)
{
  if (mask > thresh)
     return clib_host_to_net_u32((clib_net_to_host_u32(mask) << numshifts) & 0xFFFFFFFF);
  else
     return else_val;
}

static void
relax_ip4_addr(ip4_address_t *ip4_mask, int relax2) {
  int shifts_per_relax[2][4] = { { 6, 5, 4, 2 }, { 3, 2, 1, 1 } };

  int *shifts = shifts_per_relax[relax2];
  if(ip4_mask->as_u32 == 0xffffffff)
    ip4_mask->as_u32 = clib_host_to_net_u32((clib_net_to_host_u32(ip4_mask->as_u32) << shifts[0])&0xFFFFFFFF);
  else
    ip4_mask->as_u32 = shift_ip4_if(ip4_mask->as_u32, 0xffffff00, shifts[1],
                        shift_ip4_if(ip4_mask->as_u32, 0xffff0000, shifts[2],
                          shift_ip4_if(ip4_mask->as_u32, 0xff000000, shifts[3], ip4_mask->as_u32)));
}

static void
relax_ip6_addr(ip6_address_t *ip6_mask, int relax2) {
  /*
   * This "better than nothing" relax logic is based on heuristics
   * from IPv6 knowledge, and may not be optimal.
   * Some further tuning may be needed in the future.
   */
  if (ip6_mask->as_u64[0] == 0xffffffffffffffffULL) {
    if (ip6_mask->as_u64[1] == 0xffffffffffffffffULL) {
      /* relax a /128 down to /64  - likely to have more hosts */
      ip6_mask->as_u64[1] = 0;
    } else if (ip6_mask->as_u64[1] == 0) {
      /* relax a /64 down to /56 - likely to have more subnets */
      ip6_mask->as_u64[0] = clib_host_to_net_u64(0xffffffffffffff00ULL);
    }
  }
}

static void
relax_tuple(fa_5tuple_t *mask, int is_ip6, int relax2){
	fa_5tuple_t save_mask = *mask;

	int counter_s = 0, counter_d = 0;
        if (is_ip6) {
	  int i;
	  for(i=0; i<2; i++){
		counter_s += count_bits(mask->ip6_addr[0].as_u64[i]);
		counter_d += count_bits(mask->ip6_addr[1].as_u64[i]);
	  }
        } else {
		counter_s += count_bits(mask->ip4_addr[0].as_u32);
		counter_d += count_bits(mask->ip4_addr[1].as_u32);
        }

/*
 * is the rule more strongly aligned with source or destination addresses
 * (usually the two most important fields) and how much slack needs to be
 * given to allow for other rules. If the source and destination addresses
 * are close together (within 4 bits for our experiments), we use both of them.
 * Otherwise, we drop the smaller (less specific) address and its associated
 * port field from consideration
 */
	const int deltaThreshold = 4;
	/* const int deltaThreshold = 8; if IPV6? */
	int delta = counter_s - counter_d;
	if (-delta > deltaThreshold) {
                if (is_ip6)
		  mask->ip6_addr[0].as_u64[1] = mask->ip6_addr[0].as_u64[0] = 0;
                else
		  mask->ip4_addr[0].as_u32 = 0;
		mask->l4.port[0] = 0;
        } else if (delta > deltaThreshold) {
                if (is_ip6)
		  mask->ip6_addr[1].as_u64[1] = mask->ip6_addr[1].as_u64[0] = 0;
                else
		  mask->ip4_addr[1].as_u32 = 0;
		mask->l4.port[1] = 0;
        }

        if (is_ip6) {
          relax_ip6_addr(&mask->ip6_addr[0], relax2);
          relax_ip6_addr(&mask->ip6_addr[1], relax2);
        } else {
          relax_ip4_addr(&mask->ip4_addr[0], relax2);
          relax_ip4_addr(&mask->ip4_addr[1], relax2);
        }
	mask->pkt.is_nonfirst_fragment = 0;
	mask->pkt.l4_valid = 0;
	if(!first_mask_contains_second_mask(is_ip6, mask, &save_mask)){
		DBG( "TM-relaxing-ERROR");
                *mask = save_mask;
	}
	DBG( "TM-relaxing-end");
}

static u32
find_mask_type_index(acl_main_t *am, fa_5tuple_t *mask)
{
  ace_mask_type_entry_t *mte;
  /* *INDENT-OFF* */
  pool_foreach(mte, am->ace_mask_type_pool,
  ({
    if(memcmp(&mte->mask, mask, sizeof(*mask)) == 0)
      return (mte - am->ace_mask_type_pool);
  }));
  /* *INDENT-ON* */
  return ~0;
}

static u32
assign_mask_type_index(acl_main_t *am, fa_5tuple_t *mask)
{
  u32 mask_type_index = find_mask_type_index(am, mask);
  ace_mask_type_entry_t *mte;
  if(~0 == mask_type_index) {
    pool_get_aligned (am->ace_mask_type_pool, mte, CLIB_CACHE_LINE_BYTES);
    mask_type_index = mte - am->ace_mask_type_pool;
    clib_memcpy_fast(&mte->mask, mask, sizeof(mte->mask));
    mte->refcount = 0;

    /*
     * We can use only 16 bits, since in the match there is only u16 field.
     * Realistically, once you go to 64K of mask types, it is a huge
     * problem anyway, so we might as well stop half way.
     */
    ASSERT(mask_type_index < 32768);
  }
  mte = am->ace_mask_type_pool + mask_type_index;
  mte->refcount++;
  DBG0("ASSIGN MTE index %d new refcount %d", mask_type_index, mte->refcount);
  return mask_type_index;
}

static void
lock_mask_type_index(acl_main_t *am, u32 mask_type_index)
{
  DBG0("LOCK MTE index %d", mask_type_index);
  ace_mask_type_entry_t *mte = pool_elt_at_index(am->ace_mask_type_pool, mask_type_index);
  mte->refcount++;
  DBG0("LOCK MTE index %d new refcount %d", mask_type_index, mte->refcount);
}


static void
release_mask_type_index(acl_main_t *am, u32 mask_type_index)
{
  DBG0("RELEAS MTE index %d", mask_type_index);
  ace_mask_type_entry_t *mte = pool_elt_at_index(am->ace_mask_type_pool, mask_type_index);
  mte->refcount--;
  DBG0("RELEAS MTE index %d new refcount %d", mask_type_index, mte->refcount);
  if (mte->refcount == 0) {
    /* we are not using this entry anymore */
    clib_memset(mte, 0xae, sizeof(*mte));
    pool_put(am->ace_mask_type_pool, mte);
  }
}


static u32
tm_assign_mask_type_index(acl_main_t *am, fa_5tuple_t *mask, int is_ip6, u32 lc_index)
{
	u32 mask_type_index = ~0;
	u32 for_mask_type_index = ~0;
	ace_mask_type_entry_t *mte = 0;
	int order_index;
	/* look for existing mask comparable with the one in input */

	hash_applied_mask_info_t **hash_applied_mask_info_vec = vec_elt_at_index(am->hash_applied_mask_info_vec_by_lc_index, lc_index);
	hash_applied_mask_info_t *minfo;

        if (vec_len(*hash_applied_mask_info_vec) > 0) {
	    for(order_index = vec_len((*hash_applied_mask_info_vec)) -1; order_index >= 0; order_index--) {
		minfo = vec_elt_at_index((*hash_applied_mask_info_vec), order_index);
		for_mask_type_index = minfo->mask_type_index;
		mte = vec_elt_at_index(am->ace_mask_type_pool, for_mask_type_index);
		if(first_mask_contains_second_mask(is_ip6, &mte->mask, mask)){
			mask_type_index = (mte - am->ace_mask_type_pool);
			lock_mask_type_index(am, mask_type_index);
			break;
		}
            }
	}

	if(~0 == mask_type_index) {
		/* if no mask is found, then let's use a relaxed version of the original one, in order to be used by new ace_entries */
		DBG( "TM-assigning mask type index-new one");
		fa_5tuple_t relaxed_mask = *mask;
		relax_tuple(&relaxed_mask, is_ip6, 0);
		mask_type_index = assign_mask_type_index(am, &relaxed_mask);

		hash_applied_mask_info_t **hash_applied_mask_info_vec = vec_elt_at_index(am->hash_applied_mask_info_vec_by_lc_index, lc_index);

		int spot = vec_len((*hash_applied_mask_info_vec));
		vec_validate((*hash_applied_mask_info_vec), spot);
		minfo = vec_elt_at_index((*hash_applied_mask_info_vec), spot);
		minfo->mask_type_index = mask_type_index;
		minfo->num_entries = 0;
		minfo->max_collisions = 0;
		minfo->first_rule_index = ~0;

		/*
		 * We can use only 16 bits, since in the match there is only u16 field.
		 * Realistically, once you go to 64K of mask types, it is a huge
		 * problem anyway, so we might as well stop half way.
		 */
		ASSERT(mask_type_index < 32768);
	}
	mte = am->ace_mask_type_pool + mask_type_index;
	DBG0("TM-ASSIGN MTE index %d new refcount %d", mask_type_index, mte->refcount);
	return mask_type_index;
}


static void
fill_applied_hash_ace_kv(acl_main_t *am,
                            applied_hash_ace_entry_t **applied_hash_aces,
                            u32 lc_index,
                            u32 new_index, clib_bihash_kv_48_8_t *kv)
{
  fa_5tuple_t *kv_key = (fa_5tuple_t *)kv->key;
  hash_acl_lookup_value_t *kv_val = (hash_acl_lookup_value_t *)&kv->value;
  applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), new_index);
  hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, pae->acl_index);

  /* apply the mask to ace key */
  hash_ace_info_t *ace_info = vec_elt_at_index(ha->rules, pae->hash_ace_info_index);
  ace_mask_type_entry_t *mte = vec_elt_at_index(am->ace_mask_type_pool, pae->mask_type_index);

  u64 *pmatch = (u64 *) &ace_info->match;
  u64 *pmask = (u64 *)&mte->mask;
  u64 *pkey = (u64 *)kv->key;

  *pkey++ = *pmatch++ & *pmask++;
  *pkey++ = *pmatch++ & *pmask++;
  *pkey++ = *pmatch++ & *pmask++;
  *pkey++ = *pmatch++ & *pmask++;
  *pkey++ = *pmatch++ & *pmask++;
  *pkey++ = *pmatch++ & *pmask++;

  kv_key->pkt.mask_type_index_lsb = pae->mask_type_index;
  kv_key->pkt.lc_index = lc_index;
  kv_val->as_u64 = 0;
  kv_val->applied_entry_index = new_index;
}

static void
add_del_hashtable_entry(acl_main_t *am,
                            u32 lc_index,
                            applied_hash_ace_entry_t **applied_hash_aces,
			    u32 index, int is_add)
{
  clib_bihash_kv_48_8_t kv;

  fill_applied_hash_ace_kv(am, applied_hash_aces, lc_index, index, &kv);
  hashtable_add_del(am, &kv, is_add);
}


static void
remake_hash_applied_mask_info_vec (acl_main_t * am,
                                   applied_hash_ace_entry_t **
                                   applied_hash_aces, u32 lc_index)
{
  DBG0("remake applied hash mask info lc_index %d", lc_index);
  hash_applied_mask_info_t *new_hash_applied_mask_info_vec =
    vec_new (hash_applied_mask_info_t, 0);

  hash_applied_mask_info_t *minfo;
  int i;
  for (i = 0; i < vec_len ((*applied_hash_aces)); i++)
    {
      applied_hash_ace_entry_t *pae =
        vec_elt_at_index ((*applied_hash_aces), i);

      /* check if mask_type_index is already there */
      u32 new_pointer = vec_len (new_hash_applied_mask_info_vec);
      int search;
      for (search = 0; search < vec_len (new_hash_applied_mask_info_vec);
           search++)
        {
          minfo = vec_elt_at_index (new_hash_applied_mask_info_vec, search);
          if (minfo->mask_type_index == pae->mask_type_index)
            break;
        }
       
      vec_validate ((new_hash_applied_mask_info_vec), search);
      minfo = vec_elt_at_index ((new_hash_applied_mask_info_vec), search);
      if (search == new_pointer)
        {
          DBG0("remaking index %d", search);
          minfo->mask_type_index = pae->mask_type_index;
          minfo->num_entries = 0;
          minfo->max_collisions = 0;
          minfo->first_rule_index = ~0;
        }

      minfo->num_entries = minfo->num_entries + 1;

      if (vec_len (pae->colliding_rules) > minfo->max_collisions)
        minfo->max_collisions = vec_len (pae->colliding_rules);

      if (minfo->first_rule_index > i)
        minfo->first_rule_index = i;
    }

  hash_applied_mask_info_t **hash_applied_mask_info_vec =
    vec_elt_at_index (am->hash_applied_mask_info_vec_by_lc_index, lc_index);

  vec_free ((*hash_applied_mask_info_vec));
  (*hash_applied_mask_info_vec) = new_hash_applied_mask_info_vec;
}

static void
vec_del_collision_rule (collision_match_rule_t ** pvec,
                        u32 applied_entry_index)
{
  u32 i = 0;
  u32 deleted = 0;
  while (i < _vec_len ((*pvec)))
    {
      collision_match_rule_t *cr = vec_elt_at_index ((*pvec), i);
      if (cr->applied_entry_index == applied_entry_index)
        {
          /* vec_del1 ((*pvec), i) would be more efficient but would reorder the elements. */
          vec_delete((*pvec), 1, i);
          deleted++;
          DBG0("vec_del_collision_rule deleting one at index %d", i);
        }
      else
        {
          i++;
        }
    }
  ASSERT(deleted > 0);
}

static void
acl_plugin_print_pae (vlib_main_t * vm, int j, applied_hash_ace_entry_t * pae);

static void
del_colliding_rule (applied_hash_ace_entry_t ** applied_hash_aces,
                    u32 head_index, u32 applied_entry_index)
{
  DBG0("DEL COLLIDING RULE: head_index %d applied index %d", head_index, applied_entry_index);


  applied_hash_ace_entry_t *head_pae =
    vec_elt_at_index ((*applied_hash_aces), head_index);
  if (ACL_HASH_LOOKUP_DEBUG > 0)
    acl_plugin_print_pae(acl_main.vlib_main, head_index, head_pae);
  vec_del_collision_rule (&head_pae->colliding_rules, applied_entry_index);
  if (vec_len(head_pae->colliding_rules) == 0) {
    vec_free(head_pae->colliding_rules);
  }
  if (ACL_HASH_LOOKUP_DEBUG > 0)
    acl_plugin_print_pae(acl_main.vlib_main, head_index, head_pae);
}

static void
add_colliding_rule (acl_main_t * am,
                    applied_hash_ace_entry_t ** applied_hash_aces,
                    u32 head_index, u32 applied_entry_index)
{
  applied_hash_ace_entry_t *head_pae =
    vec_elt_at_index ((*applied_hash_aces), head_index);
  applied_hash_ace_entry_t *pae =
    vec_elt_at_index ((*applied_hash_aces), applied_entry_index);
  DBG0("ADD COLLIDING RULE: head_index %d applied index %d", head_index, applied_entry_index);
  if (ACL_HASH_LOOKUP_DEBUG > 0)
    acl_plugin_print_pae(acl_main.vlib_main, head_index, head_pae);

  collision_match_rule_t cr;

  cr.acl_index = pae->acl_index;
  cr.ace_index = pae->ace_index;
  cr.acl_position = pae->acl_position;
  cr.applied_entry_index = applied_entry_index;
  cr.rule = am->acls[pae->acl_index].rules[pae->ace_index];
  pae->collision_head_ae_index = head_index;
  vec_add1 (head_pae->colliding_rules, cr);
  if (ACL_HASH_LOOKUP_DEBUG > 0)
    acl_plugin_print_pae(acl_main.vlib_main, head_index, head_pae);
}

static u32
activate_applied_ace_hash_entry(acl_main_t *am,
                            u32 lc_index,
                            applied_hash_ace_entry_t **applied_hash_aces,
                            u32 new_index)
{
  clib_bihash_kv_48_8_t kv;
  ASSERT(new_index != ~0);
  DBG("activate_applied_ace_hash_entry lc_index %d new_index %d", lc_index, new_index);

  fill_applied_hash_ace_kv(am, applied_hash_aces, lc_index, new_index, &kv);

  DBG("APPLY ADD KY: %016llx %016llx %016llx %016llx %016llx %016llx",
			kv.key[0], kv.key[1], kv.key[2],
			kv.key[3], kv.key[4], kv.key[5]);

  clib_bihash_kv_48_8_t result;
  hash_acl_lookup_value_t *result_val = (hash_acl_lookup_value_t *)&result.value;
  int res = BV (clib_bihash_search) (&am->acl_lookup_hash, &kv, &result);
  ASSERT(new_index != ~0);
  ASSERT(new_index < vec_len((*applied_hash_aces)));
  if (res == 0) {
    u32 first_index = result_val->applied_entry_index;
    ASSERT(first_index != ~0);
    ASSERT(first_index < vec_len((*applied_hash_aces)));
    /* There already exists an entry or more. Append at the end. */
    DBG("A key already exists, with applied entry index: %d", first_index);
    add_colliding_rule(am, applied_hash_aces, first_index, new_index);
    return first_index;
  } else {
    /* It's the very first entry */
    hashtable_add_del(am, &kv, 1);
    ASSERT(new_index != ~0);
    add_colliding_rule(am, applied_hash_aces, new_index, new_index);
    return new_index;
  }
}


static void *
hash_acl_set_heap(acl_main_t *am)
{
  if (0 == am->hash_lookup_mheap) {
    am->hash_lookup_mheap = mheap_alloc_with_lock (0 /* use VM */ , 
                                                   am->hash_lookup_mheap_size,
                                                   1 /* locked */);
    if (0 == am->hash_lookup_mheap) {
        clib_error("ACL plugin failed to allocate lookup heap of %U bytes", 
                   format_memory_size, am->hash_lookup_mheap_size);
    }
#if USE_DLMALLOC != 0
    /*
     * DLMALLOC is being "helpful" in that it ignores the heap size parameter
     * by default and tries to allocate the larger amount of memory.
     *
     * Pin the heap so this does not happen and if we run out of memory
     * in this heap, we will bail out with "out of memory", rather than
     * an obscure error sometime later.
     */
    mspace_disable_expand(am->hash_lookup_mheap);
#endif
  }
  void *oldheap = clib_mem_set_heap(am->hash_lookup_mheap);
  return oldheap;
}

void
acl_plugin_hash_acl_set_validate_heap(int on)
{
  acl_main_t *am = &acl_main;
  clib_mem_set_heap(hash_acl_set_heap(am));
#if USE_DLMALLOC == 0
  mheap_t *h = mheap_header (am->hash_lookup_mheap);
  if (on) {
    h->flags |= MHEAP_FLAG_VALIDATE;
    h->flags &= ~MHEAP_FLAG_SMALL_OBJECT_CACHE;
    mheap_validate(h);
  } else {
    h->flags &= ~MHEAP_FLAG_VALIDATE;
    h->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
  }
#endif
}

void
acl_plugin_hash_acl_set_trace_heap(int on)
{
  acl_main_t *am = &acl_main;
  clib_mem_set_heap(hash_acl_set_heap(am));
#if USE_DLMALLOC == 0
  mheap_t *h = mheap_header (am->hash_lookup_mheap);
  if (on) {
    h->flags |= MHEAP_FLAG_TRACE;
  } else {
    h->flags &= ~MHEAP_FLAG_TRACE;
  }
#endif
}

static void
assign_mask_type_index_to_pae(acl_main_t *am, u32 lc_index, int is_ip6, applied_hash_ace_entry_t *pae)
{
  hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, pae->acl_index);
  hash_ace_info_t *ace_info = vec_elt_at_index(ha->rules, pae->hash_ace_info_index);

  ace_mask_type_entry_t *mte;
  fa_5tuple_t mask;
  /*
   * Start taking base_mask associated to ace, and essentially copy it.
   * With TupleMerge we will assign a relaxed mask here.
   */
  mte = vec_elt_at_index(am->ace_mask_type_pool, ace_info->base_mask_type_index);
  mask = mte->mask;
  if (am->use_tuple_merge)
    pae->mask_type_index = tm_assign_mask_type_index(am, &mask, is_ip6, lc_index);
  else
    pae->mask_type_index = assign_mask_type_index(am, &mask);
}

static void
split_partition(acl_main_t *am, u32 first_index,
                            u32 lc_index, int is_ip6);


static void
check_collision_count_and_maybe_split(acl_main_t *am, u32 lc_index, int is_ip6, u32 first_index)
{
  applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, lc_index);
  applied_hash_ace_entry_t *first_pae = vec_elt_at_index((*applied_hash_aces), first_index);
  if (vec_len(first_pae->colliding_rules) > am->tuple_merge_split_threshold) {
    split_partition(am, first_index, lc_index, is_ip6);
  }
}

void
hash_acl_apply(acl_main_t *am, u32 lc_index, int acl_index, u32 acl_position)
{
  int i;

  DBG0("HASH ACL apply: lc_index %d acl %d", lc_index, acl_index);
  if (!am->acl_lookup_hash_initialized) {
    BV (clib_bihash_init) (&am->acl_lookup_hash, "ACL plugin rule lookup bihash",
                           am->hash_lookup_hash_buckets, am->hash_lookup_hash_memory);
    am->acl_lookup_hash_initialized = 1;
  }

  void *oldheap = hash_acl_set_heap(am);
  vec_validate(am->hash_entry_vec_by_lc_index, lc_index);
  vec_validate(am->hash_acl_infos, acl_index);
  applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, lc_index);

  hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index);
  u32 **hash_acl_applied_lc_index = &ha->lc_index_list;

  int base_offset = vec_len(*applied_hash_aces);

  /* Update the bitmap of the mask types with which the lookup
     needs to happen for the ACLs applied to this lc_index */
  applied_hash_acl_info_t **applied_hash_acls = &am->applied_hash_acl_info_by_lc_index;
  vec_validate((*applied_hash_acls), lc_index);
  applied_hash_acl_info_t *pal = vec_elt_at_index((*applied_hash_acls), lc_index);

  /* ensure the list of applied hash acls is initialized and add this acl# to it */
  u32 index = vec_search(pal->applied_acls, acl_index);
  if (index != ~0) {
    clib_warning("BUG: trying to apply twice acl_index %d on lc_index %d, according to lc",
                 acl_index, lc_index);
    goto done;
  }
  vec_add1(pal->applied_acls, acl_index);
  u32 index2 = vec_search((*hash_acl_applied_lc_index), lc_index);
  if (index2 != ~0) {
    clib_warning("BUG: trying to apply twice acl_index %d on lc_index %d, according to hash h-acl info",
                 acl_index, lc_index);
    goto done;
  }
  vec_add1((*hash_acl_applied_lc_index), lc_index);

  /*
   * if the applied ACL is empty, the current code will cause a
   * different behavior compared to current linear search: an empty ACL will
   * simply fallthrough to the next ACL, or the default deny in the end.
   *
   * This is not a problem, because after vpp-dev discussion,
   * the consensus was it should not be possible to apply the non-existent
   * ACL, so the change adding this code also takes care of that.
   */


  vec_validate(am->hash_applied_mask_info_vec_by_lc_index, lc_index);

  /* since we know (in case of no split) how much we expand, preallocate that space */
  if (vec_len(ha->rules) > 0) {
    int old_vec_len = vec_len(*applied_hash_aces);
    vec_validate((*applied_hash_aces), old_vec_len + vec_len(ha->rules) - 1);
    _vec_len((*applied_hash_aces)) = old_vec_len;
  }

  /* add the rules from the ACL to the hash table for lookup and append to the vector*/
  for(i=0; i < vec_len(ha->rules); i++) {
    /*
     * Expand the applied aces vector to fit a new entry.
     * One by one not to upset split_partition() if it is called.
     */
    vec_resize((*applied_hash_aces), 1);

    int is_ip6 = ha->rules[i].match.pkt.is_ip6;
    u32 new_index = base_offset + i;
    applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), new_index);
    pae->acl_index = acl_index;
    pae->ace_index = ha->rules[i].ace_index;
    pae->acl_position = acl_position;
    pae->action = ha->rules[i].action;
    pae->hitcount = 0;
    pae->hash_ace_info_index = i;
    /* we might link it in later */
    pae->collision_head_ae_index = ~0;
    pae->colliding_rules = NULL;
    pae->mask_type_index = ~0;
    assign_mask_type_index_to_pae(am, lc_index, is_ip6, pae);
    u32 first_index = activate_applied_ace_hash_entry(am, lc_index, applied_hash_aces, new_index);
    if (am->use_tuple_merge)
      check_collision_count_and_maybe_split(am, lc_index, is_ip6, first_index);
  }
  remake_hash_applied_mask_info_vec(am, applied_hash_aces, lc_index);
done:
  clib_mem_set_heap (oldheap);
}

static u32
find_head_applied_ace_index(applied_hash_ace_entry_t **applied_hash_aces, u32 curr_index)
{
  ASSERT(curr_index != ~0);
  applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), curr_index);
  ASSERT(pae);
  ASSERT(pae->collision_head_ae_index != ~0);
  return pae->collision_head_ae_index;
}

static void
set_collision_head_ae_index(applied_hash_ace_entry_t **applied_hash_aces, collision_match_rule_t *colliding_rules, u32 new_index)
{
	collision_match_rule_t *cr;
	vec_foreach(cr, colliding_rules) {
            applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), cr->applied_entry_index);
            pae->collision_head_ae_index = new_index;
	}
}

static void
move_applied_ace_hash_entry(acl_main_t *am,
                            u32 lc_index,
                            applied_hash_ace_entry_t **applied_hash_aces,
                            u32 old_index, u32 new_index)
{
  ASSERT(old_index != ~0);
  ASSERT(new_index != ~0);
  /* move the entry */
  *vec_elt_at_index((*applied_hash_aces), new_index) = *vec_elt_at_index((*applied_hash_aces), old_index);

  /* update the linkage and hash table if necessary */
  applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), old_index);
  applied_hash_ace_entry_t *new_pae = vec_elt_at_index((*applied_hash_aces), new_index);

  if (ACL_HASH_LOOKUP_DEBUG > 0) {
    clib_warning("Moving pae from %d to %d", old_index, new_index);
    acl_plugin_print_pae(am->vlib_main, old_index, pae);
  }

  if (pae->collision_head_ae_index == old_index) {
    /* first entry - so the hash points to it, update */
    add_del_hashtable_entry(am, lc_index,
                            applied_hash_aces, new_index, 1);
  }
  if (new_pae->colliding_rules) {
    /* update the information within the collision rule entry */
    ASSERT(vec_len(new_pae->colliding_rules) > 0);
    collision_match_rule_t *cr = vec_elt_at_index (new_pae->colliding_rules, 0);
    ASSERT(cr->applied_entry_index == old_index);
    cr->applied_entry_index = new_index;
    set_collision_head_ae_index(applied_hash_aces, new_pae->colliding_rules, new_index);
  } else {
    /* find the index in the collision rule entry on the head element */
    u32 head_index = find_head_applied_ace_index(applied_hash_aces, new_index);
    ASSERT(head_index != ~0);
    applied_hash_ace_entry_t *head_pae = vec_elt_at_index((*applied_hash_aces), head_index);
    ASSERT(vec_len(head_pae->colliding_rules) > 0);
    u32 i;
    for (i=0; i<vec_len(head_pae->colliding_rules); i++) {
      collision_match_rule_t *cr = vec_elt_at_index (head_pae->colliding_rules, i);
      if (cr->applied_entry_index == old_index) {
        cr->applied_entry_index = new_index;
      }
    }
    if (ACL_HASH_LOOKUP_DEBUG > 0) {
      clib_warning("Head pae at index %d after adjustment", head_index);
      acl_plugin_print_pae(am->vlib_main, head_index, head_pae);
    }
  }
  /* invalidate the old entry */
  pae->collision_head_ae_index = ~0;
  pae->colliding_rules = NULL;
}

static void
deactivate_applied_ace_hash_entry(acl_main_t *am,
                            u32 lc_index,
                            applied_hash_ace_entry_t **applied_hash_aces,
                            u32 old_index)
{
  applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), old_index);
  DBG("UNAPPLY DEACTIVATE: lc_index %d applied index %d", lc_index, old_index);
  if (ACL_HASH_LOOKUP_DEBUG > 0) {
    clib_warning("Deactivating pae at index %d", old_index);
    acl_plugin_print_pae(am->vlib_main, old_index, pae);
  }

  if (pae->collision_head_ae_index != old_index) {
    DBG("UNAPPLY = index %d has collision head %d", old_index, pae->collision_head_ae_index);

    u32 head_index = find_head_applied_ace_index(applied_hash_aces, old_index);
    ASSERT(head_index != ~0);
    del_colliding_rule(applied_hash_aces, head_index, old_index);

  } else {
    /* It was the first entry. We need either to reset the hash entry or delete it */
    /* delete our entry from the collision vector first */
    del_colliding_rule(applied_hash_aces, old_index, old_index);
    if (vec_len(pae->colliding_rules) > 0) {
      u32 next_pae_index = pae->colliding_rules[0].applied_entry_index;
      applied_hash_ace_entry_t *next_pae = vec_elt_at_index((*applied_hash_aces), next_pae_index);
      /* Remove ourselves and transfer the ownership of the colliding rules vector */
      next_pae->colliding_rules = pae->colliding_rules;
      set_collision_head_ae_index(applied_hash_aces, next_pae->colliding_rules, next_pae_index);
      add_del_hashtable_entry(am, lc_index,
                              applied_hash_aces, next_pae_index, 1);
    } else {
      /* no next entry, so just delete the entry in the hash table */
      add_del_hashtable_entry(am, lc_index,
                              applied_hash_aces, old_index, 0);
    }
  }
  DBG0("Releasing mask type index %d for pae index %d on lc_index %d", pae->mask_type_index, old_index, lc_index);
  release_mask_type_index(am, pae->mask_type_index);
  /* invalidate the old entry */
  pae->mask_type_index = ~0;
  pae->collision_head_ae_index = ~0;
  /* always has to be 0 */
  pae->colliding_rules = NULL;
}


void
hash_acl_unapply(acl_main_t *am, u32 lc_index, int acl_index)
{
  int i;

  DBG0("HASH ACL unapply: lc_index %d acl %d", lc_index, acl_index);
  applied_hash_acl_info_t **applied_hash_acls = &am->applied_hash_acl_info_by_lc_index;
  applied_hash_acl_info_t *pal = vec_elt_at_index((*applied_hash_acls), lc_index);

  hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index);
  u32 **hash_acl_applied_lc_index = &ha->lc_index_list;

  if (ACL_HASH_LOOKUP_DEBUG > 0) {
    clib_warning("unapplying acl %d", acl_index);
    acl_plugin_show_tables_mask_type();
    acl_plugin_show_tables_acl_hash_info(acl_index);
    acl_plugin_show_tables_applied_info(lc_index);
  }

  /* remove this acl# from the list of applied hash acls */
  u32 index = vec_search(pal->applied_acls, acl_index);
  if (index == ~0) {
    clib_warning("BUG: trying to unapply unapplied acl_index %d on lc_index %d, according to lc",
                 acl_index, lc_index);
    return;
  }
  vec_del1(pal->applied_acls, index);

  u32 index2 = vec_search((*hash_acl_applied_lc_index), lc_index);
  if (index2 == ~0) {
    clib_warning("BUG: trying to unapply twice acl_index %d on lc_index %d, according to h-acl info",
                 acl_index, lc_index);
    return;
  }
  vec_del1((*hash_acl_applied_lc_index), index2);

  applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, lc_index);

  for(i=0; i < vec_len((*applied_hash_aces)); i++) {
    if (vec_elt_at_index(*applied_hash_aces,i)->acl_index == acl_index) {
      DBG("Found applied ACL#%d at applied index %d", acl_index, i);
      break;
    }
  }
  if (vec_len((*applied_hash_aces)) <= i) {
    DBG("Did not find applied ACL#%d at lc_index %d", acl_index, lc_index);
    /* we went all the way without finding any entries. Probably a list was empty. */
    return;
  }

  void *oldheap = hash_acl_set_heap(am);
  int base_offset = i;
  int tail_offset = base_offset + vec_len(ha->rules);
  int tail_len = vec_len((*applied_hash_aces)) - tail_offset;
  DBG("base_offset: %d, tail_offset: %d, tail_len: %d", base_offset, tail_offset, tail_len);

  for(i=0; i < vec_len(ha->rules); i ++) {
    deactivate_applied_ace_hash_entry(am, lc_index,
                                      applied_hash_aces, base_offset + i);
  }
  for(i=0; i < tail_len; i ++) {
    /* move the entry at tail offset to base offset */
    /* that is, from (tail_offset+i) -> (base_offset+i) */
    DBG0("UNAPPLY MOVE: lc_index %d, applied index %d -> %d", lc_index, tail_offset+i, base_offset + i);
    move_applied_ace_hash_entry(am, lc_index, applied_hash_aces, tail_offset + i, base_offset + i);
  }
  /* trim the end of the vector */
  _vec_len((*applied_hash_aces)) -= vec_len(ha->rules);

  remake_hash_applied_mask_info_vec(am, applied_hash_aces, lc_index);

  if (vec_len((*applied_hash_aces)) == 0) {
    vec_free((*applied_hash_aces));
  }

  clib_mem_set_heap (oldheap);
}

/*
 * Create the applied ACEs and update the hash table,
 * taking into account that the ACL may not be the last
 * in the vector of applied ACLs.
 *
 * For now, walk from the end of the vector and unapply the ACLs,
 * then apply the one in question and reapply the rest.
 */

void
hash_acl_reapply(acl_main_t *am, u32 lc_index, int acl_index)
{
  acl_lookup_context_t *acontext = pool_elt_at_index(am->acl_lookup_contexts, lc_index);
  u32 **applied_acls = &acontext->acl_indices;
  int i;
  int start_index = vec_search((*applied_acls), acl_index);

  DBG0("Start index for acl %d in lc_index %d is %d", acl_index, lc_index, start_index);
  /*
   * This function is called after we find out the lc_index where ACL is applied.
   * If the by-lc_index vector does not have the ACL#, then it's a bug.
   */
  ASSERT(start_index < vec_len(*applied_acls));

  /* unapply all the ACLs at the tail side, up to the current one */
  for(i = vec_len(*applied_acls) - 1; i > start_index; i--) {
    hash_acl_unapply(am, lc_index, *vec_elt_at_index(*applied_acls, i));
  }
  for(i = start_index; i < vec_len(*applied_acls); i++) {
    hash_acl_apply(am, lc_index, *vec_elt_at_index(*applied_acls, i), i);
  }
}

static void
make_ip6_address_mask(ip6_address_t *addr, u8 prefix_len)
{
  ip6_address_mask_from_width(addr, prefix_len);
}


/* Maybe should be moved into the core somewhere */
always_inline void
ip4_address_mask_from_width (ip4_address_t * a, u32 width)
{
  int i, byte, bit, bitnum;
  ASSERT (width <= 32);
  clib_memset (a, 0, sizeof (a[0]));
  for (i = 0; i < width; i++)
    {
      bitnum = (7 - (i & 7));
      byte = i / 8;
      bit = 1 << bitnum;
      a->as_u8[byte] |= bit;
    }
}


static void
make_ip4_address_mask(ip4_address_t *addr, u8 prefix_len)
{
  ip4_address_mask_from_width(addr, prefix_len);
}

static void
make_port_mask(u16 *portmask, u16 port_first, u16 port_last)
{
  if (port_first == port_last) {
    *portmask = 0xffff;
    /* single port is representable by masked value */
    return;
  }

  *portmask = 0;
  return;
}

static void
make_mask_and_match_from_rule(fa_5tuple_t *mask, acl_rule_t *r, hash_ace_info_t *hi)
{
  clib_memset(mask, 0, sizeof(*mask));
  clib_memset(&hi->match, 0, sizeof(hi->match));
  hi->action = r->is_permit;

  /* we will need to be matching based on lc_index and mask_type_index when applied */
  mask->pkt.lc_index = ~0;
  /* we will assign the match of mask_type_index later when we find it*/
  mask->pkt.mask_type_index_lsb = ~0;

  mask->pkt.is_ip6 = 1;
  hi->match.pkt.is_ip6 = r->is_ipv6;
  if (r->is_ipv6) {
    make_ip6_address_mask(&mask->ip6_addr[0], r->src_prefixlen);
    hi->match.ip6_addr[0] = r->src.ip6;
    make_ip6_address_mask(&mask->ip6_addr[1], r->dst_prefixlen);
    hi->match.ip6_addr[1] = r->dst.ip6;
  } else {
    clib_memset(hi->match.l3_zero_pad, 0, sizeof(hi->match.l3_zero_pad));
    make_ip4_address_mask(&mask->ip4_addr[0], r->src_prefixlen);
    hi->match.ip4_addr[0] = r->src.ip4;
    make_ip4_address_mask(&mask->ip4_addr[1], r->dst_prefixlen);
    hi->match.ip4_addr[1] = r->dst.ip4;
  }

  if (r->proto != 0) {
    mask->l4.proto = ~0; /* L4 proto needs to be matched */
    hi->match.l4.proto = r->proto;

    /* Calculate the src/dst port masks and make the src/dst port matches accordingly */
    make_port_mask(&mask->l4.port[0], r->src_port_or_type_first, r->src_port_or_type_last);
    hi->match.l4.port[0] = r->src_port_or_type_first & mask->l4.port[0];

    make_port_mask(&mask->l4.port[1], r->dst_port_or_code_first, r->dst_port_or_code_last);
    hi->match.l4.port[1] = r->dst_port_or_code_first & mask->l4.port[1];
    /* L4 info must be valid in order to match */
    mask->pkt.l4_valid = 1;
    hi->match.pkt.l4_valid = 1;
    /* And we must set the mask to check that it is an initial fragment */
    mask->pkt.is_nonfirst_fragment = 1;
    hi->match.pkt.is_nonfirst_fragment = 0;
    if ((r->proto == IPPROTO_TCP) && (r->tcp_flags_mask != 0)) {
      /* if we want to match on TCP flags, they must be masked off as well */
      mask->pkt.tcp_flags = r->tcp_flags_mask;
      hi->match.pkt.tcp_flags = r->tcp_flags_value;
      /* and the flags need to be present within the packet being matched */
      mask->pkt.tcp_flags_valid = 1;
      hi->match.pkt.tcp_flags_valid = 1;
    }
  }
  /* Sanitize the mask and the match */
  u64 *pmask = (u64 *)mask;
  u64 *pmatch = (u64 *)&hi->match;
  int j;
  for(j=0; j<6; j++) {
    pmatch[j] = pmatch[j] & pmask[j];
  }
}


int hash_acl_exists(acl_main_t *am, int acl_index)
{
  if (acl_index >= vec_len(am->hash_acl_infos))
    return 0;

  hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index);
  return ha->hash_acl_exists;
}

void hash_acl_add(acl_main_t *am, int acl_index)
{
  void *oldheap = hash_acl_set_heap(am);
  DBG("HASH ACL add : %d", acl_index);
  int i;
  acl_rule_t *acl_rules = am->acls[acl_index].rules;
  vec_validate(am->hash_acl_infos, acl_index);
  hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index);
  clib_memset(ha, 0, sizeof(*ha));
  ha->hash_acl_exists = 1;

  /* walk the newly added ACL entries and ensure that for each of them there
     is a mask type, increment a reference count for that mask type */

  /* avoid small requests by preallocating the entire vector before running the additions */
  if (vec_len(acl_rules) > 0) {
    vec_validate(ha->rules, vec_len(acl_rules)-1);
    vec_reset_length(ha->rules);
  }

  for(i=0; i < vec_len(acl_rules); i++) {
    hash_ace_info_t ace_info;
    fa_5tuple_t mask;
    clib_memset(&ace_info, 0, sizeof(ace_info));
    ace_info.acl_index = acl_index;
    ace_info.ace_index = i;

    make_mask_and_match_from_rule(&mask, &acl_rules[i], &ace_info);
    mask.pkt.flags_reserved = 0b000;
    ace_info.base_mask_type_index = assign_mask_type_index(am, &mask);
    /* assign the mask type index for matching itself */
    ace_info.match.pkt.mask_type_index_lsb = ace_info.base_mask_type_index;
    DBG("ACE: %d mask_type_index: %d", i, ace_info.base_mask_type_index);
    vec_add1(ha->rules, ace_info);
  }
  /*
   * if an ACL is applied somewhere, fill the corresponding lookup data structures.
   * We need to take care if the ACL is not the last one in the vector of ACLs applied to the interface.
   */
  if (acl_index < vec_len(am->lc_index_vec_by_acl)) {
    u32 *lc_index;
    vec_foreach(lc_index, am->lc_index_vec_by_acl[acl_index]) {
      hash_acl_reapply(am, *lc_index, acl_index);
    }
  }
  clib_mem_set_heap (oldheap);
}

void hash_acl_delete(acl_main_t *am, int acl_index)
{
  void *oldheap = hash_acl_set_heap(am);
  DBG0("HASH ACL delete : %d", acl_index);
  /*
   * If the ACL is applied somewhere, remove the references of it (call hash_acl_unapply)
   * this is a different behavior from the linear lookup where an empty ACL is "deny all",
   *
   * However, following vpp-dev discussion the ACL that is referenced elsewhere
   * should not be possible to delete, and the change adding this also adds
   * the safeguards to that respect, so this is not a problem.
   *
   * The part to remember is that this routine is called in process of reapplication
   * during the acl_add_replace() API call - the old acl ruleset is deleted, then
   * the new one is added, without the change in the applied ACLs - so this case
   * has to be handled.
   */
  hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, acl_index);
  u32 *lc_list_copy = 0;
  {
    u32 *lc_index;
    lc_list_copy = vec_dup(ha->lc_index_list);
    vec_foreach(lc_index, lc_list_copy) {
      hash_acl_unapply(am, *lc_index, acl_index);
    }
    vec_free(lc_list_copy);
  }
  vec_free(ha->lc_index_list);

  /* walk the mask types for the ACL about-to-be-deleted, and decrease
   * the reference count, possibly freeing up some of them */
  int i;
  for(i=0; i < vec_len(ha->rules); i++) {
    release_mask_type_index(am, ha->rules[i].base_mask_type_index);
  }
  ha->hash_acl_exists = 0;
  vec_free(ha->rules);
  clib_mem_set_heap (oldheap);
}


void
show_hash_acl_hash (vlib_main_t * vm, acl_main_t *am, u32 verbose)
{
  vlib_cli_output(vm, "\nACL lookup hash table:\n%U\n",
                  BV (format_bihash), &am->acl_lookup_hash, verbose);
}

void
acl_plugin_show_tables_mask_type (void)
{
  acl_main_t *am = &acl_main;
  vlib_main_t *vm = am->vlib_main;
  ace_mask_type_entry_t *mte;

  vlib_cli_output (vm, "Mask-type entries:");
    /* *INDENT-OFF* */
    pool_foreach(mte, am->ace_mask_type_pool,
    ({
      vlib_cli_output(vm, "     %3d: %016llx %016llx %016llx %016llx %016llx %016llx  refcount %d",
		    mte - am->ace_mask_type_pool,
		    mte->mask.kv_40_8.key[0], mte->mask.kv_40_8.key[1], mte->mask.kv_40_8.key[2],
		    mte->mask.kv_40_8.key[3], mte->mask.kv_40_8.key[4], mte->mask.kv_40_8.value, mte->refcount);
    }));
    /* *INDENT-ON* */
}

void
acl_plugin_show_tables_acl_hash_info (u32 acl_index)
{
  acl_main_t *am = &acl_main;
  vlib_main_t *vm = am->vlib_main;
  u32 i, j;
  u64 *m;
  vlib_cli_output (vm, "Mask-ready ACL representations\n");
  for (i = 0; i < vec_len (am->hash_acl_infos); i++)
    {
      if ((acl_index != ~0) && (acl_index != i))
	{
	  continue;
	}
      hash_acl_info_t *ha = &am->hash_acl_infos[i];
      vlib_cli_output (vm, "acl-index %u bitmask-ready layout\n", i);
      vlib_cli_output (vm, "  applied lc_index list: %U\n",
		       format_vec32, ha->lc_index_list, "%d");
      for (j = 0; j < vec_len (ha->rules); j++)
	{
	  hash_ace_info_t *pa = &ha->rules[j];
	  m = (u64 *) & pa->match;
	  vlib_cli_output (vm,
			   "    %4d: %016llx %016llx %016llx %016llx %016llx %016llx base mask index %d acl %d rule %d action %d\n",
			   j, m[0], m[1], m[2], m[3], m[4], m[5],
			   pa->base_mask_type_index, pa->acl_index, pa->ace_index,
			   pa->action);
	}
    }
}

static void
acl_plugin_print_colliding_rule (vlib_main_t * vm, int j, collision_match_rule_t *cr) {
  vlib_cli_output(vm,
                  "        %4d: acl %d ace %d acl pos %d pae index: %d",
                  j, cr->acl_index, cr->ace_index, cr->acl_position, cr->applied_entry_index);
}

static void
acl_plugin_print_pae (vlib_main_t * vm, int j, applied_hash_ace_entry_t * pae)
{
  vlib_cli_output (vm,
		   "    %4d: acl %d rule %d action %d bitmask-ready rule %d mask type index: %d colliding_rules: %d collision_head_ae_idx %d hitcount %lld acl_pos: %d",
		   j, pae->acl_index, pae->ace_index, pae->action,
		   pae->hash_ace_info_index, pae->mask_type_index, vec_len(pae->colliding_rules), pae->collision_head_ae_index,
		   pae->hitcount, pae->acl_position);
  int jj;
  for(jj=0; jj<vec_len(pae->colliding_rules); jj++)
    acl_plugin_print_colliding_rule(vm, jj, vec_elt_at_index(pae->colliding_rules, jj));
}

static void
acl_plugin_print_applied_mask_info (vlib_main_t * vm, int j, hash_applied_mask_info_t *mi)
{
  vlib_cli_output (vm,
		   "    %4d: mask type index %d first rule index %d num_entries %d max_collisions %d",
		   j, mi->mask_type_index, mi->first_rule_index, mi->num_entries, mi->max_collisions);
}

void
acl_plugin_show_tables_applied_info (u32 lc_index)
{
  acl_main_t *am = &acl_main;
  vlib_main_t *vm = am->vlib_main;
  u32 lci, j;
  vlib_cli_output (vm, "Applied lookup entries for lookup contexts");

  for (lci = 0;
       (lci < vec_len(am->applied_hash_acl_info_by_lc_index)); lci++)
    {
      if ((lc_index != ~0) && (lc_index != lci))
	{
	  continue;
	}
      vlib_cli_output (vm, "lc_index %d:", lci);
      if (lci < vec_len (am->applied_hash_acl_info_by_lc_index))
	{
	  applied_hash_acl_info_t *pal =
	    &am->applied_hash_acl_info_by_lc_index[lci];
	  vlib_cli_output (vm, "  applied acls: %U", format_vec32,
			   pal->applied_acls, "%d");
	}
      if (lci < vec_len (am->hash_applied_mask_info_vec_by_lc_index))
	{
	  vlib_cli_output (vm, "  applied mask info entries:");
	  for (j = 0;
	       j < vec_len (am->hash_applied_mask_info_vec_by_lc_index[lci]);
	       j++)
	    {
	      acl_plugin_print_applied_mask_info (vm, j,
				    &am->hash_applied_mask_info_vec_by_lc_index
				    [lci][j]);
	    }
	}
      if (lci < vec_len (am->hash_entry_vec_by_lc_index))
	{
	  vlib_cli_output (vm, "  lookup applied entries:");
	  for (j = 0;
	       j < vec_len (am->hash_entry_vec_by_lc_index[lci]);
	       j++)
	    {
	      acl_plugin_print_pae (vm, j,
				    &am->hash_entry_vec_by_lc_index
				    [lci][j]);
	    }
	}
    }
}

void
acl_plugin_show_tables_bihash (u32 show_bihash_verbose)
{
  acl_main_t *am = &acl_main;
  vlib_main_t *vm = am->vlib_main;
  show_hash_acl_hash (vm, am, show_bihash_verbose);
}

/*
 * Split of the partition needs to happen when the collision count
 * goes over a specified threshold.
 *
 * This is a signal that we ignored too many bits in
 * mT and we need to split the table into two tables. We select
 * all of the colliding rules L and find their maximum common
 * tuple mL. Normally mL is specific enough to hash L with few
 * or no collisions. We then create a new table T2 with tuple mL
 * and transfer all compatible rules from T to T2. If mL is not
 * specific enough, we find the field with the biggest difference
 * between the minimum and maximum tuple lengths for all of
 * the rules in L and set that field to be the average of those two
 * values. We then transfer all compatible rules as before. This
 * guarantees that some rules from L will move and that T2 will
 * have a smaller number of collisions than T did.
 */


static void
ensure_ip6_min_addr (ip6_address_t * min_addr, ip6_address_t * mask_addr)
{
  int update =
    (clib_net_to_host_u64 (mask_addr->as_u64[0]) <
     clib_net_to_host_u64 (min_addr->as_u64[0]))
    ||
    ((clib_net_to_host_u64 (mask_addr->as_u64[0]) ==
      clib_net_to_host_u64 (min_addr->as_u64[0]))
     && (clib_net_to_host_u64 (mask_addr->as_u64[1]) <
	 clib_net_to_host_u64 (min_addr->as_u64[1])));
  if (update)
    {
      min_addr->as_u64[0] = mask_addr->as_u64[0];
      min_addr->as_u64[1] = mask_addr->as_u64[1];
    }
}

static void
ensure_ip6_max_addr (ip6_address_t * max_addr, ip6_address_t * mask_addr)
{
  int update =
    (clib_net_to_host_u64 (mask_addr->as_u64[0]) >
     clib_net_to_host_u64 (max_addr->as_u64[0]))
    ||
    ((clib_net_to_host_u64 (mask_addr->as_u64[0]) ==
      clib_net_to_host_u64 (max_addr->as_u64[0]))
     && (clib_net_to_host_u64 (mask_addr->as_u64[1]) >
	 clib_net_to_host_u64 (max_addr->as_u64[1])));
  if (update)
    {
      max_addr->as_u64[0] = mask_addr->as_u64[0];
      max_addr->as_u64[1] = mask_addr->as_u64[1];
    }
}

static void
ensure_ip4_min_addr (ip4_address_t * min_addr, ip4_address_t * mask_addr)
{
  int update =
    (clib_net_to_host_u32 (mask_addr->as_u32) <
     clib_net_to_host_u32 (min_addr->as_u32));
  if (update)
    min_addr->as_u32 = mask_addr->as_u32;
}

static void
ensure_ip4_max_addr (ip4_address_t * max_addr, ip4_address_t * mask_addr)
{
  int update =
    (clib_net_to_host_u32 (mask_addr->as_u32) >
     clib_net_to_host_u32 (max_addr->as_u32));
  if (update)
    max_addr->as_u32 = mask_addr->as_u32;
}

enum {
  DIM_SRC_ADDR = 0,
  DIM_DST_ADDR,
  DIM_SRC_PORT,
  DIM_DST_PORT,
  DIM_PROTO,
};



static void
split_partition(acl_main_t *am, u32 first_index,
                            u32 lc_index, int is_ip6){
	DBG( "TM-split_partition - first_entry:%d", first_index);
        applied_hash_ace_entry_t **applied_hash_aces = get_applied_hash_aces(am, lc_index);
	ace_mask_type_entry_t *mte;
	fa_5tuple_t the_min_tuple, *min_tuple = &the_min_tuple;
        fa_5tuple_t the_max_tuple, *max_tuple = &the_max_tuple;
	applied_hash_ace_entry_t *pae = vec_elt_at_index((*applied_hash_aces), first_index);
	hash_acl_info_t *ha = vec_elt_at_index(am->hash_acl_infos, pae->acl_index);
	hash_ace_info_t *ace_info;
	u32 coll_mask_type_index = pae->mask_type_index;
        clib_memset(&the_min_tuple, 0, sizeof(the_min_tuple));
        clib_memset(&the_max_tuple, 0, sizeof(the_max_tuple));

	int i=0;
	collision_match_rule_t *colliding_rules = pae->colliding_rules;
	u64 collisions = vec_len(pae->colliding_rules);
	for(i=0; i<collisions; i++){
                /* reload the hash acl info as it might be a different ACL# */
		pae = vec_elt_at_index((*applied_hash_aces), colliding_rules[i].applied_entry_index);
	        ha = vec_elt_at_index(am->hash_acl_infos, pae->acl_index);

		DBG( "TM-collision: base_ace:%d (ace_mask:%d, first_collision_mask:%d)",
				pae->ace_index, pae->mask_type_index, coll_mask_type_index);

		ace_info = vec_elt_at_index(ha->rules, pae->hash_ace_info_index);
		mte = vec_elt_at_index(am->ace_mask_type_pool, ace_info->base_mask_type_index);
		fa_5tuple_t *mask = &mte->mask;

		if(pae->mask_type_index != coll_mask_type_index) continue;
		/* Computing min_mask and max_mask for colliding rules */
		if(i==0){
                        clib_memcpy_fast(min_tuple, mask, sizeof(fa_5tuple_t));
			clib_memcpy_fast(max_tuple, mask, sizeof(fa_5tuple_t));
		}else{
			int j;
			for(j=0; j<2; j++){
                                if (is_ip6)
                                  ensure_ip6_min_addr(&min_tuple->ip6_addr[j], &mask->ip6_addr[j]);
                                else
                                  ensure_ip4_min_addr(&min_tuple->ip4_addr[j], &mask->ip4_addr[j]);

				if ((mask->l4.port[j] < min_tuple->l4.port[j]))
					min_tuple->l4.port[j] = mask->l4.port[j];
			}

			if ((mask->l4.proto < min_tuple->l4.proto))
				min_tuple->l4.proto = mask->l4.proto;

			if(mask->pkt.as_u64 < min_tuple->pkt.as_u64)
				min_tuple->pkt.as_u64 = mask->pkt.as_u64;


			for(j=0; j<2; j++){
                                if (is_ip6)
                                  ensure_ip6_max_addr(&max_tuple->ip6_addr[j], &mask->ip6_addr[j]);
                                else
                                  ensure_ip4_max_addr(&max_tuple->ip4_addr[j], &mask->ip4_addr[j]);

				if ((mask->l4.port[j] > max_tuple->l4.port[j]))
					max_tuple->l4.port[j] = mask->l4.port[j];
			}

			if ((mask->l4.proto < max_tuple->l4.proto))
				max_tuple->l4.proto = mask->l4.proto;

			if(mask->pkt.as_u64 > max_tuple->pkt.as_u64)
				max_tuple->pkt.as_u64 = mask->pkt.as_u64;
		}
	}

	/* Computing field with max difference between (min/max)_mask */
	int best_dim=-1, best_delta=0, delta=0;

	/* SRC_addr dimension */
        if (is_ip6) {
	  int i;
	  for(i=0; i<2; i++){
		delta += count_bits(max_tuple->ip6_addr[0].as_u64[i]) - count_bits(min_tuple->ip6_addr[0].as_u64[i]);
	  }
        } else {
		delta += count_bits(max_tuple->ip4_addr[0].as_u32) - count_bits(min_tuple->ip4_addr[0].as_u32);
        }
	if(delta > best_delta){
		best_delta = delta;
		best_dim = DIM_SRC_ADDR;
	}

	/* DST_addr dimension */
	delta = 0;
        if (is_ip6) {
	  int i;
	  for(i=0; i<2; i++){
		delta += count_bits(max_tuple->ip6_addr[1].as_u64[i]) - count_bits(min_tuple->ip6_addr[1].as_u64[i]);
	  }
        } else {
		delta += count_bits(max_tuple->ip4_addr[1].as_u32) - count_bits(min_tuple->ip4_addr[1].as_u32);
        }
	if(delta > best_delta){
		best_delta = delta;
		best_dim = DIM_DST_ADDR;
	}

	/* SRC_port dimension */
	delta = count_bits(max_tuple->l4.port[0]) - count_bits(min_tuple->l4.port[0]);
	if(delta > best_delta){
		best_delta = delta;
		best_dim = DIM_SRC_PORT;
	}

	/* DST_port dimension */
	delta = count_bits(max_tuple->l4.port[1]) - count_bits(min_tuple->l4.port[1]);
	if(delta > best_delta){
		best_delta = delta;
		best_dim = DIM_DST_PORT;
	}

	/* Proto dimension */
	delta = count_bits(max_tuple->l4.proto) - count_bits(min_tuple->l4.proto);
	if(delta > best_delta){
		best_delta = delta;
		best_dim = DIM_PROTO;
	}

	int shifting = 0; //, ipv4_block = 0;
	switch(best_dim){
		case DIM_SRC_ADDR:
			shifting = (best_delta)/2; // FIXME IPV4-only
			// ipv4_block = count_bits(max_tuple->ip4_addr[0].as_u32);
			min_tuple->ip4_addr[0].as_u32 =
					clib_host_to_net_u32((clib_net_to_host_u32(max_tuple->ip4_addr[0].as_u32) << (shifting))&0xFFFFFFFF);

			break;
		case DIM_DST_ADDR:
			shifting = (best_delta)/2;
/*
			ipv4_block = count_bits(max_tuple->addr[1].as_u64[1]);
			if(ipv4_block > shifting)
				min_tuple->addr[1].as_u64[1] =
					clib_host_to_net_u64((clib_net_to_host_u64(max_tuple->addr[1].as_u64[1]) << (shifting))&0xFFFFFFFF);
			else{
				shifting = shifting - ipv4_block;
				min_tuple->addr[1].as_u64[1] = 0;
				min_tuple->addr[1].as_u64[0] =
					clib_host_to_net_u64((clib_net_to_host_u64(max_tuple->addr[1].as_u64[0]) << (shifting))&0xFFFFFFFF);
			}
*/
			min_tuple->ip4_addr[1].as_u32 =
					clib_host_to_net_u32((clib_net_to_host_u32(max_tuple->ip4_addr[1].as_u32) << (shifting))&0xFFFFFFFF);

			break;
		case DIM_SRC_PORT: min_tuple->l4.port[0] = max_tuple->l4.port[0]  << (best_delta)/2;
			break;
		case DIM_DST_PORT: min_tuple->l4.port[1] = max_tuple->l4.port[1] << (best_delta)/2;
			break;
		case DIM_PROTO: min_tuple->l4.proto = max_tuple->l4.proto << (best_delta)/2;
			break;
		default: relax_tuple(min_tuple, is_ip6, 1);
			break;
	}

	min_tuple->pkt.is_nonfirst_fragment = 0;
        u32 new_mask_type_index = assign_mask_type_index(am, min_tuple);

	hash_applied_mask_info_t **hash_applied_mask_info_vec = vec_elt_at_index(am->hash_applied_mask_info_vec_by_lc_index, lc_index);

	hash_applied_mask_info_t *minfo;
	//search in order pool if mask_type_index is already there
	int search;
	for (search=0; search < vec_len((*hash_applied_mask_info_vec)); search++){
		minfo = vec_elt_at_index((*hash_applied_mask_info_vec), search);
		if(minfo->mask_type_index == new_mask_type_index)
			break;
	}

	vec_validate((*hash_applied_mask_info_vec), search);
	minfo = vec_elt_at_index((*hash_applied_mask_info_vec), search);
	minfo->mask_type_index = new_mask_type_index;
	minfo->num_entries = 0;
	minfo->max_collisions = 0;
	minfo->first_rule_index = ~0;

	DBG( "TM-split_partition - mask type index-assigned!! -> %d", new_mask_type_index);

	if(coll_mask_type_index == new_mask_type_index){
		//vlib_cli_output(vm, "TM-There are collisions over threshold, but i'm not able to split! %d %d", coll_mask_type_index, new_mask_type_index);
		return;
	}


	/* populate new partition */
	DBG( "TM-Populate new partition");
	u32 r_ace_index = first_index;
        int repopulate_count = 0;

	collision_match_rule_t *temp_colliding_rules = vec_dup(colliding_rules);
	collisions = vec_len(temp_colliding_rules);

	for(i=0; i<collisions; i++){

	        r_ace_index = temp_colliding_rules[i].applied_entry_index;

		applied_hash_ace_entry_t *pop_pae = vec_elt_at_index((*applied_hash_aces), r_ace_index);
	        ha = vec_elt_at_index(am->hash_acl_infos, pop_pae->acl_index);
		DBG( "TM-Population-collision: base_ace:%d (ace_mask:%d, first_collision_mask:%d)",
				pop_pae->ace_index, pop_pae->mask_type_index, coll_mask_type_index);

		ASSERT(pop_pae->mask_type_index == coll_mask_type_index);

		ace_info = vec_elt_at_index(ha->rules, pop_pae->hash_ace_info_index);
		mte = vec_elt_at_index(am->ace_mask_type_pool, ace_info->base_mask_type_index);
		//can insert rule?
		//mte = vec_elt_at_index(am->ace_mask_type_pool, pop_pae->mask_type_index);
		fa_5tuple_t *pop_mask = &mte->mask;

		if(!first_mask_contains_second_mask(is_ip6, min_tuple, pop_mask)) continue;
		DBG( "TM-new partition can insert -> applied_ace:%d", r_ace_index);

		//delete and insert in new format
		deactivate_applied_ace_hash_entry(am, lc_index, applied_hash_aces, r_ace_index);

		/* insert the new entry */
		pop_pae->mask_type_index = new_mask_type_index;
                /* The very first repopulation gets the lock by virtue of a new mask being created above */
                if (++repopulate_count > 1)
                  lock_mask_type_index(am, new_mask_type_index);

		activate_applied_ace_hash_entry(am, lc_index, applied_hash_aces, r_ace_index);

	}
	vec_free(temp_colliding_rules);

	DBG( "TM-Populate new partition-END");
	DBG( "TM-split_partition - END");

}