summaryrefslogtreecommitdiffstats
path: root/src/vnet/pg
AgeCommit message (Expand)AuthorFilesLines
2017-11-21dpdk: add l2_hdr_offset and l3_hdr_offset in vlib_buffer_tDamjan Marion2-3/+3
2017-11-10Break up vpe.apiNeale Ranns2-0/+269
2017-10-04[aarch64] Fixes CLI crashes on dpaa2 platform.Christophe Fontaine2-2/+2
2017-07-10vlib: store buffer memory information in the buffer_mainDamjan Marion2-4/+4
2017-04-06Use thread local storage for thread indexDamjan Marion1-2/+2
2017-03-17Fix IP feature ordering.Neale Ranns1-4/+0
2017-03-07DHCP Multiple Servers (VPP-602, VPP-605)Neale Ranns1-0/+6
2017-03-01dpdk: be a pluginDamjan Marion2-8/+8
2017-02-22VPP-635: CLI Memory leak with invalid parameterBilly McFall1-10/+29
2017-02-03pg: add trace information for pg tx nodeDamjan Marion3-9/+46
2016-12-28Reorganize source tree to use single autotools instanceDamjan Marion9-0/+3742
f='#n227'>227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
/*
 * Copyright (c) 2012 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * Author: Pierre Pfister <ppfister@cisco.com>
 *
 * DISCLAIMER !
 *
 *       This most likely is not the hash table you are looking for !!
 *
 *       This structure targets a very specific and quite narrow set of use-cases 
 *         that are not covered by other hash tables.
 *
 *       Read the following text carefully, or ask the author or one of VPP's
 *         committers to make sure this is what you are looking for.
 *
 *
 *  -- Abstract:
 * This hash table intends to provide a very fast lookup and insertion of
 * key-value pairs for flow tables (although it might be used for other
 * purposes), with additional support for lazy-timeouts.
 * In particular, it was designed to minimize blocking reads, register usage and
 * cache-lines accesses during a typical lookup.
 * This hash table therefore provides stateful packet processing
 * without performance degradation even when every single lookup has to fetch
 * memory from RAM.
 * This hash table is not thread-safe and requires executing a garbage
 * collection function to clean-up chained buckets.
 *
 *  -- Overview:
 *
 * One first aspect of this hash table is that it is self-contained in a single
 * bulk of memory. Each entry contains a key, a value, and a 32 bits timeout
 * value; occupies a full and single cache line; and is identified by a unique
 * 32 bits index. The entry index zero is reserved and used when an entry
 * could not be found nor inserted. Which means it is not necessary to
 * immediately check whether an insertion or lookup was successful before
 * behaving accordingly. One can just keep doing business as usual and
 * check for the error later.
 *
 * Each entry is associated with a timeout value (which unit or clock is up to
 * the user of the hash table). An entry which timeout is strictly smaller
 * than the current time is considered empty, whereas an entry which timeout is
 * greater or equal to the current time contains a valid key-value pair.
 *
 * Hash table lookup and insertion are equivalent:
 *  - An entry index is always returned (possibly index 0 if no entry could be
 *   found nor created).
 *  - The returned entry always has its key set to the provided key.
 *  - Timeout value will be greater than the provided current time whenever a
 *    valid entry was found, strictly smaller otherwise. In particular, one can
 *    not differentiate between an entry which was just created, and an entry
 *    which already existed in the past but got timeouted in between.
 *
 * As mentioned earlier, entry index zero is used as an invalid entry which may
 * be manipulated as a normal one. Entries which index go from 1 to
 * N (where N is a power of 2) are used as direct buckets, each containing a
 * single entry. In the absence of hash collision, a single entry which location
 * can deterministically be determined from the key-hash and the hash table
 * header is accessed (One single cache line, without indirection). This
 * allows for efficient pre-fetching of the key-value for more than 95% of
 * accesses.
 *
 * In order to handle hash collisions (i.e. when multiple keys
 * end-up in the same bucket), entries which index are greater than N are
 * grouped into M groups of 16 collision entries. Such groups are linked
 * with regular entries whenever a collision needs to be handled.
 * When looking up a key with a bucket where a collision occurred, unused bits
 * from the key hash are used to select two entries (from the collision bucket)
 * where the new entry might be inserted.
 *
 * Once an entry is inserted, it will never be moved as long as the entry
 * timeout value remains greater or equal to the provided current time value.
 * The entry index can therefore be stored in other data structure as a way
 * to bypass the hash lookup. But when doing so, one should check if the
 * present key is the actual looked-up key.
 *
 *  -- Garbage Collection:
 *
 *  Since there is no explicit element removal, a garbage collector mechanism
 *  is required in order to remove buckets used for hash collisions. This
 *  is done by calling the flowhash_gc function on a regular basis. Each call
 *  to this function examines a single fixed entry. It shall therefore be called
 *  as many times as there are fixed entries in the hash table in order to
 *  ensure a full inspection.
 *
 *  -- Time and timeout mechanism:
 *
 *  The hash table makes use of a time value between in [1, 2^32 - 1].
 *  The provided time value shall keep increasing, and looping is not handled.
 *  When seconds are used, the system should run for 136 years without any issue.
 *  If milliseconds are used, a shift should be operated on all timeout values
 *  on a regular basis (more than every 49 days).
 */

#ifndef __included_flowhash_template_h__
#define __included_flowhash_template_h__

#include <vppinfra/clib.h>
#include <vppinfra/mem.h>
#include <vppinfra/cache.h>

#ifndef FLOWHASH_TYPE
#error FLOWHASH_TYPE not defined
#endif

#define _fv(a,b) a##b
#define __fv(a,b) _fv(a,b)
#define FV(a) __fv(a,FLOWHASH_TYPE)

#define _fvt(a,b) a##b##_t
#define __fvt(a,b) _fvt(a,b)
#define FVT(a) __fvt(a,FLOWHASH_TYPE)

/* Same for all flowhash variants */
#ifndef __included_flowhash_common__

#define FLOWHASH_INVALID_ENTRY_INDEX 0

#define FLOWHASH_ENTRIES_PER_BUCKETS_LOG 4
#define FLOWHASH_ENTRIES_PER_BUCKETS (1 << FLOWHASH_ENTRIES_PER_BUCKETS_LOG)

#endif /* ifndef __included_flowhash_common__ */

 /**
  * @brief Compare a stored key with a lookup key.
  *
  * This function must be defined to use this template. It must return 0
  * when the two keys are identical, and a different value otherwise.
  */
static_always_inline
u8 FV(flowhash_cmp_key)(FVT(flowhash_skey) *a, FVT(flowhash_lkey) *b);

 /**
  * @brief Hash a lookup key into a 32 bit integer.
  *
  * This function must be defined to use this template.
  * It must provides close to 32 bits of entropy distributed amongst
  * all 32 bits of the provided value.
  * Keys that are equal must have the same hash.
  */
 static_always_inline
 u32 FV(flowhash_hash)(FVT(flowhash_lkey) *k);

/**
 * @brief Copy a lookup key into a destination stored key.
 *
 * This function must be defined to use this template. It must modify the dst
 * key such that a later call to flowhash_cmp_key with the same arguments
 * would return 0.
 */
static_always_inline
void FV(flowhash_cpy_key)(FVT(flowhash_skey) *dst, FVT(flowhash_lkey) *src);

/**
 * @brief One flow hash entry used for both direct buckets and collision
 *        buckets.
 */
typedef struct {
  /* Each entry is cache-line aligned. */
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);

  /* Key is first to take advantage of alignment. */
  FVT(flowhash_skey) key;

  /* Entry value. */
  FVT(flowhash_value) value;

  /* Timeout value */
  u32 timeout;

  /* Entry index to the chained bucket. */
  u32 chained_entry_index;
} FVT(flowhash_entry);

typedef struct FVT(__flowhash_struct) {
  /* Cache aligned to simplify allocation. */
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);

  /* Array going downward containing free bucket indices */
  u32 free_buckets_indices[0];

  /* Negative index of the first free bucket */
  i32 free_buckets_position;

  /* Number of fixed buckets minus one */
  u32 fixed_entries_mask;

  /* Allocated pointer for this hash table */
  void *mem;

  u32 collision_buckets_mask;
  u32 total_entries;

  u64 not_enough_buckets_counter;
  u64 collision_lookup_counter;
  u64 garbage_collection_counter;

  u32 gc_counter;

  /* Entry array containing:
   * - 1 Dummy entry for error return
   * - (buckets_mask + 1) Fixed buckets
   * - chained_buckets Chained Buckets
   */
  FVT(flowhash_entry) entries[0];
} FVT(flowhash);

/* Same for all flowhash variants */
#ifndef __included_flowhash_common__
#define __included_flowhash_common__

/**
 * @brief Test whether a returned entry index corresponds to an overflow event.
 */
#define flowhash_is_overflow(ei) \
    ((ei) == FLOWHASH_INVALID_ENTRY_INDEX)

/**
 * @brief Iterate over all entries in the hash table.
 *
 * Iterate over all entries in the hash table, not including the first invalid
 * entry (at index 0), but including all chained hash collision buckets.
 *
 */
#define flowhash_foreach_entry(h, ei) \
      for (ei = 1; \
           ei < (h)->total_entries; \
           ei++)

/**
 * @brief Iterate over all currently valid entries.
 *
 * Iterate over all entries in the hash table which timeout value is greater
 * or equal to the current time.
 */
#define flowhash_foreach_valid_entry(h, ei, now) \
    flowhash_foreach_entry(h, ei) \
      if (((now) <= (h)->entries[ei].timeout))

/**
 * @brief Timeout variable from a given entry.
 */
#define flowhash_timeout(h, ei) (h)->entries[ei].timeout

/**
 * @brief Indicates whether the entry is being used.
 */
#define flowhash_is_timeouted(h, ei, time_now) \
    ((time_now) > flowhash_timeout(h, ei))

/**
 * @brief Get the key from the entry index, casted to the provided type.
 */
#define flowhash_key(h, ei) (&(h)->entries[ei].key)

/**
 * @brief Get the value from the entry index, casted to the provided type.
 */
#define flowhash_value(h, ei) (&(h)->entries[ei].value)

/**
 * @brief Get the number of octets allocated to this structure.
 */
#define flowhash_memory_size(h) clib_mem_size((h)->mem)

/**
 * @brief Test whether the entry index is in hash table boundaries.
 */
#define flowhash_is_valid_entry_index(h, ei) (ei < (h)->total_entries)

/**
 * @brief Adjust, if necessary, provided parameters such as being valid flowhash
 *        sizes.
 */
static
void flowhash_validate_sizes(u32 *fixed_entries, u32 *collision_buckets)
{
  /* Find power of two greater or equal to the provided value */
  if (*fixed_entries < FLOWHASH_ENTRIES_PER_BUCKETS)
    *fixed_entries = FLOWHASH_ENTRIES_PER_BUCKETS;
  if (*fixed_entries > (1 << (32 - FLOWHASH_ENTRIES_PER_BUCKETS_LOG)))
    *fixed_entries = (1 << (32 - FLOWHASH_ENTRIES_PER_BUCKETS_LOG));

  *fixed_entries -= 1;
  *fixed_entries |= *fixed_entries >> 16;
  *fixed_entries |= *fixed_entries >> 8;
  *fixed_entries |= *fixed_entries >> 4;
  *fixed_entries |= *fixed_entries >> 2;
  *fixed_entries |= *fixed_entries >> 1;
  *fixed_entries += 1;

  if (*collision_buckets != 0)
    {
      if (*collision_buckets < CLIB_CACHE_LINE_BYTES/sizeof(u32))
        *collision_buckets = CLIB_CACHE_LINE_BYTES/sizeof(u32);

      *collision_buckets -= 1;
      *collision_buckets |= *collision_buckets >> 16;
      *collision_buckets |= *collision_buckets >> 8;
      *collision_buckets |= *collision_buckets >> 4;
      *collision_buckets |= *collision_buckets >> 2;
      *collision_buckets |= *collision_buckets >> 1;
      *collision_buckets += 1;
    }
}

/**
 * @brief Prefetch the the hash entry bucket.
 *
 * This should be performed approximately 200-300 cycles before lookup
 * if the table is located in RAM. Or 30-40 cycles before lookup
 * in case the table is located in L3.
 */
#define flowhash_prefetch(h, hash) \
    CLIB_PREFETCH (&(h)->entries[((hash) & (h)->fixed_entries_mask) + 1], \
        	   sizeof((h)->entries[0]), LOAD)

#endif /* ifndef __included_flowhash_common__ */

/**
 * @brief Allocate a flowhash structure.
 *
 * @param[in] fixed_entries The number of fixed entries in the hash table.
 * @param[in] chained_buckets The number of chained buckets.
 *
 * fixed_entries and chained_buckets parameters may not be used as is but
 * modified in order to fit requirements.
 *
 * Since the flowhash does not support dynamic resizing, it is fairly
 * important to choose the parameters carefully. In particular the performance
 * gain from using this structure comes from an efficient lookup in the
 * absence of hash collision.
 * As a rule of thumbs, if the number of active entries (flows) is M,
 * there should be about 16*M fixed entries, and M/16 collision buckets.
 * Which represents 17*M allocated entries.
 *
 * For example:
 * M = 2^20 total_size ~= 1GiB    collision ~= 3%
 * M = 2^18 total_size ~= 250MiB  collision ~= 3%
 * M = 2^10 total_size ~= 1MiB    collision ~= 6%
 *
 */
static_always_inline
FVT(flowhash) *FV(flowhash_alloc)(u32 fixed_entries, u32 collision_buckets)
{
  FVT(flowhash) *h;
  uword size;
  void *mem;
  u32 entries;

  flowhash_validate_sizes(&fixed_entries, &collision_buckets);

  entries = 1 + fixed_entries +
      collision_buckets * FLOWHASH_ENTRIES_PER_BUCKETS;
  size = sizeof(*h) + sizeof(h->entries[0]) * entries +
      sizeof(h->free_buckets_indices[0]) * collision_buckets;

  mem = clib_mem_alloc_aligned(size, CLIB_CACHE_LINE_BYTES);
  h = mem + collision_buckets * sizeof(h->free_buckets_indices[0]);
  h->mem = mem;

  /* Fill free elements list */
  int i;
  clib_memset(h->entries, 0, sizeof(h->entries[0]) * entries);
  for (i = 1; i <= collision_buckets; i++)
    {
      h->free_buckets_indices[-i] =
          entries - i * FLOWHASH_ENTRIES_PER_BUCKETS;
    }

  /* Init buckets */
  for (i=0; i < entries; i++)
    {
      h->entries[i].chained_entry_index = FLOWHASH_INVALID_ENTRY_INDEX;
      h->entries[i].timeout = 0;
    }

  h->free_buckets_position = -collision_buckets;
  h->fixed_entries_mask = fixed_entries - 1;
  h->collision_buckets_mask = collision_buckets - 1;
  h->total_entries = entries;
  h->not_enough_buckets_counter = 0;
  h->collision_lookup_counter = 0;
  h->garbage_collection_counter = 0;
  h->gc_counter = 0;

  return h;
}

/**
 * @brief Free the flow hash memory.
 */
static_always_inline
void FV(flowhash_free)(FVT(flowhash) *h)
{
  clib_mem_free(h->mem);
}

static void
FV(__flowhash_get_chained) (FVT(flowhash) *h, FVT(flowhash_lkey) *k,
                            u32 hash, u32 time_now, u32 *ei);

/**
 * @brief Retrieves an entry index corresponding to a provided key and its hash.
 *
 * @param h            The hash table pointer.
 * @param k[in]        A pointer to the key value.
 * @param hash[in]     The hash of the key.
 * @param time_now[in] The current time.
 * @param ei[out]      A pointer set to the found entry index.
 *
 * This function always sets ei value to a valid entry index which can then be
 * used to access the stored value as well as get or set its associated timeout.
 * The key stored in the returned entry is always set to the provided key.
 *
 * In case the provided key is not found, and no entry could be created
 * (either because there is no hash collision bucket available or
 * the candidate entries in the collision bucket were already used), ei is
 * set to the special value FLOWHASH_INVALID_ENTRY_INDEX (which can be tested
 * with the flowhash_is_overflow macro).
 *
 * The timeout value is never modified during a lookup.
 * - Use the flowhash_is_timeouted macro to test whether the returned entry
 *   was already valid, or is proposed for insertion.
 * - Use the flowhash_timeout macro to get and set the entry timeout value.
 *
 */
static_always_inline
void FV(flowhash_get) (FVT(flowhash) *h, FVT(flowhash_lkey) *k,
                       u32 hash, u32 time_now, u32 *ei)
{
  *ei = (hash & h->fixed_entries_mask) + 1;

  if (PREDICT_FALSE(FV(flowhash_cmp_key)(&h->entries[*ei].key, k) != 0))
    {
      if (PREDICT_TRUE(time_now > h->entries[*ei].timeout &&
                       (h->entries[*ei].chained_entry_index ==
                           FLOWHASH_INVALID_ENTRY_INDEX)))
        {
          FV(flowhash_cpy_key)(&h->entries[*ei].key, k);
        }
      else
        {
          FV(__flowhash_get_chained)(h, k, hash, time_now, ei);
        }
    }
}

static_always_inline void
FV(__flowhash_get_chained) (FVT(flowhash) *h, FVT(flowhash_lkey) *k,
                            u32 hash, u32 time_now, u32 *ei)
{
  h->collision_lookup_counter++;

  if (h->entries[*ei].chained_entry_index == FLOWHASH_INVALID_ENTRY_INDEX)
    {
      /* No chained entry yet. Let's chain one. */
      if (h->free_buckets_position == 0)
        {
          /* Oops. No more buckets available. */
          h->not_enough_buckets_counter++;
          *ei = FLOWHASH_INVALID_ENTRY_INDEX;
          h->entries[FLOWHASH_INVALID_ENTRY_INDEX].timeout =
              time_now - 1;
          FV(flowhash_cpy_key)(
              &h->entries[FLOWHASH_INVALID_ENTRY_INDEX].key, k);
          return;
        }

      /* Forward link */
      h->entries[*ei].chained_entry_index =
          h->free_buckets_indices[h->free_buckets_position];

      /* Backward link (for garbage collection) */
      h->entries[h->free_buckets_indices[h->free_buckets_position]].
              chained_entry_index = *ei;

      /* Move pointer */
      h->free_buckets_position++;
    }

  /* Get the two indexes where to look at. */
  u32 bi0 = h->entries[*ei].chained_entry_index +
      (hash >> (32 - FLOWHASH_ENTRIES_PER_BUCKETS_LOG));
  u32 bi1 = bi0 + 1;
  bi1 = (bi0 & (FLOWHASH_ENTRIES_PER_BUCKETS - 1)) ? bi1 :
      bi1 - FLOWHASH_ENTRIES_PER_BUCKETS;

  /* It is possible that we wait while comparing bi0 key.
   * It's better to prefetch bi1 so we don't wait twice. */
  CLIB_PREFETCH(&h->entries[bi1], sizeof (h->entries[0]), READ);

  if (FV(flowhash_cmp_key)(&h->entries[bi0].key, k) == 0)
    {
      *ei = bi0;
      return;
    }

  if (FV(flowhash_cmp_key)(&h->entries[bi1].key, k) == 0)
    {
      *ei = bi1;
      return;
    }

  if (h->entries[*ei].timeout >= time_now)
    {
      *ei = FLOWHASH_INVALID_ENTRY_INDEX;
      *ei = (time_now > h->entries[bi0].timeout) ? bi0 : *ei;
      *ei = (time_now > h->entries[bi1].timeout) ? bi1 : *ei;
    }

  FV(flowhash_cpy_key)(&h->entries[*ei].key, k);
}

static_always_inline void
FV(flowhash_gc)(FVT(flowhash) *h, u32 time_now,
    u32 *freed_index, u32 *freed_len)
{
  u32 ei;
  if (freed_index)
    *freed_len = 0;

  if (PREDICT_FALSE(h->collision_buckets_mask == (((u32)0) - 1)))
    return;

  /* prefetch two rounds in advance */
  ei = 2 + h->fixed_entries_mask +
      ((h->gc_counter + 2) & h->collision_buckets_mask) *
        FLOWHASH_ENTRIES_PER_BUCKETS;
  CLIB_PREFETCH(&h->entries[ei], sizeof (h->entries[0]), READ);

  /* prefetch one round in advance */
  ei = 2 + h->fixed_entries_mask +
      ((h->gc_counter + 1) & h->collision_buckets_mask) *
        FLOWHASH_ENTRIES_PER_BUCKETS;
  if (h->entries[ei].chained_entry_index != FLOWHASH_INVALID_ENTRY_INDEX)
    {
      CLIB_PREFETCH(&h->entries[ei], 4 * CLIB_CACHE_LINE_BYTES, READ);
    }

  /* do GC */
  ei = 2 + h->fixed_entries_mask +
      ((h->gc_counter) & h->collision_buckets_mask) *
      FLOWHASH_ENTRIES_PER_BUCKETS;
  if (h->entries[ei].chained_entry_index != FLOWHASH_INVALID_ENTRY_INDEX)
    {
      u8 found = 0;
      int i;
      for (i=0; i<FLOWHASH_ENTRIES_PER_BUCKETS; i++)
        {
          if (time_now <= h->entries[ei + i].timeout)
            {
              found = 1;
              break;
            }
        }

      if (!found)
        {
          /* Tell caller we freed this */
          if (freed_index)
            {
              *freed_index = ei;
              *freed_len = FLOWHASH_ENTRIES_PER_BUCKETS;
            }
          /* The bucket is not used. Let's free it. */
          h->free_buckets_position--;
          /* Reset forward link */
          h->entries[h->entries[ei].chained_entry_index].chained_entry_index =
              FLOWHASH_INVALID_ENTRY_INDEX;
          /* Reset back link */
          h->entries[ei].chained_entry_index = FLOWHASH_INVALID_ENTRY_INDEX;
          /* Free element */
          h->free_buckets_indices[h->free_buckets_position] = ei;
          /* Count the garbage collection event */
          h->garbage_collection_counter++;
        }
    }

  h->gc_counter++;
}

static_always_inline
u32 FV(flowhash_elts)(FVT(flowhash) *h, u32 time_now)
{
  u32 tot = 0;
  u32 ei;

  flowhash_foreach_valid_entry(h, ei, time_now)
    tot++;

  return tot;
}

#endif /* __included_flowhash_template_h__ */