/* * Copyright (c) 2015 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * main.c: main vector processing loop * * Copyright (c) 2008 Eliot Dresselhaus * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include /* Actually allocate a few extra slots of vector data to support speculative vector enqueues which overflow vector data in next frame. */ #define VLIB_FRAME_SIZE_ALLOC (VLIB_FRAME_SIZE + 4) always_inline u32 vlib_frame_bytes (u32 n_scalar_bytes, u32 n_vector_bytes) { u32 n_bytes; /* Make room for vlib_frame_t plus scalar arguments. */ n_bytes = vlib_frame_vector_byte_offset (n_scalar_bytes); /* Make room for vector arguments. Allocate a few extra slots of vector data to support speculative vector enqueues which overflow vector data in next frame. */ #define VLIB_FRAME_SIZE_EXTRA 4 n_bytes += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * n_vector_bytes; /* Magic number is first 32bit number after vector data. Used to make sure that vector data is never overrun. */ #define VLIB_FRAME_MAGIC (0xabadc0ed) n_bytes += sizeof (u32); /* Pad to cache line. */ n_bytes = round_pow2 (n_bytes, CLIB_CACHE_LINE_BYTES); return n_bytes; } always_inline u32 * vlib_frame_find_magic (vlib_frame_t * f, vlib_node_t * node) { void *p = f; p += vlib_frame_vector_byte_offset (node->scalar_size); p += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * node->vector_size; return p; } static inline vlib_frame_size_t * get_frame_size_info (vlib_node_main_t * nm, u32 n_scalar_bytes, u32 n_vector_bytes) { #ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES uword key = (n_scalar_bytes << 16) | n_vector_bytes; uword *p, i; p = hash_get (nm->frame_size_hash, key); if (p) i = p[0]; else { i = vec_len (nm->frame_sizes); vec_validate (nm->frame_sizes, i); hash_set (nm->frame_size_hash, key, i); } return vec_elt_at_index (nm->frame_sizes, i); #else ASSERT (vlib_frame_bytes (n_scalar_bytes, n_vector_bytes) == (vlib_frame_bytes (0, 4))); return vec_elt_at_index (nm->frame_sizes, 0); #endif } static vlib_frame_t * vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, u32 frame_flags) { vlib_node_main_t *nm = &vm->node_main; vlib_frame_size_t *fs; vlib_node_t *to_node; vlib_frame_t *f; u32 l, n, scalar_size, vector_size; ASSERT (vm == vlib_get_main ()); to_node = vlib_get_node (vm, to_node_index); scalar_size = to_node->scalar_size; vector_size = to_node->vector_size; fs = get_frame_size_info (nm, scalar_size, vector_size); n = vlib_frame_bytes (scalar_size, vector_size); if ((l = vec_len (fs->free_frames)) > 0) { /* Allocate from end of free list. */ f = fs->free_frames[l - 1]; _vec_len (fs->free_frames) = l - 1; } else { f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN); } /* Poison frame when debugging. */ if (CLIB_DEBUG > 0) clib_memset (f, 0xfe, n); /* Insert magic number. */ { u32 *magic; magic = vlib_frame_find_magic (f, to_node); *magic = VLIB_FRAME_MAGIC; } f->frame_flags = VLIB_FRAME_IS_ALLOCATED | frame_flags; f->n_vectors = 0; f->scalar_size = scalar_size; f->vector_size = vector_size; f->flags = 0; fs->n_alloc_frames += 1; return f; } /* Allocate a frame for from FROM_NODE to TO_NODE via TO_NEXT_INDEX. Returns frame index. */ static vlib_frame_t * vlib_frame_alloc (vlib_main_t * vm, vlib_node_runtime_t * from_node_runtime, u32 to_next_index) { vlib_node_t *from_node; from_node = vlib_get_node (vm, from_node_runtime->node_index); ASSERT (to_next_index < vec_len (from_node->next_nodes)); return vlib_frame_alloc_to_node (vm, from_node->next_nodes[to_next_index], /* frame_flags */ 0); } vlib_frame_t * vlib_get_frame_to_node (vlib_main_t * vm, u32 to_node_index) { vlib_frame_t *f = vlib_frame_alloc_to_node (vm, to_node_index, /* frame_flags */ VLIB_FRAME_FREE_AFTER_DISPATCH); return vlib_get_frame (vm, f); } static inline void vlib_validate_frame_indices (vlib_frame_t * f) { if (CLIB_DEBUG > 0) { int i; u32 *from = vlib_frame_vector_args (f); /* Check for bad buffer index values */ for (i = 0; i < f->n_vectors; i++) { if (from[i] == 0) { clib_warning ("BUG: buffer index 0 at index %d", i); ASSERT (0); } else if (from[i] == 0xfefefefe) { clib_warning ("BUG: frame poison pattern at index %d", i); ASSERT (0); } } } } void vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f) { vlib_pending_frame_t *p; vlib_node_t *to_node; if (f->n_vectors == 0) return; ASSERT (vm == vlib_get_main ()); vlib_validate_frame_indices (f); to_node = vlib_get_node (vm, to_node_index); vec_add2 (vm->node_main.pending_frames, p, 1); f->frame_flags |= VLIB_FRAME_PENDING; p->frame = vlib_get_frame (vm, f); p->node_runtime_index = to_node->runtime_index; p->next_frame_index = VLIB_PENDING_FRAME_NO_NEXT_FRAME; } /* Free given frame. */ void vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f) { vlib_node_main_t *nm = &vm->node_main; vlib_node_t *node; vlib_frame_size_t *fs; ASSERT (vm == vlib_get_main ()); ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED); node = vlib_get_node (vm, r->node_index); fs = get_frame_size_info (nm, node->scalar_size, node->vector_size); ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED); /* No next frames may point to freed frame. */ if (CLIB_DEBUG > 0) { vlib_next_frame_t *nf; vec_foreach (nf, vm->node_main.next_frames) ASSERT (nf->frame != f); } f->frame_flags &= ~(VLIB_FRAME_IS_ALLOCATED | VLIB_FRAME_NO_APPEND); vec_add1 (fs->free_frames, f); ASSERT (fs->n_alloc_frames > 0); fs->n_alloc_frames -= 1; } static clib_error_t * show_frame_stats (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { vlib_node_main_t *nm = &vm->node_main; vlib_frame_size_t *fs; vlib_cli_output (vm, "%=6s%=12s%=12s", "Size", "# Alloc", "# Free"); vec_foreach (fs, nm->frame_sizes) { u32 n_alloc = fs->n_alloc_frames; u32 n_free = vec_len (fs->free_frames); if (n_alloc + n_free > 0) vlib_cli_output (vm, "%=6d%=12d%=12d", fs - nm->frame_sizes, n_alloc, n_free); } return 0; } /* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_frame_stats_cli, static) = { .path = "show vlib frame-allocation", .short_help = "Show node dispatch frame statistics", .function = show_frame_stats, }; /* *INDENT-ON* */ /* Change ownership of enqueue rights to given next node. */ static void vlib_next_frame_change_ownership (vlib_main_t * vm, vlib_node_runtime_t * node_runtime, u32 next_index) { vlib_node_main_t *nm = &vm->node_main; vlib_next_frame_t *next_frame; vlib_node_t *node, *next_node; node = vec_elt (nm->nodes, node_runtime->node_index); /* Only internal & input nodes are allowed to call other nodes. */ ASSERT (node->type == VLIB_NODE_TYPE_INTERNAL || node->type == VLIB_NODE_TYPE_INPUT || node->type == VLIB_NODE_TYPE_PROCESS); ASSERT (vec_len (node->next_nodes) == node_runtime->n_next_nodes); next_frame = vlib_node_runtime_get_next_frame (vm, node_runtime, next_index); next_node = vec_elt (nm->nodes, node->next_nodes[next_index]); if (next_node->owner_node_index != VLIB_INVALID_NODE_INDEX) { /* Get frame from previous owner. */ vlib_next_frame_t *owner_next_frame; vlib_next_frame_t tmp; owner_next_frame = vlib_node_get_next_frame (vm, next_node->owner_node_index, next_node->owner_next_index); /* Swap target next frame with owner's. */ tmp = owner_next_frame[0]; owner_next_frame[0] = next_frame[0]; next_frame[0] = tmp; /* * If next_frame is already pending, we have to track down * all pending frames and fix their next_frame_index fields. */ if (next_frame->flags & VLIB_FRAME_PENDING) { vlib_pending_frame_t *p; if (next_frame->frame != NULL) { vec_foreach (p, nm->pending_frames) { if (p->frame == next_frame->frame) { p->next_frame_index = next_frame - vm->node_main.next_frames; } } } } } else { /* No previous owner. Take ownership. */ next_frame->flags |= VLIB_FRAME_OWNER; } /* Record new owner. */ next_node->owner_node_index = node->index; next_node->owner_next_index = next_index; /* Now we should be owner. */ ASSERT (next_frame->flags & VLIB_FRAME_OWNER); } /* Make sure that magic number is still there. Otherwise, it is likely that caller has overrun frame arguments. */ always_inline void validate_frame_magic (vlib_main_t * vm, vlib_frame_t * f, vlib_node_t * n, uword next_index) { vlib_node_t *next_node = vlib_get_node (vm, n->next_nodes[next_index]); u32 *magic = vlib_frame_find_magic (f, next_node); ASSERT (VLIB_FRAME_MAGIC == magic[0]); } vlib_frame_t * vlib_get_next_frame_internal (vlib_main_t * vm, vlib_node_runtime_t * node, u32 next_index, u32 allocate_new_next_frame) { vlib_frame_t *f; vlib_next_frame_t *nf; u32 n_used; nf = vlib_node_runtime_get_next_frame (vm, node, next_index); /* Make sure this next frame owns right to enqueue to destination frame. */ if (PREDICT_FALSE (!(nf->flags & VLIB_FRAME_OWNER))) vlib_next_frame_change_ownership (vm, node, next_index); /* ??? Don't need valid flag: can use frame_index == ~0 */ if (PREDICT_FALSE (!(nf->flags & VLIB_FRAME_IS_ALLOCATED))) { nf->frame = vlib_frame_alloc (vm, node, next_index); nf->flags |= VLIB_FRAME_IS_ALLOCATED; } f = nf->frame; /* Has frame been removed from pending vector (e.g. finished dispatching)? If so we can reuse frame. */ if ((nf->flags & VLIB_FRAME_PENDING) && !(f->frame_flags & VLIB_FRAME_PENDING)) { nf->flags &= ~VLIB_FRAME_PENDING; f->n_vectors = 0; f->flags = 0; } /* Allocate new frame if current one is marked as no-append or it is already full. */ n_used = f->n_vectors; if (n_used >= VLIB_FRAME_SIZE || (allocate_new_next_frame && n_used > 0) || (f->frame_flags & VLIB_FRAME_NO_APPEND)) { /* Old frame may need to be freed after dispatch, since we'll have two redundant frames from node -> next node. */ if (!(nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH)) { vlib_frame_t *f_old = vlib_get_frame (vm, nf->frame); f_old->frame_flags |= VLIB_FRAME_FREE_AFTER_DISPATCH; } /* Allocate new frame to replace full one. */ f = nf->frame = vlib_frame_alloc (vm, node, next_index); n_used = f->n_vectors; } /* Should have free vectors in frame now. */ ASSERT (n_used < VLIB_FRAME_SIZE); if (CLIB_DEBUG > 0) { validate_frame_magic (vm, f, vlib_get_node (vm, node->node_index), next_index); } return f; } static void vlib_put_next_frame_validate (vlib_main_t * vm, vlib_node_runtime_t * rt, u32 next_index, u32 n_vectors_left) { vlib_node_main_t *nm = &vm->node_main; vlib_next_frame_t *nf; vlib_frame_t *f; vlib_node_runtime_t *next_rt; vlib_node_t *next_node; u32 n_before, n_after; nf = vlib_node_runtime_get_next_frame (vm, rt, next_index); f = vlib_get_frame (vm, nf->frame); ASSERT (n_vectors_left <= VLIB_FRAME_SIZE); vlib_validate_frame_indices (f); n_after = VLIB_FRAME_SIZE - n_vectors_left; n_before = f->n_vectors; ASSERT (n_after >= n_before); next_rt = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL], nf->node_runtime_index); next_node = vlib_get_node (vm, next_rt->node_index); if (n_after > 0 && next_node->validate_frame) { u8 *msg = next_node->validate_frame (vm, rt, f); if (msg) { clib_warning ("%v", msg); ASSERT (0); } vec_free (msg); } } void vlib_put_next_frame (vlib_main_t * vm, vlib_node_runtime_t * r, u32 next_index, u32 n_vectors_left) { vlib_node_main_t *nm = &vm->node_main; vlib_next_frame_t *nf; vlib_frame_t *f; u32 n_vectors_in_frame; if (CLIB_DEBUG > 0) vlib_put_next_frame_validate (vm, r, next_index, n_vectors_left); nf = vlib_node_runtime_get_next_frame (vm, r, next_index); f = vlib_get_frame (vm, nf->frame); /* Make sure that magic number is still there. Otherwise, caller has overrun frame meta data. */ if (CLIB_DEBUG > 0) { vlib_node_t *node = vlib_get_node (vm, r->node_index); validate_frame_magic (vm, f, node, next_index); } /* Convert # of vectors left -> number of vectors there. */ ASSERT (n_vectors_left <= VLIB_FRAME_SIZE); n_vectors_in_frame = VLIB_FRAME_SIZE - n_vectors_left; f->n_vectors = n_vectors_in_frame; /* If vectors were added to frame, add to pending vector. */ if (PREDICT_TRUE (n_vectors_in_frame > 0)) { vlib_pending_frame_t *p; u32 v0, v1; r->cached_next_index = next_index; if (!(f->frame_flags & VLIB_FRAME_PENDING)) { __attribute__ ((unused)) vlib_node_t *node; vlib_node_t *next_node; vlib_node_runtime_t *next_runtime; node = vlib_get_node (vm, r->node_index); next_node = vlib_get_next_node (vm, r->node_index, next_index); next_runtime = vlib_node_get_runtime (vm, next_node->index); vec_add2 (nm->pending_frames, p, 1); p->frame = nf->frame; p->node_runtime_index = nf->node_runtime_index; p->next_frame_index = nf - nm->next_frames; nf->flags |= VLIB_FRAME_PENDING; f->frame_flags |= VLIB_FRAME_PENDING; /* * If we're going to dispatch this frame on another thread, * force allocation of a new frame. Otherwise, we create * a dangling frame reference. Each thread has its own copy of * the next_frames vector. */ if (0 && r->thread_index != next_runtime->thread_index) { nf->frame = NULL; nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED); } } /* Copy trace flag from next_frame and from runtime. */ nf->flags |= (nf->flags & VLIB_NODE_FLAG_TRACE) | (r-> flags & VLIB_NODE_FLAG_TRACE); v0 = nf->vectors_since_last_overflow; v1 = v0 + n_vectors_in_frame; nf->vectors_since_last_overflow = v1; if (PREDICT_FALSE (v1 < v0)) { vlib_node_t *node = vlib_get_node (vm, r->node_index); vec_elt (node->n_vectors_by_next_node, next_index) += v0; } } } /* Sync up runtime (32 bit counters) and main node stats (64 bit counters). */ void vlib_node_runtime_sync_stats_node (vlib_node_t *n, vlib_node_runtime_t *r, uword n_calls, uword n_vectors, uword n_clocks) { n->stats_total.calls += n_calls + r->calls_since_last_overflow; n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow; n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow; n->stats_total.max_clock = r->max_clock; n->stats_total.max_clock_n = r->max_clock_n; r->calls_since_last_overflow = 0; r->vectors_since_last_overflow = 0; r->clocks_since_last_overflow = 0; } void vlib_node_runtime_sync_stats (vlib_main_t *vm, vlib_node_runtime_t *r, uword n_calls, uword n_vectors, uword n_clocks) { vlib_node_t *n = vlib_get_node (vm, r->node_index); vlib_node_runtime_sync_stats_node (n, r, n_calls, n_vectors, n_clocks); } always_inline void __attribute__ ((unused)) vlib_process_sync_stats (vlib_main_t * vm, vlib_process_t * p, uword n_calls, uword n_vectors, uword n_clocks) { vlib_node_runtime_t *rt = &p->node_runtime; vlib_node_t *n = vlib_get_node (vm, rt->node_index); vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks); n->stats_total.suspends += p->n_suspends; p->n_suspends = 0; } void vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n) { vlib_node_runtime_t *rt; if (n->type == VLIB_NODE_TYPE_PROCESS) { /* Nothing to do for PROCESS nodes except in main thread */ if (vm != vlib_get_first_main ()) return; vlib_process_t *p = vlib_get_process_from_node (vm, n); n->stats_total.suspends += p->n_suspends; p->n_suspends = 0; rt = &p->node_runtime; } else rt = vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index); vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0); /* Sync up runtime next frame vector counters with main node structure. */ { vlib_next_frame_t *nf; uword i; for (i = 0; i < rt->n_next_nodes; i++) { nf = vlib_node_runtime_get_next_frame (vm, rt, i); vec_elt (n->n_vectors_by_next_node, i) += nf->vectors_since_last_overflow; nf->vectors_since_last_overflow = 0; } } } always_inline u32 vlib_node_runtime_update_stats (vlib_main_t * vm, vlib_node_runtime_t * node, uword n_calls, uword n_vectors, uword n_clocks) { u32 ca0, ca1, v0, v1, cl0, cl1, r; cl0 = cl1 = node->clocks_since_last_overflow; ca0 = ca1 = node->calls_since_last_overflow; v0 = v1 = node->vectors_since_last_overflow; ca1 = ca0 + n_calls; v1 = v0 + n_vectors; cl1 = cl0 + n_clocks; node->calls_since_last_overflow = ca1; node->clocks_since_last_overflow = cl1; node->vectors_since_last_overflow = v1; node->max_clock_n = node->max_clock > n_clocks ? node->max_clock_n : n_vectors; node->max_clock = node->max_clock > n_clocks ? node->max_clock : n_clocks; r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors); if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0)) { node->calls_since_last_overflow = ca0; node->clocks_since_last_overflow = cl0; node->vectors_since_last_overflow = v0; vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks); } return r; } always_inline void vlib_process_update_stats (vlib_main_t * vm, vlib_process_t * p, uword n_calls, uword n_vectors, uword n_clocks) { vlib_node_runtime_update_stats (vm, &p->node_runtime, n_calls, n_vectors, n_clocks); } static clib_error_t * vlib_cli_elog_clear (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { elog_reset_buffer (&vlib_global_main.elog_main); return 0; } /* *INDENT-OFF* */ VLIB_CLI_COMMAND (elog_clear_cli, static) = { .path = "event-logger clear", .short_help = "Clear the event log", .function = vlib_cli_elog_clear, }; /* *INDENT-ON* */ #ifdef CLIB_UNIX static clib_error_t * elog_save_buffer (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { elog_main_t *em = &vlib_global_main.elog_main; char *file, *chroot_file; clib_error_t *error = 0; if (!unformat (input, "%s", &file)) { vlib_cli_output (vm, "expected file name, got `%U'", format_unformat_error, input); return 0; } /* It's fairly hard to get "../oopsie" through unformat; just in case */ if (strstr (file, "..") || index (file, '/')) { vlib_cli_output (vm, "illegal characters in filename '%s'", file); return 0; } chroot_file = (char *) format (0, "/tmp/%s%c", file, 0); vec_free (file); vlib_cli_output (vm, "Saving %wd of %wd events to %s", elog_n_events_in_buffer (em), elog_buffer_capacity (em), chroot_file); vlib_worker_thread_barrier_sync (vm); error = elog_write_file (em, chroot_file, 1 /* flush ring */ ); vlib_worker_thread_barrier_release (vm); vec_free (chroot_file); return error; } void vlib_post_mortem_dump (void) { vlib_global_main_t *vgm = vlib_get_global_main (); for (int i = 0; i < vec_len (vgm->post_mortem_callbacks); i++) (vgm->post_mortem_callbacks[i]) (); } /* *INDENT-OFF* */ VLIB_CLI_COMMAND (elog_save_cli, static) = { .path = "event-logger save", .short_help = "event-logger save (
/*
 * ipsec_itf.c: IPSec dedicated interface type
 *
 * Copyright (c) 2020 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <vnet/ip/ip.h>
#include <vnet/ipsec/ipsec_itf.h>
#include <vnet/ipsec/ipsec_tun.h>
#include <vnet/ipsec/ipsec.h>
#include <vnet/adj/adj_midchain.h>
#include <vnet/ethernet/mac_address.h>

/* bitmap of Allocated IPSEC_ITF instances */
static uword *ipsec_itf_instances;

/* pool of interfaces */
static ipsec_itf_t *ipsec_itf_pool;

static u32 *ipsec_itf_index_by_sw_if_index;

ipsec_itf_t *
ipsec_itf_get (index_t ii)
{
  return (pool_elt_at_index (ipsec_itf_pool, ii));
}

static ipsec_itf_t *
ipsec_itf_find_by_sw_if_index (u32 sw_if_index)
{
  if (vec_len (ipsec_itf_index_by_sw_if_index) <= sw_if_index)
    return NULL;
  u32 ti = ipsec_itf_index_by_sw_if_index[sw_if_index];
  if (ti == ~0)
    return NULL;
  return pool_elt_at_index (ipsec_itf_pool, ti);
}

static u8 *
format_ipsec_itf_name (u8 * s, va_list * args)
{
  u32 dev_instance = va_arg (*args, u32);
  return format (s, "ipsec%d", dev_instance);
}

void
ipsec_itf_adj_unstack (adj_index_t ai)
{
  adj_midchain_delegate_unstack (ai);
}

void
ipsec_itf_adj_stack (adj_index_t ai, u32 sai)
{
  const vnet_hw_interface_t *hw;

  hw = vnet_get_sup_hw_interface (vnet_get_main (), adj_get_sw_if_index (ai));

  if (hw->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)
    {
      const ipsec_sa_t *sa;
      fib_prefix_t dst;

      sa = ipsec_sa_get (sai);
      ip_address_to_fib_prefix (&sa->tunnel.t_dst, &dst);
      adj_midchain_delegate_stack (ai, sa->tunnel.t_fib_index, &dst);
    }
  else
    adj_midchain_delegate_unstack (ai);
}

static adj_walk_rc_t
ipsec_itf_adj_stack_cb (adj_index_t ai, void *arg)
{
  ipsec_tun_protect_t *itp = arg;

  ipsec_itf_adj_stack (ai, itp->itp_out_sa);

  return (ADJ_WALK_RC_CONTINUE);
}

static void
ipsec_itf_restack (index_t itpi, const ipsec_itf_t * itf)
{
  ipsec_tun_protect_t *itp;
  fib_protocol_t proto;

  itp = ipsec_tun_protect_get (itpi);

  /*
   * walk all the adjacencies on the interface and restack them
   */
  FOR_EACH_FIB_IP_PROTOCOL (proto)
  {
    adj_nbr_walk (itf->ii_sw_if_index, proto, ipsec_itf_adj_stack_cb, itp);
  }
}

static walk_rc_t
ipsec_tun_protect_walk_state_change (index_t itpi, void *arg)
{
  const ipsec_itf_t *itf = arg;

  ipsec_itf_restack (itpi, itf);

  return (WALK_CONTINUE);
}

static clib_error_t *
ipsec_itf_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
{
  vnet_hw_interface_t *hi;
  ipsec_itf_t *itf;
  u32 hw_flags;

  hi = vnet_get_hw_interface (vnm, hw_if_index);
  hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ?
	      VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
  vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);

  itf = ipsec_itf_find_by_sw_if_index (hi->sw_if_index);

  if (itf)
    ipsec_tun_protect_walk_itf (itf->ii_sw_if_index,
				ipsec_tun_protect_walk_state_change, itf);

  return (NULL);
}

static int
ipsec_itf_tunnel_desc (u32 sw_if_index,
		       ip46_address_t * src, ip46_address_t * dst, u8 * is_l2)
{
  ip46_address_reset (src);
  ip46_address_reset (dst);
  *is_l2 = 0;

  return (0);
}

static u8 *
ipsec_itf_build_rewrite (void)
{
  /*
   * passing the adj code a NULL rewrite means 'i don't have one cos
   * t'other end is unresolved'. That's not the case here. For the ipsec
   * tunnel there are just no bytes of encap to apply in the adj.
   * So return a zero length rewrite. Encap will be added by a tunnel mode SA.
   */
  u8 *rewrite = NULL;

  vec_validate (rewrite, 0);
  vec_reset_length (rewrite);

  return (rewrite);
}

static u8 *
ipsec_itf_build_rewrite_i (vnet_main_t * vnm,
			   u32 sw_if_index,
			   vnet_link_t link_type, const void *dst_address)
{
  return (ipsec_itf_build_rewrite ());
}

void
ipsec_itf_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
{
  adj_nbr_midchain_update_rewrite
    (ai, NULL, NULL, ADJ_FLAG_MIDCHAIN_IP_STACK, ipsec_itf_build_rewrite ());
}

/* *INDENT-OFF* */
VNET_DEVICE_CLASS (ipsec_itf_device_class) = {
  .name = "IPSEC Tunnel",
  .format_device_name = format_ipsec_itf_name,
  .admin_up_down_function = ipsec_itf_admin_up_down,
  .ip_tun_desc = ipsec_itf_tunnel_desc,
};

VNET_HW_INTERFACE_CLASS(ipsec_hw_interface_class) = {
  .name = "IPSec",
  .build_rewrite = ipsec_itf_build_rewrite_i,
  .update_adjacency = ipsec_itf_update_adj,
  .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
VNET_HW_INTERFACE_CLASS(ipsec_p2mp_hw_interface_class) = {
  .name = "IPSec",
  .build_rewrite = ipsec_itf_build_rewrite_i,
  .update_adjacency = ipsec_itf_update_adj,
  .flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
};
/* *INDENT-ON* */

/*
 * Maintain a bitmap of allocated ipsec_itf instance numbers.
 */
#define IPSEC_ITF_MAX_INSTANCE		(16 * 1024)

static u32
ipsec_itf_instance_alloc (u32 want)
{
  /*
   * Check for dynamically allocated instance number.
   */
  if (~0 == want)
    {
      u32 bit;

      bit = clib_bitmap_first_clear (ipsec_itf_instances);
      if (bit >= IPSEC_ITF_MAX_INSTANCE)
	{
	  return ~0;
	}
      ipsec_itf_instances = clib_bitmap_set (ipsec_itf_instances, bit, 1);
      return bit;
    }

  /*
   * In range?
   */
  if (want >= IPSEC_ITF_MAX_INSTANCE)
    {
      return ~0;
    }

  /*
   * Already in use?
   */
  if (clib_bitmap_get (ipsec_itf_instances, want))
    {
      return ~0;
    }

  /*
   * Grant allocation request.
   */
  ipsec_itf_instances = clib_bitmap_set (ipsec_itf_instances, want, 1);

  return want;
}

static int
ipsec_itf_instance_free (u32 instance)
{
  if (instance >= IPSEC_ITF_MAX_INSTANCE)
    {
      return -1;
    }

  if (clib_bitmap_get (ipsec_itf_instances, instance) == 0)
    {
      return -1;
    }

  ipsec_itf_instances = clib_bitmap_set (ipsec_itf_instances, instance, 0);
  return 0;
}

int
ipsec_itf_create (u32 user_instance, tunnel_mode_t mode, u32 * sw_if_indexp)
{
  vnet_main_t *vnm = vnet_get_main ();
  u32 instance, hw_if_index;
  vnet_hw_interface_t *hi;
  ipsec_itf_t *ipsec_itf;

  ASSERT (sw_if_indexp);

  *sw_if_indexp = (u32) ~ 0;

  /*
   * Allocate a ipsec_itf instance.  Either select on dynamically
   * or try to use the desired user_instance number.
   */
  instance = ipsec_itf_instance_alloc (user_instance);
  if (instance == ~0)
    return VNET_API_ERROR_INVALID_REGISTRATION;

  pool_get (ipsec_itf_pool, ipsec_itf);

  /* tunnel index (or instance) */
  u32 t_idx = ipsec_itf - ipsec_itf_pool;

  ipsec_itf->ii_mode = mode;
  ipsec_itf->ii_user_instance = instance;

  hw_if_index = vnet_register_interface (vnm,
					 ipsec_itf_device_class.index,
					 ipsec_itf->ii_user_instance,
					 (mode == TUNNEL_MODE_P2P ?
					  ipsec_hw_interface_class.index :
					  ipsec_p2mp_hw_interface_class.index),
					 t_idx