summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Yourtchenko <ayourtch@gmail.com>2017-06-08 20:03:35 +0200
committerOle Trøan <otroan@employees.org>2017-06-15 20:27:39 +0000
commit779c3e3a632f887a7249a5cae8cce6eeacb67e3f (patch)
treef240f028618e3a064302823b45d75085387c862e
parentb2d5ff349d2c6cb2b733375dca4952cdeab2e7d3 (diff)
acl-plugin: store sessions in a single hash table instead of a per-interface
A bihash-per-interface is convenient, but turns out tricky difficult from the maintenance standpoint with the large number of interfaces. This patch makes the sessions reside in a single hash table for all the interfaces, adding the lower 16 bit of sw_if_index as part of the key into the previously unused space. There is a tradeoff, that a session with an identical 5-tuple and the same sw_if_index modulo 65536 will match on either of the interfaces. The probability of that is deemed sufficiently small to not worry about it. In case it still happens before the heat death of the universe, there is a clib_warning and the colliding packet will be dropped, at which point we will need to bump the hash key size by another u64, but rather not pay the cost of doing that right now. Change-Id: I2747839cfcceda73e597cbcafbe1e377fb8f1889 Signed-off-by: Andrew Yourtchenko <ayourtch@gmail.com>
-rw-r--r--src/plugins/acl/acl.c5
-rw-r--r--src/plugins/acl/acl.h9
-rw-r--r--src/plugins/acl/fa_node.c45
-rw-r--r--src/plugins/acl/fa_node.h2
4 files changed, 39 insertions, 22 deletions
diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c
index 4174a570183..e7b8549535d 100644
--- a/src/plugins/acl/acl.c
+++ b/src/plugins/acl/acl.c
@@ -1823,6 +1823,11 @@ acl_show_aclplugin_fn (vlib_main_t * vm,
u64 n_dels = sw_if_index < vec_len(am->fa_session_dels_by_sw_if_index) ? am->fa_session_dels_by_sw_if_index[sw_if_index] : 0;
out0 = format(out0, "sw_if_index %d: add %lu - del %lu = %lu\n", sw_if_index, n_adds, n_dels, n_adds - n_dels);
}));
+ {
+ u64 n_adds = am->fa_session_total_adds;
+ u64 n_dels = am->fa_session_total_dels;
+ out0 = format(out0, "TOTAL: add %lu - del %lu = %lu\n", n_adds, n_dels, n_adds - n_dels);
+ }
out0 = format(out0, "\n\nPer-worker data:\n");
for (wk = 0; wk < vec_len (am->per_worker_data); wk++) {
acl_fa_per_worker_data_t *pw = &am->per_worker_data[wk];
diff --git a/src/plugins/acl/acl.h b/src/plugins/acl/acl.h
index 02623a9ce4b..65bc9e74ead 100644
--- a/src/plugins/acl/acl.h
+++ b/src/plugins/acl/acl.h
@@ -135,9 +135,9 @@ typedef struct {
/* bitmaps when set the processing is enabled on the interface */
uword *fa_in_acl_on_sw_if_index;
uword *fa_out_acl_on_sw_if_index;
- /* bitmap, when set the hash is initialized */
- uword *fa_sessions_on_sw_if_index;
- clib_bihash_40_8_t *fa_sessions_by_sw_if_index;
+ /* bihash holding all of the sessions */
+ int fa_sessions_hash_is_initialized;
+ clib_bihash_40_8_t fa_sessions_hash;
/* The process node which orcherstrates the cleanup */
u32 fa_cleaner_node_index;
/* FA session timeouts, in seconds */
@@ -145,6 +145,9 @@ typedef struct {
/* session add/delete counters */
u64 *fa_session_adds_by_sw_if_index;
u64 *fa_session_dels_by_sw_if_index;
+ /* total session adds/dels */
+ u64 fa_session_total_adds;
+ u64 fa_session_total_dels;
/* L2 datapath glue */
diff --git a/src/plugins/acl/fa_node.c b/src/plugins/acl/fa_node.c
index 78b10dc9504..66621b6ba78 100644
--- a/src/plugins/acl/fa_node.c
+++ b/src/plugins/acl/fa_node.c
@@ -494,9 +494,7 @@ acl_make_5tuple_session_key (int is_input, fa_5tuple_t * p5tuple_pkt,
static int
acl_fa_ifc_has_sessions (acl_main_t * am, int sw_if_index0)
{
- int has_sessions =
- clib_bitmap_get (am->fa_sessions_on_sw_if_index, sw_if_index0);
- return has_sessions;
+ return am->fa_sessions_hash_is_initialized;
}
static int
@@ -594,13 +592,11 @@ acl_fa_ifc_init_sessions (acl_main_t * am, int sw_if_index0)
sw_if_index0, am->fa_conn_table_hash_num_buckets,
am->fa_conn_table_hash_memory_size);
#endif
- vec_validate (am->fa_sessions_by_sw_if_index, sw_if_index0);
- BV (clib_bihash_init) (&am->fa_sessions_by_sw_if_index
- [sw_if_index0], "ACL plugin FA session bihash",
+ BV (clib_bihash_init) (&am->fa_sessions_hash,
+ "ACL plugin FA session bihash",
am->fa_conn_table_hash_num_buckets,
am->fa_conn_table_hash_memory_size);
- am->fa_sessions_on_sw_if_index =
- clib_bitmap_set (am->fa_sessions_on_sw_if_index, sw_if_index0, 1);
+ am->fa_sessions_hash_is_initialized = 1;
}
static inline fa_session_t *get_session_ptr(acl_main_t *am, u16 thread_index, u32 session_index)
@@ -715,7 +711,7 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t se
{
fa_session_t *sess = get_session_ptr(am, sess_id.thread_index, sess_id.session_index);
ASSERT(sess->thread_index == os_get_thread_index ());
- BV (clib_bihash_add_del) (&am->fa_sessions_by_sw_if_index[sw_if_index],
+ BV (clib_bihash_add_del) (&am->fa_sessions_hash,
&sess->info.kv, 0);
acl_fa_per_worker_data_t *pw = &am->per_worker_data[sess_id.thread_index];
pool_put_index (pw->fa_sessions_pool, sess_id.session_index);
@@ -723,18 +719,15 @@ acl_fa_delete_session (acl_main_t * am, u32 sw_if_index, fa_full_session_id_t se
as the caller must have dealt with the timers. */
vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index);
am->fa_session_dels_by_sw_if_index[sw_if_index]++;
+ clib_smp_atomic_add(&am->fa_session_total_dels, 1);
}
static int
acl_fa_can_add_session (acl_main_t * am, int is_input, u32 sw_if_index)
{
- u64 curr_sess;
- vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index);
- vec_validate (am->fa_session_dels_by_sw_if_index, sw_if_index);
- curr_sess =
- am->fa_session_adds_by_sw_if_index[sw_if_index] -
- am->fa_session_dels_by_sw_if_index[sw_if_index];
- return (curr_sess < am->fa_conn_table_max_entries);
+ u64 curr_sess_count;
+ curr_sess_count = am->fa_session_total_adds - am->fa_session_total_dels;
+ return (curr_sess_count < am->fa_conn_table_max_entries);
}
static u64
@@ -889,12 +882,13 @@ acl_fa_add_session (acl_main_t * am, int is_input, u32 sw_if_index, u64 now,
acl_fa_ifc_init_sessions (am, sw_if_index);
}
- BV (clib_bihash_add_del) (&am->fa_sessions_by_sw_if_index[sw_if_index],
+ BV (clib_bihash_add_del) (&am->fa_sessions_hash,
&kv, 1);
acl_fa_conn_list_add_session(am, f_sess_id, now);
vec_validate (am->fa_session_adds_by_sw_if_index, sw_if_index);
am->fa_session_adds_by_sw_if_index[sw_if_index]++;
+ clib_smp_atomic_add(&am->fa_session_total_adds, 1);
}
static int
@@ -902,7 +896,7 @@ acl_fa_find_session (acl_main_t * am, u32 sw_if_index0, fa_5tuple_t * p5tuple,
clib_bihash_kv_40_8_t * pvalue_sess)
{
return (BV (clib_bihash_search)
- (&am->fa_sessions_by_sw_if_index[sw_if_index0], &p5tuple->kv,
+ (&am->fa_sessions_hash, &p5tuple->kv,
pvalue_sess) == 0);
}
@@ -977,6 +971,7 @@ acl_fa_node_fn (vlib_main_t * vm,
*/
acl_fill_5tuple (am, b0, is_ip6, is_input, is_l2_path, &fa_5tuple);
+ fa_5tuple.l4.lsb_of_sw_if_index = sw_if_index0 & 0xffff;
acl_make_5tuple_session_key (is_input, &fa_5tuple, &kv_sess);
#ifdef FA_NODE_VERBOSE_DEBUG
clib_warning
@@ -1024,6 +1019,20 @@ acl_fa_node_fn (vlib_main_t * vm,
0x00010000 + ((0xff & old_timeout_type) << 8) +
(0xff & new_timeout_type);
}
+ /*
+ * I estimate the likelihood to be very low - the VPP needs
+ * to have >64K interfaces to start with and then on
+ * exactly 64K indices apart needs to be exactly the same
+ * 5-tuple... Anyway, since this probability is nonzero -
+ * print an error and drop the unlucky packet.
+ * If this shows up in real world, we would need to bump
+ * the hash key length.
+ */
+ if (PREDICT_FALSE(sess->sw_if_index != sw_if_index0)) {
+ clib_warning("BUG: session LSB16(sw_if_index) and 5-tuple collision!");
+ acl_check_needed = 0;
+ action = 0;
+ }
}
}
diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h
index a94e7db9eea..671593a8c99 100644
--- a/src/plugins/acl/fa_node.h
+++ b/src/plugins/acl/fa_node.h
@@ -36,7 +36,7 @@ typedef union {
struct {
u16 port[2];
u16 proto;
- u16 rsvd;
+ u16 lsb_of_sw_if_index;
};
} fa_session_l4_key_t;
f0f0 } /* Literal.String */ .highlight .na { color: #336699 } /* Name.Attribute */ .highlight .nb { color: #003388 } /* Name.Builtin */ .highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */ .highlight .no { color: #003366; font-weight: bold } /* Name.Constant */ .highlight .nd { color: #555555 } /* Name.Decorator */ .highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */ .highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */ .highlight .nl { color: #336699; font-style: italic } /* Name.Label */ .highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */ .highlight .py { color: #336699; font-weight: bold } /* Name.Property */ .highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */ .highlight .nv { color: #336699 } /* Name.Variable */ .highlight .ow { color: #008800 } /* Operator.Word */ .highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */ }
/*
 * Copyright (c) 2018-2019 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * @file
 * @brief Unidirectional shared-memory multi-ring message queue
 */

#ifndef SRC_SVM_MESSAGE_QUEUE_H_
#define SRC_SVM_MESSAGE_QUEUE_H_

#include <vppinfra/clib.h>
#include <vppinfra/error.h>
#include <svm/queue.h>

typedef struct svm_msg_q_ring_
{
  volatile u32 cursize;			/**< current size of the ring */
  u32 nitems;				/**< max size of the ring */
  volatile u32 head;			/**< current head (for dequeue) */
  volatile u32 tail;			/**< current tail (for enqueue) */
  u32 elsize;				/**< size of an element */
  u8 *data;				/**< chunk of memory for msg data */
} __clib_packed svm_msg_q_ring_t;

typedef struct svm_msg_q_
{
  svm_queue_t *q;			/**< queue for exchanging messages */
  svm_msg_q_ring_t *rings;		/**< rings with message data*/
} __clib_packed svm_msg_q_t;

typedef struct svm_msg_q_ring_cfg_
{
  u32 nitems;
  u32 elsize;
  void *data;
} svm_msg_q_ring_cfg_t;

typedef struct svm_msg_q_cfg_
{
  int consumer_pid;			/**< pid of msg consumer */
  u32 q_nitems;				/**< msg queue size (not rings) */
  u32 n_rings;				/**< number of msg rings */
  svm_msg_q_ring_cfg_t *ring_cfgs;	/**< array of ring cfgs */
} svm_msg_q_cfg_t;

typedef union
{
  struct
  {
    u32 ring_index;			/**< ring index, could be u8 */
    u32 elt_index;			/**< index in ring */
  };
  u64 as_u64;
} svm_msg_q_msg_t;

#define SVM_MQ_INVALID_MSG { .as_u64 = ~0 }
/**
 * Allocate message queue
 *
 * Allocates a message queue on the heap. Based on the configuration options,
 * apart from the message queue this also allocates (one or multiple)
 * shared-memory rings for the messages.
 *
 * @param cfg 		configuration options: queue len, consumer pid,
 * 			ring configs
 * @return		message queue
 */
svm_msg_q_t *svm_msg_q_alloc (svm_msg_q_cfg_t * cfg);

/**
 * Free message queue
 *
 * @param mq		message queue to be freed
 */
void svm_msg_q_free (svm_msg_q_t * mq);

/**
 * Allocate message buffer
 *
 * Message is allocated on the first available ring capable of holding
 * the requested number of bytes.
 *
 * @param mq		message queue
 * @param nbytes	number of bytes needed for message
 * @return		message structure pointing to the ring and position
 * 			allocated
 */
svm_msg_q_msg_t svm_msg_q_alloc_msg (svm_msg_q_t * mq, u32 nbytes);

/**
 * Allocate message buffer on ring
 *
 * Message is allocated, on requested ring. The caller MUST check that
 * the ring is not full.
 *
 * @param mq		message queue
 * @param ring_index	ring on which the allocation should occur
 * @return		message structure pointing to the ring and position
 * 			allocated
 */
svm_msg_q_msg_t svm_msg_q_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index);

/**
 * Lock message queue and allocate message buffer on ring
 *
 * This should be used when multiple writers/readers are expected to
 * compete for the rings/queue. Message should be enqueued by calling
 * @ref svm_msg_q_add_w_lock and the caller MUST unlock the queue once
 * the message in enqueued.
 *
 * @param mq		message queue
 * @param ring_index	ring on which the allocation should occur
 * @param noblock	flag that indicates if request should block
 * @param msg		pointer to message to be filled in
 * @return		0 on success, negative number otherwise
 */
int svm_msg_q_lock_and_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index,
					 u8 noblock, svm_msg_q_msg_t * msg);

/**
 * Free message buffer
 *
 * Marks message buffer on ring as free.
 *
 * @param mq		message queue
 * @param msg		message to be freed
 */
void svm_msg_q_free_msg (svm_msg_q_t * mq, svm_msg_q_msg_t * msg);

/**
 * Producer enqueue one message to queue
 *
 * Prior to calling this, the producer should've obtained a message buffer
 * from one of the rings by calling @ref svm_msg_q_alloc_msg.
 *
 * @param mq		message queue
 * @param msg		message (pointer to ring position) to be enqueued
 * @param nowait	flag to indicate if request is blocking or not
 * @return		success status
 */
int svm_msg_q_add (svm_msg_q_t * mq, svm_msg_q_msg_t * msg, int nowait);

/**
 * Producer enqueue one message to queue with mutex held
 *
 * Prior to calling this, the producer should've obtained a message buffer
 * from one of the rings by calling @ref svm_msg_q_alloc_msg. It assumes
 * the queue mutex is held.
 *
 * @param mq		message queue
 * @param msg		message (pointer to ring position) to be enqueued
 * @return		success status
 */
void svm_msg_q_add_and_unlock (svm_msg_q_t * mq, svm_msg_q_msg_t * msg);

/**
 * Consumer dequeue one message from queue
 *
 * This returns the message pointing to the data in the message rings.
 * The consumer is expected to call @ref svm_msg_q_free_msg once it
 * finishes processing/copies the message data.
 *
 * @param mq		message queue
 * @param msg		pointer to structure where message is to be received
 * @param cond		flag that indicates if request should block or not
 * @param time		time to wait if condition it SVM_Q_TIMEDWAIT
 * @return		success status
 */
int svm_msg_q_sub (svm_msg_q_t * mq, svm_msg_q_msg_t * msg,
		   svm_q_conditional_wait_t cond, u32 time);

/**
 * Consumer dequeue one message from queue with mutex held
 *
 * Returns the message pointing to the data in the message rings under the
 * assumption that the message queue lock is already held. The consumer is
 * expected to call @ref svm_msg_q_free_msg once it finishes
 * processing/copies the message data.
 *
 * @param mq		message queue
 * @param msg		pointer to structure where message is to be received
 * @return		success status
 */
void svm_msg_q_sub_w_lock (svm_msg_q_t * mq, svm_msg_q_msg_t * msg);

/**
 * Get data for message in queue
 *
 * @param mq		message queue
 * @param msg		message for which the data is requested
 * @return		pointer to data
 */
void *svm_msg_q_msg_data (svm_msg_q_t * mq, svm_msg_q_msg_t * msg);

/**
 * Get message queue ring
 *
 * @param mq		message queue
 * @param ring_index	index of ring
 * @return		pointer to ring
 */
svm_msg_q_ring_t *svm_msg_q_ring (svm_msg_q_t * mq, u32 ring_index);

/**
 * Set event fd for queue consumer
 *
 * If set, queue will exclusively use eventfds for signaling. Moreover,
 * afterwards, the queue should only be used in non-blocking mode. Waiting
 * for events should be done externally using something like epoll.
 *
 * @param mq		message queue
 * @param fd		consumer eventfd
 */
void svm_msg_q_set_consumer_eventfd (svm_msg_q_t * mq, int fd);

/**
 * Set event fd for queue producer
 *
 * If set, queue will exclusively use eventfds for signaling. Moreover,
 * afterwards, the queue should only be used in non-blocking mode. Waiting
 * for events should be done externally using something like epoll.
 *
 * @param mq		message queue
 * @param fd		producer eventfd
 */
void svm_msg_q_set_producer_eventfd (svm_msg_q_t * mq, int fd);

/**
 * Allocate event fd for queue consumer
 */
int svm_msg_q_alloc_consumer_eventfd (svm_msg_q_t * mq);

/**
 * Allocate event fd for queue consumer
 */
int svm_msg_q_alloc_producer_eventfd (svm_msg_q_t * mq);

/**
 * Check if message queue is full
 */
static inline u8
svm_msg_q_is_full (svm_msg_q_t * mq)
{
  return (mq->q->cursize == mq->q->maxsize);
}

static inline u8
svm_msg_q_ring_is_full (svm_msg_q_t * mq, u32 ring_index)
{
  ASSERT (ring_index < vec_len (mq->rings));
  return (mq->rings[ring_index].cursize == mq->rings[ring_index].nitems);
}

/**
 * Check if message queue is empty
 */
static inline u8
svm_msg_q_is_empty (svm_msg_q_t * mq)
{
  return (mq->q->cursize == 0);
}

/**
 * Check length of message queue
 */
static inline u32
svm_msg_q_size (svm_msg_q_t * mq)
{
  return mq->q->cursize;
}

/**
 * Check if message is invalid
 */
static inline u8
svm_msg_q_msg_is_invalid (svm_msg_q_msg_t * msg)
{
  return (msg->as_u64 == (u64) ~ 0);
}

/**
 * Try locking message queue
 */
static inline int
svm_msg_q_try_lock (svm_msg_q_t * mq)
{
  return pthread_mutex_trylock (&mq->q->mutex);
}

/**
 * Lock, or block trying, the message queue
 */
static inline int
svm_msg_q_lock (svm_msg_q_t * mq)
{
  return pthread_mutex_lock (&mq->q->mutex);
}

/**
 * Unlock message queue
 */
static inline void
svm_msg_q_unlock (svm_msg_q_t * mq)
{
  pthread_mutex_unlock (&mq->q->mutex);
}

/**
 * Wait for message queue event
 *
 * Must be called with mutex held. The queue only works non-blocking
 * with eventfds, so handle blocking calls as an exception here.
 */
static inline void
svm_msg_q_wait (svm_msg_q_t * mq)
{
  svm_queue_wait (mq->q);
}

/**
 * Timed wait for message queue event
 *
 * Must be called with mutex held.
 *
 * @param mq 		message queue
 * @param timeout	time in seconds
 */
static inline int
svm_msg_q_timedwait (svm_msg_q_t * mq, double timeout)
{
  return svm_queue_timedwait (mq->q, timeout);
}

static inline int
svm_msg_q_get_consumer_eventfd (svm_msg_q_t * mq)
{
  return mq->q->consumer_evtfd;
}

static inline int
svm_msg_q_get_producer_eventfd (svm_msg_q_t * mq)
{
  return mq->q->producer_evtfd;
}

#endif /* SRC_SVM_MESSAGE_QUEUE_H_ */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */