summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2018-07-10 14:15:35 +0200
committerDamjan Marion <dmarion@me.com>2018-07-17 21:27:13 +0000
commit96f40a0563d0a32fe1008c7cc8443367c528b902 (patch)
tree4159cc1868ba77c3f14738b54468c63922bbcace
parent6a0f7dd3021b63b5eb5d4286a9d84cbb5a60b01a (diff)
memif: vectorized buffer enqueue in input node
Change-Id: If6970788396c85415634f12304f49eed0d812281 Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r--src/plugins/memif/node.c267
1 files changed, 121 insertions, 146 deletions
diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c
index 2b8c4f2d228..726adb886a5 100644
--- a/src/plugins/memif/node.c
+++ b/src/plugins/memif/node.c
@@ -181,11 +181,9 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
memif_ring_t *ring;
memif_queue_t *mq;
u16 buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
- u32 next_index;
uword n_trace = vlib_get_trace_count (vm, node);
u32 n_rx_packets = 0, n_rx_bytes = 0;
- u32 n_left, *to_next = 0;
- u32 bi0, bi1, bi2, bi3;
+ u32 n_left;
vlib_buffer_t *b0, *b1, *b2, *b3;
u32 thread_index = vm->thread_index;
memif_per_thread_data_t *ptd = vec_elt_at_index (mm->per_thread_data,
@@ -204,9 +202,6 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
ring_size = 1 << mq->log2_ring_size;
mask = ring_size - 1;
- next_index = (mode == MEMIF_INTERFACE_MODE_IP) ?
- VNET_DEVICE_INPUT_NEXT_IP6_INPUT : VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
-
/* asume that somebody will want to add ethernet header on the packet
so start with IP header at offset 14 */
start_offset = (mode == MEMIF_INTERFACE_MODE_IP) ? 14 : 0;
@@ -340,162 +335,142 @@ memif_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
mq->last_tail = cur_slot;
}
- u32 n_from = n_rx_packets;
- po = ptd->packet_ops;
+ u16 nexts[MEMIF_RX_VECTOR_SZ], *next = nexts;
+ u32 to_next_buffers[MEMIF_RX_VECTOR_SZ], *bi = to_next_buffers;
+ /* prepare buffer template and next indices */
vnet_buffer (bt)->sw_if_index[VLIB_RX] = mif->sw_if_index;
+ vnet_buffer (bt)->feature_arc_index = 0;
bt->current_data = start_offset;
+ bt->current_config_index = 0;
- while (n_from)
+ if (mode == MEMIF_INTERFACE_MODE_ETHERNET)
{
- u32 n_left_to_next;
- u32 next0, next1, next2, next3;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- while (n_from >= 8 && n_left_to_next >= 4)
- {
- b0 = vlib_get_buffer (vm, po[4].first_buffer_vec_index);
- b1 = vlib_get_buffer (vm, po[5].first_buffer_vec_index);
- b2 = vlib_get_buffer (vm, po[6].first_buffer_vec_index);
- b3 = vlib_get_buffer (vm, po[7].first_buffer_vec_index);
- vlib_prefetch_buffer_header (b0, STORE);
- vlib_prefetch_buffer_header (b1, STORE);
- vlib_prefetch_buffer_header (b2, STORE);
- vlib_prefetch_buffer_header (b3, STORE);
-
- /* enqueue buffer */
- u32 fbvi0 = po[0].first_buffer_vec_index;
- u32 fbvi1 = po[1].first_buffer_vec_index;
- u32 fbvi2 = po[2].first_buffer_vec_index;
- u32 fbvi3 = po[3].first_buffer_vec_index;
- to_next[0] = bi0 = ptd->buffers[fbvi0];
- to_next[1] = bi1 = ptd->buffers[fbvi1];
- to_next[2] = bi2 = ptd->buffers[fbvi2];
- to_next[3] = bi3 = ptd->buffers[fbvi3];
- to_next += 4;
- n_left_to_next -= 4;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
- b2 = vlib_get_buffer (vm, bi2);
- b3 = vlib_get_buffer (vm, bi3);
-
- clib_memcpy64_x4 (b0, b1, b2, b3, bt);
-
- b0->current_length = po[0].packet_len;
- n_rx_bytes += b0->current_length;
- b1->current_length = po[1].packet_len;
- n_rx_bytes += b1->current_length;
- b2->current_length = po[2].packet_len;
- n_rx_bytes += b2->current_length;
- b3->current_length = po[3].packet_len;
- n_rx_bytes += b3->current_length;
-
- memif_add_to_chain (vm, b0, ptd->buffers + fbvi0 + 1, buffer_size);
- memif_add_to_chain (vm, b1, ptd->buffers + fbvi1 + 1, buffer_size);
- memif_add_to_chain (vm, b2, ptd->buffers + fbvi2 + 1, buffer_size);
- memif_add_to_chain (vm, b3, ptd->buffers + fbvi3 + 1, buffer_size);
-
- if (mode == MEMIF_INTERFACE_MODE_IP)
- {
- next0 = memif_next_from_ip_hdr (node, b0);
- next1 = memif_next_from_ip_hdr (node, b1);
- next2 = memif_next_from_ip_hdr (node, b2);
- next3 = memif_next_from_ip_hdr (node, b3);
- }
- else if (mode == MEMIF_INTERFACE_MODE_ETHERNET)
- {
- if (PREDICT_FALSE (mif->per_interface_next_index != ~0))
- {
- next0 = mif->per_interface_next_index;
- next1 = mif->per_interface_next_index;
- next2 = mif->per_interface_next_index;
- next3 = mif->per_interface_next_index;
- }
- else
- {
- next0 = next1 = next2 = next3 = next_index;
- /* redirect if feature path enabled */
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next0, b0);
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next1, b1);
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next2, b2);
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next3, b3);
- }
- }
-
- /* trace */
- if (PREDICT_FALSE (n_trace > 0))
- {
- memif_trace_buffer (vm, node, mif, b0, next0, qid, &n_trace);
- if (PREDICT_FALSE (n_trace > 0))
- memif_trace_buffer (vm, node, mif, b1, next1, qid, &n_trace);
- if (PREDICT_FALSE (n_trace > 0))
- memif_trace_buffer (vm, node, mif, b2, next2, qid, &n_trace);
- if (PREDICT_FALSE (n_trace > 0))
- memif_trace_buffer (vm, node, mif, b3, next3, qid, &n_trace);
- }
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ if (mif->per_interface_next_index != ~0)
+ next_index = mif->per_interface_next_index;
+ else
+ vnet_feature_start_device_input_x1 (mif->sw_if_index, &next_index,
+ bt);
+ clib_memset_u16 (nexts, next_index, n_rx_packets);
+ }
- /* enqueue */
- vlib_validate_buffer_enqueue_x4 (vm, node, next_index, to_next,
- n_left_to_next, bi0, bi1, bi2, bi3,
- next0, next1, next2, next3);
+ /* process buffer metadata */
+ u32 n_from = n_rx_packets;
+ po = ptd->packet_ops;
- /* next */
- n_from -= 4;
- po += 4;
- }
- while (n_from && n_left_to_next)
+ while (n_from >= 8)
+ {
+ b0 = vlib_get_buffer (vm, po[4].first_buffer_vec_index);
+ b1 = vlib_get_buffer (vm, po[5].first_buffer_vec_index);
+ b2 = vlib_get_buffer (vm, po[6].first_buffer_vec_index);
+ b3 = vlib_get_buffer (vm, po[7].first_buffer_vec_index);
+ vlib_prefetch_buffer_header (b0, STORE);
+ vlib_prefetch_buffer_header (b1, STORE);
+ vlib_prefetch_buffer_header (b2, STORE);
+ vlib_prefetch_buffer_header (b3, STORE);
+
+ /* enqueue buffer */
+ u32 fbvi[4];
+ fbvi[0] = po[0].first_buffer_vec_index;
+ fbvi[1] = po[1].first_buffer_vec_index;
+ fbvi[2] = po[2].first_buffer_vec_index;
+ fbvi[3] = po[3].first_buffer_vec_index;
+
+ bi[0] = ptd->buffers[fbvi[0]];
+ bi[1] = ptd->buffers[fbvi[1]];
+ bi[2] = ptd->buffers[fbvi[2]];
+ bi[3] = ptd->buffers[fbvi[3]];
+
+ b0 = vlib_get_buffer (vm, bi[0]);
+ b1 = vlib_get_buffer (vm, bi[1]);
+ b2 = vlib_get_buffer (vm, bi[2]);
+ b3 = vlib_get_buffer (vm, bi[3]);
+
+ clib_memcpy64_x4 (b0, b1, b2, b3, bt);
+
+ b0->current_length = po[0].packet_len;
+ n_rx_bytes += b0->current_length;
+ b1->current_length = po[1].packet_len;
+ n_rx_bytes += b1->current_length;
+ b2->current_length = po[2].packet_len;
+ n_rx_bytes += b2->current_length;
+ b3->current_length = po[3].packet_len;
+ n_rx_bytes += b3->current_length;
+
+ memif_add_to_chain (vm, b0, ptd->buffers + fbvi[0] + 1, buffer_size);
+ memif_add_to_chain (vm, b1, ptd->buffers + fbvi[1] + 1, buffer_size);
+ memif_add_to_chain (vm, b2, ptd->buffers + fbvi[2] + 1, buffer_size);
+ memif_add_to_chain (vm, b3, ptd->buffers + fbvi[3] + 1, buffer_size);
+
+ if (mode == MEMIF_INTERFACE_MODE_IP)
{
- /* enqueue buffer */
- u32 fbvi0 = po->first_buffer_vec_index;
- to_next[0] = bi0 = ptd->buffers[fbvi0];
- to_next += 1;
- n_left_to_next--;
-
- b0 = vlib_get_buffer (vm, bi0);
- clib_memcpy (b0, bt, 64);
- b0->current_length = po->packet_len;
- n_rx_bytes += b0->current_length;
-
- memif_add_to_chain (vm, b0, ptd->buffers + fbvi0 + 1, buffer_size);
-
- if (mode == MEMIF_INTERFACE_MODE_IP)
- {
- next0 = memif_next_from_ip_hdr (node, b0);
- }
- else if (mode == MEMIF_INTERFACE_MODE_ETHERNET)
- {
- if (PREDICT_FALSE (mif->per_interface_next_index != ~0))
- next0 = mif->per_interface_next_index;
- else
- {
- next0 = next_index;
- /* redirect if feature path enabled */
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next0, b0);
- }
+ next[0] = memif_next_from_ip_hdr (node, b0);
+ next[1] = memif_next_from_ip_hdr (node, b1);
+ next[2] = memif_next_from_ip_hdr (node, b2);
+ next[3] = memif_next_from_ip_hdr (node, b3);
+ }
- }
+ /* next */
+ n_from -= 4;
+ po += 4;
+ bi += 4;
+ next += 4;
+ }
+ while (n_from)
+ {
+ u32 fbvi[4];
+ /* enqueue buffer */
+ fbvi[0] = po[0].first_buffer_vec_index;
+ bi[0] = ptd->buffers[fbvi[0]];
+ b0 = vlib_get_buffer (vm, bi[0]);
+ clib_memcpy (b0, bt, 64);
+ b0->current_length = po->packet_len;
+ n_rx_bytes += b0->current_length;
+
+ memif_add_to_chain (vm, b0, ptd->buffers + fbvi[0] + 1, buffer_size);
+
+ if (mode == MEMIF_INTERFACE_MODE_IP)
+ {
+ next[0] = memif_next_from_ip_hdr (node, b0);
+ }
- /* trace */
- if (PREDICT_FALSE (n_trace > 0))
- memif_trace_buffer (vm, node, mif, b0, next0, qid, &n_trace);
+ /* next */
+ n_from -= 1;
+ po += 1;
+ bi += 1;
+ next += 1;
+ }
- /* enqueue */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
+ /* packet trace if enabled */
+ if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
+ {
+ u32 n_left = n_rx_packets;
+ bi = to_next_buffers;
+ next = nexts;
+ while (n_trace && n_left)
+ {
+ vlib_buffer_t *b;
+ memif_input_trace_t *tr;
+ b = vlib_get_buffer (vm, bi[0]);
+ vlib_trace_buffer (vm, node, next[0], b, /* follow_chain */ 0);
+ tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+ tr->next_index = next[0];
+ tr->hw_if_index = mif->hw_if_index;
+ tr->ring = qid;
/* next */
- n_from--;
- po++;
+ n_trace--;
+ n_left--;
+ bi++;
+ next++;
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_set_trace_count (vm, node, n_trace);
}
+ vlib_buffer_enqueue_to_next (vm, node, to_next_buffers, nexts,
+ n_rx_packets);
+
vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters
+ VNET_INTERFACE_COUNTER_RX, thread_index,
mif->hw_if_index, n_rx_packets,
ht .cpf { color: #75715e } /* Comment.PreprocFile */ .highlight .c1 { color: #75715e } /* Comment.Single */ .highlight .cs { color: #75715e } /* Comment.Special */ .highlight .gd { color: #f92672 } /* Generic.Deleted */ .highlight .ge { font-style: italic } /* Generic.Emph */ .highlight .gi { color: #a6e22e } /* Generic.Inserted */ .highlight .gs { font-weight: bold } /* Generic.Strong */ .highlight .gu { color: #75715e } /* Generic.Subheading */ .highlight .kc { color: #66d9ef } /* Keyword.Constant */ .highlight .kd { color: #66d9ef } /* Keyword.Declaration */ .highlight .kn { color: #f92672 } /* Keyword.Namespace */ .highlight .kp { color: #66d9ef } /* Keyword.Pseudo */ .highlight .kr { color: #66d9ef } /* Keyword.Reserved */ .highlight .kt { color: #66d9ef } /* Keyword.Type */ .highlight .ld { color: #e6db74 } /* Literal.Date */ .highlight .m { color: #ae81ff } /* Literal.Number */ .highlight .s { color: #e6db74 } /* Literal.String */ .highlight .na { color: #a6e22e } /* Name.Attribute */ .highlight .nb { color: #f8f8f2 } /* Name.Builtin */ .highlight .nc { color: #a6e22e } /* Name.Class */ .highlight .no { color: #66d9ef } /* Name.Constant */ .highlight .nd { color: #a6e22e } /* Name.Decorator */ .highlight .ni { color: #f8f8f2 } /* Name.Entity */ .highlight .ne { color: #a6e22e } /* Name.Exception */ .highlight .nf { color: #a6e22e } /* Name.Function */ .highlight .nl { color: #f8f8f2 } /* Name.Label */ .highlight .nn { color: #f8f8f2 } /* Name.Namespace */ .highlight .nx { color: #a6e22e } /* Name.Other */ .highlight .py { color: #f8f8f2 } /* Name.Property */ .highlight .nt { color: #f92672 } /* Name.Tag */ .highlight .nv { color: #f8f8f2 } /* Name.Variable */ .highlight .ow { color: #f92672 } /* Operator.Word */ .highlight .w { color: #f8f8f2 } /* Text.Whitespace */ .highlight .mb { color: #ae81ff } /* Literal.Number.Bin */ .highlight .mf { color: #ae81ff } /* Literal.Number.Float */ .highlight .mh { color: #ae81ff } /* Literal.Number.Hex */ .highlight .mi { color: #ae81ff } /* Literal.Number.Integer */ .highlight .mo { color: #ae81ff } /* Literal.Number.Oct */ .highlight .sa { color: #e6db74 } /* Literal.String.Affix */ .highlight .sb { color: #e6db74 } /* Literal.String.Backtick */ .highlight .sc { color: #e6db74 } /* Literal.String.Char */ .highlight .dl { color: #e6db74 } /* Literal.String.Delimiter */ .highlight .sd { color: #e6db74 } /* Literal.String.Doc */ .highlight .s2 { color: #e6db74 } /* Literal.String.Double */ .highlight .se { color: #ae81ff } /* Literal.String.Escape */ .highlight .sh { color: #e6db74 } /* Literal.String.Heredoc */ .highlight .si { color: #e6db74 } /* Literal.String.Interpol */ .highlight .sx { color: #e6db74 } /* Literal.String.Other */ .highlight .sr { color: #e6db74 } /* Literal.String.Regex */ .highlight .s1 { color: #e6db74 } /* Literal.String.Single */ .highlight .ss { color: #e6db74 } /* Literal.String.Symbol */ .highlight .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #a6e22e } /* Name.Function.Magic */ .highlight .vc { color: #f8f8f2 } /* Name.Variable.Class */ .highlight .vg { color: #f8f8f2 } /* Name.Variable.Global */ .highlight .vi { color: #f8f8f2 } /* Name.Variable.Instance */ .highlight .vm { color: #f8f8f2 } /* Name.Variable.Magic */ .highlight .il { color: #ae81ff } /* Literal.Number.Integer.Long */ } @media (prefers-color-scheme: light) { .highlight .hll { background-color: #ffffcc } .highlight .c { color: #888888 } /* Comment */ .highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */ .highlight .k { color: #008800; font-weight: bold } /* Keyword */ .highlight .ch { color: #888888 } /* Comment.Hashbang */ .highlight .cm { color: #888888 } /* Comment.Multiline */ .highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */ .highlight .cpf { color: #888888 } /* Comment.PreprocFile */ .highlight .c1 { color: #888888 } /* Comment.Single */ .highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */ .highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ .highlight .ge { font-style: italic } /* Generic.Emph */ .highlight .gr { color: #aa0000 } /* Generic.Error */ .highlight .gh { color: #333333 } /* Generic.Heading */ .highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ .highlight .go { color: #888888 } /* Generic.Output */ .highlight .gp { color: #555555 } /* Generic.Prompt */ .highlight .gs { font-weight: bold } /* Generic.Strong */ .highlight .gu { color: #666666 } /* Generic.Subheading */ .highlight .gt { color: #aa0000 } /* Generic.Traceback */ .highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */ .highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */ .highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */ .highlight .kp { color: #008800 } /* Keyword.Pseudo */ .highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */ .highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */ .highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */ .highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */ .highlight .na { color: #336699 } /* Name.Attribute */ .highlight .nb { color: #003388 } /* Name.Builtin */ .highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */ .highlight .no { color: #003366; font-weight: bold } /* Name.Constant */ .highlight .nd { color: #555555 } /* Name.Decorator */ .highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */ .highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */ .highlight .nl { color: #336699; font-style: italic } /* Name.Label */ .highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */ .highlight .py { color: #336699; font-weight: bold } /* Name.Property */ .highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */ .highlight .nv { color: #336699 } /* Name.Variable */ .highlight .ow { color: #008800 } /* Operator.Word */ .highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */ }
/*
 * Copyright (c) 2017-2019 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef __included_session_h__
#define __included_session_h__

#include <vppinfra/llist.h>
#include <vnet/session/session_types.h>
#include <vnet/session/session_lookup.h>
#include <vnet/session/session_debug.h>
#include <svm/message_queue.h>
#include <svm/ssvm.h>

#define foreach_session_input_error                                    	\
_(NO_SESSION, "No session drops")                                       \
_(NO_LISTENER, "No listener for dst port drops")                        \
_(ENQUEUED, "Packets pushed into rx fifo")                              \
_(NOT_READY, "Session not ready packets")                               \
_(FIFO_FULL, "Packets dropped for lack of rx fifo space")               \
_(EVENT_FIFO_FULL, "Events not sent for lack of event fifo space")      \
_(API_QUEUE_FULL, "Sessions not created for lack of API queue space")   \
_(NEW_SEG_NO_SPACE, "Created segment, couldn't allocate a fifo pair")   \
_(NO_SPACE, "Couldn't allocate a fifo pair")				\
_(SEG_CREATE, "Couldn't create a new segment")

typedef enum
{
#define _(sym,str) SESSION_ERROR_##sym,
  foreach_session_input_error
#undef _
    SESSION_N_ERROR,
} session_error_t;

typedef struct session_tx_context_
{
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
  session_t *s;
  transport_proto_vft_t *transport_vft;
  transport_connection_t *tc;
  u32 max_dequeue;
  u32 snd_space;
  u32 left_to_snd;
  u32 tx_offset;
  u32 max_len_to_snd;
  u16 deq_per_first_buf;
  u16 deq_per_buf;
  u16 snd_mss;
  u16 n_segs_per_evt;
  u8 n_bufs_per_seg;
    CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
  session_dgram_hdr_t hdr;
} session_tx_context_t;

typedef struct session_evt_elt
{
  clib_llist_anchor_t evt_list;
  session_event_t evt;
} session_evt_elt_t;

typedef struct session_ctrl_evt_data_
{
  u8 data[SESSION_CTRL_MSG_MAX_SIZE];
} session_evt_ctrl_data_t;

typedef struct session_worker_
{
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);

  /** Worker session pool */
  session_t *sessions;

  /** vpp event message queue for worker */
  svm_msg_q_t *vpp_event_queue;

  /** vlib_time_now last time around the track */
  clib_time_type_t last_vlib_time;

  /** vlib_time_now rounded to us precision and as u64 */
  clib_us_time_t last_vlib_us_time;

  /** Convenience pointer to this worker's vlib_main */
  vlib_main_t *vm;

  /** Per-proto vector of sessions to enqueue */
  u32 *session_to_enqueue[TRANSPORT_N_PROTO];

  /** Context for session tx */
  session_tx_context_t ctx;

  /** Vector of tx buffer free lists */
  u32 *tx_buffers;

  /** Pool of session event list elements */
  session_evt_elt_t *event_elts;

  /** Pool of ctrl events data buffers */
  session_evt_ctrl_data_t *ctrl_evts_data;

  /** Head of control events list */
  clib_llist_index_t ctrl_head;

  /** Head of list of elements */
  clib_llist_index_t new_head;

  /** Head of list of pending events */
  clib_llist_index_t old_head;

  /** Peekers rw lock */
  clib_rwlock_t peekers_rw_locks;

  /** Vector of buffers to be sent */
  u32 *pending_tx_buffers;

  /** Vector of nexts for the pending tx buffers */
  u16 *pending_tx_nexts;

#if SESSION_DEBUG
  /** last event poll time by thread */
  clib_time_type_t last_event_poll;
#endif
} session_worker_t;

typedef int (session_fifo_rx_fn) (session_worker_t * wrk,
				  vlib_node_runtime_t * node,
				  session_evt_elt_t * e, int *n_tx_packets);

extern session_fifo_rx_fn session_tx_fifo_peek_and_snd;
extern session_fifo_rx_fn session_tx_fifo_dequeue_and_snd;
extern session_fifo_rx_fn session_tx_fifo_dequeue_internal;

u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e);

typedef struct session_main_
{
  /** Worker contexts */
  session_worker_t *wrk;

  /** Event queues memfd segment initialized only if so configured */
  ssvm_private_t evt_qs_segment;

  /** Unique segment name counter */
  u32 unique_segment_name_counter;

  /** Per transport rx function that can either dequeue or peek */
  session_fifo_rx_fn **session_tx_fns;

  /** Per session type output nodes. Could optimize to group nodes by
   * fib but lookup would then require session type parsing in session node.
   * Trade memory for speed, for now */
  u32 *session_type_to_next;

  /*
   * Config parameters
   */

  /** Session manager is enabled */
  u8 is_enabled;
  /** Enable session manager at startup */
  u8 session_enable_asap;

  /** vpp fifo event queue configured length */
  u32 configured_event_queue_length;

  /** Session ssvm segment configs*/
  uword session_baseva;
  uword session_va_space_size;
  uword evt_qs_segment_size;
  u8 evt_qs_use_memfd_seg;

  /** Session table size parameters */
  u32 configured_v4_session_table_buckets;
  u32 configured_v4_session_table_memory;
  u32 configured_v4_halfopen_table_buckets;
  u32 configured_v4_halfopen_table_memory;
  u32 configured_v6_session_table_buckets;
  u32 configured_v6_session_table_memory;
  u32 configured_v6_halfopen_table_buckets;
  u32 configured_v6_halfopen_table_memory;

  /** Transport table (preallocation) size parameters */
  u32 local_endpoints_table_memory;
  u32 local_endpoints_table_buckets;

  /** Preallocate session config parameter */
  u32 preallocated_sessions;

} session_main_t;

extern session_main_t session_main;
extern vlib_node_registration_t session_queue_node;
extern vlib_node_registration_t session_queue_process_node;
extern vlib_node_registration_t session_queue_pre_input_node;

#define SESSION_Q_PROCESS_FLUSH_FRAMES	1
#define SESSION_Q_PROCESS_STOP		2

static inline session_evt_elt_t *
session_evt_elt_alloc (session_worker_t * wrk)
{
  session_evt_elt_t *elt;
  pool_get (wrk->event_elts, elt);
  return elt;
}

static inline void
session_evt_elt_free (session_worker_t * wrk, session_evt_elt_t * elt)
{
  pool_put (wrk->event_elts, elt);
}

static inline void
session_evt_add_old (session_worker_t * wrk, session_evt_elt_t * elt)
{
  clib_llist_add_tail (wrk->event_elts, evt_list, elt,
		       pool_elt_at_index (wrk->event_elts, wrk->old_head));
}

static inline void
session_evt_add_head_old (session_worker_t * wrk, session_evt_elt_t * elt)
{
  clib_llist_add (wrk->event_elts, evt_list, elt,
		  pool_elt_at_index (wrk->event_elts, wrk->old_head));
}


static inline u32
session_evt_ctrl_data_alloc (session_worker_t * wrk)
{
  session_evt_ctrl_data_t *data;
  pool_get (wrk->ctrl_evts_data, data);
  return (data - wrk->ctrl_evts_data);
}

static inline session_evt_elt_t *
session_evt_alloc_ctrl (session_worker_t * wrk)
{
  session_evt_elt_t *elt;
  elt = session_evt_elt_alloc (wrk);
  clib_llist_add_tail (wrk->event_elts, evt_list, elt,
		       pool_elt_at_index (wrk->event_elts, wrk->ctrl_head));
  return elt;
}

static inline void *
session_evt_ctrl_data (session_worker_t * wrk, session_evt_elt_t * elt)
{
  return (void *) (pool_elt_at_index (wrk->ctrl_evts_data,
				      elt->evt.ctrl_data_index));
}

static inline void
session_evt_ctrl_data_free (session_worker_t * wrk, session_evt_elt_t * elt)
{
  ASSERT (elt->evt.event_type > SESSION_IO_EVT_BUILTIN_TX);
  pool_put_index (wrk->ctrl_evts_data, elt->evt.ctrl_data_index);
}

static inline session_evt_elt_t *
session_evt_alloc_new (session_worker_t * wrk)
{
  session_evt_elt_t *elt;
  elt = session_evt_elt_alloc (wrk);
  clib_llist_add_tail (wrk->event_elts, evt_list, elt,
		       pool_elt_at_index (wrk->event_elts, wrk->new_head));
  return elt;
}

static inline session_evt_elt_t *
session_evt_alloc_old (session_worker_t * wrk)
{
  session_evt_elt_t *elt;
  elt = session_evt_elt_alloc (wrk);
  clib_llist_add_tail (wrk->event_elts, evt_list, elt,
		       pool_elt_at_index (wrk->event_elts, wrk->old_head));
  return elt;
}

session_t *session_alloc (u32 thread_index);
void session_free (session_t * s);
void session_free_w_fifos (session_t * s);
u8 session_is_valid (u32 si, u8 thread_index);

always_inline session_t *
session_get (u32 si, u32 thread_index)
{
  ASSERT (session_is_valid (si, thread_index));
  return pool_elt_at_index (session_main.wrk[thread_index].sessions, si);
}

always_inline session_t *
session_get_if_valid (u64 si, u32 thread_index)
{
  if (thread_index >= vec_len (session_main.wrk))
    return 0;

  if (pool_is_free_index (session_main.wrk[thread_index].sessions, si))
    return 0;

  ASSERT (session_is_valid (si, thread_index));
  return pool_elt_at_index (session_main.wrk[thread_index].sessions, si);
}

always_inline session_t *
session_get_from_handle (session_handle_t handle)
{
  session_main_t *smm = &session_main;
  u32 session_index, thread_index;
  session_parse_handle (handle, &session_index, &thread_index);
  return pool_elt_at_index (smm->wrk[thread_index].sessions, session_index);
}

always_inline session_t *
session_get_from_handle_if_valid (session_handle_t handle)
{
  u32 session_index, thread_index;
  session_parse_handle (handle, &session_index, &thread_index);
  return session_get_if_valid (session_index, thread_index);
}

u64 session_segment_handle (session_t * s);

/**
 * Acquires a lock that blocks a session pool from expanding.
 *
 * This is typically used for safely peeking into other threads'
 * pools in order to clone elements. Lock should be dropped as soon
 * as possible by calling @ref session_pool_remove_peeker.
 *
 * NOTE: Avoid using pool_elt_at_index while the lock is held because
 * it may lead to free elt bitmap expansion/contraction!
 */
always_inline void
session_pool_add_peeker (u32 thread_index)
{
  session_worker_t *wrk = &session_main.wrk[thread_index];
  if (thread_index == vlib_get_thread_index ())
    return;
  clib_rwlock_reader_lock (&wrk->peekers_rw_locks);
}

always_inline void
session_pool_remove_peeker (u32 thread_index)
{
  session_worker_t *wrk = &session_main.wrk[thread_index];
  if (thread_index == vlib_get_thread_index ())
    return;
  clib_rwlock_reader_unlock (&wrk->peekers_rw_locks);
}

/**
 * Get session from handle and 'lock' pool resize if not in same thread
 *
 * Caller should drop the peek 'lock' as soon as possible.
 */
always_inline session_t *
session_get_from_handle_safe (u64 handle)
{
  u32 thread_index = session_thread_from_handle (handle);
  session_worker_t *wrk = &session_main.wrk[thread_index];

  if (thread_index == vlib_get_thread_index ())
    {
      return pool_elt_at_index (wrk->sessions,
				session_index_from_handle (handle));
    }
  else
    {
      session_pool_add_peeker (thread_index);
      /* Don't use pool_elt_at index. See @ref session_pool_add_peeker */
      return wrk->sessions + session_index_from_handle (handle);
    }
}

always_inline u32
session_get_index (session_t * s)
{
  return (s - session_main.wrk[s->thread_index].sessions);
}

always_inline session_t *
session_clone_safe (u32 session_index, u32 thread_index)
{
  session_t *old_s, *new_s;
  u32 current_thread_index = vlib_get_thread_index ();

  /* If during the memcpy pool is reallocated AND the memory allocator
   * decides to give the old chunk of memory to somebody in a hurry to
   * scribble something on it, we have a problem. So add this thread as
   * a session pool peeker.
   */
  session_pool_add_peeker (thread_index);
  new_s = session_alloc (current_thread_index);
  old_s = session_main.wrk[thread_index].sessions + session_index;
  clib_memcpy_fast (new_s, old_s, sizeof (*new_s));
  session_pool_remove_peeker (thread_index);
  new_s->thread_index = current_thread_index;
  new_s->session_index = session_get_index (new_s);
  return new_s;
}

int session_open (u32 app_index, session_endpoint_t * tep, u32 opaque);
int session_listen (session_t * s, session_endpoint_cfg_t * sep);
int session_stop_listen (session_t * s);
void session_close (session_t * s);
void session_reset (session_t * s);
void session_transport_close (session_t * s);
void session_transport_reset (session_t * s);
void session_transport_cleanup (session_t * s);
int session_send_io_evt_to_thread (svm_fifo_t * f,
				   session_evt_type_t evt_type);
int session_enqueue_notify (session_t * s);
int session_dequeue_notify (session_t * s);
int session_send_io_evt_to_thread_custom (void *data, u32 thread_index,
					  session_evt_type_t evt_type);
void session_send_rpc_evt_to_thread (u32 thread_index, void *fp,
				     void *rpc_args);
void session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp,
					   void *rpc_args);
void session_add_self_custom_tx_evt (transport_connection_t * tc,
				     u8 has_prio);
transport_connection_t *session_get_transport (session_t * s);
void session_get_endpoint (session_t * s, transport_endpoint_t * tep,
			   u8 is_lcl);

u8 *format_session (u8 * s, va_list * args);
uword unformat_session (unformat_input_t * input, va_list * args);
uword unformat_transport_connection (unformat_input_t * input,
				     va_list * args);

/*
 * Interface to transport protos
 */

int session_enqueue_stream_connection (transport_connection_t * tc,
				       vlib_buffer_t * b, u32 offset,
				       u8 queue_event, u8 is_in_order);
int session_enqueue_dgram_connection (session_t * s,
				      session_dgram_hdr_t * hdr,
				      vlib_buffer_t * b, u8 proto,
				      u8 queue_event);
int session_stream_connect_notify (transport_connection_t * tc, u8 is_fail);
int session_dgram_connect_notify (transport_connection_t * tc,
				  u32 old_thread_index,
				  session_t ** new_session);
int session_stream_accept_notify (transport_connection_t * tc);
void session_transport_closing_notify (transport_connection_t * tc);
void session_transport_delete_notify (transport_connection_t * tc);
void session_transport_closed_notify (transport_connection_t * tc);
void session_transport_reset_notify (transport_connection_t * tc);
int session_stream_accept (transport_connection_t * tc, u32 listener_index,
			   u32 thread_index, u8 notify);
void session_register_transport (transport_proto_t transport_proto,
				 const transport_proto_vft_t * vft, u8 is_ip4,
				 u32 output_node);
int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer,
				u32 offset, u32 max_bytes);
u32 session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes);

always_inline u32
transport_max_rx_enqueue (transport_connection_t * tc)
{
  session_t *s = session_get (tc->s_index, tc->thread_index);
  return svm_fifo_max_enqueue_prod (s->rx_fifo);
}

always_inline u32
transport_max_tx_dequeue (transport_connection_t * tc)
{
  session_t *s = session_get (tc->s_index, tc->thread_index);
  return svm_fifo_max_dequeue_cons (s->tx_fifo);
}

always_inline u32
transport_max_rx_dequeue (transport_connection_t * tc)
{
  session_t *s = session_get (tc->s_index, tc->thread_index);
  return svm_fifo_max_dequeue (s->rx_fifo);
}

always_inline u32
transport_rx_fifo_size (transport_connection_t * tc)
{
  session_t *s = session_get (tc->s_index, tc->thread_index);
  return s->rx_fifo->nitems;
}

always_inline u32
transport_tx_fifo_size (transport_connection_t * tc)
{
  session_t *s = session_get (tc->s_index, tc->thread_index);
  return s->tx_fifo->nitems;
}

always_inline u8
transport_rx_fifo_has_ooo_data (transport_connection_t * tc)
{
  session_t *s = session_get (tc->c_index, tc->thread_index);
  return svm_fifo_has_ooo_data (s->rx_fifo);
}

always_inline clib_time_type_t
transport_time_now (u32 thread_index)
{
  return session_main.wrk[thread_index].last_vlib_time;
}

always_inline clib_us_time_t
transport_us_time_now (u32 thread_index)
{
  return session_main.wrk[thread_index].last_vlib_us_time;
}

always_inline void
transport_add_tx_event (transport_connection_t * tc)
{
  session_t *s = session_get (tc->s_index, tc->thread_index);
  if (svm_fifo_has_event (s->tx_fifo))
    return;
  session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX);
}

/*
 * Listen sessions
 */

always_inline u64
listen_session_get_handle (session_t * s)
{
  ASSERT (s->session_state == SESSION_STATE_LISTENING ||
	  session_get_transport_proto (s) == TRANSPORT_PROTO_QUIC);
  return session_handle (s);
}

always_inline session_t *
listen_session_get_from_handle (session_handle_t handle)
{
  return session_get_from_handle (handle);
}

always_inline void
listen_session_parse_handle (session_handle_t handle, u32 * index,
			     u32 * thread_index)
{
  session_parse_handle (handle, index, thread_index);
}

always_inline session_t *
listen_session_alloc (u8 thread_index, session_type_t type)
{
  session_t *s;
  s = session_alloc (thread_index);
  s->session_type = type;
  s->session_state = SESSION_STATE_LISTENING;
  return s;
}

always_inline session_t *
listen_session_get (u32 ls_index)
{
  return session_get (ls_index, 0);
}

always_inline void
listen_session_free (session_t * s)
{
  session_free (s);
}

transport_connection_t *listen_session_get_transport (session_t * s);

/*
 * Session layer functions
 */

always_inline session_main_t *
vnet_get_session_main ()
{
  return &session_main;
}

always_inline session_worker_t *
session_main_get_worker (u32 thread_index)
{
  return &session_main.wrk[thread_index];
}

static inline session_worker_t *
session_main_get_worker_if_valid (u32 thread_index)
{
  if (thread_index > vec_len (session_main.wrk))
    return 0;
  return &session_main.wrk[thread_index];
}

always_inline svm_msg_q_t *
session_main_get_vpp_event_queue (u32 thread_index)
{
  return session_main.wrk[thread_index].vpp_event_queue;
}

always_inline u8
session_main_is_enabled ()
{
  return session_main.is_enabled == 1;
}

#define session_cli_return_if_not_enabled()				\
do {									\
    if (!session_main.is_enabled)					\
      return clib_error_return (0, "session layer is not enabled");	\
} while (0)

int session_main_flush_enqueue_events (u8 proto, u32 thread_index);
int session_main_flush_all_enqueue_events (u8 transport_proto);
void session_flush_frames_main_thread (vlib_main_t * vm);

always_inline void
session_add_pending_tx_buffer (session_type_t st, u32 thread_index, u32 bi)
{
  session_worker_t *wrk = session_main_get_worker (thread_index);
  vec_add1 (wrk->pending_tx_buffers, bi);
  vec_add1 (wrk->pending_tx_nexts, session_main.session_type_to_next[st]);
}

ssvm_private_t *session_main_get_evt_q_segment (void);
void session_node_enable_disable (u8 is_en);
clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en);

#endif /* __included_session_h__ */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */