From 04e5344a358a9ad42d896486d2d226149fd326f4 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Sun, 16 Jul 2017 17:12:15 -0700 Subject: Cleanup/refactor session layer code Change-Id: Ica99e8cb919fca6b069c37c969d60e8ccc2c6bf9 Signed-off-by: Florin Coras --- src/vnet/session/hashes.c | 28 -- src/vnet/session/node.c | 685 --------------------------------- src/vnet/session/session.c | 506 +----------------------- src/vnet/session/session.h | 164 +------- src/vnet/session/session_lookup.c | 620 +++++++++++++++++++++++++++++ src/vnet/session/session_lookup.h | 101 +++++ src/vnet/session/session_node.c | 685 +++++++++++++++++++++++++++++++++ src/vnet/session/stream_session.h | 98 +++++ src/vnet/session/transport.c | 64 --- src/vnet/session/transport.h | 174 +-------- src/vnet/session/transport_interface.c | 106 +++++ src/vnet/session/transport_interface.h | 82 ++++ 12 files changed, 1729 insertions(+), 1584 deletions(-) delete mode 100644 src/vnet/session/hashes.c delete mode 100644 src/vnet/session/node.c create mode 100644 src/vnet/session/session_lookup.c create mode 100644 src/vnet/session/session_lookup.h create mode 100644 src/vnet/session/session_node.c create mode 100644 src/vnet/session/stream_session.h delete mode 100644 src/vnet/session/transport.c create mode 100644 src/vnet/session/transport_interface.c create mode 100644 src/vnet/session/transport_interface.h (limited to 'src/vnet/session') diff --git a/src/vnet/session/hashes.c b/src/vnet/session/hashes.c deleted file mode 100644 index 1808dd73f90..00000000000 --- a/src/vnet/session/hashes.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Generate typed init functions for multiple hash table styles... */ - -#include -#include - -#include - -#undef __included_bihash_template_h__ - -#include -#include - -#include diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c deleted file mode 100644 index 8d703b0b302..00000000000 --- a/src/vnet/session/node.c +++ /dev/null @@ -1,685 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -vlib_node_registration_t session_queue_node; - -typedef struct -{ - u32 session_index; - u32 server_thread_index; -} session_queue_trace_t; - -/* packet trace format function */ -static u8 * -format_session_queue_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - session_queue_trace_t *t = va_arg (*args, session_queue_trace_t *); - - s = format (s, "SESSION_QUEUE: session index %d, server thread index %d", - t->session_index, t->server_thread_index); - return s; -} - -vlib_node_registration_t session_queue_node; - -#define foreach_session_queue_error \ -_(TX, "Packets transmitted") \ -_(TIMER, "Timer events") \ -_(NO_BUFFER, "Out of buffers") - -typedef enum -{ -#define _(sym,str) SESSION_QUEUE_ERROR_##sym, - foreach_session_queue_error -#undef _ - SESSION_QUEUE_N_ERROR, -} session_queue_error_t; - -static char *session_queue_error_strings[] = { -#define _(sym,string) string, - foreach_session_queue_error -#undef _ -}; - -static u32 session_type_to_next[] = { - SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT, - SESSION_QUEUE_NEXT_IP4_LOOKUP, - SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT, - SESSION_QUEUE_NEXT_IP6_LOOKUP, -}; - -always_inline void -session_tx_fifo_chain_tail (session_manager_main_t * smm, vlib_main_t * vm, - u8 thread_index, svm_fifo_t * fifo, - vlib_buffer_t * b0, u32 bi0, u8 n_bufs_per_seg, - u32 * left_to_snd0, u16 * n_bufs, u32 * rx_offset, - u16 deq_per_buf, u8 peek_data) -{ - vlib_buffer_t *chain_b0, *prev_b0; - u32 chain_bi0; - u16 len_to_deq0, n_bytes_read; - u8 *data0, j; - - chain_bi0 = bi0; - chain_b0 = b0; - for (j = 1; j < n_bufs_per_seg; j++) - { - prev_b0 = chain_b0; - len_to_deq0 = clib_min (*left_to_snd0, deq_per_buf); - - *n_bufs -= 1; - chain_bi0 = smm->tx_buffers[thread_index][*n_bufs]; - _vec_len (smm->tx_buffers[thread_index]) = *n_bufs; - - chain_b0 = vlib_get_buffer (vm, chain_bi0); - chain_b0->current_data = 0; - data0 = vlib_buffer_get_current (chain_b0); - if (peek_data) - { - n_bytes_read = svm_fifo_peek (fifo, *rx_offset, len_to_deq0, data0); - *rx_offset += n_bytes_read; - } - else - { - n_bytes_read = svm_fifo_dequeue_nowait (fifo, len_to_deq0, data0); - } - ASSERT (n_bytes_read == len_to_deq0); - chain_b0->current_length = n_bytes_read; - b0->total_length_not_including_first_buffer += chain_b0->current_length; - - /* update previous buffer */ - prev_b0->next_buffer = chain_bi0; - prev_b0->flags |= VLIB_BUFFER_NEXT_PRESENT; - - /* update current buffer */ - chain_b0->next_buffer = 0; - - *left_to_snd0 -= n_bytes_read; - if (*left_to_snd0 == 0) - break; - } -} - -always_inline int -session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, - session_manager_main_t * smm, - session_fifo_event_t * e0, - stream_session_t * s0, u32 thread_index, - int *n_tx_packets, u8 peek_data) -{ - u32 n_trace = vlib_get_trace_count (vm, node); - u32 left_to_snd0, max_len_to_snd0, len_to_deq0, snd_space0; - u32 n_bufs_per_evt, n_frames_per_evt; - transport_connection_t *tc0; - transport_proto_vft_t *transport_vft; - u32 next_index, next0, *to_next, n_left_to_next, bi0; - vlib_buffer_t *b0; - u32 rx_offset = 0, max_dequeue0, n_bytes_per_seg; - u16 snd_mss0, n_bufs_per_seg, n_bufs; - u8 *data0; - int i, n_bytes_read; - u32 n_bytes_per_buf, deq_per_buf; - u32 buffers_allocated, buffers_allocated_this_call; - - next_index = next0 = session_type_to_next[s0->session_type]; - - transport_vft = session_get_transport_vft (s0->session_type); - tc0 = transport_vft->get_connection (s0->connection_index, thread_index); - - /* Make sure we have space to send and there's something to dequeue */ - snd_mss0 = transport_vft->send_mss (tc0); - snd_space0 = transport_vft->send_space (tc0); - - /* Can't make any progress */ - if (snd_space0 == 0 || snd_mss0 == 0) - { - vec_add1 (smm->pending_event_vector[thread_index], *e0); - return 0; - } - - if (peek_data) - { - /* Offset in rx fifo from where to peek data */ - rx_offset = transport_vft->tx_fifo_offset (tc0); - } - - /* Check how much we can pull. If buffering, subtract the offset */ - max_dequeue0 = svm_fifo_max_dequeue (s0->server_tx_fifo) - rx_offset; - - /* Nothing to read return */ - if (max_dequeue0 == 0) - { - svm_fifo_unset_event (s0->server_tx_fifo); - return 0; - } - - /* Ensure we're not writing more than transport window allows */ - if (max_dequeue0 < snd_space0) - { - /* Constrained by tx queue. Try to send only fully formed segments */ - max_len_to_snd0 = (max_dequeue0 > snd_mss0) ? - max_dequeue0 - max_dequeue0 % snd_mss0 : max_dequeue0; - /* TODO Nagle ? */ - } - else - { - max_len_to_snd0 = snd_space0; - } - - n_bytes_per_buf = vlib_buffer_free_list_buffer_size - (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - n_bytes_per_seg = MAX_HDRS_LEN + snd_mss0; - n_bufs_per_seg = ceil ((double) n_bytes_per_seg / n_bytes_per_buf); - n_bufs_per_evt = (ceil ((double) max_len_to_snd0 / n_bytes_per_seg)) - * n_bufs_per_seg; - n_frames_per_evt = ceil ((double) n_bufs_per_evt / VLIB_FRAME_SIZE); - - deq_per_buf = clib_min (snd_mss0, n_bytes_per_buf); - - n_bufs = vec_len (smm->tx_buffers[thread_index]); - left_to_snd0 = max_len_to_snd0; - for (i = 0; i < n_frames_per_evt; i++) - { - /* Make sure we have at least one full frame of buffers ready */ - if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE)) - { - vec_validate (smm->tx_buffers[thread_index], - n_bufs + 2 * VLIB_FRAME_SIZE - 1); - - buffers_allocated = 0; - do - { - buffers_allocated_this_call = - vlib_buffer_alloc - (vm, - &smm->tx_buffers[thread_index][n_bufs + buffers_allocated], - 2 * VLIB_FRAME_SIZE - buffers_allocated); - buffers_allocated += buffers_allocated_this_call; - } - while (buffers_allocated_this_call > 0 - && ((buffers_allocated + n_bufs < VLIB_FRAME_SIZE))); - - n_bufs += buffers_allocated; - - _vec_len (smm->tx_buffers[thread_index]) = n_bufs; - - if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE)) - { - vec_add1 (smm->pending_event_vector[thread_index], *e0); - return -1; - } - } - /* Allow enqueuing of a new event */ - svm_fifo_unset_event (s0->server_tx_fifo); - - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (left_to_snd0 && n_left_to_next >= n_bufs_per_seg) - { - /* - * Handle first buffer in chain separately - */ - - /* Get free buffer */ - ASSERT (n_bufs >= 1); - bi0 = smm->tx_buffers[thread_index][--n_bufs]; - ASSERT (bi0); - _vec_len (smm->tx_buffers[thread_index]) = n_bufs; - - /* usual speculation, or the enqueue_x1 macro will barf */ - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - - b0 = vlib_get_buffer (vm, bi0); - b0->error = 0; - b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID - | VNET_BUFFER_F_LOCALLY_ORIGINATED; - b0->current_data = 0; - b0->total_length_not_including_first_buffer = 0; - - len_to_deq0 = clib_min (left_to_snd0, deq_per_buf); - - data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN); - if (peek_data) - { - n_bytes_read = svm_fifo_peek (s0->server_tx_fifo, rx_offset, - len_to_deq0, data0); - /* Keep track of progress locally, transport is also supposed to - * increment it independently when pushing the header */ - rx_offset += n_bytes_read; - } - else - { - n_bytes_read = svm_fifo_dequeue_nowait (s0->server_tx_fifo, - len_to_deq0, data0); - } - - if (n_bytes_read <= 0) - goto dequeue_fail; - - b0->current_length = n_bytes_read; - - left_to_snd0 -= n_bytes_read; - *n_tx_packets = *n_tx_packets + 1; - - /* - * Fill in the remaining buffers in the chain, if any - */ - if (PREDICT_FALSE (n_bufs_per_seg > 1)) - session_tx_fifo_chain_tail (smm, vm, thread_index, - s0->server_tx_fifo, b0, bi0, - n_bufs_per_seg, &left_to_snd0, - &n_bufs, &rx_offset, deq_per_buf, - peek_data); - - /* Ask transport to push header after current_length and - * total_length_not_including_first_buffer are updated */ - transport_vft->push_header (tc0, b0); - - /* *INDENT-OFF* */ - SESSION_EVT_DBG(SESSION_EVT_DEQ, s0, ({ - ed->data[0] = e0->event_id; - ed->data[1] = max_dequeue0; - ed->data[2] = len_to_deq0; - ed->data[3] = left_to_snd0; - })); - /* *INDENT-ON* */ - - - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - if (PREDICT_FALSE (n_trace > 0)) - { - session_queue_trace_t *t0; - vlib_trace_buffer (vm, node, next_index, b0, - 1 /* follow_chain */ ); - vlib_set_trace_count (vm, node, --n_trace); - t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); - t0->session_index = s0->session_index; - t0->server_thread_index = s0->thread_index; - } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, - to_next, n_left_to_next, - bi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); - } - - /* If we couldn't dequeue all bytes mark as partially read */ - if (max_len_to_snd0 < max_dequeue0) - { - /* If we don't already have new event */ - if (svm_fifo_set_event (s0->server_tx_fifo)) - { - vec_add1 (smm->pending_event_vector[thread_index], *e0); - } - } - return 0; - -dequeue_fail: - /* - * Can't read from fifo. If we don't already have an event, save as partially - * read, return buff to free list and return - */ - clib_warning ("dequeue fail"); - - if (svm_fifo_set_event (s0->server_tx_fifo)) - { - vec_add1 (smm->pending_event_vector[thread_index], *e0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); - _vec_len (smm->tx_buffers[thread_index]) += 1; - - return 0; -} - -int -session_tx_fifo_peek_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node, - session_manager_main_t * smm, - session_fifo_event_t * e0, - stream_session_t * s0, u32 thread_index, - int *n_tx_pkts) -{ - return session_tx_fifo_read_and_snd_i (vm, node, smm, e0, s0, thread_index, - n_tx_pkts, 1); -} - -int -session_tx_fifo_dequeue_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node, - session_manager_main_t * smm, - session_fifo_event_t * e0, - stream_session_t * s0, u32 thread_index, - int *n_tx_pkts) -{ - return session_tx_fifo_read_and_snd_i (vm, node, smm, e0, s0, thread_index, - n_tx_pkts, 0); -} - -always_inline stream_session_t * -session_event_get_session (session_fifo_event_t * e, u8 thread_index) -{ - ASSERT (e->fifo->master_thread_index == thread_index); - return stream_session_get_if_valid (e->fifo->master_session_index, - thread_index); -} - -void -dump_thread_0_event_queue (void) -{ - session_manager_main_t *smm = vnet_get_session_manager_main (); - vlib_main_t *vm = &vlib_global_main; - u32 my_thread_index = vm->thread_index; - session_fifo_event_t _e, *e = &_e; - stream_session_t *s0; - int i, index; - i8 *headp; - - unix_shared_memory_queue_t *q; - q = smm->vpp_event_queues[my_thread_index]; - - index = q->head; - - for (i = 0; i < q->cursize; i++) - { - headp = (i8 *) (&q->data[0] + q->elsize * index); - clib_memcpy (e, headp, q->elsize); - - switch (e->event_type) - { - case FIFO_EVENT_APP_TX: - s0 = session_event_get_session (e, my_thread_index); - fformat (stdout, "[%04d] TX session %d\n", i, s0->session_index); - break; - - case FIFO_EVENT_DISCONNECT: - s0 = stream_session_get_from_handle (e->session_handle); - fformat (stdout, "[%04d] disconnect session %d\n", i, - s0->session_index); - break; - - case FIFO_EVENT_BUILTIN_RX: - s0 = session_event_get_session (e, my_thread_index); - fformat (stdout, "[%04d] builtin_rx %d\n", i, s0->session_index); - break; - - case FIFO_EVENT_RPC: - fformat (stdout, "[%04d] RPC call %llx with %llx\n", - i, (u64) (e->rpc_args.fp), (u64) (e->rpc_args.arg)); - break; - - default: - fformat (stdout, "[%04d] unhandled event type %d\n", - i, e->event_type); - break; - } - - index++; - - if (index == q->maxsize) - index = 0; - } -} - -static u8 -session_node_cmp_event (session_fifo_event_t * e, svm_fifo_t * f) -{ - stream_session_t *s; - switch (e->event_type) - { - case FIFO_EVENT_APP_RX: - case FIFO_EVENT_APP_TX: - case FIFO_EVENT_BUILTIN_RX: - if (e->fifo == f) - return 1; - break; - case FIFO_EVENT_DISCONNECT: - break; - case FIFO_EVENT_RPC: - s = stream_session_get_from_handle (e->session_handle); - if (!s) - { - clib_warning ("session has event but doesn't exist!"); - break; - } - if (s->server_rx_fifo == f || s->server_tx_fifo == f) - return 1; - break; - default: - break; - } - return 0; -} - -u8 -session_node_lookup_fifo_event (svm_fifo_t * f, session_fifo_event_t * e) -{ - session_manager_main_t *smm = vnet_get_session_manager_main (); - unix_shared_memory_queue_t *q; - session_fifo_event_t *pending_event_vector, *evt; - int i, index, found = 0; - i8 *headp; - u8 thread_index; - - ASSERT (e); - thread_index = f->master_thread_index; - /* - * Search evt queue - */ - q = smm->vpp_event_queues[thread_index]; - index = q->head; - for (i = 0; i < q->cursize; i++) - { - headp = (i8 *) (&q->data[0] + q->elsize * index); - clib_memcpy (e, headp, q->elsize); - found = session_node_cmp_event (e, f); - if (found) - break; - if (++index == q->maxsize) - index = 0; - } - /* - * Search pending events vector - */ - pending_event_vector = smm->pending_event_vector[thread_index]; - vec_foreach (evt, pending_event_vector) - { - found = session_node_cmp_event (evt, f); - if (found) - { - clib_memcpy (e, evt, sizeof (*evt)); - break; - } - } - return found; -} - -static uword -session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - session_manager_main_t *smm = vnet_get_session_manager_main (); - session_fifo_event_t *my_pending_event_vector, *e; - session_fifo_event_t *my_fifo_events; - u32 n_to_dequeue, n_events; - unix_shared_memory_queue_t *q; - application_t *app; - int n_tx_packets = 0; - u32 my_thread_index = vm->thread_index; - int i, rv; - f64 now = vlib_time_now (vm); - void (*fp) (void *); - - SESSION_EVT_DBG (SESSION_EVT_POLL_GAP_TRACK, smm, my_thread_index); - - /* - * Update TCP time - */ - tcp_update_time (now, my_thread_index); - - /* - * Get vpp queue events - */ - q = smm->vpp_event_queues[my_thread_index]; - if (PREDICT_FALSE (q == 0)) - return 0; - - my_fifo_events = smm->free_event_vector[my_thread_index]; - - /* min number of events we can dequeue without blocking */ - n_to_dequeue = q->cursize; - my_pending_event_vector = smm->pending_event_vector[my_thread_index]; - - if (n_to_dequeue == 0 && vec_len (my_pending_event_vector) == 0) - return 0; - - SESSION_EVT_DBG (SESSION_EVT_DEQ_NODE, 0); - - /* - * If we didn't manage to process previous events try going - * over them again without dequeuing new ones. - */ - /* XXX: Block senders to sessions that can't keep up */ - if (0 && vec_len (my_pending_event_vector) >= 100) - { - clib_warning ("too many fifo events unsolved"); - goto skip_dequeue; - } - - /* See you in the next life, don't be late */ - if (pthread_mutex_trylock (&q->mutex)) - return 0; - - for (i = 0; i < n_to_dequeue; i++) - { - vec_add2 (my_fifo_events, e, 1); - unix_shared_memory_queue_sub_raw (q, (u8 *) e); - } - - /* The other side of the connection is not polling */ - if (q->cursize < (q->maxsize / 8)) - (void) pthread_cond_broadcast (&q->condvar); - pthread_mutex_unlock (&q->mutex); - - vec_append (my_fifo_events, my_pending_event_vector); - - _vec_len (my_pending_event_vector) = 0; - smm->pending_event_vector[my_thread_index] = my_pending_event_vector; - -skip_dequeue: - n_events = vec_len (my_fifo_events); - for (i = 0; i < n_events; i++) - { - stream_session_t *s0; /* $$$ prefetch 1 ahead maybe */ - session_fifo_event_t *e0; - - e0 = &my_fifo_events[i]; - - switch (e0->event_type) - { - case FIFO_EVENT_APP_TX: - s0 = session_event_get_session (e0, my_thread_index); - - if (CLIB_DEBUG && !s0) - { - clib_warning ("It's dead, Jim!"); - continue; - } - - if (PREDICT_FALSE (s0->session_state == SESSION_STATE_CLOSED)) - continue; - /* Spray packets in per session type frames, since they go to - * different nodes */ - rv = (smm->session_tx_fns[s0->session_type]) (vm, node, smm, e0, s0, - my_thread_index, - &n_tx_packets); - /* Out of buffers */ - if (PREDICT_FALSE (rv < 0)) - { - vlib_node_increment_counter (vm, node->node_index, - SESSION_QUEUE_ERROR_NO_BUFFER, 1); - continue; - } - break; - case FIFO_EVENT_DISCONNECT: - s0 = stream_session_get_from_handle (e0->session_handle); - stream_session_disconnect (s0); - break; - case FIFO_EVENT_BUILTIN_RX: - s0 = session_event_get_session (e0, my_thread_index); - svm_fifo_unset_event (s0->server_rx_fifo); - app = application_get (s0->app_index); - app->cb_fns.builtin_server_rx_callback (s0); - break; - case FIFO_EVENT_RPC: - fp = e0->rpc_args.fp; - (*fp) (e0->rpc_args.arg); - break; - - default: - clib_warning ("unhandled event type %d", e0->event_type); - } - } - - _vec_len (my_fifo_events) = 0; - smm->free_event_vector[my_thread_index] = my_fifo_events; - - vlib_node_increment_counter (vm, session_queue_node.index, - SESSION_QUEUE_ERROR_TX, n_tx_packets); - - SESSION_EVT_DBG (SESSION_EVT_DEQ_NODE, 1); - - return n_tx_packets; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (session_queue_node) = -{ - .function = session_queue_node_fn, - .name = "session-queue", - .format_trace = format_session_queue_trace, - .type = VLIB_NODE_TYPE_INPUT, - .n_errors = ARRAY_LEN (session_queue_error_strings), - .error_strings = session_queue_error_strings, - .n_next_nodes = SESSION_QUEUE_N_NEXT, - .state = VLIB_NODE_STATE_DISABLED, - .next_nodes = - { - [SESSION_QUEUE_NEXT_DROP] = "error-drop", - [SESSION_QUEUE_NEXT_IP4_LOOKUP] = "ip4-lookup", - [SESSION_QUEUE_NEXT_IP6_LOOKUP] = "ip6-lookup", - [SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT] = "tcp4-output", - [SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT] = "tcp6-output", - }, -}; -/* *INDENT-ON* */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 09bc00e745a..48000a6fee6 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -18,455 +18,15 @@ */ #include +#include +#include #include #include #include -#include #include -#include - -/** - * Per-type vector of transport protocol virtual function tables - */ -static transport_proto_vft_t *tp_vfts; session_manager_main_t session_manager_main; - -transport_connection_t * -stream_session_lookup_half_open (transport_connection_t * tc) -{ - session_manager_main_t *smm = &session_manager_main; - session_kv4_t kv4; - int rv; - if (tc->is_ip4) - { - make_v4_ss_kv_from_tc (&kv4, tc); - rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4); - if (rv == 0) - return tp_vfts[tc->proto].get_half_open (kv4.value & 0xFFFFFFFFULL); - } - return 0; -} - -/* - * Session lookup key; (src-ip, dst-ip, src-port, dst-port, session-type) - * Value: (owner thread index << 32 | session_index); - */ -void -stream_session_table_add_for_tc (transport_connection_t * tc, u64 value) -{ - session_manager_main_t *smm = &session_manager_main; - session_kv4_t kv4; - session_kv6_t kv6; - - switch (tc->proto) - { - case SESSION_TYPE_IP4_UDP: - case SESSION_TYPE_IP4_TCP: - make_v4_ss_kv_from_tc (&kv4, tc); - kv4.value = value; - clib_bihash_add_del_16_8 (&smm->v4_session_hash, &kv4, 1 /* is_add */ ); - break; - case SESSION_TYPE_IP6_UDP: - case SESSION_TYPE_IP6_TCP: - make_v6_ss_kv_from_tc (&kv6, tc); - kv6.value = value; - clib_bihash_add_del_48_8 (&smm->v6_session_hash, &kv6, 1 /* is_add */ ); - break; - default: - clib_warning ("Session type not supported"); - ASSERT (0); - } -} - -void -stream_session_table_add (session_manager_main_t * smm, stream_session_t * s, - u64 value) -{ - transport_connection_t *tc; - - tc = tp_vfts[s->session_type].get_connection (s->connection_index, - s->thread_index); - stream_session_table_add_for_tc (tc, value); -} - -static void -stream_session_half_open_table_add (session_type_t sst, - transport_connection_t * tc, u64 value) -{ - session_manager_main_t *smm = &session_manager_main; - session_kv4_t kv4; - session_kv6_t kv6; - - switch (sst) - { - case SESSION_TYPE_IP4_UDP: - case SESSION_TYPE_IP4_TCP: - make_v4_ss_kv_from_tc (&kv4, tc); - kv4.value = value; - clib_bihash_add_del_16_8 (&smm->v4_half_open_hash, &kv4, - 1 /* is_add */ ); - break; - case SESSION_TYPE_IP6_UDP: - case SESSION_TYPE_IP6_TCP: - make_v6_ss_kv_from_tc (&kv6, tc); - kv6.value = value; - clib_bihash_add_del_48_8 (&smm->v6_half_open_hash, &kv6, - 1 /* is_add */ ); - break; - default: - clib_warning ("Session type not supported"); - ASSERT (0); - } -} - -int -stream_session_table_del_for_tc (transport_connection_t * tc) -{ - session_manager_main_t *smm = &session_manager_main; - session_kv4_t kv4; - session_kv6_t kv6; - switch (tc->proto) - { - case SESSION_TYPE_IP4_UDP: - case SESSION_TYPE_IP4_TCP: - make_v4_ss_kv_from_tc (&kv4, tc); - return clib_bihash_add_del_16_8 (&smm->v4_session_hash, &kv4, - 0 /* is_add */ ); - break; - case SESSION_TYPE_IP6_UDP: - case SESSION_TYPE_IP6_TCP: - make_v6_ss_kv_from_tc (&kv6, tc); - return clib_bihash_add_del_48_8 (&smm->v6_session_hash, &kv6, - 0 /* is_add */ ); - break; - default: - clib_warning ("Session type not supported"); - ASSERT (0); - } - - return 0; -} - -static int -stream_session_table_del (session_manager_main_t * smm, stream_session_t * s) -{ - transport_connection_t *ts; - - ts = tp_vfts[s->session_type].get_connection (s->connection_index, - s->thread_index); - return stream_session_table_del_for_tc (ts); -} - -static void -stream_session_half_open_table_del (session_manager_main_t * smm, u8 sst, - transport_connection_t * tc) -{ - session_kv4_t kv4; - session_kv6_t kv6; - - switch (sst) - { - case SESSION_TYPE_IP4_UDP: - case SESSION_TYPE_IP4_TCP: - make_v4_ss_kv_from_tc (&kv4, tc); - clib_bihash_add_del_16_8 (&smm->v4_half_open_hash, &kv4, - 0 /* is_add */ ); - break; - case SESSION_TYPE_IP6_UDP: - case SESSION_TYPE_IP6_TCP: - make_v6_ss_kv_from_tc (&kv6, tc); - clib_bihash_add_del_48_8 (&smm->v6_half_open_hash, &kv6, - 0 /* is_add */ ); - break; - default: - clib_warning ("Session type not supported"); - ASSERT (0); - } -} - -stream_session_t * -stream_session_lookup_listener4 (ip4_address_t * lcl, u16 lcl_port, u8 proto) -{ - session_manager_main_t *smm = &session_manager_main; - session_kv4_t kv4; - int rv; - - make_v4_listener_kv (&kv4, lcl, lcl_port, proto); - rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); - if (rv == 0) - return pool_elt_at_index (smm->listen_sessions[proto], (u32) kv4.value); - - /* Zero out the lcl ip */ - kv4.key[0] = 0; - rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); - if (rv == 0) - return pool_elt_at_index (smm->listen_sessions[proto], kv4.value); - - return 0; -} - -/** Looks up a session based on the 5-tuple passed as argument. - * - * First it tries to find an established session, if this fails, it tries - * finding a listener session if this fails, it tries a lookup with a - * wildcarded local source (listener bound to all interfaces) - */ -stream_session_t * -stream_session_lookup4 (ip4_address_t * lcl, ip4_address_t * rmt, - u16 lcl_port, u16 rmt_port, u8 proto) -{ - session_manager_main_t *smm = &session_manager_main; - session_kv4_t kv4; - stream_session_t *s; - int rv; - - /* Lookup session amongst established ones */ - make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto); - rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); - if (rv == 0) - return stream_session_get_from_handle (kv4.value); - - /* If nothing is found, check if any listener is available */ - if ((s = stream_session_lookup_listener4 (lcl, lcl_port, proto))) - return s; - - /* Finally, try half-open connections */ - rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4); - if (rv == 0) - return stream_session_get_from_handle (kv4.value); - return 0; -} - -stream_session_t * -stream_session_lookup_listener6 (ip6_address_t * lcl, u16 lcl_port, u8 proto) -{ - session_manager_main_t *smm = &session_manager_main; - session_kv6_t kv6; - int rv; - - make_v6_listener_kv (&kv6, lcl, lcl_port, proto); - rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); - if (rv == 0) - return pool_elt_at_index (smm->listen_sessions[proto], kv6.value); - - /* Zero out the lcl ip */ - kv6.key[0] = kv6.key[1] = 0; - rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); - if (rv == 0) - return pool_elt_at_index (smm->listen_sessions[proto], kv6.value); - - return 0; -} - -/* Looks up a session based on the 5-tuple passed as argument. - * First it tries to find an established session, if this fails, it tries - * finding a listener session if this fails, it tries a lookup with a - * wildcarded local source (listener bound to all interfaces) */ -stream_session_t * -stream_session_lookup6 (ip6_address_t * lcl, ip6_address_t * rmt, - u16 lcl_port, u16 rmt_port, u8 proto) -{ - session_manager_main_t *smm = vnet_get_session_manager_main (); - session_kv6_t kv6; - stream_session_t *s; - int rv; - - make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto); - rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); - if (rv == 0) - return stream_session_get_from_handle (kv6.value); - - /* If nothing is found, check if any listener is available */ - if ((s = stream_session_lookup_listener6 (lcl, lcl_port, proto))) - return s; - - /* Finally, try half-open connections */ - rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6); - if (rv == 0) - return stream_session_get_from_handle (kv6.value); - return 0; -} - -stream_session_t * -stream_session_lookup_listener (ip46_address_t * lcl, u16 lcl_port, u8 proto) -{ - switch (proto) - { - case SESSION_TYPE_IP4_UDP: - case SESSION_TYPE_IP4_TCP: - return stream_session_lookup_listener4 (&lcl->ip4, lcl_port, proto); - break; - case SESSION_TYPE_IP6_UDP: - case SESSION_TYPE_IP6_TCP: - return stream_session_lookup_listener6 (&lcl->ip6, lcl_port, proto); - break; - } - return 0; -} - -static u64 -stream_session_half_open_lookup (session_manager_main_t * smm, - ip46_address_t * lcl, ip46_address_t * rmt, - u16 lcl_port, u16 rmt_port, u8 proto) -{ - session_kv4_t kv4; - session_kv6_t kv6; - int rv; - - switch (proto) - { - case SESSION_TYPE_IP4_UDP: - case SESSION_TYPE_IP4_TCP: - make_v4_ss_kv (&kv4, &lcl->ip4, &rmt->ip4, lcl_port, rmt_port, proto); - rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4); - - if (rv == 0) - return kv4.value; - - return (u64) ~ 0; - break; - case SESSION_TYPE_IP6_UDP: - case SESSION_TYPE_IP6_TCP: - make_v6_ss_kv (&kv6, &lcl->ip6, &rmt->ip6, lcl_port, rmt_port, proto); - rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6); - - if (rv == 0) - return kv6.value; - - return (u64) ~ 0; - break; - } - return 0; -} - -transport_connection_t * -stream_session_lookup_transport_wt4 (ip4_address_t * lcl, ip4_address_t * rmt, - u16 lcl_port, u16 rmt_port, u8 proto, - u32 my_thread_index) -{ - session_manager_main_t *smm = &session_manager_main; - session_kv4_t kv4; - stream_session_t *s; - int rv; - - /* Lookup session amongst established ones */ - make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto); - rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); - if (rv == 0) - { - s = stream_session_get_tsi (kv4.value, my_thread_index); - - return tp_vfts[s->session_type].get_connection (s->connection_index, - my_thread_index); - } - - /* If nothing is found, check if any listener is available */ - s = stream_session_lookup_listener4 (lcl, lcl_port, proto); - if (s) - return tp_vfts[s->session_type].get_listener (s->connection_index); - - /* Finally, try half-open connections */ - rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4); - if (rv == 0) - return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF); - return 0; -} - -transport_connection_t * -stream_session_lookup_transport4 (ip4_address_t * lcl, ip4_address_t * rmt, - u16 lcl_port, u16 rmt_port, u8 proto) -{ - session_manager_main_t *smm = &session_manager_main; - session_kv4_t kv4; - stream_session_t *s; - int rv; - - /* Lookup session amongst established ones */ - make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto); - rv = clib_bihash_search_inline_16_8 (&smm->v4_session_hash, &kv4); - if (rv == 0) - { - s = stream_session_get_from_handle (kv4.value); - return tp_vfts[s->session_type].get_connection (s->connection_index, - s->thread_index); - } - - /* If nothing is found, check if any listener is available */ - s = stream_session_lookup_listener4 (lcl, lcl_port, proto); - if (s) - return tp_vfts[s->session_type].get_listener (s->connection_index); - - /* Finally, try half-open connections */ - rv = clib_bihash_search_inline_16_8 (&smm->v4_half_open_hash, &kv4); - if (rv == 0) - return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF); - return 0; -} - -transport_connection_t * -stream_session_lookup_transport_wt6 (ip6_address_t * lcl, ip6_address_t * rmt, - u16 lcl_port, u16 rmt_port, u8 proto, - u32 my_thread_index) -{ - session_manager_main_t *smm = &session_manager_main; - stream_session_t *s; - session_kv6_t kv6; - int rv; - - make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto); - rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); - if (rv == 0) - { - s = stream_session_get_tsi (kv6.value, my_thread_index); - - return tp_vfts[s->session_type].get_connection (s->connection_index, - my_thread_index); - } - - /* If nothing is found, check if any listener is available */ - s = stream_session_lookup_listener6 (lcl, lcl_port, proto); - if (s) - return tp_vfts[s->session_type].get_listener (s->connection_index); - - /* Finally, try half-open connections */ - rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6); - if (rv == 0) - return tp_vfts[proto].get_half_open (kv6.value & 0xFFFFFFFF); - - return 0; -} - -transport_connection_t * -stream_session_lookup_transport6 (ip6_address_t * lcl, ip6_address_t * rmt, - u16 lcl_port, u16 rmt_port, u8 proto) -{ - session_manager_main_t *smm = &session_manager_main; - stream_session_t *s; - session_kv6_t kv6; - int rv; - - make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto); - rv = clib_bihash_search_inline_48_8 (&smm->v6_session_hash, &kv6); - if (rv == 0) - { - s = stream_session_get_from_handle (kv6.value); - return tp_vfts[s->session_type].get_connection (s->connection_index, - s->thread_index); - } - - /* If nothing is found, check if any listener is available */ - s = stream_session_lookup_listener6 (lcl, lcl_port, proto); - if (s) - return tp_vfts[s->session_type].get_listener (s->connection_index); - - /* Finally, try half-open connections */ - rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6); - if (rv == 0) - return tp_vfts[proto].get_half_open (kv6.value & 0xFFFFFFFF); - - return 0; -} +extern transport_proto_vft_t *tp_vfts; int stream_session_create_i (segment_manager_t * sm, transport_connection_t * tc, @@ -797,16 +357,15 @@ int stream_session_connect_notify (transport_connection_t * tc, u8 sst, u8 is_fail) { - session_manager_main_t *smm = &session_manager_main; application_t *app; stream_session_t *new_s = 0; u64 handle; u32 api_context = 0; int error = 0; - handle = stream_session_half_open_lookup (smm, &tc->lcl_ip, &tc->rmt_ip, - tc->lcl_port, tc->rmt_port, - tc->proto); + handle = stream_session_half_open_lookup_handle (&tc->lcl_ip, &tc->rmt_ip, + tc->lcl_port, tc->rmt_port, + tc->proto); if (handle == HALF_OPEN_LOOKUP_INVALID_VALUE) { clib_warning ("This can't be good!"); @@ -847,7 +406,7 @@ stream_session_connect_notify (transport_connection_t * tc, u8 sst, } /* Cleanup session lookup */ - stream_session_half_open_table_del (smm, sst, tc); + stream_session_half_open_table_del (sst, tc); return error; } @@ -891,7 +450,7 @@ stream_session_delete (stream_session_t * s) int rv; /* Delete from the main lookup table. */ - if ((rv = stream_session_table_del (smm, s))) + if ((rv = stream_session_table_del (s))) clib_warning ("hash delete error, rv %d", rv); /* Cleanup fifo segments */ @@ -986,14 +545,14 @@ stream_session_accept (transport_connection_t * tc, u32 listener_index, */ int stream_session_open (u32 app_index, session_type_t st, - transport_endpoint_t * tep, + transport_endpoint_t * rmt, transport_connection_t ** res) { transport_connection_t *tc; int rv; u64 handle; - rv = tp_vfts[st].open (&tep->ip, tep->port); + rv = tp_vfts[st].open (rmt); if (rv < 0) { clib_warning ("Transport failed to open connection."); @@ -1030,7 +589,7 @@ stream_session_listen (stream_session_t * s, transport_endpoint_t * tep) u32 tci; /* Transport bind/listen */ - tci = tp_vfts[s->session_type].bind (s->session_index, &tep->ip, tep->port); + tci = tp_vfts[s->session_type].bind (s->session_index, tep); if (tci == (u32) ~ 0) return -1; @@ -1132,41 +691,18 @@ stream_session_disconnect (stream_session_t * s) void stream_session_cleanup (stream_session_t * s) { - session_manager_main_t *smm = &session_manager_main; int rv; s->session_state = SESSION_STATE_CLOSED; /* Delete from the main lookup table to avoid more enqueues */ - rv = stream_session_table_del (smm, s); + rv = stream_session_table_del (s); if (rv) clib_warning ("hash delete error, rv %d", rv); tp_vfts[s->session_type].cleanup (s->connection_index, s->thread_index); } -void -session_register_transport (u8 type, const transport_proto_vft_t * vft) -{ - session_manager_main_t *smm = vnet_get_session_manager_main (); - - vec_validate (tp_vfts, type); - tp_vfts[type] = *vft; - - /* If an offset function is provided, then peek instead of dequeue */ - smm->session_tx_fns[type] = - (vft->tx_fifo_offset) ? session_tx_fifo_peek_and_snd : - session_tx_fifo_dequeue_and_snd; -} - -transport_proto_vft_t * -session_get_transport_vft (u8 type) -{ - if (type >= vec_len (tp_vfts)) - return 0; - return &tp_vfts[type]; -} - /** * Allocate vpp event queue (once) per worker thread */ @@ -1269,19 +805,7 @@ session_manager_main_enable (vlib_main_t * vm) for (i = 0; i < smm->preallocated_sessions; i++) pool_put_index (smm->sessions[0], i); - clib_bihash_init_16_8 (&smm->v4_session_hash, "v4 session table", - 200000 /* $$$$ config parameter nbuckets */ , - (64 << 20) /*$$$ config parameter table size */ ); - clib_bihash_init_48_8 (&smm->v6_session_hash, "v6 session table", - 200000 /* $$$$ config parameter nbuckets */ , - (64 << 20) /*$$$ config parameter table size */ ); - - clib_bihash_init_16_8 (&smm->v4_half_open_hash, "v4 half-open table", - 200000 /* $$$$ config parameter nbuckets */ , - (64 << 20) /*$$$ config parameter table size */ ); - clib_bihash_init_48_8 (&smm->v6_half_open_hash, "v6 half-open table", - 200000 /* $$$$ config parameter nbuckets */ , - (64 << 20) /*$$$ config parameter table size */ ); + session_lookup_init (); smm->is_enabled = 1; @@ -1328,11 +852,7 @@ clib_error_t * session_manager_main_init (vlib_main_t * vm) { session_manager_main_t *smm = &session_manager_main; - - smm->vlib_main = vm; - smm->vnet_main = vnet_get_main (); smm->is_enabled = 0; - return 0; } diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index 6c6163260f8..bb22f100529 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -15,7 +15,9 @@ #ifndef __included_session_h__ #define __included_session_h__ -#include +#include +#include +#include #include #include #include @@ -66,37 +68,6 @@ typedef enum SESSION_QUEUE_N_NEXT, } session_queue_next_t; -#define foreach_session_type \ - _(IP4_TCP, ip4_tcp) \ - _(IP4_UDP, ip4_udp) \ - _(IP6_TCP, ip6_tcp) \ - _(IP6_UDP, ip6_udp) - -typedef enum -{ -#define _(A, a) SESSION_TYPE_##A, - foreach_session_type -#undef _ - SESSION_N_TYPES, -} session_type_t; - - -session_type_t -session_type_from_proto_and_ip (transport_proto_t proto, u8 is_ip4); - -/* - * Application session state - */ -typedef enum -{ - SESSION_STATE_LISTENING, - SESSION_STATE_CONNECTING, - SESSION_STATE_ACCEPTING, - SESSION_STATE_READY, - SESSION_STATE_CLOSED, - SESSION_STATE_N_STATES, -} stream_session_state_t; - typedef struct { void *fp; @@ -116,48 +87,6 @@ typedef CLIB_PACKED (struct { }) session_fifo_event_t; /* *INDENT-ON* */ -typedef struct _stream_session_t -{ - /** fifo pointers. Once allocated, these do not move */ - svm_fifo_t *server_rx_fifo; - svm_fifo_t *server_tx_fifo; - - /** Type */ - u8 session_type; - - /** State */ - u8 session_state; - - u8 thread_index; - - /** To avoid n**2 "one event per frame" check */ - u8 enqueue_epoch; - - /** Pad to a multiple of 8 octets */ - u8 align_pad[4]; - - /** svm segment index where fifos were allocated */ - u32 svm_segment_index; - - /** Session index in per_thread pool */ - u32 session_index; - - /** Transport specific */ - u32 connection_index; - - /** Application specific */ - u32 pid; - - /** stream server pool index */ - u32 app_index; - - /** Parent listener session if the result of an accept */ - u32 listener_index; - - /** Opaque, pad to a 64-octet boundary */ - u64 opaque[2]; -} stream_session_t; - /* Forward definition */ typedef struct _session_manager_main session_manager_main_t; @@ -174,14 +103,6 @@ u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_fifo_event_t * e); struct _session_manager_main { - /** Lookup tables for established sessions and listeners */ - clib_bihash_16_8_t v4_session_hash; - clib_bihash_48_8_t v6_session_hash; - - /** Lookup tables for half-open sessions */ - clib_bihash_16_8_t v4_half_open_hash; - clib_bihash_48_8_t v6_half_open_hash; - /** Per worker thread session pools */ stream_session_t **sessions; @@ -224,10 +145,6 @@ struct _session_manager_main /** Preallocate session config parameter */ u32 preallocated_sessions; - /* Convenience */ - vlib_main_t *vlib_main; - vnet_main_t *vnet_main; - #if SESSION_DBG /** * last event poll time by thread @@ -250,60 +167,6 @@ vnet_get_session_manager_main () return &session_manager_main; } -/* - * Stream session functions - */ - -stream_session_t *stream_session_lookup_listener4 (ip4_address_t * lcl, - u16 lcl_port, u8 proto); -stream_session_t *stream_session_lookup4 (ip4_address_t * lcl, - ip4_address_t * rmt, u16 lcl_port, - u16 rmt_port, u8 proto); -stream_session_t *stream_session_lookup_listener6 (ip6_address_t * lcl, - u16 lcl_port, u8 proto); -stream_session_t *stream_session_lookup6 (ip6_address_t * lcl, - ip6_address_t * rmt, u16 lcl_port, - u16 rmt_port, u8 proto); -transport_connection_t - * stream_session_lookup_transport_wt4 (ip4_address_t * lcl, - ip4_address_t * rmt, u16 lcl_port, - u16 rmt_port, u8 proto, - u32 thread_index); -transport_connection_t *stream_session_lookup_transport4 (ip4_address_t * lcl, - ip4_address_t * rmt, - u16 lcl_port, - u16 rmt_port, - u8 proto); -transport_connection_t *stream_session_lookup_transport_wt6 (ip6_address_t * - lcl, - ip6_address_t * - rmt, - u16 lcl_port, - u16 rmt_port, - u8 proto, - u32 - thread_index); -transport_connection_t *stream_session_lookup_transport6 (ip6_address_t * lcl, - ip6_address_t * rmt, - u16 lcl_port, - u16 rmt_port, - u8 proto); - -stream_session_t *stream_session_lookup_listener (ip46_address_t * lcl, - u16 lcl_port, u8 proto); -transport_connection_t - * stream_session_lookup_half_open (transport_connection_t * tc); -void stream_session_table_add_for_tc (transport_connection_t * tc, u64 value); -int stream_session_table_del_for_tc (transport_connection_t * tc); - -always_inline stream_session_t * -stream_session_get_tsi (u64 ti_and_si, u32 thread_index) -{ - ASSERT ((u32) (ti_and_si >> 32) == thread_index); - return pool_elt_at_index (session_manager_main.sessions[thread_index], - ti_and_si & 0xFFFFFFFFULL); -} - always_inline u8 stream_session_is_valid (u32 si, u8 thread_index) { @@ -445,9 +308,6 @@ send_session_connected_callback (u32 app_index, u32 api_context, stream_session_t * s, u8 is_fail); -void session_register_transport (u8 type, const transport_proto_vft_t * vft); -transport_proto_vft_t *session_get_transport_vft (u8 type); - clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en); always_inline unix_shared_memory_queue_t * @@ -510,6 +370,24 @@ listen_session_del (stream_session_t * s) pool_put (session_manager_main.listen_sessions[s->session_type], s); } +always_inline stream_session_t * +session_manager_get_listener (u8 type, u32 index) +{ + return pool_elt_at_index (session_manager_main.listen_sessions[type], + index); +} + +always_inline void +session_manager_set_transport_rx_fn (u8 type, u8 is_peek) +{ + /* If an offset function is provided, then peek instead of dequeue */ + session_manager_main.session_tx_fns[type] = (is_peek) ? + session_tx_fifo_peek_and_snd : session_tx_fifo_dequeue_and_snd; +} + +session_type_t +session_type_from_proto_and_ip (transport_proto_t proto, u8 is_ip4); + always_inline u8 session_manager_is_enabled () { diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c new file mode 100644 index 00000000000..b3862ee3920 --- /dev/null +++ b/src/vnet/session/session_lookup.c @@ -0,0 +1,620 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Generate typed init functions for multiple hash table styles... */ +#include +#include + +#include + +#undef __included_bihash_template_h__ + +#include +#include + +#include +#include +#include + +static session_lookup_t session_lookup; +extern transport_proto_vft_t *tp_vfts; + +/* *INDENT-OFF* */ +/* 16 octets */ +typedef CLIB_PACKED (struct { + union + { + struct + { + ip4_address_t src; + ip4_address_t dst; + u16 src_port; + u16 dst_port; + /* align by making this 4 octets even though its a 1-bit field + * NOTE: avoid key overlap with other transports that use 5 tuples for + * session identification. + */ + u32 proto; + }; + u64 as_u64[2]; + }; +}) v4_connection_key_t; + +typedef CLIB_PACKED (struct { + union + { + struct + { + /* 48 octets */ + ip6_address_t src; + ip6_address_t dst; + u16 src_port; + u16 dst_port; + u32 proto; + u64 unused; + }; + u64 as_u64[6]; + }; +}) v6_connection_key_t; +/* *INDENT-ON* */ + +typedef clib_bihash_kv_16_8_t session_kv4_t; +typedef clib_bihash_kv_48_8_t session_kv6_t; + +always_inline void +make_v4_ss_kv (session_kv4_t * kv, ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + v4_connection_key_t *key = (v4_connection_key_t *) kv->key; + + key->src.as_u32 = lcl->as_u32; + key->dst.as_u32 = rmt->as_u32; + key->src_port = lcl_port; + key->dst_port = rmt_port; + key->proto = proto; + + kv->value = ~0ULL; +} + +always_inline void +make_v4_listener_kv (session_kv4_t * kv, ip4_address_t * lcl, u16 lcl_port, + u8 proto) +{ + v4_connection_key_t *key = (v4_connection_key_t *) kv->key; + + key->src.as_u32 = lcl->as_u32; + key->dst.as_u32 = 0; + key->src_port = lcl_port; + key->dst_port = 0; + key->proto = proto; + + kv->value = ~0ULL; +} + +always_inline void +make_v4_ss_kv_from_tc (session_kv4_t * kv, transport_connection_t * t) +{ + return make_v4_ss_kv (kv, &t->lcl_ip.ip4, &t->rmt_ip.ip4, t->lcl_port, + t->rmt_port, t->proto); +} + +always_inline void +make_v6_ss_kv (session_kv6_t * kv, ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + v6_connection_key_t *key = (v6_connection_key_t *) kv->key; + + key->src.as_u64[0] = lcl->as_u64[0]; + key->src.as_u64[1] = lcl->as_u64[1]; + key->dst.as_u64[0] = rmt->as_u64[0]; + key->dst.as_u64[1] = rmt->as_u64[1]; + key->src_port = lcl_port; + key->dst_port = rmt_port; + key->proto = proto; + key->unused = 0; + + kv->value = ~0ULL; +} + +always_inline void +make_v6_listener_kv (session_kv6_t * kv, ip6_address_t * lcl, u16 lcl_port, + u8 proto) +{ + v6_connection_key_t *key = (v6_connection_key_t *) kv->key; + + key->src.as_u64[0] = lcl->as_u64[0]; + key->src.as_u64[1] = lcl->as_u64[1]; + key->dst.as_u64[0] = 0; + key->dst.as_u64[1] = 0; + key->src_port = lcl_port; + key->dst_port = 0; + key->proto = proto; + key->unused = 0; + + kv->value = ~0ULL; +} + +always_inline void +make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * t) +{ + make_v6_ss_kv (kv, &t->lcl_ip.ip6, &t->rmt_ip.ip6, t->lcl_port, + t->rmt_port, t->proto); +} + +/* + * Session lookup key; (src-ip, dst-ip, src-port, dst-port, session-type) + * Value: (owner thread index << 32 | session_index); + */ +void +stream_session_table_add_for_tc (transport_connection_t * tc, u64 value) +{ + session_lookup_t *sl = &session_lookup; + session_kv4_t kv4; + session_kv6_t kv6; + + switch (tc->proto) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + kv4.value = value; + clib_bihash_add_del_16_8 (&sl->v4_session_hash, &kv4, 1 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + kv6.value = value; + clib_bihash_add_del_48_8 (&sl->v6_session_hash, &kv6, 1 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } +} + +void +stream_session_table_add (session_manager_main_t * smm, stream_session_t * s, + u64 value) +{ + transport_connection_t *tc; + + tc = tp_vfts[s->session_type].get_connection (s->connection_index, + s->thread_index); + stream_session_table_add_for_tc (tc, value); +} + +void +stream_session_half_open_table_add (session_type_t sst, + transport_connection_t * tc, u64 value) +{ + session_lookup_t *sl = &session_lookup; + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + kv4.value = value; + clib_bihash_add_del_16_8 (&sl->v4_half_open_hash, &kv4, + 1 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + kv6.value = value; + clib_bihash_add_del_48_8 (&sl->v6_half_open_hash, &kv6, + 1 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } +} + +int +stream_session_table_del_for_tc (transport_connection_t * tc) +{ + session_lookup_t *sl = &session_lookup; + session_kv4_t kv4; + session_kv6_t kv6; + switch (tc->proto) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + return clib_bihash_add_del_16_8 (&sl->v4_session_hash, &kv4, + 0 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + return clib_bihash_add_del_48_8 (&sl->v6_session_hash, &kv6, + 0 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } + + return 0; +} + +int +stream_session_table_del (stream_session_t * s) +{ + transport_connection_t *ts; + ts = tp_vfts[s->session_type].get_connection (s->connection_index, + s->thread_index); + return stream_session_table_del_for_tc (ts); +} + +void +stream_session_half_open_table_del (u8 sst, transport_connection_t * tc) +{ + session_lookup_t *sl = &session_lookup; + session_kv4_t kv4; + session_kv6_t kv6; + + switch (sst) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv_from_tc (&kv4, tc); + clib_bihash_add_del_16_8 (&sl->v4_half_open_hash, &kv4, + 0 /* is_add */ ); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv_from_tc (&kv6, tc); + clib_bihash_add_del_48_8 (&sl->v6_half_open_hash, &kv6, + 0 /* is_add */ ); + break; + default: + clib_warning ("Session type not supported"); + ASSERT (0); + } +} + +stream_session_t * +stream_session_lookup_listener4 (ip4_address_t * lcl, u16 lcl_port, u8 proto) +{ + session_lookup_t *sl = &session_lookup; + session_kv4_t kv4; + int rv; + + make_v4_listener_kv (&kv4, lcl, lcl_port, proto); + rv = clib_bihash_search_inline_16_8 (&sl->v4_session_hash, &kv4); + if (rv == 0) + return session_manager_get_listener (proto, (u32) kv4.value); + + /* Zero out the lcl ip */ + kv4.key[0] = 0; + rv = clib_bihash_search_inline_16_8 (&sl->v4_session_hash, &kv4); + if (rv == 0) + return session_manager_get_listener (proto, (u32) kv4.value); + + return 0; +} + +/** Looks up a session based on the 5-tuple passed as argument. + * + * First it tries to find an established session, if this fails, it tries + * finding a listener session if this fails, it tries a lookup with a + * wildcarded local source (listener bound to all interfaces) + */ +stream_session_t * +stream_session_lookup4 (ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + session_lookup_t *sl = &session_lookup; + session_kv4_t kv4; + stream_session_t *s; + int rv; + + /* Lookup session amongst established ones */ + make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&sl->v4_session_hash, &kv4); + if (rv == 0) + return stream_session_get_from_handle (kv4.value); + + /* If nothing is found, check if any listener is available */ + if ((s = stream_session_lookup_listener4 (lcl, lcl_port, proto))) + return s; + + /* Finally, try half-open connections */ + rv = clib_bihash_search_inline_16_8 (&sl->v4_half_open_hash, &kv4); + if (rv == 0) + return stream_session_get_from_handle (kv4.value); + return 0; +} + +stream_session_t * +stream_session_lookup_listener6 (ip6_address_t * lcl, u16 lcl_port, u8 proto) +{ + session_lookup_t *sl = &session_lookup; + session_kv6_t kv6; + int rv; + + make_v6_listener_kv (&kv6, lcl, lcl_port, proto); + rv = clib_bihash_search_inline_48_8 (&sl->v6_session_hash, &kv6); + if (rv == 0) + return session_manager_get_listener (proto, (u32) kv6.value); + + /* Zero out the lcl ip */ + kv6.key[0] = kv6.key[1] = 0; + rv = clib_bihash_search_inline_48_8 (&sl->v6_session_hash, &kv6); + if (rv == 0) + return session_manager_get_listener (proto, (u32) kv6.value); + + return 0; +} + +/* Looks up a session based on the 5-tuple passed as argument. + * First it tries to find an established session, if this fails, it tries + * finding a listener session if this fails, it tries a lookup with a + * wildcarded local source (listener bound to all interfaces) */ +stream_session_t * +stream_session_lookup6 (ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + session_lookup_t *sl = &session_lookup; + session_kv6_t kv6; + stream_session_t *s; + int rv; + + make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&sl->v6_session_hash, &kv6); + if (rv == 0) + return stream_session_get_from_handle (kv6.value); + + /* If nothing is found, check if any listener is available */ + if ((s = stream_session_lookup_listener6 (lcl, lcl_port, proto))) + return s; + + /* Finally, try half-open connections */ + rv = clib_bihash_search_inline_48_8 (&sl->v6_half_open_hash, &kv6); + if (rv == 0) + return stream_session_get_from_handle (kv6.value); + return 0; +} + +stream_session_t * +stream_session_lookup_listener (ip46_address_t * lcl, u16 lcl_port, u8 proto) +{ + switch (proto) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + return stream_session_lookup_listener4 (&lcl->ip4, lcl_port, proto); + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + return stream_session_lookup_listener6 (&lcl->ip6, lcl_port, proto); + break; + } + return 0; +} + +u64 +stream_session_half_open_lookup_handle (ip46_address_t * lcl, + ip46_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto) +{ + session_lookup_t *sl = &session_lookup; + session_kv4_t kv4; + session_kv6_t kv6; + int rv; + + switch (proto) + { + case SESSION_TYPE_IP4_UDP: + case SESSION_TYPE_IP4_TCP: + make_v4_ss_kv (&kv4, &lcl->ip4, &rmt->ip4, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&sl->v4_half_open_hash, &kv4); + + if (rv == 0) + return kv4.value; + + return HALF_OPEN_LOOKUP_INVALID_VALUE; + break; + case SESSION_TYPE_IP6_UDP: + case SESSION_TYPE_IP6_TCP: + make_v6_ss_kv (&kv6, &lcl->ip6, &rmt->ip6, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&sl->v6_half_open_hash, &kv6); + + if (rv == 0) + return kv6.value; + + return HALF_OPEN_LOOKUP_INVALID_VALUE; + break; + } + return HALF_OPEN_LOOKUP_INVALID_VALUE; +} + +transport_connection_t * +stream_session_half_open_lookup (ip46_address_t * lcl, ip46_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + u64 handle; + handle = + stream_session_half_open_lookup_handle (lcl, rmt, lcl_port, rmt_port, + proto); + if (handle != HALF_OPEN_LOOKUP_INVALID_VALUE) + return tp_vfts[proto].get_half_open (handle & 0xFFFFFFFF); + return 0; +} + +always_inline stream_session_t * +stream_session_get_tsi (u64 ti_and_si, u32 thread_index) +{ + ASSERT ((u32) (ti_and_si >> 32) == thread_index); + return pool_elt_at_index (session_manager_main.sessions[thread_index], + ti_and_si & 0xFFFFFFFFULL); +} + +transport_connection_t * +stream_session_lookup_transport_wt4 (ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + session_lookup_t *sl = &session_lookup; + session_kv4_t kv4; + stream_session_t *s; + int rv; + + /* Lookup session amongst established ones */ + make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&sl->v4_session_hash, &kv4); + if (rv == 0) + { + s = stream_session_get_tsi (kv4.value, my_thread_index); + return tp_vfts[s->session_type].get_connection (s->connection_index, + my_thread_index); + } + + /* If nothing is found, check if any listener is available */ + s = stream_session_lookup_listener4 (lcl, lcl_port, proto); + if (s) + return tp_vfts[s->session_type].get_listener (s->connection_index); + + /* Finally, try half-open connections */ + rv = clib_bihash_search_inline_16_8 (&sl->v4_half_open_hash, &kv4); + if (rv == 0) + return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF); + return 0; +} + +transport_connection_t * +stream_session_lookup_transport4 (ip4_address_t * lcl, ip4_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + session_lookup_t *sl = &session_lookup; + session_kv4_t kv4; + stream_session_t *s; + int rv; + + /* Lookup session amongst established ones */ + make_v4_ss_kv (&kv4, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_16_8 (&sl->v4_session_hash, &kv4); + if (rv == 0) + { + s = stream_session_get_from_handle (kv4.value); + return tp_vfts[s->session_type].get_connection (s->connection_index, + s->thread_index); + } + + /* If nothing is found, check if any listener is available */ + s = stream_session_lookup_listener4 (lcl, lcl_port, proto); + if (s) + return tp_vfts[s->session_type].get_listener (s->connection_index); + + /* Finally, try half-open connections */ + rv = clib_bihash_search_inline_16_8 (&sl->v4_half_open_hash, &kv4); + if (rv == 0) + return tp_vfts[proto].get_half_open (kv4.value & 0xFFFFFFFF); + return 0; +} + +transport_connection_t * +stream_session_lookup_transport_wt6 (ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto, + u32 my_thread_index) +{ + session_lookup_t *sl = &session_lookup; + stream_session_t *s; + session_kv6_t kv6; + int rv; + + make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&sl->v6_session_hash, &kv6); + if (rv == 0) + { + s = stream_session_get_tsi (kv6.value, my_thread_index); + return tp_vfts[s->session_type].get_connection (s->connection_index, + my_thread_index); + } + + /* If nothing is found, check if any listener is available */ + s = stream_session_lookup_listener6 (lcl, lcl_port, proto); + if (s) + return tp_vfts[s->session_type].get_listener (s->connection_index); + + /* Finally, try half-open connections */ + rv = clib_bihash_search_inline_48_8 (&sl->v6_half_open_hash, &kv6); + if (rv == 0) + return tp_vfts[proto].get_half_open (kv6.value & 0xFFFFFFFF); + + return 0; +} + +transport_connection_t * +stream_session_lookup_transport6 (ip6_address_t * lcl, ip6_address_t * rmt, + u16 lcl_port, u16 rmt_port, u8 proto) +{ + session_lookup_t *sl = &session_lookup; + stream_session_t *s; + session_kv6_t kv6; + int rv; + + make_v6_ss_kv (&kv6, lcl, rmt, lcl_port, rmt_port, proto); + rv = clib_bihash_search_inline_48_8 (&sl->v6_session_hash, &kv6); + if (rv == 0) + { + s = stream_session_get_from_handle (kv6.value); + return tp_vfts[s->session_type].get_connection (s->connection_index, + s->thread_index); + } + + /* If nothing is found, check if any listener is available */ + s = stream_session_lookup_listener6 (lcl, lcl_port, proto); + if (s) + return tp_vfts[s->session_type].get_listener (s->connection_index); + + /* Finally, try half-open connections */ + rv = clib_bihash_search_inline_48_8 (&sl->v6_half_open_hash, &kv6); + if (rv == 0) + return tp_vfts[proto].get_half_open (kv6.value & 0xFFFFFFFF); + + return 0; +} + +void +session_lookup_init (void) +{ + session_lookup_t *sl = &session_lookup; + clib_bihash_init_16_8 (&sl->v4_session_hash, "v4 session table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + clib_bihash_init_48_8 (&sl->v6_session_hash, "v6 session table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + + clib_bihash_init_16_8 (&sl->v4_half_open_hash, "v4 half-open table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); + clib_bihash_init_48_8 (&sl->v6_half_open_hash, "v6 half-open table", + 200000 /* $$$$ config parameter nbuckets */ , + (64 << 20) /*$$$ config parameter table size */ ); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h new file mode 100644 index 00000000000..9e92dab1b1d --- /dev/null +++ b/src/vnet/session/session_lookup.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_VNET_SESSION_SESSION_LOOKUP_H_ +#define SRC_VNET_SESSION_SESSION_LOOKUP_H_ + +#include +#include + +typedef struct _session_lookup +{ + /** Lookup tables for established sessions and listeners */ + clib_bihash_16_8_t v4_session_hash; + clib_bihash_48_8_t v6_session_hash; + + /** Lookup tables for half-open sessions */ + clib_bihash_16_8_t v4_half_open_hash; + clib_bihash_48_8_t v6_half_open_hash; +} session_lookup_t; + +stream_session_t *stream_session_lookup_listener4 (ip4_address_t * lcl, + u16 lcl_port, u8 proto); +stream_session_t *stream_session_lookup4 (ip4_address_t * lcl, + ip4_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto); +stream_session_t *stream_session_lookup_listener6 (ip6_address_t * lcl, + u16 lcl_port, u8 proto); +stream_session_t *stream_session_lookup6 (ip6_address_t * lcl, + ip6_address_t * rmt, u16 lcl_port, + u16 rmt_port, u8 proto); +transport_connection_t *stream_session_lookup_transport_wt4 (ip4_address_t * + lcl, + ip4_address_t * + rmt, + u16 lcl_port, + u16 rmt_port, + u8 proto, + u32 + thread_index); +transport_connection_t *stream_session_lookup_transport4 (ip4_address_t * lcl, + ip4_address_t * rmt, + u16 lcl_port, + u16 rmt_port, + u8 proto); +transport_connection_t *stream_session_lookup_transport_wt6 (ip6_address_t * + lcl, + ip6_address_t * + rmt, + u16 lcl_port, + u16 rmt_port, + u8 proto, + u32 + thread_index); +transport_connection_t *stream_session_lookup_transport6 (ip6_address_t * lcl, + ip6_address_t * rmt, + u16 lcl_port, + u16 rmt_port, + u8 proto); + +stream_session_t *stream_session_lookup_listener (ip46_address_t * lcl, + u16 lcl_port, u8 proto); +u64 stream_session_half_open_lookup_handle (ip46_address_t * lcl, + ip46_address_t * rmt, + u16 lcl_port, + u16 rmt_port, u8 proto); +transport_connection_t *stream_session_half_open_lookup (ip46_address_t * lcl, + ip46_address_t * rmt, + u16 lcl_port, + u16 rmt_port, + u8 proto); +void stream_session_table_add_for_tc (transport_connection_t * tc, u64 value); +int stream_session_table_del_for_tc (transport_connection_t * tc); +int stream_session_table_del (stream_session_t * s); +void stream_session_half_open_table_del (u8 sst, transport_connection_t * tc); +void stream_session_half_open_table_add (session_type_t sst, + transport_connection_t * tc, + u64 value); + +void session_lookup_init (void); + +#endif /* SRC_VNET_SESSION_SESSION_LOOKUP_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c new file mode 100644 index 00000000000..8d703b0b302 --- /dev/null +++ b/src/vnet/session/session_node.c @@ -0,0 +1,685 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +vlib_node_registration_t session_queue_node; + +typedef struct +{ + u32 session_index; + u32 server_thread_index; +} session_queue_trace_t; + +/* packet trace format function */ +static u8 * +format_session_queue_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + session_queue_trace_t *t = va_arg (*args, session_queue_trace_t *); + + s = format (s, "SESSION_QUEUE: session index %d, server thread index %d", + t->session_index, t->server_thread_index); + return s; +} + +vlib_node_registration_t session_queue_node; + +#define foreach_session_queue_error \ +_(TX, "Packets transmitted") \ +_(TIMER, "Timer events") \ +_(NO_BUFFER, "Out of buffers") + +typedef enum +{ +#define _(sym,str) SESSION_QUEUE_ERROR_##sym, + foreach_session_queue_error +#undef _ + SESSION_QUEUE_N_ERROR, +} session_queue_error_t; + +static char *session_queue_error_strings[] = { +#define _(sym,string) string, + foreach_session_queue_error +#undef _ +}; + +static u32 session_type_to_next[] = { + SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT, + SESSION_QUEUE_NEXT_IP4_LOOKUP, + SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT, + SESSION_QUEUE_NEXT_IP6_LOOKUP, +}; + +always_inline void +session_tx_fifo_chain_tail (session_manager_main_t * smm, vlib_main_t * vm, + u8 thread_index, svm_fifo_t * fifo, + vlib_buffer_t * b0, u32 bi0, u8 n_bufs_per_seg, + u32 * left_to_snd0, u16 * n_bufs, u32 * rx_offset, + u16 deq_per_buf, u8 peek_data) +{ + vlib_buffer_t *chain_b0, *prev_b0; + u32 chain_bi0; + u16 len_to_deq0, n_bytes_read; + u8 *data0, j; + + chain_bi0 = bi0; + chain_b0 = b0; + for (j = 1; j < n_bufs_per_seg; j++) + { + prev_b0 = chain_b0; + len_to_deq0 = clib_min (*left_to_snd0, deq_per_buf); + + *n_bufs -= 1; + chain_bi0 = smm->tx_buffers[thread_index][*n_bufs]; + _vec_len (smm->tx_buffers[thread_index]) = *n_bufs; + + chain_b0 = vlib_get_buffer (vm, chain_bi0); + chain_b0->current_data = 0; + data0 = vlib_buffer_get_current (chain_b0); + if (peek_data) + { + n_bytes_read = svm_fifo_peek (fifo, *rx_offset, len_to_deq0, data0); + *rx_offset += n_bytes_read; + } + else + { + n_bytes_read = svm_fifo_dequeue_nowait (fifo, len_to_deq0, data0); + } + ASSERT (n_bytes_read == len_to_deq0); + chain_b0->current_length = n_bytes_read; + b0->total_length_not_including_first_buffer += chain_b0->current_length; + + /* update previous buffer */ + prev_b0->next_buffer = chain_bi0; + prev_b0->flags |= VLIB_BUFFER_NEXT_PRESENT; + + /* update current buffer */ + chain_b0->next_buffer = 0; + + *left_to_snd0 -= n_bytes_read; + if (*left_to_snd0 == 0) + break; + } +} + +always_inline int +session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, + int *n_tx_packets, u8 peek_data) +{ + u32 n_trace = vlib_get_trace_count (vm, node); + u32 left_to_snd0, max_len_to_snd0, len_to_deq0, snd_space0; + u32 n_bufs_per_evt, n_frames_per_evt; + transport_connection_t *tc0; + transport_proto_vft_t *transport_vft; + u32 next_index, next0, *to_next, n_left_to_next, bi0; + vlib_buffer_t *b0; + u32 rx_offset = 0, max_dequeue0, n_bytes_per_seg; + u16 snd_mss0, n_bufs_per_seg, n_bufs; + u8 *data0; + int i, n_bytes_read; + u32 n_bytes_per_buf, deq_per_buf; + u32 buffers_allocated, buffers_allocated_this_call; + + next_index = next0 = session_type_to_next[s0->session_type]; + + transport_vft = session_get_transport_vft (s0->session_type); + tc0 = transport_vft->get_connection (s0->connection_index, thread_index); + + /* Make sure we have space to send and there's something to dequeue */ + snd_mss0 = transport_vft->send_mss (tc0); + snd_space0 = transport_vft->send_space (tc0); + + /* Can't make any progress */ + if (snd_space0 == 0 || snd_mss0 == 0) + { + vec_add1 (smm->pending_event_vector[thread_index], *e0); + return 0; + } + + if (peek_data) + { + /* Offset in rx fifo from where to peek data */ + rx_offset = transport_vft->tx_fifo_offset (tc0); + } + + /* Check how much we can pull. If buffering, subtract the offset */ + max_dequeue0 = svm_fifo_max_dequeue (s0->server_tx_fifo) - rx_offset; + + /* Nothing to read return */ + if (max_dequeue0 == 0) + { + svm_fifo_unset_event (s0->server_tx_fifo); + return 0; + } + + /* Ensure we're not writing more than transport window allows */ + if (max_dequeue0 < snd_space0) + { + /* Constrained by tx queue. Try to send only fully formed segments */ + max_len_to_snd0 = (max_dequeue0 > snd_mss0) ? + max_dequeue0 - max_dequeue0 % snd_mss0 : max_dequeue0; + /* TODO Nagle ? */ + } + else + { + max_len_to_snd0 = snd_space0; + } + + n_bytes_per_buf = vlib_buffer_free_list_buffer_size + (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + n_bytes_per_seg = MAX_HDRS_LEN + snd_mss0; + n_bufs_per_seg = ceil ((double) n_bytes_per_seg / n_bytes_per_buf); + n_bufs_per_evt = (ceil ((double) max_len_to_snd0 / n_bytes_per_seg)) + * n_bufs_per_seg; + n_frames_per_evt = ceil ((double) n_bufs_per_evt / VLIB_FRAME_SIZE); + + deq_per_buf = clib_min (snd_mss0, n_bytes_per_buf); + + n_bufs = vec_len (smm->tx_buffers[thread_index]); + left_to_snd0 = max_len_to_snd0; + for (i = 0; i < n_frames_per_evt; i++) + { + /* Make sure we have at least one full frame of buffers ready */ + if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE)) + { + vec_validate (smm->tx_buffers[thread_index], + n_bufs + 2 * VLIB_FRAME_SIZE - 1); + + buffers_allocated = 0; + do + { + buffers_allocated_this_call = + vlib_buffer_alloc + (vm, + &smm->tx_buffers[thread_index][n_bufs + buffers_allocated], + 2 * VLIB_FRAME_SIZE - buffers_allocated); + buffers_allocated += buffers_allocated_this_call; + } + while (buffers_allocated_this_call > 0 + && ((buffers_allocated + n_bufs < VLIB_FRAME_SIZE))); + + n_bufs += buffers_allocated; + + _vec_len (smm->tx_buffers[thread_index]) = n_bufs; + + if (PREDICT_FALSE (n_bufs < VLIB_FRAME_SIZE)) + { + vec_add1 (smm->pending_event_vector[thread_index], *e0); + return -1; + } + } + /* Allow enqueuing of a new event */ + svm_fifo_unset_event (s0->server_tx_fifo); + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while (left_to_snd0 && n_left_to_next >= n_bufs_per_seg) + { + /* + * Handle first buffer in chain separately + */ + + /* Get free buffer */ + ASSERT (n_bufs >= 1); + bi0 = smm->tx_buffers[thread_index][--n_bufs]; + ASSERT (bi0); + _vec_len (smm->tx_buffers[thread_index]) = n_bufs; + + /* usual speculation, or the enqueue_x1 macro will barf */ + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + b0->error = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID + | VNET_BUFFER_F_LOCALLY_ORIGINATED; + b0->current_data = 0; + b0->total_length_not_including_first_buffer = 0; + + len_to_deq0 = clib_min (left_to_snd0, deq_per_buf); + + data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN); + if (peek_data) + { + n_bytes_read = svm_fifo_peek (s0->server_tx_fifo, rx_offset, + len_to_deq0, data0); + /* Keep track of progress locally, transport is also supposed to + * increment it independently when pushing the header */ + rx_offset += n_bytes_read; + } + else + { + n_bytes_read = svm_fifo_dequeue_nowait (s0->server_tx_fifo, + len_to_deq0, data0); + } + + if (n_bytes_read <= 0) + goto dequeue_fail; + + b0->current_length = n_bytes_read; + + left_to_snd0 -= n_bytes_read; + *n_tx_packets = *n_tx_packets + 1; + + /* + * Fill in the remaining buffers in the chain, if any + */ + if (PREDICT_FALSE (n_bufs_per_seg > 1)) + session_tx_fifo_chain_tail (smm, vm, thread_index, + s0->server_tx_fifo, b0, bi0, + n_bufs_per_seg, &left_to_snd0, + &n_bufs, &rx_offset, deq_per_buf, + peek_data); + + /* Ask transport to push header after current_length and + * total_length_not_including_first_buffer are updated */ + transport_vft->push_header (tc0, b0); + + /* *INDENT-OFF* */ + SESSION_EVT_DBG(SESSION_EVT_DEQ, s0, ({ + ed->data[0] = e0->event_id; + ed->data[1] = max_dequeue0; + ed->data[2] = len_to_deq0; + ed->data[3] = left_to_snd0; + })); + /* *INDENT-ON* */ + + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + if (PREDICT_FALSE (n_trace > 0)) + { + session_queue_trace_t *t0; + vlib_trace_buffer (vm, node, next_index, b0, + 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + t0->session_index = s0->session_index; + t0->server_thread_index = s0->thread_index; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* If we couldn't dequeue all bytes mark as partially read */ + if (max_len_to_snd0 < max_dequeue0) + { + /* If we don't already have new event */ + if (svm_fifo_set_event (s0->server_tx_fifo)) + { + vec_add1 (smm->pending_event_vector[thread_index], *e0); + } + } + return 0; + +dequeue_fail: + /* + * Can't read from fifo. If we don't already have an event, save as partially + * read, return buff to free list and return + */ + clib_warning ("dequeue fail"); + + if (svm_fifo_set_event (s0->server_tx_fifo)) + { + vec_add1 (smm->pending_event_vector[thread_index], *e0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); + _vec_len (smm->tx_buffers[thread_index]) += 1; + + return 0; +} + +int +session_tx_fifo_peek_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, + int *n_tx_pkts) +{ + return session_tx_fifo_read_and_snd_i (vm, node, smm, e0, s0, thread_index, + n_tx_pkts, 1); +} + +int +session_tx_fifo_dequeue_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, + int *n_tx_pkts) +{ + return session_tx_fifo_read_and_snd_i (vm, node, smm, e0, s0, thread_index, + n_tx_pkts, 0); +} + +always_inline stream_session_t * +session_event_get_session (session_fifo_event_t * e, u8 thread_index) +{ + ASSERT (e->fifo->master_thread_index == thread_index); + return stream_session_get_if_valid (e->fifo->master_session_index, + thread_index); +} + +void +dump_thread_0_event_queue (void) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + vlib_main_t *vm = &vlib_global_main; + u32 my_thread_index = vm->thread_index; + session_fifo_event_t _e, *e = &_e; + stream_session_t *s0; + int i, index; + i8 *headp; + + unix_shared_memory_queue_t *q; + q = smm->vpp_event_queues[my_thread_index]; + + index = q->head; + + for (i = 0; i < q->cursize; i++) + { + headp = (i8 *) (&q->data[0] + q->elsize * index); + clib_memcpy (e, headp, q->elsize); + + switch (e->event_type) + { + case FIFO_EVENT_APP_TX: + s0 = session_event_get_session (e, my_thread_index); + fformat (stdout, "[%04d] TX session %d\n", i, s0->session_index); + break; + + case FIFO_EVENT_DISCONNECT: + s0 = stream_session_get_from_handle (e->session_handle); + fformat (stdout, "[%04d] disconnect session %d\n", i, + s0->session_index); + break; + + case FIFO_EVENT_BUILTIN_RX: + s0 = session_event_get_session (e, my_thread_index); + fformat (stdout, "[%04d] builtin_rx %d\n", i, s0->session_index); + break; + + case FIFO_EVENT_RPC: + fformat (stdout, "[%04d] RPC call %llx with %llx\n", + i, (u64) (e->rpc_args.fp), (u64) (e->rpc_args.arg)); + break; + + default: + fformat (stdout, "[%04d] unhandled event type %d\n", + i, e->event_type); + break; + } + + index++; + + if (index == q->maxsize) + index = 0; + } +} + +static u8 +session_node_cmp_event (session_fifo_event_t * e, svm_fifo_t * f) +{ + stream_session_t *s; + switch (e->event_type) + { + case FIFO_EVENT_APP_RX: + case FIFO_EVENT_APP_TX: + case FIFO_EVENT_BUILTIN_RX: + if (e->fifo == f) + return 1; + break; + case FIFO_EVENT_DISCONNECT: + break; + case FIFO_EVENT_RPC: + s = stream_session_get_from_handle (e->session_handle); + if (!s) + { + clib_warning ("session has event but doesn't exist!"); + break; + } + if (s->server_rx_fifo == f || s->server_tx_fifo == f) + return 1; + break; + default: + break; + } + return 0; +} + +u8 +session_node_lookup_fifo_event (svm_fifo_t * f, session_fifo_event_t * e) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + unix_shared_memory_queue_t *q; + session_fifo_event_t *pending_event_vector, *evt; + int i, index, found = 0; + i8 *headp; + u8 thread_index; + + ASSERT (e); + thread_index = f->master_thread_index; + /* + * Search evt queue + */ + q = smm->vpp_event_queues[thread_index]; + index = q->head; + for (i = 0; i < q->cursize; i++) + { + headp = (i8 *) (&q->data[0] + q->elsize * index); + clib_memcpy (e, headp, q->elsize); + found = session_node_cmp_event (e, f); + if (found) + break; + if (++index == q->maxsize) + index = 0; + } + /* + * Search pending events vector + */ + pending_event_vector = smm->pending_event_vector[thread_index]; + vec_foreach (evt, pending_event_vector) + { + found = session_node_cmp_event (evt, f); + if (found) + { + clib_memcpy (e, evt, sizeof (*evt)); + break; + } + } + return found; +} + +static uword +session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + session_manager_main_t *smm = vnet_get_session_manager_main (); + session_fifo_event_t *my_pending_event_vector, *e; + session_fifo_event_t *my_fifo_events; + u32 n_to_dequeue, n_events; + unix_shared_memory_queue_t *q; + application_t *app; + int n_tx_packets = 0; + u32 my_thread_index = vm->thread_index; + int i, rv; + f64 now = vlib_time_now (vm); + void (*fp) (void *); + + SESSION_EVT_DBG (SESSION_EVT_POLL_GAP_TRACK, smm, my_thread_index); + + /* + * Update TCP time + */ + tcp_update_time (now, my_thread_index); + + /* + * Get vpp queue events + */ + q = smm->vpp_event_queues[my_thread_index]; + if (PREDICT_FALSE (q == 0)) + return 0; + + my_fifo_events = smm->free_event_vector[my_thread_index]; + + /* min number of events we can dequeue without blocking */ + n_to_dequeue = q->cursize; + my_pending_event_vector = smm->pending_event_vector[my_thread_index]; + + if (n_to_dequeue == 0 && vec_len (my_pending_event_vector) == 0) + return 0; + + SESSION_EVT_DBG (SESSION_EVT_DEQ_NODE, 0); + + /* + * If we didn't manage to process previous events try going + * over them again without dequeuing new ones. + */ + /* XXX: Block senders to sessions that can't keep up */ + if (0 && vec_len (my_pending_event_vector) >= 100) + { + clib_warning ("too many fifo events unsolved"); + goto skip_dequeue; + } + + /* See you in the next life, don't be late */ + if (pthread_mutex_trylock (&q->mutex)) + return 0; + + for (i = 0; i < n_to_dequeue; i++) + { + vec_add2 (my_fifo_events, e, 1); + unix_shared_memory_queue_sub_raw (q, (u8 *) e); + } + + /* The other side of the connection is not polling */ + if (q->cursize < (q->maxsize / 8)) + (void) pthread_cond_broadcast (&q->condvar); + pthread_mutex_unlock (&q->mutex); + + vec_append (my_fifo_events, my_pending_event_vector); + + _vec_len (my_pending_event_vector) = 0; + smm->pending_event_vector[my_thread_index] = my_pending_event_vector; + +skip_dequeue: + n_events = vec_len (my_fifo_events); + for (i = 0; i < n_events; i++) + { + stream_session_t *s0; /* $$$ prefetch 1 ahead maybe */ + session_fifo_event_t *e0; + + e0 = &my_fifo_events[i]; + + switch (e0->event_type) + { + case FIFO_EVENT_APP_TX: + s0 = session_event_get_session (e0, my_thread_index); + + if (CLIB_DEBUG && !s0) + { + clib_warning ("It's dead, Jim!"); + continue; + } + + if (PREDICT_FALSE (s0->session_state == SESSION_STATE_CLOSED)) + continue; + /* Spray packets in per session type frames, since they go to + * different nodes */ + rv = (smm->session_tx_fns[s0->session_type]) (vm, node, smm, e0, s0, + my_thread_index, + &n_tx_packets); + /* Out of buffers */ + if (PREDICT_FALSE (rv < 0)) + { + vlib_node_increment_counter (vm, node->node_index, + SESSION_QUEUE_ERROR_NO_BUFFER, 1); + continue; + } + break; + case FIFO_EVENT_DISCONNECT: + s0 = stream_session_get_from_handle (e0->session_handle); + stream_session_disconnect (s0); + break; + case FIFO_EVENT_BUILTIN_RX: + s0 = session_event_get_session (e0, my_thread_index); + svm_fifo_unset_event (s0->server_rx_fifo); + app = application_get (s0->app_index); + app->cb_fns.builtin_server_rx_callback (s0); + break; + case FIFO_EVENT_RPC: + fp = e0->rpc_args.fp; + (*fp) (e0->rpc_args.arg); + break; + + default: + clib_warning ("unhandled event type %d", e0->event_type); + } + } + + _vec_len (my_fifo_events) = 0; + smm->free_event_vector[my_thread_index] = my_fifo_events; + + vlib_node_increment_counter (vm, session_queue_node.index, + SESSION_QUEUE_ERROR_TX, n_tx_packets); + + SESSION_EVT_DBG (SESSION_EVT_DEQ_NODE, 1); + + return n_tx_packets; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (session_queue_node) = +{ + .function = session_queue_node_fn, + .name = "session-queue", + .format_trace = format_session_queue_trace, + .type = VLIB_NODE_TYPE_INPUT, + .n_errors = ARRAY_LEN (session_queue_error_strings), + .error_strings = session_queue_error_strings, + .n_next_nodes = SESSION_QUEUE_N_NEXT, + .state = VLIB_NODE_STATE_DISABLED, + .next_nodes = + { + [SESSION_QUEUE_NEXT_DROP] = "error-drop", + [SESSION_QUEUE_NEXT_IP4_LOOKUP] = "ip4-lookup", + [SESSION_QUEUE_NEXT_IP6_LOOKUP] = "ip6-lookup", + [SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT] = "tcp4-output", + [SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT] = "tcp6-output", + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/stream_session.h b/src/vnet/session/stream_session.h new file mode 100644 index 00000000000..82bbf521ce9 --- /dev/null +++ b/src/vnet/session/stream_session.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_VNET_SESSION_STREAM_SESSION_H_ +#define SRC_VNET_SESSION_STREAM_SESSION_H_ + +#include +#include + +#define foreach_session_type \ + _(IP4_TCP, ip4_tcp) \ + _(IP4_UDP, ip4_udp) \ + _(IP6_TCP, ip6_tcp) \ + _(IP6_UDP, ip6_udp) + +typedef enum +{ +#define _(A, a) SESSION_TYPE_##A, + foreach_session_type +#undef _ + SESSION_N_TYPES, +} session_type_t; + +/* + * Application session state + */ +typedef enum +{ + SESSION_STATE_LISTENING, + SESSION_STATE_CONNECTING, + SESSION_STATE_ACCEPTING, + SESSION_STATE_READY, + SESSION_STATE_CLOSED, + SESSION_STATE_N_STATES, +} stream_session_state_t; + +typedef struct _stream_session_t +{ + /** fifo pointers. Once allocated, these do not move */ + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; + + /** Type */ + u8 session_type; + + /** State */ + u8 session_state; + + u8 thread_index; + + /** To avoid n**2 "one event per frame" check */ + u8 enqueue_epoch; + + /** Pad to a multiple of 8 octets */ + u8 align_pad[4]; + + /** svm segment index where fifos were allocated */ + u32 svm_segment_index; + + /** Session index in per_thread pool */ + u32 session_index; + + /** Transport specific */ + u32 connection_index; + + /** stream server pool index */ + u32 app_index; + + /** Parent listener session if the result of an accept */ + u32 listener_index; + + u32 opaque2; + + /** Opaque, pad to a 64-octet boundary */ + u64 opaque[2]; +} stream_session_t; + +#endif /* SRC_VNET_SESSION_STREAM_SESSION_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c deleted file mode 100644 index abd94ba4f1d..00000000000 --- a/src/vnet/session/transport.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -u32 -transport_endpoint_lookup (transport_endpoint_table_t *ht, ip46_address_t *ip, - u16 port) -{ - clib_bihash_kv_24_8_t kv; - int rv; - - kv.key[0] = ip->as_u64[0]; - kv.key[1] = ip->as_u64[1]; - kv.key[2] = port; - - rv = clib_bihash_search_inline_24_8 (ht, &kv); - if (rv == 0) - return kv.value; - - return TRANSPORT_ENDPOINT_INVALID_INDEX; -} - -void -transport_endpoint_table_add (transport_endpoint_table_t *ht, - transport_endpoint_t *te, u32 value) -{ - clib_bihash_kv_24_8_t kv; - - kv.key[0] = te->ip.as_u64[0]; - kv.key[1] = te->ip.as_u64[1]; - kv.key[2] = te->port; - kv.value = value; - - clib_bihash_add_del_24_8 (ht, &kv, 1); -} - -void -transport_endpoint_table_del (transport_endpoint_table_t *ht, - transport_endpoint_t *te) -{ - clib_bihash_kv_24_8_t kv; - - kv.key[0] = te->ip.as_u64[0]; - kv.key[1] = te->ip.as_u64[1]; - kv.key[2] = te->port; - - clib_bihash_add_del_24_8 (ht, &kv, 0); -} - - - diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h index 9c38bab9c4f..3895a60af48 100644 --- a/src/vnet/session/transport.h +++ b/src/vnet/session/transport.h @@ -21,6 +21,7 @@ #include #include #include + /* * Protocol independent transport properties associated to a session */ @@ -31,6 +32,7 @@ typedef struct _transport_connection u16 lcl_port; /**< Local port */ u16 rmt_port; /**< Remote port */ u8 proto; /**< Protocol id (also session type) */ + u32 vrf; /**< FIB table id */ u32 s_index; /**< Parent session index */ u32 c_index; /**< Connection index in transport pool */ @@ -55,6 +57,7 @@ typedef struct _transport_connection #define c_lcl_port connection.lcl_port #define c_rmt_port connection.rmt_port #define c_proto connection.proto +#define c_vrf connection.vrf #define c_state connection.state #define c_s_index connection.s_index #define c_c_index connection.c_index @@ -66,165 +69,6 @@ typedef struct _transport_connection #define c_rmt_dpo connection.rmt_dpo } transport_connection_t; -/* - * Transport protocol virtual function table - */ -typedef struct _transport_proto_vft -{ - /* - * Setup - */ - u32 (*bind) (u32, ip46_address_t *, u16); - u32 (*unbind) (u32); - int (*open) (ip46_address_t * addr, u16 port_host_byte_order); - void (*close) (u32 conn_index, u32 thread_index); - void (*cleanup) (u32 conn_index, u32 thread_index); - - /* - * Transmission - */ - u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b); - u16 (*send_mss) (transport_connection_t * tc); - u32 (*send_space) (transport_connection_t * tc); - u32 (*tx_fifo_offset) (transport_connection_t * tc); - - /* - * Connection retrieval - */ - transport_connection_t *(*get_connection) (u32 conn_idx, u32 thread_idx); - transport_connection_t *(*get_listener) (u32 conn_index); - transport_connection_t *(*get_half_open) (u32 conn_index); - - /* - * Format - */ - u8 *(*format_connection) (u8 * s, va_list * args); - u8 *(*format_listener) (u8 * s, va_list * args); - u8 *(*format_half_open) (u8 * s, va_list * args); -} transport_proto_vft_t; - -/* *INDENT-OFF* */ -/* 16 octets */ -typedef CLIB_PACKED (struct { - union - { - struct - { - ip4_address_t src; - ip4_address_t dst; - u16 src_port; - u16 dst_port; - /* align by making this 4 octets even though its a 1-bit field - * NOTE: avoid key overlap with other transports that use 5 tuples for - * session identification. - */ - u32 proto; - }; - u64 as_u64[2]; - }; -}) v4_connection_key_t; - -typedef CLIB_PACKED (struct { - union - { - struct - { - /* 48 octets */ - ip6_address_t src; - ip6_address_t dst; - u16 src_port; - u16 dst_port; - u32 proto; - u64 unused; - }; - u64 as_u64[6]; - }; -}) v6_connection_key_t; -/* *INDENT-ON* */ - -typedef clib_bihash_kv_16_8_t session_kv4_t; -typedef clib_bihash_kv_48_8_t session_kv6_t; - -always_inline void -make_v4_ss_kv (session_kv4_t * kv, ip4_address_t * lcl, ip4_address_t * rmt, - u16 lcl_port, u16 rmt_port, u8 proto) -{ - v4_connection_key_t *key = (v4_connection_key_t *) kv->key; - - key->src.as_u32 = lcl->as_u32; - key->dst.as_u32 = rmt->as_u32; - key->src_port = lcl_port; - key->dst_port = rmt_port; - key->proto = proto; - - kv->value = ~0ULL; -} - -always_inline void -make_v4_listener_kv (session_kv4_t * kv, ip4_address_t * lcl, u16 lcl_port, - u8 proto) -{ - v4_connection_key_t *key = (v4_connection_key_t *) kv->key; - - key->src.as_u32 = lcl->as_u32; - key->dst.as_u32 = 0; - key->src_port = lcl_port; - key->dst_port = 0; - key->proto = proto; - - kv->value = ~0ULL; -} - -always_inline void -make_v4_ss_kv_from_tc (session_kv4_t * kv, transport_connection_t * t) -{ - return make_v4_ss_kv (kv, &t->lcl_ip.ip4, &t->rmt_ip.ip4, t->lcl_port, - t->rmt_port, t->proto); -} - -always_inline void -make_v6_ss_kv (session_kv6_t * kv, ip6_address_t * lcl, ip6_address_t * rmt, - u16 lcl_port, u16 rmt_port, u8 proto) -{ - v6_connection_key_t *key = (v6_connection_key_t *) kv->key; - - key->src.as_u64[0] = lcl->as_u64[0]; - key->src.as_u64[1] = lcl->as_u64[1]; - key->dst.as_u64[0] = rmt->as_u64[0]; - key->dst.as_u64[1] = rmt->as_u64[1]; - key->src_port = lcl_port; - key->dst_port = rmt_port; - key->proto = proto; - key->unused = 0; - - kv->value = ~0ULL; -} - -always_inline void -make_v6_listener_kv (session_kv6_t * kv, ip6_address_t * lcl, u16 lcl_port, - u8 proto) -{ - v6_connection_key_t *key = (v6_connection_key_t *) kv->key; - - key->src.as_u64[0] = lcl->as_u64[0]; - key->src.as_u64[1] = lcl->as_u64[1]; - key->dst.as_u64[0] = 0; - key->dst.as_u64[1] = 0; - key->src_port = lcl_port; - key->dst_port = 0; - key->proto = proto; - key->unused = 0; - - kv->value = ~0ULL; -} - -always_inline void -make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * t) -{ - make_v6_ss_kv (kv, &t->lcl_ip.ip6, &t->rmt_ip.ip6, t->lcl_port, - t->rmt_port, t->proto); -} - typedef enum _transport_proto { TRANSPORT_PROTO_TCP, @@ -239,18 +83,6 @@ typedef struct _transport_endpoint u32 vrf; /** fib table the endpoint is associated with */ } transport_endpoint_t; -typedef clib_bihash_24_8_t transport_endpoint_table_t; - -#define TRANSPORT_ENDPOINT_INVALID_INDEX ((u32)~0) - -u32 -transport_endpoint_lookup (transport_endpoint_table_t * ht, - ip46_address_t * ip, u16 port); -void transport_endpoint_table_add (transport_endpoint_table_t * ht, - transport_endpoint_t * te, u32 value); -void transport_endpoint_table_del (transport_endpoint_table_t * ht, - transport_endpoint_t * te); - #endif /* VNET_VNET_URI_TRANSPORT_H_ */ /* diff --git a/src/vnet/session/transport_interface.c b/src/vnet/session/transport_interface.c new file mode 100644 index 00000000000..eb12aa69475 --- /dev/null +++ b/src/vnet/session/transport_interface.c @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +/** + * Per-type vector of transport protocol virtual function tables + */ +transport_proto_vft_t *tp_vfts; + +u32 +transport_endpoint_lookup (transport_endpoint_table_t * ht, + ip46_address_t * ip, u16 port) +{ + clib_bihash_kv_24_8_t kv; + int rv; + + kv.key[0] = ip->as_u64[0]; + kv.key[1] = ip->as_u64[1]; + kv.key[2] = port; + + rv = clib_bihash_search_inline_24_8 (ht, &kv); + if (rv == 0) + return kv.value; + + return TRANSPORT_ENDPOINT_INVALID_INDEX; +} + +void +transport_endpoint_table_add (transport_endpoint_table_t * ht, + transport_endpoint_t * te, u32 value) +{ + clib_bihash_kv_24_8_t kv; + + kv.key[0] = te->ip.as_u64[0]; + kv.key[1] = te->ip.as_u64[1]; + kv.key[2] = te->port; + kv.value = value; + + clib_bihash_add_del_24_8 (ht, &kv, 1); +} + +void +transport_endpoint_table_del (transport_endpoint_table_t * ht, + transport_endpoint_t * te) +{ + clib_bihash_kv_24_8_t kv; + + kv.key[0] = te->ip.as_u64[0]; + kv.key[1] = te->ip.as_u64[1]; + kv.key[2] = te->port; + + clib_bihash_add_del_24_8 (ht, &kv, 0); +} + +/** + * Register transport virtual function table. + * + * @param type - session type (not protocol type) + * @param vft - virtual function table + */ +void +session_register_transport (u8 session_type, + const transport_proto_vft_t * vft) +{ + vec_validate (tp_vfts, session_type); + tp_vfts[session_type] = *vft; + + /* If an offset function is provided, then peek instead of dequeue */ + session_manager_set_transport_rx_fn (session_type, + vft->tx_fifo_offset != 0); +} + +/** + * Get transport virtual function table + * + * @param type - session type (not protocol type) + */ +transport_proto_vft_t * +session_get_transport_vft (u8 session_type) +{ + if (session_type >= vec_len (tp_vfts)) + return 0; + return &tp_vfts[session_type]; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/transport_interface.h b/src/vnet/session/transport_interface.h new file mode 100644 index 00000000000..b7e86ee7960 --- /dev/null +++ b/src/vnet/session/transport_interface.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef SRC_VNET_SESSION_TRANSPORT_INTERFACE_H_ +#define SRC_VNET_SESSION_TRANSPORT_INTERFACE_H_ + +#include +#include + +/* + * Transport protocol virtual function table + */ +typedef struct _transport_proto_vft +{ + /* + * Setup + */ + u32 (*bind) (u32 session_index, transport_endpoint_t * lcl); + u32 (*unbind) (u32); + int (*open) (transport_endpoint_t * rmt); + void (*close) (u32 conn_index, u32 thread_index); + void (*cleanup) (u32 conn_index, u32 thread_index); + + /* + * Transmission + */ + u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b); + u16 (*send_mss) (transport_connection_t * tc); + u32 (*send_space) (transport_connection_t * tc); + u32 (*tx_fifo_offset) (transport_connection_t * tc); + + /* + * Connection retrieval + */ + transport_connection_t *(*get_connection) (u32 conn_idx, u32 thread_idx); + transport_connection_t *(*get_listener) (u32 conn_index); + transport_connection_t *(*get_half_open) (u32 conn_index); + + /* + * Format + */ + u8 *(*format_connection) (u8 * s, va_list * args); + u8 *(*format_listener) (u8 * s, va_list * args); + u8 *(*format_half_open) (u8 * s, va_list * args); +} transport_proto_vft_t; + +typedef clib_bihash_24_8_t transport_endpoint_table_t; + +#define TRANSPORT_ENDPOINT_INVALID_INDEX ((u32)~0) + +u32 transport_endpoint_lookup (transport_endpoint_table_t * ht, + ip46_address_t * ip, u16 port); +void transport_endpoint_table_add (transport_endpoint_table_t * ht, + transport_endpoint_t * te, u32 value); +void transport_endpoint_table_del (transport_endpoint_table_t * ht, + transport_endpoint_t * te); + +void session_register_transport (u8 session_type, + const transport_proto_vft_t * vft); +transport_proto_vft_t *session_get_transport_vft (u8 session_type); + +#endif /* SRC_VNET_SESSION_TRANSPORT_INTERFACE_H_ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg