diff options
author | Marvin Liu <yong.liu@intel.com> | 2022-08-16 06:49:09 +0000 |
---|---|---|
committer | Florin Coras <florin.coras@gmail.com> | 2022-09-15 23:12:55 +0000 |
commit | 0654242d1ef51566f0d58445a16053cf376e5a6e (patch) | |
tree | ff86c0b35806f76dfd95e98bf85e5bdf3521a2e4 /src/vnet | |
parent | 6e1eaad216c41ce1cb4af13a2214f4d86e094414 (diff) |
session: support dma option
add dma support to session, acclerate host-stack with dma
Type: feature
Signed-off-by: Marvin Liu <yong.liu@intel.com>
Signed-off-by: Junfeng Wang <drenfong.wang@intel.com>
Change-Id: I3d492921d69d9e3e0b34d33adc33fba3bde9e1cc
Diffstat (limited to 'src/vnet')
-rw-r--r-- | src/vnet/session/session.c | 86 | ||||
-rw-r--r-- | src/vnet/session/session.h | 20 | ||||
-rw-r--r-- | src/vnet/session/session_node.c | 171 |
3 files changed, 257 insertions, 20 deletions
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 852f87da677..3643e91a33a 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -22,6 +22,7 @@ #include <vnet/dpo/load_balance.h> #include <vnet/fib/ip4_fib.h> #include <vlib/stats/stats.h> +#include <vlib/dma/dma.h> session_main_t session_main; @@ -1988,6 +1989,86 @@ session_manager_main_disable (vlib_main_t * vm) transport_enable_disable (vm, 0 /* is_en */ ); } +/* in this new callback, cookie hint the index */ +void +session_dma_completion_cb (vlib_main_t *vm, struct vlib_dma_batch *batch) +{ + session_worker_t *wrk; + wrk = session_main_get_worker (vm->thread_index); + session_dma_transfer *dma_transfer; + + dma_transfer = &wrk->dma_trans[wrk->trans_head]; + vec_add (wrk->pending_tx_buffers, dma_transfer->pending_tx_buffers, + vec_len (dma_transfer->pending_tx_buffers)); + vec_add (wrk->pending_tx_nexts, dma_transfer->pending_tx_nexts, + vec_len (dma_transfer->pending_tx_nexts)); + vec_reset_length (dma_transfer->pending_tx_buffers); + vec_reset_length (dma_transfer->pending_tx_nexts); + wrk->trans_head++; + if (wrk->trans_head == wrk->trans_size) + wrk->trans_head = 0; + return; +} + +static void +session_prepare_dma_args (vlib_dma_config_t *args) +{ + args->max_transfers = DMA_TRANS_SIZE; + args->max_transfer_size = 65536; + args->features = 0; + args->sw_fallback = 1; + args->barrier_before_last = 1; + args->callback_fn = session_dma_completion_cb; +} + +static void +session_node_enable_dma (u8 is_en, int n_vlibs) +{ + vlib_dma_config_t args; + session_prepare_dma_args (&args); + session_worker_t *wrk; + vlib_main_t *vm; + + int config_index = -1; + + if (is_en) + { + vm = vlib_get_main_by_index (0); + config_index = vlib_dma_config_add (vm, &args); + } + else + { + vm = vlib_get_main_by_index (0); + wrk = session_main_get_worker (0); + if (wrk->config_index >= 0) + vlib_dma_config_del (vm, wrk->config_index); + } + int i; + for (i = 0; i < n_vlibs; i++) + { + vm = vlib_get_main_by_index (i); + wrk = session_main_get_worker (vm->thread_index); + wrk->config_index = config_index; + if (is_en) + { + if (config_index >= 0) + wrk->dma_enabled = true; + wrk->dma_trans = (session_dma_transfer *) clib_mem_alloc ( + sizeof (session_dma_transfer) * DMA_TRANS_SIZE); + bzero (wrk->dma_trans, + sizeof (session_dma_transfer) * DMA_TRANS_SIZE); + } + else + { + if (wrk->dma_trans) + clib_mem_free (wrk->dma_trans); + } + wrk->trans_head = 0; + wrk->trans_tail = 0; + wrk->trans_size = DMA_TRANS_SIZE; + } +} + void session_node_enable_disable (u8 is_en) { @@ -2028,6 +2109,9 @@ session_node_enable_disable (u8 is_en) if (sm->use_private_rx_mqs) application_enable_rx_mqs_nodes (is_en); + + if (sm->dma_enabled) + session_node_enable_dma (is_en, n_vlibs); } clib_error_t * @@ -2170,6 +2254,8 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input) smm->use_private_rx_mqs = 1; else if (unformat (input, "no-adaptive")) smm->no_adaptive = 1; + else if (unformat (input, "use-dma")) + smm->dma_enabled = 1; /* * Deprecated but maintained for compatibility */ diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index 68ed8431605..a68e51239bd 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -21,6 +21,7 @@ #include <vnet/session/session_debug.h> #include <svm/message_queue.h> #include <svm/fifo_segment.h> +#include <vlib/dma/dma.h> #define foreach_session_input_error \ _(NO_SESSION, "No session drops") \ @@ -85,6 +86,13 @@ typedef enum session_wrk_flags_ SESSION_WRK_F_ADAPTIVE = 1 << 0, } __clib_packed session_wrk_flag_t; +#define DMA_TRANS_SIZE 1024 +typedef struct +{ + u32 *pending_tx_buffers; + u16 *pending_tx_nexts; +} session_dma_transfer; + typedef struct session_worker_ { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -155,6 +163,15 @@ typedef struct session_worker_ /** List head for first worker evts pending handling on main */ clib_llist_index_t evts_pending_main; + int config_index; + u8 dma_enabled; + session_dma_transfer *dma_trans; + u16 trans_head; + u16 trans_tail; + u16 trans_size; + u16 batch_num; + vlib_dma_batch_t *batch; + #if SESSION_DEBUG /** last event poll time by thread */ clib_time_type_t last_event_poll; @@ -237,6 +254,9 @@ typedef struct session_main_ /** Session ssvm segment configs*/ uword wrk_mqs_segment_size; + /** Session enable dma*/ + u8 dma_enabled; + /** Session table size parameters */ u32 configured_v4_session_table_buckets; u32 configured_v4_session_table_memory; diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index 510ffa2617f..1908a58f08f 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -899,13 +899,111 @@ session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_set_trace_count (vm, node, n_trace); } +always_inline int +session_tx_fill_dma_transfers (session_worker_t *wrk, + session_tx_context_t *ctx, vlib_buffer_t *b) +{ + vlib_main_t *vm = wrk->vm; + u32 len_to_deq; + u8 *data0 = NULL; + int n_bytes_read, len_write; + svm_fifo_seg_t data_fs[2]; + + u32 n_segs = 2; + u16 n_transfers = 0; + /* + * Start with the first buffer in chain + */ + b->error = 0; + b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED; + b->current_data = 0; + data0 = vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN); + len_to_deq = clib_min (ctx->left_to_snd, ctx->deq_per_first_buf); + + n_bytes_read = svm_fifo_segments (ctx->s->tx_fifo, ctx->sp.tx_offset, + data_fs, &n_segs, len_to_deq); + + len_write = n_bytes_read; + ASSERT (n_bytes_read == len_to_deq); + + while (n_bytes_read) + { + wrk->batch_num++; + vlib_dma_batch_add (vm, wrk->batch, data0, data_fs[n_transfers].data, + data_fs[n_transfers].len); + data0 += data_fs[n_transfers].len; + n_bytes_read -= data_fs[n_transfers].len; + n_transfers++; + } + return len_write; +} + +always_inline int +session_tx_fill_dma_transfers_tail (session_worker_t *wrk, + session_tx_context_t *ctx, + vlib_buffer_t *b, u32 len_to_deq, u8 *data) +{ + vlib_main_t *vm = wrk->vm; + int n_bytes_read, len_write; + svm_fifo_seg_t data_fs[2]; + u32 n_segs = 2; + u16 n_transfers = 0; + + n_bytes_read = svm_fifo_segments (ctx->s->tx_fifo, ctx->sp.tx_offset, + data_fs, &n_segs, len_to_deq); + + len_write = n_bytes_read; + + ASSERT (n_bytes_read == len_to_deq); + + while (n_bytes_read) + { + wrk->batch_num++; + vlib_dma_batch_add (vm, wrk->batch, data, data_fs[n_transfers].data, + data_fs[n_transfers].len); + data += data_fs[n_transfers].len; + n_bytes_read -= data_fs[n_transfers].len; + n_transfers++; + } + + return len_write; +} + +always_inline int +session_tx_copy_data (session_worker_t *wrk, session_tx_context_t *ctx, + vlib_buffer_t *b, u32 len_to_deq, u8 *data0) +{ + int n_bytes_read; + if (PREDICT_TRUE (!wrk->dma_enabled)) + n_bytes_read = + svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, len_to_deq, data0); + else + n_bytes_read = session_tx_fill_dma_transfers (wrk, ctx, b); + return n_bytes_read; +} + +always_inline int +session_tx_copy_data_tail (session_worker_t *wrk, session_tx_context_t *ctx, + vlib_buffer_t *b, u32 len_to_deq, u8 *data) +{ + int n_bytes_read; + if (PREDICT_TRUE (!wrk->dma_enabled)) + n_bytes_read = + svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, len_to_deq, data); + else + n_bytes_read = + session_tx_fill_dma_transfers_tail (wrk, ctx, b, len_to_deq, data); + return n_bytes_read; +} + always_inline void -session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx, - vlib_buffer_t * b, u16 * n_bufs, u8 peek_data) +session_tx_fifo_chain_tail (session_worker_t *wrk, session_tx_context_t *ctx, + vlib_buffer_t *b, u16 *n_bufs, u8 peek_data) { + vlib_main_t *vm = wrk->vm; vlib_buffer_t *chain_b, *prev_b; u32 chain_bi0, to_deq, left_from_seg; - u16 len_to_deq, n_bytes_read; + int len_to_deq, n_bytes_read; u8 *data, j; b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; @@ -927,8 +1025,8 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx, data = vlib_buffer_get_current (chain_b); if (peek_data) { - n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo, - ctx->sp.tx_offset, len_to_deq, data); + n_bytes_read = + session_tx_copy_data_tail (wrk, ctx, b, len_to_deq, data); ctx->sp.tx_offset += n_bytes_read; } else @@ -985,13 +1083,12 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx, } always_inline void -session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, - vlib_buffer_t * b, u16 * n_bufs, u8 peek_data) +session_tx_fill_buffer (session_worker_t *wrk, session_tx_context_t *ctx, + vlib_buffer_t *b, u16 *n_bufs, u8 peek_data) { u32 len_to_deq; u8 *data0; int n_bytes_read; - /* * Start with the first buffer in chain */ @@ -1004,8 +1101,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, if (peek_data) { - n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, - len_to_deq, data0); + n_bytes_read = session_tx_copy_data (wrk, ctx, b, len_to_deq, data0); ASSERT (n_bytes_read > 0); /* Keep track of progress locally, transport is also supposed to * increment it independently when pushing the header */ @@ -1052,6 +1148,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, ASSERT (n_bytes_read > 0); } } + b->current_length = n_bytes_read; ctx->left_to_snd -= n_bytes_read; @@ -1059,7 +1156,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, * Fill in the remaining buffers in the chain, if any */ if (PREDICT_FALSE (ctx->n_bufs_per_seg > 1 && ctx->left_to_snd)) - session_tx_fifo_chain_tail (vm, ctx, b, n_bufs, peek_data); + session_tx_fifo_chain_tail (wrk, ctx, b, n_bufs, peek_data); } always_inline u8 @@ -1252,6 +1349,22 @@ session_tx_maybe_reschedule (session_worker_t * wrk, } } +always_inline void +session_tx_add_pending_buffer (session_worker_t *wrk, u32 bi, u32 next_index) +{ + if (PREDICT_TRUE (!wrk->dma_enabled)) + { + vec_add1 (wrk->pending_tx_buffers, bi); + vec_add1 (wrk->pending_tx_nexts, next_index); + } + else + { + session_dma_transfer *dma_transfer = &wrk->dma_trans[wrk->trans_tail]; + vec_add1 (dma_transfer->pending_tx_buffers, bi); + vec_add1 (dma_transfer->pending_tx_nexts, next_index); + } +} + always_inline int session_tx_fifo_read_and_snd_i (session_worker_t * wrk, vlib_node_runtime_t * node, @@ -1393,17 +1506,15 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); - session_tx_fill_buffer (vm, ctx, b0, &n_bufs, peek_data); - session_tx_fill_buffer (vm, ctx, b1, &n_bufs, peek_data); + session_tx_fill_buffer (wrk, ctx, b0, &n_bufs, peek_data); + session_tx_fill_buffer (wrk, ctx, b1, &n_bufs, peek_data); ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left] = b0; ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left + 1] = b1; n_left -= 2; - vec_add1 (wrk->pending_tx_buffers, bi0); - vec_add1 (wrk->pending_tx_buffers, bi1); - vec_add1 (wrk->pending_tx_nexts, next_index); - vec_add1 (wrk->pending_tx_nexts, next_index); + session_tx_add_pending_buffer (wrk, bi0, next_index); + session_tx_add_pending_buffer (wrk, bi1, next_index); } while (n_left) { @@ -1419,13 +1530,12 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk, bi0 = ctx->tx_buffers[--n_bufs]; b0 = vlib_get_buffer (vm, bi0); - session_tx_fill_buffer (vm, ctx, b0, &n_bufs, peek_data); + session_tx_fill_buffer (wrk, ctx, b0, &n_bufs, peek_data); ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left] = b0; n_left -= 1; - vec_add1 (wrk->pending_tx_buffers, bi0); - vec_add1 (wrk->pending_tx_nexts, next_index); + session_tx_add_pending_buffer (wrk, bi0, next_index); } /* Ask transport to push headers */ @@ -1825,6 +1935,13 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, n_tx_packets = vec_len (wrk->pending_tx_buffers); SESSION_EVT (SESSION_EVT_DSP_CNTRS, UPDATE_TIME, wrk); + if (PREDICT_FALSE (wrk->dma_enabled)) + { + if (wrk->trans_head == ((wrk->trans_tail + 1) & (wrk->trans_size - 1))) + return 0; + wrk->batch = vlib_dma_batch_new (vm, wrk->config_index); + } + /* * Dequeue new internal mq events */ @@ -1894,6 +2011,20 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, }; } + if (PREDICT_FALSE (wrk->dma_enabled)) + { + if (wrk->batch_num) + { + vlib_dma_batch_set_cookie (vm, wrk->batch, wrk->trans_tail); + wrk->batch_num = 0; + wrk->trans_tail++; + if (wrk->trans_tail == wrk->trans_size) + wrk->trans_tail = 0; + } + + vlib_dma_batch_submit (vm, wrk->batch); + } + SESSION_EVT (SESSION_EVT_DSP_CNTRS, OLD_IO_EVTS, wrk); if (vec_len (wrk->pending_tx_buffers)) |