aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/dpdk
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/dpdk')
-rw-r--r--src/plugins/dpdk/CMakeLists.txt20
-rw-r--r--src/plugins/dpdk/buffer.c28
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev.c432
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev.h204
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c467
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c428
-rw-r--r--src/plugins/dpdk/device/cli.c53
-rw-r--r--src/plugins/dpdk/device/common.c276
-rw-r--r--src/plugins/dpdk/device/device.c137
-rw-r--r--src/plugins/dpdk/device/dpdk.h287
-rw-r--r--src/plugins/dpdk/device/dpdk_priv.h142
-rw-r--r--src/plugins/dpdk/device/driver.c154
-rw-r--r--src/plugins/dpdk/device/flow.c226
-rw-r--r--src/plugins/dpdk/device/format.c601
-rw-r--r--src/plugins/dpdk/device/init.c1343
-rw-r--r--src/plugins/dpdk/device/node.c150
-rw-r--r--src/plugins/dpdk/main.c13
-rw-r--r--src/plugins/dpdk/thread.c85
18 files changed, 2787 insertions, 2259 deletions
diff --git a/src/plugins/dpdk/CMakeLists.txt b/src/plugins/dpdk/CMakeLists.txt
index 5de75e76289..48c56f35282 100644
--- a/src/plugins/dpdk/CMakeLists.txt
+++ b/src/plugins/dpdk/CMakeLists.txt
@@ -90,8 +90,10 @@ else()
##############################################################################
# libnuma
##############################################################################
- vpp_plugin_find_library(dpdk NUMA_LIB "numa")
- list(APPEND DPDK_LINK_LIBRARIES ${NUMA_LIB})
+ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
+ vpp_plugin_find_library(dpdk NUMA_LIB "numa")
+ list(APPEND DPDK_LINK_LIBRARIES ${NUMA_LIB})
+ endif()
##############################################################################
# Mellanox libraries
@@ -103,12 +105,10 @@ else()
list(APPEND DPDK_LINK_LIBRARIES "${MNL_LIB}")
else()
message(WARNING "EXPERIMENTAL: DPDK plugin without dlopen mode")
- vpp_plugin_find_library(dpdk IBVERBS_LIB "libibverbs.a")
- vpp_plugin_find_library(dpdk MLX5_LIB "libmlx5.a")
- vpp_plugin_find_library(dpdk MLX4_LIB "libmlx4.a")
- vpp_plugin_find_library(dpdk CCAN_LIB "libccan.a")
- vpp_plugin_find_library(dpdk RDMA_UTIL_LIB "rdma_util")
- string_append(DPDK_LINK_FLAGS "-Wl,--whole-archive,${IBVERBS_LIB},${MLX5_LIB},${MLX4_LIB},${CCAN_LIB},${RDMA_UTIL_LIB},--no-whole-archive")
+ vpp_plugin_find_library(dpdk IBVERBS_LIB "libibverbs.a")
+ vpp_plugin_find_library(dpdk MLX5_LIB "libmlx5.a")
+ vpp_plugin_find_library(dpdk MLX4_LIB "libmlx4.a")
+ string_append(DPDK_LINK_FLAGS "-Wl,--whole-archive,${IBVERBS_LIB},${MLX5_LIB},${MLX4_LIB} -Wl,--no-whole-archive,--exclude-libs,ALL")
endif()
endif()
endif()
@@ -131,10 +131,10 @@ add_vpp_plugin(dpdk
SOURCES
buffer.c
main.c
- thread.c
device/cli.c
device/common.c
device/device.c
+ device/driver.c
device/flow.c
device/format.c
device/init.c
@@ -158,7 +158,7 @@ add_vpp_plugin(dpdk
${DPDK_LINK_LIBRARIES}
LINK_LIBRARIES
- ${OPENSSL_LIBRARIES}
+ ${OPENSSL_CRYPTO_LIBRARIES}
COMPONENT
vpp-plugin-dpdk
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index 97184519428..f3137a996d6 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -19,6 +19,7 @@
#include <rte_config.h>
#include <rte_mbuf.h>
#include <rte_ethdev.h>
+#include <rte_cryptodev.h>
#include <rte_vfio.h>
#include <rte_version.h>
@@ -115,6 +116,9 @@ dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp)
mp->populated_size++;
nmp->populated_size++;
}
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 3, 0, 0)
+ mp->flags &= ~RTE_MEMPOOL_F_NON_IO;
+#endif
/* call the object initializers */
rte_mempool_obj_iter (mp, rte_pktmbuf_init, 0);
@@ -131,11 +135,11 @@ dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp)
{
vlib_buffer_t *b;
b = vlib_buffer_ptr_from_index (buffer_mem_start, bp->buffers[i], 0);
- vlib_buffer_copy_template (b, &bp->buffer_template);
+ b->template = bp->buffer_template;
}
/* map DMA pages if at least one physical device exists */
- if (rte_eth_dev_count_avail ())
+ if (rte_eth_dev_count_avail () || rte_cryptodev_count ())
{
uword i;
size_t page_sz;
@@ -193,7 +197,7 @@ dpdk_ops_vpp_free (struct rte_mempool *mp)
#endif
static_always_inline void
-dpdk_ops_vpp_enqueue_one (vlib_buffer_t * bt, void *obj)
+dpdk_ops_vpp_enqueue_one (vlib_buffer_template_t *bt, void *obj)
{
/* Only non-replicated packets (b->ref_count == 1) expected */
@@ -201,7 +205,7 @@ dpdk_ops_vpp_enqueue_one (vlib_buffer_t * bt, void *obj)
vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
ASSERT (b->ref_count == 1);
ASSERT (b->buffer_pool_index == bt->buffer_pool_index);
- vlib_buffer_copy_template (b, bt);
+ b->template = *bt;
}
int
@@ -210,14 +214,14 @@ CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue) (struct rte_mempool * mp,
{
const int batch_size = 32;
vlib_main_t *vm = vlib_get_main ();
- vlib_buffer_t bt;
+ vlib_buffer_template_t bt;
u8 buffer_pool_index = mp->pool_id;
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
u32 bufs[batch_size];
u32 n_left = n;
void *const *obj = obj_table;
- vlib_buffer_copy_template (&bt, &bp->buffer_template);
+ bt = bp->buffer_template;
while (n_left >= 4)
{
@@ -259,9 +263,9 @@ CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue) (struct rte_mempool * mp,
CLIB_MARCH_FN_REGISTRATION (dpdk_ops_vpp_enqueue);
static_always_inline void
-dpdk_ops_vpp_enqueue_no_cache_one (vlib_main_t * vm, struct rte_mempool *old,
+dpdk_ops_vpp_enqueue_no_cache_one (vlib_main_t *vm, struct rte_mempool *old,
struct rte_mempool *new, void *obj,
- vlib_buffer_t * bt)
+ vlib_buffer_template_t *bt)
{
struct rte_mbuf *mb = obj;
vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
@@ -269,7 +273,7 @@ dpdk_ops_vpp_enqueue_no_cache_one (vlib_main_t * vm, struct rte_mempool *old,
if (clib_atomic_sub_fetch (&b->ref_count, 1) == 0)
{
u32 bi = vlib_get_buffer_index (vm, b);
- vlib_buffer_copy_template (b, bt);
+ b->template = *bt;
vlib_buffer_pool_put (vm, bt->buffer_pool_index, &bi, 1);
return;
}
@@ -281,12 +285,12 @@ CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue_no_cache) (struct rte_mempool * cmp,
unsigned n)
{
vlib_main_t *vm = vlib_get_main ();
- vlib_buffer_t bt;
+ vlib_buffer_template_t bt;
struct rte_mempool *mp;
mp = dpdk_mempool_by_buffer_pool_index[cmp->pool_id];
u8 buffer_pool_index = cmp->pool_id;
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
- vlib_buffer_copy_template (&bt, &bp->buffer_template);
+ bt = bp->buffer_template;
while (n >= 4)
{
@@ -456,11 +460,9 @@ dpdk_buffer_pools_create (vlib_main_t * vm)
ops.dequeue = dpdk_ops_vpp_dequeue_no_cache;
rte_mempool_register_ops (&ops);
- /* *INDENT-OFF* */
vec_foreach (bp, vm->buffer_main->buffer_pools)
if (bp->start && (err = dpdk_buffer_pool_init (vm, bp)))
return err;
- /* *INDENT-ON* */
return 0;
}
diff --git a/src/plugins/dpdk/cryptodev/cryptodev.c b/src/plugins/dpdk/cryptodev/cryptodev.c
index d52fa407ec5..43c2c879aab 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev.c
+++ b/src/plugins/dpdk/cryptodev/cryptodev.c
@@ -29,7 +29,6 @@
#include <rte_cryptodev.h>
#include <rte_crypto_sym.h>
#include <rte_crypto.h>
-#include <rte_cryptodev_pmd.h>
#include <rte_config.h>
#include "cryptodev.h"
@@ -52,12 +51,19 @@ prepare_aead_xform (struct rte_crypto_sym_xform *xform,
xform->type = RTE_CRYPTO_SYM_XFORM_AEAD;
xform->next = 0;
- if (key->alg != VNET_CRYPTO_ALG_AES_128_GCM &&
- key->alg != VNET_CRYPTO_ALG_AES_192_GCM &&
- key->alg != VNET_CRYPTO_ALG_AES_256_GCM)
+ if (key->alg == VNET_CRYPTO_ALG_AES_128_GCM ||
+ key->alg == VNET_CRYPTO_ALG_AES_192_GCM ||
+ key->alg == VNET_CRYPTO_ALG_AES_256_GCM)
+ {
+ aead_xform->algo = RTE_CRYPTO_AEAD_AES_GCM;
+ }
+ else if (key->alg == VNET_CRYPTO_ALG_CHACHA20_POLY1305)
+ {
+ aead_xform->algo = RTE_CRYPTO_AEAD_CHACHA20_POLY1305;
+ }
+ else
return -1;
- aead_xform->algo = RTE_CRYPTO_AEAD_AES_GCM;
aead_xform->op = (op_type == CRYPTODEV_OP_TYPE_ENCRYPT) ?
RTE_CRYPTO_AEAD_OP_ENCRYPT : RTE_CRYPTO_AEAD_OP_DECRYPT;
aead_xform->aad_length = aad_len;
@@ -135,7 +141,7 @@ prepare_linked_xform (struct rte_crypto_sym_xform *xforms,
}
static_always_inline void
-cryptodev_session_del (struct rte_cryptodev_sym_session *sess)
+cryptodev_session_del (cryptodev_session_t *sess)
{
u32 n_devs, i;
@@ -145,9 +151,14 @@ cryptodev_session_del (struct rte_cryptodev_sym_session *sess)
n_devs = rte_cryptodev_count ();
for (i = 0; i < n_devs; i++)
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (rte_cryptodev_sym_session_free (i, sess) == 0)
+ break;
+#else
rte_cryptodev_sym_session_clear (i, sess);
rte_cryptodev_sym_session_free (sess);
+#endif
}
static int
@@ -310,7 +321,7 @@ cryptodev_sess_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop,
if (cryptodev_check_supported_vnet_alg (key) == 0)
return;
- vec_validate (ckey->keys, idx);
+ vec_validate (ckey->keys, vec_len (cmt->per_numa_data) - 1);
vec_foreach_index (i, ckey->keys)
vec_validate (ckey->keys[i], CRYPTODEV_N_OP_TYPES - 1);
}
@@ -322,6 +333,59 @@ cryptodev_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop,
cryptodev_sess_handler (vm, kop, idx, 8);
}
+clib_error_t *
+allocate_session_pools (u32 numa_node,
+ cryptodev_session_pool_t *sess_pools_elt, u32 len)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ u8 *name;
+ clib_error_t *error = NULL;
+
+ name = format (0, "vcrypto_sess_pool_%u_%04x%c", numa_node, len, 0);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ sess_pools_elt->sess_pool = rte_cryptodev_sym_session_pool_create (
+ (char *) name, CRYPTODEV_NB_SESSION, cmt->sess_sz, 0, 0, numa_node);
+#else
+ sess_pools_elt->sess_pool = rte_cryptodev_sym_session_pool_create (
+ (char *) name, CRYPTODEV_NB_SESSION, 0, 0, 0, numa_node);
+#endif
+
+ if (!sess_pools_elt->sess_pool)
+ {
+ error = clib_error_return (0, "Not enough memory for mp %s", name);
+ goto clear_mempools;
+ }
+ vec_free (name);
+
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ name = format (0, "crypto_sess_pool_%u_%04x%c", numa_node, len, 0);
+ sess_pools_elt->sess_priv_pool = rte_mempool_create (
+ (char *) name, CRYPTODEV_NB_SESSION * (cmt->drivers_cnt), cmt->sess_sz, 0,
+ 0, NULL, NULL, NULL, NULL, numa_node, 0);
+
+ if (!sess_pools_elt->sess_priv_pool)
+ {
+ error = clib_error_return (0, "Not enough memory for mp %s", name);
+ goto clear_mempools;
+ }
+ vec_free (name);
+#endif
+
+clear_mempools:
+ if (error)
+ {
+ vec_free (name);
+ if (sess_pools_elt->sess_pool)
+ rte_mempool_free (sess_pools_elt->sess_pool);
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ if (sess_pools_elt->sess_priv_pool)
+ rte_mempool_free (sess_pools_elt->sess_priv_pool);
+#endif
+ return error;
+ }
+ return 0;
+}
+
int
cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx,
u32 aad_len)
@@ -330,52 +394,106 @@ cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx,
cryptodev_numa_data_t *numa_data;
cryptodev_inst_t *dev_inst;
vnet_crypto_key_t *key = vnet_crypto_get_key (idx);
- struct rte_mempool *sess_pool, *sess_priv_pool;
+ struct rte_mempool *sess_pool;
+ cryptodev_session_pool_t *sess_pools_elt;
cryptodev_key_t *ckey = vec_elt_at_index (cmt->keys, idx);
struct rte_crypto_sym_xform xforms_enc[2] = { { 0 } };
struct rte_crypto_sym_xform xforms_dec[2] = { { 0 } };
- struct rte_cryptodev_sym_session *sessions[CRYPTODEV_N_OP_TYPES] = { 0 };
+ cryptodev_session_t *sessions[CRYPTODEV_N_OP_TYPES] = { 0 };
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ struct rte_mempool *sess_priv_pool;
+ struct rte_cryptodev_info dev_info;
+#endif
u32 numa_node = vm->numa_node;
- int ret;
+ clib_error_t *error;
+ int ret = 0;
+ u8 found = 0;
numa_data = vec_elt_at_index (cmt->per_numa_data, numa_node);
- sess_pool = numa_data->sess_pool;
- sess_priv_pool = numa_data->sess_priv_pool;
- sessions[CRYPTODEV_OP_TYPE_ENCRYPT] =
- rte_cryptodev_sym_session_create (sess_pool);
- if (!sessions[CRYPTODEV_OP_TYPE_ENCRYPT])
+ clib_spinlock_lock (&cmt->tlock);
+ vec_foreach (sess_pools_elt, numa_data->sess_pools)
{
- ret = -1;
- goto clear_key;
+ if (sess_pools_elt->sess_pool == NULL)
+ {
+ error = allocate_session_pools (numa_node, sess_pools_elt,
+ vec_len (numa_data->sess_pools) - 1);
+ if (error)
+ {
+ ret = -1;
+ goto clear_key;
+ }
+ }
+ if (rte_mempool_avail_count (sess_pools_elt->sess_pool) >= 2)
+ {
+ found = 1;
+ break;
+ }
}
- sessions[CRYPTODEV_OP_TYPE_DECRYPT] =
- rte_cryptodev_sym_session_create (sess_pool);
- if (!sessions[CRYPTODEV_OP_TYPE_DECRYPT])
+ if (found == 0)
{
- ret = -1;
- goto clear_key;
+ vec_add2 (numa_data->sess_pools, sess_pools_elt, 1);
+ error = allocate_session_pools (numa_node, sess_pools_elt,
+ vec_len (numa_data->sess_pools) - 1);
+ if (error)
+ {
+ ret = -1;
+ goto clear_key;
+ }
}
+ sess_pool = sess_pools_elt->sess_pool;
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ sess_priv_pool = sess_pools_elt->sess_priv_pool;
+
+ sessions[CRYPTODEV_OP_TYPE_ENCRYPT] =
+ rte_cryptodev_sym_session_create (sess_pool);
+
+ sessions[CRYPTODEV_OP_TYPE_DECRYPT] =
+ rte_cryptodev_sym_session_create (sess_pool);
+#endif
+
if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
ret = prepare_linked_xform (xforms_enc, CRYPTODEV_OP_TYPE_ENCRYPT, key);
else
ret =
prepare_aead_xform (xforms_enc, CRYPTODEV_OP_TYPE_ENCRYPT, key, aad_len);
if (ret)
- return 0;
+ {
+ ret = -1;
+ goto clear_key;
+ }
if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
prepare_linked_xform (xforms_dec, CRYPTODEV_OP_TYPE_DECRYPT, key);
else
prepare_aead_xform (xforms_dec, CRYPTODEV_OP_TYPE_DECRYPT, key, aad_len);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ dev_inst = vec_elt_at_index (cmt->cryptodev_inst, 0);
+ u32 dev_id = dev_inst->dev_id;
+ sessions[CRYPTODEV_OP_TYPE_ENCRYPT] =
+ rte_cryptodev_sym_session_create (dev_id, xforms_enc, sess_pool);
+ sessions[CRYPTODEV_OP_TYPE_DECRYPT] =
+ rte_cryptodev_sym_session_create (dev_id, xforms_dec, sess_pool);
+ if (!sessions[CRYPTODEV_OP_TYPE_ENCRYPT] ||
+ !sessions[CRYPTODEV_OP_TYPE_DECRYPT])
+ {
+ ret = -1;
+ goto clear_key;
+ }
+
+ rte_cryptodev_sym_session_opaque_data_set (
+ sessions[CRYPTODEV_OP_TYPE_ENCRYPT], aad_len);
+ rte_cryptodev_sym_session_opaque_data_set (
+ sessions[CRYPTODEV_OP_TYPE_DECRYPT], aad_len);
+#else
vec_foreach (dev_inst, cmt->cryptodev_inst)
{
u32 dev_id = dev_inst->dev_id;
- struct rte_cryptodev *cdev = rte_cryptodev_pmd_get_dev (dev_id);
- u32 driver_id = cdev->driver_id;
+ rte_cryptodev_info_get (dev_id, &dev_info);
+ u32 driver_id = dev_info.driver_id;
/* if the session is already configured for the driver type, avoid
configuring it again to increase the session data's refcnt */
@@ -390,11 +508,12 @@ cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx,
dev_id, sessions[CRYPTODEV_OP_TYPE_DECRYPT], xforms_dec,
sess_priv_pool);
if (ret < 0)
- return ret;
+ goto clear_key;
}
sessions[CRYPTODEV_OP_TYPE_ENCRYPT]->opaque_data = aad_len;
sessions[CRYPTODEV_OP_TYPE_DECRYPT]->opaque_data = aad_len;
+#endif
CLIB_MEMORY_STORE_BARRIER ();
ckey->keys[numa_node][CRYPTODEV_OP_TYPE_ENCRYPT] =
@@ -408,6 +527,7 @@ clear_key:
cryptodev_session_del (sessions[CRYPTODEV_OP_TYPE_ENCRYPT]);
cryptodev_session_del (sessions[CRYPTODEV_OP_TYPE_DECRYPT]);
}
+ clib_spinlock_unlock (&cmt->tlock);
return ret;
}
@@ -459,14 +579,14 @@ cryptodev_assign_resource (cryptodev_engine_thread_t * cet,
return -EBUSY;
vec_foreach_index (idx, cmt->cryptodev_inst)
- {
- cinst = cmt->cryptodev_inst + idx;
- if (cinst->dev_id == cet->cryptodev_id &&
- cinst->q_id == cet->cryptodev_q)
- break;
- }
+ {
+ cinst = cmt->cryptodev_inst + idx;
+ if (cinst->dev_id == cet->cryptodev_id &&
+ cinst->q_id == cet->cryptodev_q)
+ break;
+ }
/* invalid existing worker resource assignment */
- if (idx == vec_len (cmt->cryptodev_inst))
+ if (idx >= vec_len (cmt->cryptodev_inst))
return -EINVAL;
clib_spinlock_lock (&cmt->tlock);
clib_bitmap_set_no_check (cmt->active_cdev_inst_mask, idx, 0);
@@ -547,6 +667,90 @@ VLIB_CLI_COMMAND (show_cryptodev_assignment, static) = {
};
static clib_error_t *
+cryptodev_show_cache_rings_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ u32 thread_index = 0;
+ u16 i;
+ vec_foreach_index (thread_index, cmt->per_thread_data)
+ {
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 head = ring->head;
+ u16 tail = ring->tail;
+ u16 n_cached = (CRYPTODEV_CACHE_QUEUE_SIZE - tail + head) &
+ CRYPTODEV_CACHE_QUEUE_MASK;
+
+ u16 enq_head = ring->enq_head;
+ u16 deq_tail = ring->deq_tail;
+ u16 n_frames_inflight =
+ (enq_head == deq_tail) ?
+ 0 :
+ ((CRYPTODEV_CACHE_QUEUE_SIZE + enq_head - deq_tail) &
+ CRYPTODEV_CACHE_QUEUE_MASK);
+ /* even if some elements of dequeued frame are still pending for deq
+ * we consider the frame as processed */
+ u16 n_frames_processed =
+ ((tail == deq_tail) && (ring->frames[deq_tail].f == 0)) ?
+ 0 :
+ ((CRYPTODEV_CACHE_QUEUE_SIZE - tail + deq_tail) &
+ CRYPTODEV_CACHE_QUEUE_MASK) +
+ 1;
+ /* even if some elements of enqueued frame are still pending for enq
+ * we consider the frame as enqueued */
+ u16 n_frames_pending =
+ (head == enq_head) ? 0 :
+ ((CRYPTODEV_CACHE_QUEUE_SIZE - enq_head + head) &
+ CRYPTODEV_CACHE_QUEUE_MASK) -
+ 1;
+
+ u16 elts_to_enq =
+ (ring->frames[enq_head].n_elts - ring->frames[enq_head].enq_elts_head);
+ u16 elts_to_deq =
+ (ring->frames[deq_tail].n_elts - ring->frames[deq_tail].deq_elts_tail);
+
+ u32 elts_total = 0;
+
+ for (i = 0; i < CRYPTODEV_CACHE_QUEUE_SIZE; i++)
+ elts_total += ring->frames[i].n_elts;
+
+ if (vlib_num_workers () > 0 && thread_index == 0)
+ continue;
+
+ vlib_cli_output (vm, "\n\n");
+ vlib_cli_output (vm, "Frames cached in the ring: %u", n_cached);
+ vlib_cli_output (vm, "Frames cached but not processed: %u",
+ n_frames_pending);
+ vlib_cli_output (vm, "Frames inflight: %u", n_frames_inflight);
+ vlib_cli_output (vm, "Frames processed: %u", n_frames_processed);
+ vlib_cli_output (vm, "Elements total: %u", elts_total);
+ vlib_cli_output (vm, "Elements inflight: %u", cet->inflight);
+ vlib_cli_output (vm, "Head index: %u", head);
+ vlib_cli_output (vm, "Tail index: %u", tail);
+ vlib_cli_output (vm, "Current frame index beeing enqueued: %u",
+ enq_head);
+ vlib_cli_output (vm, "Current frame index being dequeued: %u", deq_tail);
+ vlib_cli_output (vm,
+ "Elements in current frame to be enqueued: %u, waiting "
+ "to be enqueued: %u",
+ ring->frames[enq_head].n_elts, elts_to_enq);
+ vlib_cli_output (vm,
+ "Elements in current frame to be dequeued: %u, waiting "
+ "to be dequeued: %u",
+ ring->frames[deq_tail].n_elts, elts_to_deq);
+ vlib_cli_output (vm, "\n\n");
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_cryptodev_sw_rings, static) = {
+ .path = "show cryptodev cache status",
+ .short_help = "show status of all cryptodev cache rings",
+ .function = cryptodev_show_cache_rings_fn,
+};
+
+static clib_error_t *
cryptodev_set_assignment_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
@@ -643,6 +847,15 @@ cryptodev_configure (vlib_main_t *vm, u32 cryptodev_id)
rte_cryptodev_info_get (cryptodev_id, &info);
+ /* Starting from DPDK 22.11, VPP does not allow heterogeneous crypto devices
+ anymore. Only devices that have the same driver type as the first
+ initialized device can be initialized.
+ */
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (cmt->drivers_cnt == 1 && cmt->driver_id != info.driver_id)
+ return -1;
+#endif
+
if (!(info.feature_flags & RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO))
return -1;
@@ -656,7 +869,9 @@ cryptodev_configure (vlib_main_t *vm, u32 cryptodev_id)
struct rte_cryptodev_qp_conf qp_cfg;
qp_cfg.mp_session = 0;
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
qp_cfg.mp_session_private = 0;
+#endif
qp_cfg.nb_descriptors = CRYPTODEV_NB_CRYPTO_OPS;
ret = rte_cryptodev_queue_pair_setup (cryptodev_id, i, &qp_cfg,
@@ -675,16 +890,30 @@ cryptodev_configure (vlib_main_t *vm, u32 cryptodev_id)
/* start the device */
rte_cryptodev_start (cryptodev_id);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (cmt->drivers_cnt == 0)
+ {
+ cmt->drivers_cnt = 1;
+ cmt->driver_id = info.driver_id;
+ cmt->sess_sz = rte_cryptodev_sym_get_private_session_size (cryptodev_id);
+ }
+#endif
+
for (i = 0; i < info.max_nb_queue_pairs; i++)
{
cryptodev_inst_t *cdev_inst;
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ const char *dev_name = rte_dev_name (info.device);
+#else
+ const char *dev_name = info.device->name;
+#endif
vec_add2(cmt->cryptodev_inst, cdev_inst, 1);
- cdev_inst->desc = vec_new (char, strlen (info.device->name) + 10);
+ cdev_inst->desc = vec_new (char, strlen (dev_name) + 10);
cdev_inst->dev_id = cryptodev_id;
cdev_inst->q_id = i;
- snprintf (cdev_inst->desc, strlen (info.device->name) + 9,
- "%s_q%u", info.device->name, i);
+ snprintf (cdev_inst->desc, strlen (dev_name) + 9, "%s_q%u",
+ info.device->name, i);
}
return 0;
@@ -1016,46 +1245,26 @@ cryptodev_probe (vlib_main_t *vm, u32 n_workers)
return 0;
}
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
static void
-cryptodev_get_max_sz (u32 *max_sess_sz, u32 *max_dp_sz)
+is_drv_unique (u32 driver_id, u32 **unique_drivers)
{
- cryptodev_main_t *cmt = &cryptodev_main;
- cryptodev_inst_t *cinst;
- u32 max_sess = 0, max_dp = 0;
+ u32 *unique_elt;
+ u8 found = 0;
- vec_foreach (cinst, cmt->cryptodev_inst)
+ vec_foreach (unique_elt, *unique_drivers)
{
- u32 sess_sz = rte_cryptodev_sym_get_private_session_size (cinst->dev_id);
- u32 dp_sz = rte_cryptodev_get_raw_dp_ctx_size (cinst->dev_id);
-
- max_sess = clib_max (sess_sz, max_sess);
- max_dp = clib_max (dp_sz, max_dp);
+ if (*unique_elt == driver_id)
+ {
+ found = 1;
+ break;
+ }
}
- *max_sess_sz = max_sess;
- *max_dp_sz = max_dp;
-}
-
-static void
-dpdk_disable_cryptodev_engine (vlib_main_t *vm)
-{
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- cryptodev_main_t *cmt = &cryptodev_main;
- u32 i;
-
- for (i = (vlib_num_workers () > 0); i < tm->n_vlib_mains; i++)
- {
- u32 numa = vlib_get_main_by_index (i)->numa_node;
- cryptodev_numa_data_t *numa_data;
-
- vec_validate (cmt->per_numa_data, numa);
- numa_data = cmt->per_numa_data + numa;
- if (numa_data->sess_pool)
- rte_mempool_free (numa_data->sess_pool);
- if (numa_data->sess_priv_pool)
- rte_mempool_free (numa_data->sess_priv_pool);
- }
+ if (!found)
+ vec_add1 (*unique_drivers, driver_id);
}
+#endif
clib_error_t *
dpdk_cryptodev_init (vlib_main_t * vm)
@@ -1064,30 +1273,53 @@ dpdk_cryptodev_init (vlib_main_t * vm)
vlib_thread_main_t *tm = vlib_get_thread_main ();
cryptodev_engine_thread_t *cet;
cryptodev_numa_data_t *numa_data;
- struct rte_mempool *mp;
+ u32 node;
+ u8 nodes = 0;
u32 skip_master = vlib_num_workers () > 0;
u32 n_workers = tm->n_vlib_mains - skip_master;
- u32 numa = vm->numa_node;
- u32 sess_sz, dp_sz;
u32 eidx;
u32 i;
- u8 *name = 0;
clib_error_t *error;
cmt->iova_mode = rte_eal_iova_mode ();
- vec_validate (cmt->per_numa_data, vm->numa_node);
+ clib_bitmap_foreach (node, tm->cpu_socket_bitmap)
+ {
+ if (node >= nodes)
+ nodes = node;
+ }
+
+ vec_validate (cmt->per_numa_data, nodes);
+ vec_foreach (numa_data, cmt->per_numa_data)
+ {
+ vec_validate (numa_data->sess_pools, 0);
+ }
/* probe all cryptodev devices and get queue info */
if (cryptodev_probe (vm, n_workers) < 0)
+ return 0;
+
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ struct rte_cryptodev_info dev_info;
+ cryptodev_inst_t *dev_inst;
+ u32 *unique_drivers = 0;
+ vec_foreach (dev_inst, cmt->cryptodev_inst)
{
- error = clib_error_return (0, "Failed to configure cryptodev");
- goto err_handling;
+ u32 dev_id = dev_inst->dev_id;
+ rte_cryptodev_info_get (dev_id, &dev_info);
+ u32 driver_id = dev_info.driver_id;
+ is_drv_unique (driver_id, &unique_drivers);
+
+ u32 sess_sz =
+ rte_cryptodev_sym_get_private_session_size (dev_inst->dev_id);
+ cmt->sess_sz = clib_max (cmt->sess_sz, sess_sz);
}
- cryptodev_get_max_sz (&sess_sz, &dp_sz);
+ cmt->drivers_cnt = vec_len (unique_drivers);
+ vec_free (unique_drivers);
+#endif
- clib_bitmap_vec_validate (cmt->active_cdev_inst_mask, tm->n_vlib_mains);
+ clib_bitmap_vec_validate (cmt->active_cdev_inst_mask, n_workers);
clib_spinlock_init (&cmt->tlock);
vec_validate_aligned(cmt->per_thread_data, tm->n_vlib_mains - 1,
@@ -1095,46 +1327,13 @@ dpdk_cryptodev_init (vlib_main_t * vm)
for (i = skip_master; i < tm->n_vlib_mains; i++)
{
cet = cmt->per_thread_data + i;
- numa = vlib_get_main_by_index (i)->numa_node;
- vec_validate (cmt->per_numa_data, numa);
- numa_data = vec_elt_at_index (cmt->per_numa_data, numa);
-
- if (!numa_data->sess_pool)
+ if (cryptodev_assign_resource (cet, 0, CRYPTODEV_RESOURCE_ASSIGN_AUTO) <
+ 0)
{
- /* create session pool for the numa node */
- name = format (0, "vcryptodev_sess_pool_%u%c", numa, 0);
- mp = rte_cryptodev_sym_session_pool_create (
- (char *) name, CRYPTODEV_NB_SESSION, 0, 0, 0, numa);
- if (!mp)
- {
- error =
- clib_error_return (0, "Not enough memory for mp %s", name);
- goto err_handling;
- }
- vec_free (name);
-
- numa_data->sess_pool = mp;
-
- /* create session private pool for the numa node */
- name = format (0, "cryptodev_sess_pool_%u%c", numa, 0);
- mp =
- rte_mempool_create ((char *) name, CRYPTODEV_NB_SESSION, sess_sz,
- 0, 0, NULL, NULL, NULL, NULL, numa, 0);
- if (!mp)
- {
- error =
- clib_error_return (0, "Not enough memory for mp %s", name);
- vec_free (name);
- goto err_handling;
- }
-
- vec_free (name);
-
- numa_data->sess_priv_pool = mp;
+ error = clib_error_return (0, "Failed to configure cryptodev");
+ goto err_handling;
}
-
- cryptodev_assign_resource (cet, 0, CRYPTODEV_RESOURCE_ASSIGN_AUTO);
}
/* register handler */
@@ -1154,13 +1353,10 @@ dpdk_cryptodev_init (vlib_main_t * vm)
/* this engine is only enabled when cryptodev device(s) are presented in
* startup.conf. Assume it is wanted to be used, turn on async mode here.
*/
- vnet_crypto_request_async_mode (1);
ipsec_set_async_mode (1);
return 0;
err_handling:
- dpdk_disable_cryptodev_engine (vm);
-
return error;
}
diff --git a/src/plugins/dpdk/cryptodev/cryptodev.h b/src/plugins/dpdk/cryptodev/cryptodev.h
index 3b47b43f538..7cd525dac56 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev.h
+++ b/src/plugins/dpdk/cryptodev/cryptodev.h
@@ -26,11 +26,13 @@
#define CRYPTODEV_CACHE_QUEUE_MASK (VNET_CRYPTO_FRAME_POOL_SIZE - 1)
#define CRYPTODEV_MAX_INFLIGHT (CRYPTODEV_NB_CRYPTO_OPS - 1)
#define CRYPTODEV_AAD_MASK (CRYPTODEV_NB_CRYPTO_OPS - 1)
-#define CRYPTODEV_DEQ_CACHE_SZ 32
-#define CRYPTODEV_NB_SESSION 10240
+#define CRYPTODE_ENQ_MAX 64
+#define CRYPTODE_DEQ_MAX 64
+#define CRYPTODEV_NB_SESSION 4096
#define CRYPTODEV_MAX_IV_SIZE 16
#define CRYPTODEV_MAX_AAD_SIZE 16
#define CRYPTODEV_MAX_N_SGL 8 /**< maximum number of segments */
+#define CRYPTODEV_MAX_PROCESED_IN_CACHE_QUEUE 8
#define CRYPTODEV_IV_OFFSET (offsetof (cryptodev_op_t, iv))
#define CRYPTODEV_AAD_OFFSET (offsetof (cryptodev_op_t, aad))
@@ -43,7 +45,10 @@
_ (AES_192_GCM, AEAD, AES_GCM, 12, 16, 8, 24) \
_ (AES_192_GCM, AEAD, AES_GCM, 12, 16, 12, 24) \
_ (AES_256_GCM, AEAD, AES_GCM, 12, 16, 8, 32) \
- _ (AES_256_GCM, AEAD, AES_GCM, 12, 16, 12, 32)
+ _ (AES_256_GCM, AEAD, AES_GCM, 12, 16, 12, 32) \
+ _ (CHACHA20_POLY1305, AEAD, CHACHA20_POLY1305, 12, 16, 0, 32) \
+ _ (CHACHA20_POLY1305, AEAD, CHACHA20_POLY1305, 12, 16, 8, 32) \
+ _ (CHACHA20_POLY1305, AEAD, CHACHA20_POLY1305, 12, 16, 12, 32)
/**
* crypto (alg, cryptodev_alg, key_size), hash (alg, digest-size)
@@ -66,7 +71,10 @@
_ (AES_256_CBC, AES_CBC, 32, SHA384, 24) \
_ (AES_128_CBC, AES_CBC, 16, SHA512, 32) \
_ (AES_192_CBC, AES_CBC, 24, SHA512, 32) \
- _ (AES_256_CBC, AES_CBC, 32, SHA512, 32)
+ _ (AES_256_CBC, AES_CBC, 32, SHA512, 32) \
+ _ (AES_128_CTR, AES_CTR, 16, SHA1, 12) \
+ _ (AES_192_CTR, AES_CTR, 24, SHA1, 12) \
+ _ (AES_256_CTR, AES_CTR, 32, SHA1, 12)
typedef enum
{
@@ -75,10 +83,16 @@ typedef enum
CRYPTODEV_N_OP_TYPES,
} cryptodev_op_type_t;
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+typedef void cryptodev_session_t;
+#else
+typedef struct rte_cryptodev_sym_session cryptodev_session_t;
+#endif
+
/* Cryptodev session data, one data per direction per numa */
typedef struct
{
- struct rte_cryptodev_sym_session ***keys;
+ cryptodev_session_t ***keys;
} cryptodev_key_t;
/* Replicate DPDK rte_cryptodev_sym_capability structure with key size ranges
@@ -119,7 +133,14 @@ typedef struct
typedef struct
{
struct rte_mempool *sess_pool;
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
struct rte_mempool *sess_priv_pool;
+#endif
+} cryptodev_session_pool_t;
+
+typedef struct
+{
+ cryptodev_session_pool_t *sess_pools;
} cryptodev_numa_data_t;
typedef struct
@@ -135,26 +156,71 @@ typedef struct
typedef struct
{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- vlib_buffer_t *b[VNET_CRYPTO_FRAME_SIZE];
+ vnet_crypto_async_frame_t *f;
union
{
struct
{
- cryptodev_op_t **cops;
- struct rte_mempool *cop_pool;
- struct rte_ring *ring;
+ /* index of frame elt where enque to
+ * the crypto engine is happening */
+ u8 enq_elts_head;
+ /* index of the frame elt where dequeue
+ * from the crypto engine is happening */
+ u8 deq_elts_tail;
+ u8 elts_inflight;
+
+ u8 op_type;
+ u8 aad_len;
+ u8 n_elts;
+ u16 reserved;
};
+ u64 raw;
+ };
+
+ u64 frame_elts_errs_mask;
+} cryptodev_cache_ring_elt_t;
+
+typedef struct
+{
+ cryptodev_cache_ring_elt_t frames[VNET_CRYPTO_FRAME_POOL_SIZE];
+
+ union
+ {
+ struct
+ {
+ /* head of the cache ring */
+ u16 head;
+ /* tail of the cache ring */
+ u16 tail;
+ /* index of the frame where enqueue
+ * to the crypto engine is happening */
+ u16 enq_head;
+ /* index of the frame where dequeue
+ * from the crypto engine is happening */
+ u16 deq_tail;
+ };
+ u64 raw;
+ };
+} cryptodev_cache_ring_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vlib_buffer_t *b[VNET_CRYPTO_FRAME_SIZE];
+ union
+ {
+ struct rte_mempool *cop_pool;
struct
{
struct rte_crypto_raw_dp_ctx *ctx;
- struct rte_ring *cached_frame;
u16 aad_index;
u8 *aad_buf;
u64 aad_phy_addr;
- struct rte_cryptodev_sym_session *reset_sess;
+ cryptodev_session_t *reset_sess;
};
};
+
+ cryptodev_cache_ring_t cache_ring;
u16 cryptodev_id;
u16 cryptodev_q;
u16 inflight;
@@ -170,20 +236,122 @@ typedef struct
clib_bitmap_t *active_cdev_inst_mask;
clib_spinlock_t tlock;
cryptodev_capability_t *supported_caps;
+ u32 sess_sz;
+ u32 drivers_cnt;
u8 is_raw_api;
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ u8 driver_id;
+#endif
} cryptodev_main_t;
extern cryptodev_main_t cryptodev_main;
+#define CRYPTODEV_CACHE_RING_GET_FRAME(r, i) \
+ ((r)->frames[(i) &CRYPTODEV_CACHE_QUEUE_MASK].f)
+
+#define CRYPTODEV_CACHE_RING_GET_ERR_MASK(r, i) \
+ ((r)->frames[(i) &CRYPTODEV_CACHE_QUEUE_MASK].frame_elts_errs_mask)
+
+#define CRYPTODEV_CACHE_RING_GET_FRAME_ELTS_INFLIGHT(r, i) \
+ (((r)->frames[(i) &CRYPTODEV_CACHE_QUEUE_MASK].enq_elts_head) - \
+ ((r)->frames[(i) &CRYPTODEV_CACHE_QUEUE_MASK].deq_elts_tail))
+
static_always_inline void
-cryptodev_mark_frame_err_status (vnet_crypto_async_frame_t *f,
- vnet_crypto_op_status_t s)
+cryptodev_cache_ring_update_enq_head (cryptodev_cache_ring_t *r,
+ vnet_crypto_async_frame_t *f)
+{
+ if (r->frames[r->enq_head].enq_elts_head == f->n_elts)
+ {
+ r->enq_head++;
+ r->enq_head &= CRYPTODEV_CACHE_QUEUE_MASK;
+ f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
+ }
+}
+
+static_always_inline bool
+cryptodev_cache_ring_update_deq_tail (cryptodev_cache_ring_t *r,
+ u16 *const deq)
+{
+ if (r->frames[*deq].deq_elts_tail == r->frames[*deq].n_elts)
+ {
+ *deq += 1;
+ *deq &= CRYPTODEV_CACHE_QUEUE_MASK;
+ return 1;
+ }
+
+ return 0;
+}
+static_always_inline u64
+cryptodev_mark_frame_fill_err (vnet_crypto_async_frame_t *f, u64 current_err,
+ u16 index, u16 n, vnet_crypto_op_status_t op_s)
+{
+ u64 err = current_err;
+ u16 i;
+
+ ERROR_ASSERT (index + n <= VNET_CRYPTO_FRAME_SIZE);
+ ERROR_ASSERT (op_s != VNET_CRYPTO_OP_STATUS_COMPLETED);
+
+ for (i = index; i < (index + n); i++)
+ f->elts[i].status = op_s;
+
+ err |= (~(~(0ull) << n) << index);
+
+ return err;
+}
+
+static_always_inline cryptodev_cache_ring_elt_t *
+cryptodev_cache_ring_push (cryptodev_cache_ring_t *r,
+ vnet_crypto_async_frame_t *f)
+{
+ u16 head = r->head;
+ u16 tail = r->tail;
+
+ cryptodev_cache_ring_elt_t *ring_elt = &r->frames[head];
+ /**
+ * in debug mode we do the ring sanity test when a frame is enqueued to
+ * the ring.
+ **/
+#if CLIB_DEBUG > 0
+ u16 n_cached = (head >= tail) ? (head - tail) :
+ (CRYPTODEV_CACHE_QUEUE_MASK - tail + head);
+ ERROR_ASSERT (n_cached < CRYPTODEV_CACHE_QUEUE_SIZE);
+ ERROR_ASSERT (r->raw == 0 && r->frames[head].raw == 0 &&
+ r->frames[head].f == 0);
+#endif
+ /*the ring capacity is CRYPTODEV_CACHE_QUEUE_SIZE - 1*/
+ if (PREDICT_FALSE (head + 1) == tail)
+ return 0;
+
+ ring_elt->f = f;
+ ring_elt->n_elts = f->n_elts;
+ /* update head */
+ r->head++;
+ r->head &= CRYPTODEV_CACHE_QUEUE_MASK;
+ return ring_elt;
+}
+
+static_always_inline vnet_crypto_async_frame_t *
+cryptodev_cache_ring_pop (cryptodev_cache_ring_t *r)
{
- u32 n_elts = f->n_elts, i;
+ vnet_crypto_async_frame_t *f;
+ u16 tail = r->tail;
+ cryptodev_cache_ring_elt_t *ring_elt = &r->frames[tail];
+
+ ERROR_ASSERT (r->frames[r->head].raw == 0 ? r->head != tail : 1);
+ ERROR_ASSERT (r->frames[tail].raw != 0);
+ ERROR_ASSERT (ring_elt->deq_elts_tail == ring_elt->enq_elts_head &&
+ ring_elt->deq_elts_tail == ring_elt->n_elts);
+
+ f = CRYPTODEV_CACHE_RING_GET_FRAME (r, tail);
+ f->state = CRYPTODEV_CACHE_RING_GET_ERR_MASK (r, r->tail) == 0 ?
+ VNET_CRYPTO_FRAME_STATE_SUCCESS :
+ VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
+
+ clib_memset (ring_elt, 0, sizeof (*ring_elt));
+ r->tail++;
+ r->tail &= CRYPTODEV_CACHE_QUEUE_MASK;
- for (i = 0; i < n_elts; i++)
- f->elts[i].status = s;
- f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
+ return f;
}
int cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx,
diff --git a/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c b/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c
index 4545e24fc83..8d55e4fbf0f 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c
+++ b/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c
@@ -27,7 +27,6 @@
#include <rte_cryptodev.h>
#include <rte_crypto_sym.h>
#include <rte_crypto.h>
-#include <rte_cryptodev_pmd.h>
#include <rte_ring_peek_zc.h>
#include <rte_config.h>
@@ -68,6 +67,23 @@ cryptodev_get_iova (clib_pmalloc_main_t *pm, enum rte_iova_mode mode,
}
static_always_inline void
+cryptodev_validate_mbuf (struct rte_mbuf *mb, vlib_buffer_t *b)
+{
+ /* on vnet side vlib_buffer current_length is updated by cipher padding and
+ * icv_sh. mbuf needs to be sync with these changes */
+ u16 data_len = b->current_length +
+ (b->data + b->current_data - rte_pktmbuf_mtod (mb, u8 *));
+
+ /* for input nodes that are not dpdk-input, it is possible the mbuf
+ * was updated before as one of the chained mbufs. Setting nb_segs
+ * to 1 here to prevent the cryptodev PMD to access potentially
+ * invalid m_src->next pointers.
+ */
+ mb->nb_segs = 1;
+ mb->pkt_len = mb->data_len = data_len;
+}
+
+static_always_inline void
cryptodev_validate_mbuf_chain (vlib_main_t *vm, struct rte_mbuf *mb,
vlib_buffer_t *b)
{
@@ -125,39 +141,66 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
cryptodev_op_type_t op_type)
{
cryptodev_main_t *cmt = &cryptodev_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ ERROR_ASSERT (frame != 0);
+ ERROR_ASSERT (frame->n_elts > 0);
+ cryptodev_cache_ring_elt_t *ring_elt =
+ cryptodev_cache_ring_push (ring, frame);
+
+ if (PREDICT_FALSE (ring_elt == NULL))
+ return -1;
+
+ ring_elt->aad_len = 1;
+ ring_elt->op_type = (u8) op_type;
+ return 0;
+}
+
+static_always_inline void
+cryptodev_frame_linked_algs_enqueue_internal (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame,
+ cryptodev_op_type_t op_type)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const enq = &ring->enq_head;
vnet_crypto_async_frame_elt_t *fe;
- struct rte_cryptodev_sym_session *sess = 0;
- cryptodev_op_t **cop;
- u32 *bi;
+ cryptodev_session_t *sess = 0;
+ cryptodev_op_t *cops[CRYPTODE_ENQ_MAX] = {};
+ cryptodev_op_t **cop = cops;
+ u32 *bi = 0;
u32 n_enqueue, n_elts;
u32 last_key_index = ~0;
+ u32 max_to_enq;
if (PREDICT_FALSE (frame == 0 || frame->n_elts == 0))
- return -1;
- n_elts = frame->n_elts;
+ return;
- if (PREDICT_FALSE (CRYPTODEV_NB_CRYPTO_OPS - cet->inflight < n_elts))
- {
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
- }
+ max_to_enq = clib_min (CRYPTODE_ENQ_MAX,
+ frame->n_elts - ring->frames[*enq].enq_elts_head);
+
+ if (cet->inflight + max_to_enq > CRYPTODEV_MAX_INFLIGHT)
+ return;
+
+ n_elts = max_to_enq;
if (PREDICT_FALSE (
- rte_mempool_get_bulk (cet->cop_pool, (void **) cet->cops, n_elts) < 0))
+ rte_mempool_get_bulk (cet->cop_pool, (void **) cops, n_elts) < 0))
{
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ return;
}
- cop = cet->cops;
- fe = frame->elts;
- bi = frame->buffer_indices;
- cop[0]->frame = frame;
- cop[0]->n_elts = n_elts;
+ fe = frame->elts + ring->frames[*enq].enq_elts_head;
+ bi = frame->buffer_indices + ring->frames[*enq].enq_elts_head;
while (n_elts)
{
@@ -169,8 +212,8 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
if (n_elts > 2)
{
- CLIB_PREFETCH (cop[1], CLIB_CACHE_LINE_BYTES * 3, STORE);
- CLIB_PREFETCH (cop[2], CLIB_CACHE_LINE_BYTES * 3, STORE);
+ CLIB_PREFETCH (cop[1], sizeof (*cop[1]), STORE);
+ CLIB_PREFETCH (cop[2], sizeof (*cop[2]), STORE);
clib_prefetch_load (&fe[1]);
clib_prefetch_load (&fe[2]);
}
@@ -184,9 +227,11 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (
cryptodev_session_create (vm, last_key_index, 0) < 0))
{
- cryptodev_mark_frame_err_status (
- frame, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ goto error_exit;
}
}
sess = key->keys[vm->numa_node][op_type];
@@ -216,26 +261,29 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (fe->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS))
cryptodev_validate_mbuf_chain (vm, sop->m_src, b);
else
- /* for input nodes that are not dpdk-input, it is possible the mbuf
- * was updated before as one of the chained mbufs. Setting nb_segs
- * to 1 here to prevent the cryptodev PMD to access potentially
- * invalid m_src->next pointers.
- */
- sop->m_src->nb_segs = 1;
+ cryptodev_validate_mbuf (sop->m_src, b);
+
clib_memcpy_fast (cop[0]->iv, fe->iv, 16);
+ ring->frames[*enq].enq_elts_head++;
cop++;
bi++;
fe++;
n_elts--;
}
- n_enqueue = rte_cryptodev_enqueue_burst (cet->cryptodev_id, cet->cryptodev_q,
- (struct rte_crypto_op **) cet->cops,
- frame->n_elts);
- ASSERT (n_enqueue == frame->n_elts);
- cet->inflight += n_enqueue;
+ n_enqueue =
+ rte_cryptodev_enqueue_burst (cet->cryptodev_id, cet->cryptodev_q,
+ (struct rte_crypto_op **) cops, max_to_enq);
+ ERROR_ASSERT (n_enqueue == max_to_enq);
+ cet->inflight += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ return;
- return 0;
+error_exit:
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ rte_mempool_put_bulk (cet->cop_pool, (void **) cops, max_to_enq);
}
static_always_inline int
@@ -244,39 +292,64 @@ cryptodev_frame_aead_enqueue (vlib_main_t *vm,
cryptodev_op_type_t op_type, u8 aad_len)
{
cryptodev_main_t *cmt = &cryptodev_main;
- clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ ERROR_ASSERT (frame != 0);
+ ERROR_ASSERT (frame->n_elts > 0);
+ cryptodev_cache_ring_elt_t *ring_elt =
+ cryptodev_cache_ring_push (ring, frame);
+
+ if (PREDICT_FALSE (ring_elt == NULL))
+ return -1;
+
+ ring_elt->aad_len = aad_len;
+ ring_elt->op_type = (u8) op_type;
+ return 0;
+}
+
+static_always_inline int
+cryptodev_aead_enqueue_internal (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame,
+ cryptodev_op_type_t op_type, u8 aad_len)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const enq = &ring->enq_head;
+ clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
vnet_crypto_async_frame_elt_t *fe;
- struct rte_cryptodev_sym_session *sess = 0;
- cryptodev_op_t **cop;
- u32 *bi;
+ cryptodev_session_t *sess = 0;
+ cryptodev_op_t *cops[CRYPTODE_ENQ_MAX] = {};
+ cryptodev_op_t **cop = cops;
+ u32 *bi = 0;
u32 n_enqueue = 0, n_elts;
u32 last_key_index = ~0;
+ u16 left_to_enq = frame->n_elts - ring->frames[*enq].enq_elts_head;
+ const u16 max_to_enq = clib_min (CRYPTODE_ENQ_MAX, left_to_enq);
if (PREDICT_FALSE (frame == 0 || frame->n_elts == 0))
return -1;
- n_elts = frame->n_elts;
- if (PREDICT_FALSE (CRYPTODEV_MAX_INFLIGHT - cet->inflight < n_elts))
- {
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
- }
+ if (cet->inflight + max_to_enq > CRYPTODEV_MAX_INFLIGHT)
+ return -1;
+
+ n_elts = max_to_enq;
if (PREDICT_FALSE (
- rte_mempool_get_bulk (cet->cop_pool, (void **) cet->cops, n_elts) < 0))
+ rte_mempool_get_bulk (cet->cop_pool, (void **) cops, n_elts) < 0))
{
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
return -1;
}
- cop = cet->cops;
- fe = frame->elts;
- bi = frame->buffer_indices;
- cop[0]->frame = frame;
- cop[0]->n_elts = n_elts;
+ fe = frame->elts + ring->frames[*enq].enq_elts_head;
+ bi = frame->buffer_indices + ring->frames[*enq].enq_elts_head;
while (n_elts)
{
@@ -286,8 +359,8 @@ cryptodev_frame_aead_enqueue (vlib_main_t *vm,
if (n_elts > 2)
{
- CLIB_PREFETCH (cop[1], CLIB_CACHE_LINE_BYTES * 3, STORE);
- CLIB_PREFETCH (cop[2], CLIB_CACHE_LINE_BYTES * 3, STORE);
+ CLIB_PREFETCH (cop[1], sizeof (*cop[1]), STORE);
+ CLIB_PREFETCH (cop[2], sizeof (*cop[2]), STORE);
clib_prefetch_load (&fe[1]);
clib_prefetch_load (&fe[2]);
}
@@ -301,23 +374,32 @@ cryptodev_frame_aead_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (cryptodev_session_create (vm, last_key_index,
aad_len) < 0))
{
- cryptodev_mark_frame_err_status (
- frame, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ goto error_exit;
}
}
else if (PREDICT_FALSE (
- key->keys[vm->numa_node][op_type]->opaque_data !=
- aad_len))
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ rte_cryptodev_sym_session_opaque_data_get (
+ key->keys[vm->numa_node][op_type]) != (u64) aad_len
+#else
+ key->keys[vm->numa_node][op_type]->opaque_data != aad_len
+#endif
+ ))
{
cryptodev_sess_handler (vm, VNET_CRYPTO_KEY_OP_DEL,
fe->key_index, aad_len);
if (PREDICT_FALSE (cryptodev_session_create (vm, last_key_index,
aad_len) < 0))
{
- cryptodev_mark_frame_err_status (
- frame, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ goto error_exit;
}
}
@@ -348,117 +430,179 @@ cryptodev_frame_aead_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (fe->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS))
cryptodev_validate_mbuf_chain (vm, sop->m_src, b);
else
- /* for input nodes that are not dpdk-input, it is possible the mbuf
- * was updated before as one of the chained mbufs. Setting nb_segs
- * to 1 here to prevent the cryptodev PMD to access potentially
- * invalid m_src->next pointers.
- */
- sop->m_src->nb_segs = 1;
+ cryptodev_validate_mbuf (sop->m_src, b);
+
clib_memcpy_fast (cop[0]->iv, fe->iv, 12);
clib_memcpy_fast (cop[0]->aad, fe->aad, aad_len);
+
cop++;
bi++;
fe++;
n_elts--;
}
- n_enqueue = rte_cryptodev_enqueue_burst (cet->cryptodev_id, cet->cryptodev_q,
- (struct rte_crypto_op **) cet->cops,
- frame->n_elts);
- ASSERT (n_enqueue == frame->n_elts);
- cet->inflight += n_enqueue;
+ n_enqueue =
+ rte_cryptodev_enqueue_burst (cet->cryptodev_id, cet->cryptodev_q,
+ (struct rte_crypto_op **) cops, max_to_enq);
+ ERROR_ASSERT (n_enqueue == max_to_enq);
+ cet->inflight += max_to_enq;
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
return 0;
-}
-
-static_always_inline u16
-cryptodev_ring_deq (struct rte_ring *r, cryptodev_op_t **cops)
-{
- u16 n, n_elts = 0;
- n = rte_ring_dequeue_bulk_start (r, (void **) cops, 1, 0);
- rte_ring_dequeue_finish (r, 0);
- if (!n)
- return 0;
-
- n = cops[0]->n_elts;
- if (rte_ring_count (r) < n)
- return 0;
-
- n_elts = rte_ring_sc_dequeue_bulk (r, (void **) cops, n, 0);
- ASSERT (n_elts == n);
+error_exit:
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ rte_mempool_put_bulk (cet->cop_pool, (void **) cops, max_to_enq);
- return n_elts;
+ return -1;
}
-static_always_inline vnet_crypto_async_frame_t *
-cryptodev_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
- u32 *enqueue_thread_idx)
+static_always_inline u8
+cryptodev_frame_dequeue_internal (vlib_main_t *vm, u32 *enqueue_thread_idx)
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
- cryptodev_op_t **cop = cet->cops;
+ vnet_crypto_async_frame_t *frame = NULL;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const deq = &ring->deq_tail;
+ u16 n_deq, left_to_deq;
+ u16 max_to_deq = 0;
+ u16 inflight = cet->inflight;
+ u8 dequeue_more = 0;
+ cryptodev_op_t *cops[CRYPTODE_DEQ_MAX] = {};
+ cryptodev_op_t **cop = cops;
vnet_crypto_async_frame_elt_t *fe;
- vnet_crypto_async_frame_t *frame;
- u32 n_elts, n_completed_ops = rte_ring_count (cet->ring);
- u32 ss0 = 0, ss1 = 0, ss2 = 0, ss3 = 0; /* sum of status */
-
- if (cet->inflight)
- {
- n_elts = rte_cryptodev_dequeue_burst (
- cet->cryptodev_id, cet->cryptodev_q,
- (struct rte_crypto_op **) cet->cops, VNET_CRYPTO_FRAME_SIZE);
+ u32 n_elts, n;
+ u64 err0 = 0, err1 = 0, err2 = 0, err3 = 0; /* partial errors mask */
- if (n_elts)
- {
- cet->inflight -= n_elts;
- n_completed_ops += n_elts;
+ left_to_deq =
+ ring->frames[*deq].f->n_elts - ring->frames[*deq].deq_elts_tail;
+ max_to_deq = clib_min (left_to_deq, CRYPTODE_DEQ_MAX);
- rte_ring_sp_enqueue_burst (cet->ring, (void **) cet->cops, n_elts,
- NULL);
- }
- }
+ /* deq field can be used to track frame that is currently dequeued
+ based on that you can specify the amount of elements to deq for the frame */
+ n_deq =
+ rte_cryptodev_dequeue_burst (cet->cryptodev_id, cet->cryptodev_q,
+ (struct rte_crypto_op **) cops, max_to_deq);
- if (PREDICT_FALSE (n_completed_ops == 0))
- return 0;
+ if (n_deq == 0)
+ return dequeue_more;
- n_elts = cryptodev_ring_deq (cet->ring, cop);
- if (!n_elts)
- return 0;
+ frame = ring->frames[*deq].f;
+ fe = frame->elts + ring->frames[*deq].deq_elts_tail;
- frame = cop[0]->frame;
- fe = frame->elts;
+ n_elts = n_deq;
+ n = ring->frames[*deq].deq_elts_tail;
while (n_elts > 4)
{
- ss0 |= fe[0].status = cryptodev_status_conversion[cop[0]->op.status];
- ss1 |= fe[1].status = cryptodev_status_conversion[cop[1]->op.status];
- ss2 |= fe[2].status = cryptodev_status_conversion[cop[2]->op.status];
- ss3 |= fe[3].status = cryptodev_status_conversion[cop[3]->op.status];
+ fe[0].status = cryptodev_status_conversion[cop[0]->op.status];
+ fe[1].status = cryptodev_status_conversion[cop[1]->op.status];
+ fe[2].status = cryptodev_status_conversion[cop[2]->op.status];
+ fe[3].status = cryptodev_status_conversion[cop[3]->op.status];
+
+ err0 |= ((u64) (fe[0].status == VNET_CRYPTO_OP_STATUS_COMPLETED)) << n;
+ err1 |= ((u64) (fe[1].status == VNET_CRYPTO_OP_STATUS_COMPLETED))
+ << (n + 1);
+ err2 |= ((u64) (fe[2].status == VNET_CRYPTO_OP_STATUS_COMPLETED))
+ << (n + 2);
+ err3 |= ((u64) (fe[3].status == VNET_CRYPTO_OP_STATUS_COMPLETED))
+ << (n + 3);
cop += 4;
fe += 4;
n_elts -= 4;
+ n += 4;
}
while (n_elts)
{
- ss0 |= fe[0].status = cryptodev_status_conversion[cop[0]->op.status];
+ fe[0].status = cryptodev_status_conversion[cop[0]->op.status];
+ err0 |= ((u64) (fe[0].status == VNET_CRYPTO_OP_STATUS_COMPLETED)) << n;
+ n++;
fe++;
cop++;
n_elts--;
}
- frame->state = (ss0 | ss1 | ss2 | ss3) == VNET_CRYPTO_OP_STATUS_COMPLETED ?
- VNET_CRYPTO_FRAME_STATE_SUCCESS :
- VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
+ ring->frames[*deq].frame_elts_errs_mask |= (err0 | err1 | err2 | err3);
- rte_mempool_put_bulk (cet->cop_pool, (void **) cet->cops, frame->n_elts);
- *nb_elts_processed = frame->n_elts;
- *enqueue_thread_idx = frame->enqueue_thread_index;
- return frame;
+ rte_mempool_put_bulk (cet->cop_pool, (void **) cops, n_deq);
+
+ inflight -= n_deq;
+ ring->frames[*deq].deq_elts_tail += n_deq;
+ if (cryptodev_cache_ring_update_deq_tail (ring, deq))
+ {
+ u32 fr_processed =
+ (CRYPTODEV_CACHE_QUEUE_SIZE - ring->tail + ring->deq_tail) &
+ CRYPTODEV_CACHE_QUEUE_MASK;
+
+ *enqueue_thread_idx = frame->enqueue_thread_index;
+ dequeue_more = (fr_processed < CRYPTODEV_MAX_PROCESED_IN_CACHE_QUEUE);
+ }
+
+ cet->inflight = inflight;
+ return dequeue_more;
}
+static_always_inline void
+cryptodev_enqueue_frame (vlib_main_t *vm, cryptodev_cache_ring_elt_t *ring_elt)
+{
+ cryptodev_op_type_t op_type = (cryptodev_op_type_t) ring_elt->op_type;
+ u8 linked_or_aad_len = ring_elt->aad_len;
+
+ if (linked_or_aad_len == 1)
+ cryptodev_frame_linked_algs_enqueue_internal (vm, ring_elt->f, op_type);
+ else
+ cryptodev_aead_enqueue_internal (vm, ring_elt->f, op_type,
+ linked_or_aad_len);
+}
+
+static_always_inline vnet_crypto_async_frame_t *
+cryptodev_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
+ u32 *enqueue_thread_idx)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ vnet_crypto_main_t *cm = &crypto_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ cryptodev_cache_ring_elt_t *ring_elt = &ring->frames[ring->tail];
+
+ vnet_crypto_async_frame_t *ret_frame = 0;
+ u8 dequeue_more = 1;
+
+ while (cet->inflight > 0 && dequeue_more)
+ {
+ dequeue_more = cryptodev_frame_dequeue_internal (vm, enqueue_thread_idx);
+ }
+
+ if (PREDICT_TRUE (ring->frames[ring->enq_head].f != 0))
+ cryptodev_enqueue_frame (vm, &ring->frames[ring->enq_head]);
+
+ if (PREDICT_TRUE (ring_elt->f != 0))
+ {
+ if (ring_elt->n_elts == ring_elt->deq_elts_tail)
+ {
+ *nb_elts_processed = ring_elt->n_elts;
+ vlib_node_set_interrupt_pending (
+ vlib_get_main_by_index (vm->thread_index), cm->crypto_node_index);
+ ret_frame = cryptodev_cache_ring_pop (ring);
+ return ret_frame;
+ }
+ }
+
+ return ret_frame;
+}
+static_always_inline int
+cryptodev_enqueue_aead_aad_0_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return cryptodev_frame_aead_enqueue (vm, frame, CRYPTODEV_OP_TYPE_ENCRYPT,
+ 0);
+}
static_always_inline int
cryptodev_enqueue_aead_aad_8_enc (vlib_main_t *vm,
vnet_crypto_async_frame_t *frame)
@@ -475,6 +619,13 @@ cryptodev_enqueue_aead_aad_12_enc (vlib_main_t *vm,
}
static_always_inline int
+cryptodev_enqueue_aead_aad_0_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return cryptodev_frame_aead_enqueue (vm, frame, CRYPTODEV_OP_TYPE_DECRYPT,
+ 0);
+}
+static_always_inline int
cryptodev_enqueue_aead_aad_8_dec (vlib_main_t *vm,
vnet_crypto_async_frame_t *frame)
{
@@ -515,6 +666,7 @@ cryptodev_register_cop_hdl (vlib_main_t *vm, u32 eidx)
struct rte_cryptodev_sym_capability_idx cap_aead_idx;
u8 *name;
clib_error_t *error = 0;
+ u32 ref_cnt = 0;
vec_foreach (cet, cmt->per_thread_data)
{
@@ -525,43 +677,28 @@ cryptodev_register_cop_hdl (vlib_main_t *vm, u32 eidx)
(char *) name, CRYPTODEV_NB_CRYPTO_OPS, sizeof (cryptodev_op_t), 0,
sizeof (struct rte_crypto_op_pool_private), NULL, NULL, crypto_op_init,
NULL, vm->numa_node, 0);
- if (!cet->cop_pool)
- {
- error = clib_error_return (
- 0, "Failed to create cryptodev op pool %s", name);
-
- goto error_exit;
- }
vec_free (name);
-
- name = format (0, "frames_ring_%u_%u", numa, thread_index);
- cet->ring =
- rte_ring_create ((char *) name, CRYPTODEV_NB_CRYPTO_OPS, vm->numa_node,
- RING_F_SP_ENQ | RING_F_SC_DEQ);
- if (!cet->ring)
+ if (!cet->cop_pool)
{
error = clib_error_return (
0, "Failed to create cryptodev op pool %s", name);
goto error_exit;
}
- vec_free (name);
-
- vec_validate (cet->cops, VNET_CRYPTO_FRAME_SIZE - 1);
}
- /** INDENT-OFF **/
#define _(a, b, c, d, e, f, g) \
cap_aead_idx.type = RTE_CRYPTO_SYM_XFORM_AEAD; \
cap_aead_idx.algo.aead = RTE_CRYPTO_##b##_##c; \
if (cryptodev_check_cap_support (&cap_aead_idx, g, e, f)) \
{ \
- vnet_crypto_register_async_handler ( \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_TAG##e##_AAD##f##_ENC, \
- cryptodev_enqueue_aead_aad_##f##_enc, cryptodev_frame_dequeue); \
- vnet_crypto_register_async_handler ( \
+ cryptodev_enqueue_aead_aad_##f##_enc); \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_TAG##e##_AAD##f##_DEC, \
- cryptodev_enqueue_aead_aad_##f##_dec, cryptodev_frame_dequeue); \
+ cryptodev_enqueue_aead_aad_##f##_dec); \
+ ref_cnt++; \
}
foreach_vnet_aead_crypto_conversion
#undef _
@@ -574,25 +711,25 @@ cryptodev_register_cop_hdl (vlib_main_t *vm, u32 eidx)
if (cryptodev_check_cap_support (&cap_cipher_idx, c, -1, -1) && \
cryptodev_check_cap_support (&cap_auth_idx, -1, e, -1)) \
{ \
- vnet_crypto_register_async_handler ( \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_##d##_TAG##e##_ENC, \
- cryptodev_enqueue_linked_alg_enc, cryptodev_frame_dequeue); \
- vnet_crypto_register_async_handler ( \
+ cryptodev_enqueue_linked_alg_enc); \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_##d##_TAG##e##_DEC, \
- cryptodev_enqueue_linked_alg_dec, cryptodev_frame_dequeue); \
+ cryptodev_enqueue_linked_alg_dec); \
+ ref_cnt++; \
}
foreach_cryptodev_link_async_alg
#undef _
- /** INDENT-ON **/
+
+ if (ref_cnt)
+ vnet_crypto_register_dequeue_handler (vm, eidx, cryptodev_frame_dequeue);
return 0;
error_exit:
vec_foreach (cet, cmt->per_thread_data)
{
- if (cet->ring)
- rte_ring_free (cet->ring);
-
if (cet->cop_pool)
rte_mempool_free (cet->cop_pool);
}
diff --git a/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c b/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c
index 41a1e0c2a09..67ab9c89e67 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c
+++ b/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c
@@ -29,7 +29,7 @@
#include <rte_cryptodev.h>
#include <rte_crypto_sym.h>
#include <rte_crypto.h>
-#include <rte_cryptodev_pmd.h>
+#include <rte_malloc.h>
#include <rte_config.h>
#include "cryptodev.h"
@@ -96,7 +96,7 @@ cryptodev_reset_ctx (cryptodev_engine_thread_t *cet)
{
union rte_cryptodev_session_ctx sess_ctx;
- ASSERT (cet->reset_sess != 0);
+ ERROR_ASSERT (cet->reset_sess != 0);
sess_ctx.crypto_sess = cet->reset_sess;
@@ -112,30 +112,51 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ ERROR_ASSERT (frame != 0);
+ ERROR_ASSERT (frame->n_elts > 0);
+ cryptodev_cache_ring_elt_t *ring_elt =
+ cryptodev_cache_ring_push (ring, frame);
+
+ if (PREDICT_FALSE (ring_elt == NULL))
+ return -1;
+
+ ring_elt->aad_len = 1;
+ ring_elt->op_type = (u8) op_type;
+ return 0;
+}
+
+static_always_inline void
+cryptodev_frame_linked_algs_enqueue_internal (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame,
+ cryptodev_op_type_t op_type)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
vnet_crypto_async_frame_elt_t *fe;
vlib_buffer_t **b;
struct rte_crypto_vec vec[CRYPTODEV_MAX_N_SGL];
struct rte_crypto_va_iova_ptr iv_vec, digest_vec;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const enq = &ring->enq_head;
u32 n_elts;
u32 last_key_index = ~0;
i16 min_ofs;
u32 max_end;
+ u32 max_to_enq = clib_min (CRYPTODE_ENQ_MAX,
+ frame->n_elts - ring->frames[*enq].enq_elts_head);
u8 is_update = 0;
int status;
- n_elts = frame->n_elts;
+ if (cet->inflight + max_to_enq > CRYPTODEV_MAX_INFLIGHT)
+ return;
- if (PREDICT_FALSE (CRYPTODEV_MAX_INFLIGHT - cet->inflight < n_elts))
- {
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
- }
+ n_elts = max_to_enq;
vlib_get_buffers (vm, frame->buffer_indices, cet->b, frame->n_elts);
- b = cet->b;
- fe = frame->elts;
+ b = cet->b + ring->frames[*enq].enq_elts_head;
+ fe = frame->elts + ring->frames[*enq].enq_elts_head;
while (n_elts)
{
@@ -215,26 +236,31 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (status < 0))
goto error_exit;
+ ring->frames[*enq].enq_elts_head += 1;
b++;
fe++;
n_elts--;
}
- status = rte_cryptodev_raw_enqueue_done (cet->ctx, frame->n_elts);
+ status = rte_cryptodev_raw_enqueue_done (cet->ctx, max_to_enq);
if (PREDICT_FALSE (status < 0))
- {
- cryptodev_reset_ctx (cet);
- return -1;
- }
+ goto error_exit;
- cet->inflight += frame->n_elts;
- return 0;
+ cet->inflight += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ return;
error_exit:
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ cryptodev_mark_frame_fill_err (frame,
+ ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
cryptodev_reset_ctx (cet);
- return -1;
+
+ return;
}
static_always_inline int
@@ -243,6 +269,28 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ ERROR_ASSERT (frame != 0);
+ ERROR_ASSERT (frame->n_elts > 0);
+ cryptodev_cache_ring_elt_t *ring_elt =
+ cryptodev_cache_ring_push (ring, frame);
+
+ if (PREDICT_FALSE (ring_elt == NULL))
+ return -1;
+
+ ring_elt->aad_len = aad_len;
+ ring_elt->op_type = (u8) op_type;
+ return 0;
+}
+
+static_always_inline void
+cryptodev_raw_aead_enqueue_internal (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame,
+ cryptodev_op_type_t op_type, u8 aad_len)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
vnet_crypto_async_frame_elt_t *fe;
vlib_buffer_t **b;
u32 n_elts;
@@ -250,22 +298,23 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
struct rte_crypto_vec vec[CRYPTODEV_MAX_N_SGL];
struct rte_crypto_va_iova_ptr iv_vec, digest_vec, aad_vec;
u32 last_key_index = ~0;
+ u16 *const enq = &ring->enq_head;
+ u16 left_to_enq = frame->n_elts - ring->frames[*enq].enq_elts_head;
+ u16 max_to_enq = clib_min (CRYPTODE_ENQ_MAX, left_to_enq);
u8 is_update = 0;
int status;
- n_elts = frame->n_elts;
-
- if (PREDICT_FALSE (CRYPTODEV_MAX_INFLIGHT - cet->inflight < n_elts))
+ if (cet->inflight + max_to_enq > CRYPTODEV_MAX_INFLIGHT)
{
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ return;
}
+ n_elts = max_to_enq;
+
vlib_get_buffers (vm, frame->buffer_indices, cet->b, frame->n_elts);
- fe = frame->elts;
- b = cet->b;
+ fe = frame->elts + ring->frames[*enq].enq_elts_head;
+ b = cet->b + ring->frames[*enq].enq_elts_head;
cofs.raw = 0;
while (n_elts)
@@ -292,8 +341,13 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
}
if (PREDICT_FALSE (
- (u8) key->keys[vm->numa_node][op_type]->opaque_data !=
- aad_len))
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ rte_cryptodev_sym_session_opaque_data_get (
+ key->keys[vm->numa_node][op_type]) != (u64) aad_len
+#else
+ (u8) key->keys[vm->numa_node][op_type]->opaque_data != aad_len
+#endif
+ ))
{
cryptodev_sess_handler (vm, VNET_CRYPTO_KEY_OP_DEL,
fe->key_index, aad_len);
@@ -349,7 +403,7 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
if (aad_len == 8)
*(u64 *) (cet->aad_buf + aad_offset) = *(u64 *) fe->aad;
- else
+ else if (aad_len != 0)
{
/* aad_len == 12 */
*(u64 *) (cet->aad_buf + aad_offset) = *(u64 *) fe->aad;
@@ -373,31 +427,30 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
if (PREDICT_FALSE (status < 0))
goto error_exit;
+ ring->frames[*enq].enq_elts_head += 1;
fe++;
b++;
n_elts--;
}
- status = rte_cryptodev_raw_enqueue_done (cet->ctx, frame->n_elts);
+ status = rte_cryptodev_raw_enqueue_done (cet->ctx, max_to_enq);
if (PREDICT_FALSE (status < 0))
goto error_exit;
- cet->inflight += frame->n_elts;
-
- return 0;
+ cet->inflight += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ return;
error_exit:
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ cryptodev_mark_frame_fill_err (frame,
+ ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
cryptodev_reset_ctx (cet);
- return -1;
-}
-
-static_always_inline u32
-cryptodev_get_frame_n_elts (void *frame)
-{
- vnet_crypto_async_frame_t *f = (vnet_crypto_async_frame_t *) frame;
- return f->n_elts;
+ return;
}
static_always_inline void
@@ -409,180 +462,114 @@ cryptodev_post_dequeue (void *frame, u32 index, u8 is_op_success)
VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
}
-#define GET_RING_OBJ(r, pos, f) \
- do \
- { \
- vnet_crypto_async_frame_t **ring = (void *) &r[1]; \
- f = ring[(r->cons.head + pos) & r->mask]; \
- } \
- while (0)
-
-static_always_inline vnet_crypto_async_frame_t *
-cryptodev_raw_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
- u32 *enqueue_thread_idx)
+static_always_inline u8
+cryptodev_raw_dequeue_internal (vlib_main_t *vm, u32 *enqueue_thread_idx)
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
- vnet_crypto_async_frame_t *frame, *frame_ret = 0;
- u32 n_deq, n_success;
- u32 n_cached_frame = rte_ring_count (cet->cached_frame), n_room_left;
- u8 no_job_to_deq = 0;
+ vnet_crypto_async_frame_t *frame;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const deq = &ring->deq_tail;
+ u32 n_success;
+ u16 n_deq, i, left_to_deq;
+ u16 max_to_deq = 0;
u16 inflight = cet->inflight;
+ u8 dequeue_more = 0;
int dequeue_status;
- n_room_left = CRYPTODEV_DEQ_CACHE_SZ - n_cached_frame - 1;
+ left_to_deq = ring->frames[*deq].n_elts - ring->frames[*deq].deq_elts_tail;
+ max_to_deq = clib_min (left_to_deq, CRYPTODE_DEQ_MAX);
- if (n_cached_frame)
- {
- u32 i;
- for (i = 0; i < n_cached_frame; i++)
- {
- vnet_crypto_async_frame_t *f;
- void *f_ret;
- enum rte_crypto_op_status op_status;
- u8 n_left, err, j;
+ /* deq field can be used to track frame that is currently dequeued */
+ /* based on thatthe amount of elements to deq for the frame can be specified
+ */
- GET_RING_OBJ (cet->cached_frame, i, f);
-
- if (i < n_cached_frame - 2)
- {
- vnet_crypto_async_frame_t *f1, *f2;
- GET_RING_OBJ (cet->cached_frame, i + 1, f1);
- GET_RING_OBJ (cet->cached_frame, i + 2, f2);
- clib_prefetch_load (f1);
- clib_prefetch_load (f2);
- }
-
- n_left = f->state & 0x7f;
- err = f->state & 0x80;
-
- for (j = f->n_elts - n_left; j < f->n_elts && inflight; j++)
- {
- int ret;
- f_ret = rte_cryptodev_raw_dequeue (cet->ctx, &ret, &op_status);
-
- if (!f_ret)
- break;
-
- switch (op_status)
- {
- case RTE_CRYPTO_OP_STATUS_SUCCESS:
- f->elts[j].status = VNET_CRYPTO_OP_STATUS_COMPLETED;
- break;
- default:
- f->elts[j].status = VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR;
- err |= 1 << 7;
- }
-
- inflight--;
- }
+ n_deq = rte_cryptodev_raw_dequeue_burst (
+ cet->ctx, NULL, max_to_deq, cryptodev_post_dequeue, (void **) &frame, 0,
+ &n_success, &dequeue_status);
- if (j == f->n_elts)
- {
- if (i == 0)
- {
- frame_ret = f;
- f->state = err ? VNET_CRYPTO_FRAME_STATE_ELT_ERROR :
- VNET_CRYPTO_FRAME_STATE_SUCCESS;
- }
- else
- {
- f->state = f->n_elts - j;
- f->state |= err;
- }
- if (inflight)
- continue;
- }
+ if (n_deq == 0)
+ return dequeue_more;
- /* to here f is not completed dequeued and no more job can be
- * dequeued
- */
- f->state = f->n_elts - j;
- f->state |= err;
- no_job_to_deq = 1;
- break;
- }
+ inflight -= n_deq;
+ if (PREDICT_FALSE (n_success < n_deq))
+ {
+ u16 idx = ring->frames[*deq].deq_elts_tail;
- if (frame_ret)
+ for (i = 0; i < n_deq; i++)
{
- rte_ring_sc_dequeue (cet->cached_frame, (void **) &frame_ret);
- n_room_left++;
+ if (frame->elts[idx + i].status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ ring->frames[*deq].frame_elts_errs_mask |= 1 << (idx + i);
}
}
+ ring->frames[*deq].deq_elts_tail += n_deq;
- /* no point to dequeue further */
- if (!inflight || no_job_to_deq || !n_room_left)
- goto end_deq;
+ if (cryptodev_cache_ring_update_deq_tail (ring, deq))
+ {
+ u32 fr_processed =
+ (CRYPTODEV_CACHE_QUEUE_SIZE - ring->tail + ring->deq_tail) &
+ CRYPTODEV_CACHE_QUEUE_MASK;
-#if RTE_VERSION >= RTE_VERSION_NUM(21, 5, 0, 0)
- n_deq = rte_cryptodev_raw_dequeue_burst (
- cet->ctx, cryptodev_get_frame_n_elts, 0, cryptodev_post_dequeue,
- (void **) &frame, 0, &n_success, &dequeue_status);
-#else
- n_deq = rte_cryptodev_raw_dequeue_burst (
- cet->ctx, cryptodev_get_frame_n_elts, cryptodev_post_dequeue,
- (void **) &frame, 0, &n_success, &dequeue_status);
-#endif
+ *enqueue_thread_idx = frame->enqueue_thread_index;
+ dequeue_more = (fr_processed < CRYPTODEV_MAX_PROCESED_IN_CACHE_QUEUE);
+ }
- if (!n_deq)
- goto end_deq;
+ int res =
+ rte_cryptodev_raw_dequeue_done (cet->ctx, cet->inflight - inflight);
+ ERROR_ASSERT (res == 0);
+ cet->inflight = inflight;
+ return dequeue_more;
+}
- inflight -= n_deq;
- no_job_to_deq = n_deq < frame->n_elts;
- /* we have to cache the frame */
- if (frame_ret || n_cached_frame || no_job_to_deq)
- {
- frame->state = frame->n_elts - n_deq;
- frame->state |= ((n_success < n_deq) << 7);
- rte_ring_sp_enqueue (cet->cached_frame, (void *) frame);
- n_room_left--;
- }
+static_always_inline void
+cryptodev_enqueue_frame_to_qat (vlib_main_t *vm,
+ cryptodev_cache_ring_elt_t *ring_elt)
+{
+ cryptodev_op_type_t op_type = (cryptodev_op_type_t) ring_elt->op_type;
+ u8 linked_or_aad_len = ring_elt->aad_len;
+
+ if (linked_or_aad_len == 1)
+ cryptodev_frame_linked_algs_enqueue_internal (vm, ring_elt->f, op_type);
else
- {
- frame->state = n_success == frame->n_elts ?
- VNET_CRYPTO_FRAME_STATE_SUCCESS :
- VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
- frame_ret = frame;
- }
+ cryptodev_raw_aead_enqueue_internal (vm, ring_elt->f, op_type,
+ linked_or_aad_len);
+}
- /* see if we can dequeue more */
- while (inflight && n_room_left && !no_job_to_deq)
- {
-#if RTE_VERSION >= RTE_VERSION_NUM(21, 5, 0, 0)
- n_deq = rte_cryptodev_raw_dequeue_burst (
- cet->ctx, cryptodev_get_frame_n_elts, 0, cryptodev_post_dequeue,
- (void **) &frame, 0, &n_success, &dequeue_status);
-#else
- n_deq = rte_cryptodev_raw_dequeue_burst (
- cet->ctx, cryptodev_get_frame_n_elts, cryptodev_post_dequeue,
- (void **) &frame, 0, &n_success, &dequeue_status);
-#endif
- if (!n_deq)
- break;
- inflight -= n_deq;
- no_job_to_deq = n_deq < frame->n_elts;
- frame->state = frame->n_elts - n_deq;
- frame->state |= ((n_success < n_deq) << 7);
- rte_ring_sp_enqueue (cet->cached_frame, (void *) frame);
- n_room_left--;
- }
+static_always_inline vnet_crypto_async_frame_t *
+cryptodev_raw_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
+ u32 *enqueue_thread_idx)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ vnet_crypto_main_t *cm = &crypto_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ cryptodev_cache_ring_elt_t *ring_elt = &ring->frames[ring->tail];
+ vnet_crypto_async_frame_t *ret_frame = 0;
+ u8 dequeue_more = 1;
-end_deq:
- if (inflight < cet->inflight)
- {
- int res =
- rte_cryptodev_raw_dequeue_done (cet->ctx, cet->inflight - inflight);
- ASSERT (res == 0);
- cet->inflight = inflight;
- }
+ while (cet->inflight > 0 && dequeue_more)
+ dequeue_more = cryptodev_raw_dequeue_internal (vm, enqueue_thread_idx);
+
+ if (PREDICT_TRUE (ring->frames[ring->enq_head].f != 0))
+ cryptodev_enqueue_frame_to_qat (vm, &ring->frames[ring->enq_head]);
- if (frame_ret)
+ if (PREDICT_TRUE (ring_elt->f != 0) &&
+ (ring_elt->n_elts == ring_elt->deq_elts_tail))
{
- *nb_elts_processed = frame_ret->n_elts;
- *enqueue_thread_idx = frame_ret->enqueue_thread_index;
+ *nb_elts_processed = ring_elt->n_elts;
+ vlib_node_set_interrupt_pending (
+ vlib_get_main_by_index (vm->thread_index), cm->crypto_node_index);
+ ret_frame = cryptodev_cache_ring_pop (ring);
}
- return frame_ret;
+ return ret_frame;
+}
+
+static_always_inline int
+cryptodev_raw_enq_aead_aad_0_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return cryptodev_raw_aead_enqueue (vm, frame, CRYPTODEV_OP_TYPE_ENCRYPT, 0);
}
static_always_inline int
@@ -599,6 +586,13 @@ cryptodev_raw_enq_aead_aad_12_enc (vlib_main_t *vm,
}
static_always_inline int
+cryptodev_raw_enq_aead_aad_0_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return cryptodev_raw_aead_enqueue (vm, frame, CRYPTODEV_OP_TYPE_DECRYPT, 0);
+}
+
+static_always_inline int
cryptodev_raw_enq_aead_aad_8_dec (vlib_main_t *vm,
vnet_crypto_async_frame_t *frame)
{
@@ -639,6 +633,7 @@ cryptodev_register_raw_hdl (vlib_main_t *vm, u32 eidx)
struct rte_cryptodev_sym_capability_idx cap_aead_idx;
u32 support_raw_api = 1, max_ctx_size = 0;
clib_error_t *error = 0;
+ u8 ref_cnt = 0;
vec_foreach (cinst, cmt->cryptodev_inst)
{
@@ -661,11 +656,7 @@ cryptodev_register_raw_hdl (vlib_main_t *vm, u32 eidx)
{
u32 thread_id = cet - cmt->per_thread_data;
u32 numa = vlib_get_main_by_index (thread_id)->numa_node;
- u8 *name = format (0, "cache_frame_ring_%u_%u", numa, thread_id);
-
- cet->cached_frame =
- rte_ring_create ((char *) name, CRYPTODEV_DEQ_CACHE_SZ, numa,
- RING_F_SC_DEQ | RING_F_SP_ENQ);
+ u8 *name = format (0, "cache_cache_ring_%u_%u", numa, thread_id);
cet->aad_buf = rte_zmalloc_socket (
0, CRYPTODEV_NB_CRYPTO_OPS * CRYPTODEV_MAX_AAD_SIZE,
@@ -684,28 +675,21 @@ cryptodev_register_raw_hdl (vlib_main_t *vm, u32 eidx)
error = clib_error_return (0, "Failed to alloc raw dp ctx");
goto err_handling;
}
-
- if (cet->cached_frame == 0)
- {
- error = clib_error_return (0, "Failed to alloc frame ring %s", name);
- goto err_handling;
- }
-
vec_free (name);
}
-/** INDENT-OFF **/
#define _(a, b, c, d, e, f, g) \
cap_aead_idx.type = RTE_CRYPTO_SYM_XFORM_AEAD; \
cap_aead_idx.algo.aead = RTE_CRYPTO_##b##_##c; \
if (cryptodev_check_cap_support (&cap_aead_idx, g, e, f)) \
{ \
- vnet_crypto_register_async_handler ( \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_TAG##e##_AAD##f##_ENC, \
- cryptodev_raw_enq_aead_aad_##f##_enc, cryptodev_raw_dequeue); \
- vnet_crypto_register_async_handler ( \
+ cryptodev_raw_enq_aead_aad_##f##_enc); \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_TAG##e##_AAD##f##_DEC, \
- cryptodev_raw_enq_aead_aad_##f##_dec, cryptodev_raw_dequeue); \
+ cryptodev_raw_enq_aead_aad_##f##_dec); \
+ ref_cnt++; \
}
foreach_vnet_aead_crypto_conversion
#undef _
@@ -718,26 +702,24 @@ cryptodev_register_raw_hdl (vlib_main_t *vm, u32 eidx)
if (cryptodev_check_cap_support (&cap_cipher_idx, c, -1, -1) && \
cryptodev_check_cap_support (&cap_auth_idx, -1, e, -1)) \
{ \
- vnet_crypto_register_async_handler ( \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_##d##_TAG##e##_ENC, \
- cryptodev_raw_enq_linked_alg_enc, cryptodev_raw_dequeue); \
- vnet_crypto_register_async_handler ( \
+ cryptodev_raw_enq_linked_alg_enc); \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_##d##_TAG##e##_DEC, \
- cryptodev_raw_enq_linked_alg_dec, cryptodev_raw_dequeue); \
+ cryptodev_raw_enq_linked_alg_dec); \
+ ref_cnt++; \
}
foreach_cryptodev_link_async_alg
#undef _
- cmt->is_raw_api = 1;
+ if (ref_cnt)
+ vnet_crypto_register_dequeue_handler (vm, eidx, cryptodev_raw_dequeue);
+
+ cmt->is_raw_api = 1;
return 0;
err_handling:
- vec_foreach (cet, cmt->per_thread_data)
- {
- if (cet->cached_frame)
- rte_ring_free (cet->cached_frame);
- }
-
return error;
}
diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c
index 0f771c6ba77..77f9a27f97b 100644
--- a/src/plugins/dpdk/device/cli.c
+++ b/src/plugins/dpdk/device/cli.c
@@ -77,26 +77,30 @@ show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
* name="mbuf_pool_socket0" available = 15104 allocated = 1280 total = 16384
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_show_dpdk_buffer,static) = {
.path = "show dpdk buffer",
.short_help = "show dpdk buffer",
.function = show_dpdk_buffer,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
clib_error_t *err = 0;
- u32 pipe_max_size;
int fds[2];
u8 *s = 0;
int n, n_try;
FILE *f;
+ /*
+ * XXX: Pipes on FreeBSD grow dynamically up to 64KB (FreeBSD 15), don't
+ * manually tweak this value on FreeBSD at the moment.
+ */
+#ifdef __linux__
+ u32 pipe_max_size;
+
err = clib_sysfs_read ("/proc/sys/fs/pipe-max-size", "%u", &pipe_max_size);
if (err)
@@ -114,6 +118,7 @@ show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input,
err = clib_error_return_unix (0, "fcntl(F_SETPIPE_SZ)");
goto error;
}
+#endif /* __linux__ */
if (fcntl (fds[0], F_SETFL, O_NONBLOCK) == -1)
{
@@ -142,7 +147,7 @@ show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input,
err = clib_error_return_unix (0, "read");
goto error;
}
- _vec_len (s) = len + (n < 0 ? 0 : n);
+ vec_set_len (s, len + (n < 0 ? 0 : n));
}
vlib_cli_output (vm, "%v", s);
@@ -162,14 +167,12 @@ error:
* @cliexstart{show dpdk physmem}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_show_dpdk_physmem,static) = {
.path = "show dpdk physmem",
.short_help = "show dpdk physmem",
.function = show_dpdk_physmem,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
@@ -198,7 +201,7 @@ test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
first = vec_len (allocated_buffers) - n_free;
vlib_buffer_free (vm, allocated_buffers + first, n_free);
- _vec_len (allocated_buffers) = first;
+ vec_set_len (allocated_buffers, first);
}
if (n_alloc)
{
@@ -208,7 +211,7 @@ test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first,
n_alloc);
- _vec_len (allocated_buffers) = first + actual_alloc;
+ vec_set_len (allocated_buffers, first + actual_alloc);
if (actual_alloc < n_alloc)
vlib_cli_output (vm, "WARNING: only allocated %d buffers",
@@ -250,14 +253,12 @@ test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = {
.path = "test dpdk buffer",
.short_help = "test dpdk buffer [allocate <nn>] [free <nn>]",
.function = test_dpdk_buffer,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
@@ -265,6 +266,7 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
{
unformat_input_t _line_input, *line_input = &_line_input;
dpdk_main_t *dm = &dpdk_main;
+ vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hw;
dpdk_device_t *xd;
u32 hw_if_index = (u32) ~ 0;
@@ -277,9 +279,8 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat
- (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
- &hw_if_index))
+ if (unformat (line_input, "%U", unformat_vnet_hw_interface, vnm,
+ &hw_if_index))
;
else if (unformat (line_input, "tx %d", &nb_tx_desc))
;
@@ -299,30 +300,21 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
goto done;
}
- hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
xd = vec_elt_at_index (dm->devices, hw->dev_instance);
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- {
- error =
- clib_error_return (0,
- "number of descriptors can be set only for "
- "physical devices");
- goto done;
- }
-
- if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) &&
- (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc))
+ if ((nb_rx_desc == (u32) ~0 || nb_rx_desc == xd->conf.n_rx_desc) &&
+ (nb_tx_desc == (u32) ~0 || nb_tx_desc == xd->conf.n_tx_desc))
{
error = clib_error_return (0, "nothing changed");
goto done;
}
if (nb_rx_desc != (u32) ~ 0)
- xd->nb_rx_desc = nb_rx_desc;
+ xd->conf.n_rx_desc = nb_rx_desc;
if (nb_tx_desc != (u32) ~ 0)
- xd->nb_tx_desc = nb_tx_desc;
+ xd->conf.n_tx_desc = nb_tx_desc;
dpdk_device_setup (xd);
@@ -345,13 +337,11 @@ done:
* Example of how to set the DPDK interface descriptors:
* @cliexcmd{set dpdk interface descriptors GigabitEthernet0/8/0 rx 512 tx 512}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = {
.path = "set dpdk interface descriptors",
.short_help = "set dpdk interface descriptors <interface> [rx <nn>] [tx <nn>]",
.function = set_dpdk_if_desc,
};
-/* *INDENT-ON* */
static clib_error_t *
show_dpdk_version_command_fn (vlib_main_t * vm,
@@ -373,16 +363,15 @@ show_dpdk_version_command_fn (vlib_main_t * vm,
* Example of how to display how many DPDK buffer test command has allocated:
* @cliexstart{show dpdk version}
* DPDK Version: DPDK 16.11.0
- * DPDK EAL init args: -c 1 -n 4 --huge-dir /run/vpp/hugepages --file-prefix vpp -w 0000:00:08.0 -w 0000:00:09.0 --master-lcore 0 --socket-mem 256
+ * DPDK EAL init args: --in-memory --no-telemetry --file-prefix vpp
+ * -w 0000:00:08.0 -w 0000:00:09.0
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_vpe_version_command, static) = {
.path = "show dpdk version",
.short_help = "show dpdk version",
.function = show_dpdk_version_command_fn,
};
-/* *INDENT-ON* */
/* Dummy function to get us linked in. */
void
diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c
index 89046d1a8c0..7a49c5aaef2 100644
--- a/src/plugins/dpdk/device/common.c
+++ b/src/plugins/dpdk/device/common.c
@@ -29,11 +29,26 @@
#include <dpdk/device/dpdk_priv.h>
#include <vppinfra/error.h>
+/* DPDK TX offload to vnet hw interface caps mapppings */
+static struct
+{
+ u64 offload;
+ vnet_hw_if_caps_t caps;
+} tx_off_caps_map[] = {
+ { RTE_ETH_TX_OFFLOAD_IPV4_CKSUM, VNET_HW_IF_CAP_TX_IP4_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_TCP_CKSUM, VNET_HW_IF_CAP_TX_TCP_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_UDP_CKSUM, VNET_HW_IF_CAP_TX_UDP_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM, VNET_HW_IF_CAP_TX_IP4_OUTER_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM, VNET_HW_IF_CAP_TX_UDP_OUTER_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_TCP_TSO, VNET_HW_IF_CAP_TCP_GSO },
+ { RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO, VNET_HW_IF_CAP_VXLAN_TNL_GSO }
+};
+
void
dpdk_device_error (dpdk_device_t * xd, char *str, int rv)
{
- dpdk_log_err ("Interface %U error %d: %s",
- format_dpdk_device_name, xd->port_id, rv, rte_strerror (rv));
+ dpdk_log_err ("Interface %U error %d: %s", format_dpdk_device_name,
+ xd->device_index, rv, rte_strerror (rv));
xd->errors = clib_error_return (xd->errors, "%s[port:%d, errno:%d]: %s",
str, xd->port_id, rv, rte_strerror (rv));
}
@@ -41,14 +56,16 @@ dpdk_device_error (dpdk_device_t * xd, char *str, int rv)
void
dpdk_device_setup (dpdk_device_t * xd)
{
- dpdk_main_t *dm = &dpdk_main;
vlib_main_t *vm = vlib_get_main ();
vnet_main_t *vnm = vnet_get_main ();
- vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index);
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+ u16 buf_sz = vlib_buffer_get_default_data_size (vm);
+ vnet_hw_if_caps_change_t caps = {};
struct rte_eth_dev_info dev_info;
- u64 bitmap;
+ struct rte_eth_conf conf = {};
+ u64 rxo, txo;
+ u32 max_frame_size;
int rv;
int j;
@@ -59,70 +76,152 @@ dpdk_device_setup (dpdk_device_t * xd)
if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
{
- vnet_hw_interface_set_flags (dm->vnet_main, xd->hw_if_index, 0);
+ vnet_hw_interface_set_flags (vnm, xd->hw_if_index, 0);
dpdk_device_stop (xd);
}
- /* Enable flow director when flows exist */
- if (xd->pmd == VNET_DPDK_PMD_I40E)
+ rte_eth_dev_info_get (xd->port_id, &dev_info);
+
+ dpdk_log_debug ("[%u] configuring device %U", xd->port_id,
+ format_dpdk_rte_device, dev_info.device);
+
+ /* create rx and tx offload wishlist */
+ rxo = RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
+ txo = 0;
+
+ if (xd->conf.enable_tcp_udp_checksum)
+ rxo |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
+
+ if (xd->conf.disable_tx_checksum_offload == 0 &&
+ xd->conf.enable_outer_checksum_offload)
+ txo |=
+ RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
+
+ if (xd->conf.disable_tx_checksum_offload == 0)
+ txo |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
+ RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
+
+ if (xd->conf.disable_multi_seg == 0)
{
- if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) != 0)
- xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_PERFECT;
- else
- xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_NONE;
+ txo |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
+ rxo |= RTE_ETH_RX_OFFLOAD_SCATTER;
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+ rxo |= DEV_RX_OFFLOAD_JUMBO_FRAME;
+#endif
}
- rte_eth_dev_info_get (xd->port_id, &dev_info);
-
- bitmap = xd->port_conf.txmode.offloads & ~dev_info.tx_offload_capa;
- if (bitmap)
+ if (xd->conf.enable_lro)
+ rxo |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
+
+ /* per-device offload config */
+ if (xd->conf.enable_tso)
+ txo |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_TSO |
+ RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
+
+ if (xd->conf.disable_rx_scatter)
+ rxo &= ~RTE_ETH_RX_OFFLOAD_SCATTER;
+
+ /* mask unsupported offloads */
+ rxo &= dev_info.rx_offload_capa;
+ txo &= dev_info.tx_offload_capa;
+
+ dpdk_log_debug ("[%u] Supported RX offloads: %U", xd->port_id,
+ format_dpdk_rx_offload_caps, dev_info.rx_offload_capa);
+ dpdk_log_debug ("[%u] Configured RX offloads: %U", xd->port_id,
+ format_dpdk_rx_offload_caps, rxo);
+ dpdk_log_debug ("[%u] Supported TX offloads: %U", xd->port_id,
+ format_dpdk_tx_offload_caps, dev_info.tx_offload_capa);
+ dpdk_log_debug ("[%u] Configured TX offloads: %U", xd->port_id,
+ format_dpdk_tx_offload_caps, txo);
+
+ /* finalize configuration */
+ conf.rxmode.offloads = rxo;
+ conf.txmode.offloads = txo;
+ if (rxo & RTE_ETH_RX_OFFLOAD_TCP_LRO)
+ conf.rxmode.max_lro_pkt_size = xd->conf.max_lro_pkt_size;
+
+ if (xd->conf.enable_lsc_int)
+ conf.intr_conf.lsc = 1;
+ if (xd->conf.enable_rxq_int)
+ conf.intr_conf.rxq = 1;
+
+ conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
+ if (xd->conf.n_rx_queues > 1)
{
- dpdk_log_warn ("unsupported tx offloads requested on port %u: %U",
- xd->port_id, format_dpdk_tx_offload_caps, bitmap);
- xd->port_conf.txmode.offloads ^= bitmap;
+ if (xd->conf.disable_rss == 0)
+ {
+ conf.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
+ conf.rx_adv_conf.rss_conf.rss_hf = xd->conf.rss_hf;
+ }
}
- bitmap = xd->port_conf.rxmode.offloads & ~dev_info.rx_offload_capa;
- if (bitmap)
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+ if (rxo & DEV_RX_OFFLOAD_JUMBO_FRAME)
+ {
+ conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
+ xd->max_supported_frame_size = dev_info.max_rx_pktlen;
+ }
+ else
{
- dpdk_log_warn ("unsupported rx offloads requested on port %u: %U",
- xd->port_id, format_dpdk_rx_offload_caps, bitmap);
- xd->port_conf.rxmode.offloads ^= bitmap;
+ xd->max_supported_frame_size =
+ clib_min (1500 + xd->driver_frame_overhead, buf_sz);
}
+#else
+ if (xd->conf.disable_multi_seg)
+ xd->max_supported_frame_size = clib_min (dev_info.max_rx_pktlen, buf_sz);
+ else
+ xd->max_supported_frame_size = dev_info.max_rx_pktlen;
+#endif
+
+ max_frame_size = clib_min (xd->max_supported_frame_size,
+ ethernet_main.default_mtu + hi->frame_overhead);
- rv = rte_eth_dev_configure (xd->port_id, xd->rx_q_used,
- xd->tx_q_used, &xd->port_conf);
+#if RTE_VERSION >= RTE_VERSION_NUM(21, 11, 0, 0)
+ conf.rxmode.mtu = max_frame_size - xd->driver_frame_overhead;
+#endif
- if (rv < 0)
+retry:
+ rv = rte_eth_dev_configure (xd->port_id, xd->conf.n_rx_queues,
+ xd->conf.n_tx_queues, &conf);
+ if (rv < 0 && conf.intr_conf.rxq)
{
- dpdk_device_error (xd, "rte_eth_dev_configure", rv);
- goto error;
+ conf.intr_conf.rxq = 0;
+ goto retry;
}
- vec_validate_aligned (xd->tx_queues, xd->tx_q_used - 1,
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+ rte_eth_dev_set_mtu (xd->port_id,
+ max_frame_size - xd->driver_frame_overhead);
+#endif
+
+ hi->max_frame_size = 0;
+ vnet_hw_interface_set_max_frame_size (vnm, xd->hw_if_index, max_frame_size);
+ dpdk_log_debug ("[%u] max_frame_size %u max max_frame_size %u "
+ "driver_frame_overhead %u",
+ xd->port_id, hi->max_frame_size,
+ xd->max_supported_frame_size, xd->driver_frame_overhead);
+
+ vec_validate_aligned (xd->tx_queues, xd->conf.n_tx_queues - 1,
CLIB_CACHE_LINE_BYTES);
- for (j = 0; j < xd->tx_q_used; j++)
+ for (j = 0; j < xd->conf.n_tx_queues; j++)
{
- rv =
- rte_eth_tx_queue_setup (xd->port_id, j, xd->nb_tx_desc,
- xd->cpu_socket, &xd->tx_conf);
+ rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->conf.n_tx_desc,
+ xd->cpu_socket, 0);
/* retry with any other CPU socket */
if (rv < 0)
- rv =
- rte_eth_tx_queue_setup (xd->port_id, j,
- xd->nb_tx_desc, SOCKET_ID_ANY,
- &xd->tx_conf);
+ rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->conf.n_tx_desc,
+ SOCKET_ID_ANY, 0);
if (rv < 0)
dpdk_device_error (xd, "rte_eth_tx_queue_setup", rv);
- if (xd->tx_q_used < tm->n_vlib_mains)
- clib_spinlock_init (&vec_elt (xd->tx_queues, j).lock);
+ clib_spinlock_init (&vec_elt (xd->tx_queues, j).lock);
}
- vec_validate_aligned (xd->rx_queues, xd->rx_q_used - 1,
+ vec_validate_aligned (xd->rx_queues, xd->conf.n_rx_queues - 1,
CLIB_CACHE_LINE_BYTES);
- for (j = 0; j < xd->rx_q_used; j++)
+
+ for (j = 0; j < xd->conf.n_rx_queues; j++)
{
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, j);
u8 bpidx = vlib_buffer_pool_get_default_for_numa (
@@ -130,12 +229,12 @@ dpdk_device_setup (dpdk_device_t * xd)
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, bpidx);
struct rte_mempool *mp = dpdk_mempool_by_buffer_pool_index[bpidx];
- rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc,
+ rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->conf.n_rx_desc,
xd->cpu_socket, 0, mp);
/* retry with any other CPU socket */
if (rv < 0)
- rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc,
+ rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->conf.n_rx_desc,
SOCKET_ID_ANY, 0, mp);
rxq->buffer_pool_index = bp->index;
@@ -147,7 +246,40 @@ dpdk_device_setup (dpdk_device_t * xd)
if (vec_len (xd->errors))
goto error;
- rte_eth_dev_set_mtu (xd->port_id, hi->max_packet_bytes);
+ xd->buffer_flags =
+ (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID);
+
+ if ((rxo & (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM)) ==
+ (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM))
+ xd->buffer_flags |=
+ (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_RX_IP4_CKSUM,
+ rxo & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM);
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_MAYBE_MULTISEG,
+ rxo & RTE_ETH_RX_OFFLOAD_SCATTER);
+ dpdk_device_flag_set (
+ xd, DPDK_DEVICE_FLAG_TX_OFFLOAD,
+ (txo & (RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) ==
+ (RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM));
+
+ /* unconditionally set mac filtering cap */
+ caps.val = caps.mask = VNET_HW_IF_CAP_MAC_FILTER;
+
+ ethernet_set_flags (vnm, xd->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+
+ for (int i = 0; i < ARRAY_LEN (tx_off_caps_map); i++)
+ {
+ __typeof__ (tx_off_caps_map[0]) *v = tx_off_caps_map + i;
+ caps.mask |= v->caps;
+ if ((v->offload & txo) == v->offload)
+ caps.val |= v->caps;
+ }
+
+ vnet_hw_if_change_caps (vnm, xd->hw_if_index, &caps);
+ xd->enabled_rx_off = rxo;
+ xd->enabled_tx_off = txo;
if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
dpdk_device_start (xd);
@@ -187,17 +319,18 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+ int int_mode = 0;
if (!hi)
return;
- if (!xd->port_conf.intr_conf.rxq)
+ if (!xd->conf.enable_rxq_int)
return;
/* Probe for interrupt support */
if (rte_eth_dev_rx_intr_enable (xd->port_id, 0))
{
dpdk_log_info ("probe for interrupt mode for device %U. Failed.\n",
- format_dpdk_device_name, xd->port_id);
+ format_dpdk_device_name, xd->device_index);
}
else
{
@@ -205,13 +338,13 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
if (!(xd->flags & DPDK_DEVICE_FLAG_INT_UNMASKABLE))
rte_eth_dev_rx_intr_disable (xd->port_id, 0);
dpdk_log_info ("Probe for interrupt mode for device %U. Success.\n",
- format_dpdk_device_name, xd->port_id);
+ format_dpdk_device_name, xd->device_index);
}
if (xd->flags & DPDK_DEVICE_FLAG_INT_SUPPORTED)
{
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
- for (int q = 0; q < xd->rx_q_used; q++)
+ int_mode = 1;
+ for (int q = 0; q < xd->conf.n_rx_queues; q++)
{
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
clib_file_t f = { 0 };
@@ -219,15 +352,15 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
if (rxq->efd < 0)
{
xd->flags &= ~DPDK_DEVICE_FLAG_INT_SUPPORTED;
- hi->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ int_mode = 0;
break;
}
f.read_function = dpdk_rx_read_ready;
f.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
f.file_descriptor = rxq->efd;
f.private_data = rxq->queue_index;
- f.description =
- format (0, "%U queue %u", format_dpdk_device_name, xd->port_id, q);
+ f.description = format (0, "%U queue %u", format_dpdk_device_name,
+ xd->device_index, q);
rxq->clib_file_index = clib_file_add (&file_main, &f);
vnet_hw_if_set_rx_queue_file_index (vnm, rxq->queue_index,
rxq->clib_file_index);
@@ -240,6 +373,11 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
}
}
}
+
+ if (int_mode)
+ vnet_hw_if_set_caps (vnm, hi->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
+ else
+ vnet_hw_if_unset_caps (vnm, hi->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
vnet_hw_if_update_runtime_data (vnm, xd->hw_if_index);
}
@@ -259,6 +397,11 @@ dpdk_device_start (dpdk_device_t * xd)
return;
}
+ dpdk_log_debug ("[%u] RX burst function: %U", xd->port_id,
+ format_dpdk_burst_fn, xd, VLIB_RX);
+ dpdk_log_debug ("[%u] TX burst function: %U", xd->port_id,
+ format_dpdk_burst_fn, xd, VLIB_TX);
+
dpdk_setup_interrupts (xd);
if (xd->default_mac_address)
@@ -275,8 +418,8 @@ dpdk_device_start (dpdk_device_t * xd)
rte_eth_allmulticast_enable (xd->port_id);
- dpdk_log_info ("Interface %U started",
- format_dpdk_device_name, xd->port_id);
+ dpdk_log_info ("Interface %U started", format_dpdk_device_name,
+ xd->device_index);
}
void
@@ -289,8 +432,8 @@ dpdk_device_stop (dpdk_device_t * xd)
rte_eth_dev_stop (xd->port_id);
clib_memset (&xd->link, 0, sizeof (struct rte_eth_link));
- dpdk_log_info ("Interface %U stopped",
- format_dpdk_device_name, xd->port_id);
+ dpdk_log_info ("Interface %U stopped", format_dpdk_device_name,
+ xd->device_index);
}
void vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
@@ -311,10 +454,11 @@ dpdk_port_state_callback_inline (dpdk_portid_t port_id,
rte_eth_link_get_nowait (port_id, &link);
u8 link_up = link.link_status;
if (link_up)
- dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s",
- port_id, (unsigned) link.link_speed,
- (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
- "full-duplex" : "half-duplex");
+ dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s", port_id,
+ (unsigned) link.link_speed,
+ (link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ?
+ "full-duplex" :
+ "half-duplex");
else
dpdk_log_info ("Port %d Link Down\n\n", port_id);
@@ -337,12 +481,17 @@ dpdk_get_pci_device (const struct rte_eth_dev_info *info)
const struct rte_bus *bus;
bus = rte_bus_find_by_device (info->device);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (bus && !strcmp (rte_bus_name (bus), "pci"))
+#else
if (bus && !strcmp (bus->name, "pci"))
+#endif
return RTE_DEV_TO_PCI (info->device);
else
return NULL;
}
+#ifdef __linux__
/* If this device is VMBUS return pointer to info, otherwise NULL */
struct rte_vmbus_device *
dpdk_get_vmbus_device (const struct rte_eth_dev_info *info)
@@ -350,11 +499,16 @@ dpdk_get_vmbus_device (const struct rte_eth_dev_info *info)
const struct rte_bus *bus;
bus = rte_bus_find_by_device (info->device);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (bus && !strcmp (rte_bus_name (bus), "vmbus"))
+#else
if (bus && !strcmp (bus->name, "vmbus"))
+#endif
return container_of (info->device, struct rte_vmbus_device, device);
else
return NULL;
}
+#endif /* __linux__ */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c
index 7c083e1dcf4..0ba59562838 100644
--- a/src/plugins/dpdk/device/device.c
+++ b/src/plugins/dpdk/device/device.c
@@ -25,7 +25,6 @@
#include <vlib/unix/unix.h>
#define foreach_dpdk_tx_func_error \
- _(BAD_RETVAL, "DPDK tx function returned an error") \
_(PKT_DROP, "Tx packet drops (dpdk tx failure)")
typedef enum
@@ -153,52 +152,30 @@ dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b,
* support multiple queues. It returns the number of packets untransmitted
* If all packets are transmitted (the normal case), the function returns 0.
*/
-static_always_inline
- u32 tx_burst_vector_internal (vlib_main_t * vm,
- dpdk_device_t * xd,
- struct rte_mbuf **mb, u32 n_left)
+static_always_inline u32
+tx_burst_vector_internal (vlib_main_t *vm, dpdk_device_t *xd,
+ struct rte_mbuf **mb, u32 n_left, int queue_id,
+ u8 is_shared)
{
- dpdk_main_t *dm = &dpdk_main;
dpdk_tx_queue_t *txq;
u32 n_retry;
int n_sent = 0;
- int queue_id;
n_retry = 16;
- queue_id = vm->thread_index % xd->tx_q_used;
txq = vec_elt_at_index (xd->tx_queues, queue_id);
do
{
- clib_spinlock_lock_if_init (&txq->lock);
+ if (is_shared)
+ clib_spinlock_lock (&txq->lock);
- if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD))
- {
- /* no wrap, transmit in one burst */
- n_sent = rte_eth_tx_burst (xd->port_id, queue_id, mb, n_left);
- n_retry--;
- }
- else
- {
- ASSERT (0);
- n_sent = 0;
- }
-
- clib_spinlock_unlock_if_init (&txq->lock);
-
- if (PREDICT_FALSE (n_sent < 0))
- {
- // emit non-fatal message, bump counter
- vnet_main_t *vnm = dm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- u32 node_index;
+ /* no wrap, transmit in one burst */
+ n_sent = rte_eth_tx_burst (xd->port_id, queue_id, mb, n_left);
- node_index = vec_elt_at_index (im->hw_interfaces,
- xd->hw_if_index)->tx_node_index;
+ if (is_shared)
+ clib_spinlock_unlock (&txq->lock);
- vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1);
- return n_left; // untransmitted packets
- }
+ n_retry--;
n_left -= n_sent;
mb += n_sent;
}
@@ -221,7 +198,8 @@ dpdk_buffer_tx_offload (dpdk_device_t * xd, vlib_buffer_t * b,
{
int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
u32 tso = b->flags & VNET_BUFFER_F_GSO, max_pkt_len;
- u32 ip_cksum, tcp_cksum, udp_cksum;
+ u32 ip_cksum, tcp_cksum, udp_cksum, outer_hdr_len = 0;
+ u32 outer_ip_cksum, vxlan_tunnel;
u64 ol_flags;
vnet_buffer_oflags_t oflags = 0;
@@ -233,25 +211,49 @@ dpdk_buffer_tx_offload (dpdk_device_t * xd, vlib_buffer_t * b,
ip_cksum = oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
tcp_cksum = oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
udp_cksum = oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+ outer_ip_cksum = oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM;
+ vxlan_tunnel = oflags & VNET_BUFFER_OFFLOAD_F_TNL_VXLAN;
- mb->l2_len = vnet_buffer (b)->l3_hdr_offset - b->current_data;
- mb->l3_len = vnet_buffer (b)->l4_hdr_offset -
- vnet_buffer (b)->l3_hdr_offset;
- mb->outer_l3_len = 0;
- mb->outer_l2_len = 0;
- ol_flags = is_ip4 ? PKT_TX_IPV4 : PKT_TX_IPV6;
- ol_flags |= ip_cksum ? PKT_TX_IP_CKSUM : 0;
- ol_flags |= tcp_cksum ? PKT_TX_TCP_CKSUM : 0;
- ol_flags |= udp_cksum ? PKT_TX_UDP_CKSUM : 0;
+ ol_flags = is_ip4 ? RTE_MBUF_F_TX_IPV4 : RTE_MBUF_F_TX_IPV6;
+ ol_flags |= ip_cksum ? RTE_MBUF_F_TX_IP_CKSUM : 0;
+ ol_flags |= tcp_cksum ? RTE_MBUF_F_TX_TCP_CKSUM : 0;
+ ol_flags |= udp_cksum ? RTE_MBUF_F_TX_UDP_CKSUM : 0;
+
+ if (vxlan_tunnel)
+ {
+ ol_flags |= outer_ip_cksum ?
+ RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IP_CKSUM :
+ RTE_MBUF_F_TX_OUTER_IPV6;
+ ol_flags |= RTE_MBUF_F_TX_TUNNEL_VXLAN;
+ mb->l2_len =
+ vnet_buffer (b)->l3_hdr_offset - vnet_buffer2 (b)->outer_l4_hdr_offset;
+ mb->l3_len =
+ vnet_buffer (b)->l4_hdr_offset - vnet_buffer (b)->l3_hdr_offset;
+ mb->outer_l2_len =
+ vnet_buffer2 (b)->outer_l3_hdr_offset - b->current_data;
+ mb->outer_l3_len = vnet_buffer2 (b)->outer_l4_hdr_offset -
+ vnet_buffer2 (b)->outer_l3_hdr_offset;
+ outer_hdr_len = mb->outer_l2_len + mb->outer_l3_len;
+ }
+ else
+ {
+ mb->l2_len = vnet_buffer (b)->l3_hdr_offset - b->current_data;
+ mb->l3_len =
+ vnet_buffer (b)->l4_hdr_offset - vnet_buffer (b)->l3_hdr_offset;
+ mb->outer_l2_len = 0;
+ mb->outer_l3_len = 0;
+ }
if (tso)
{
mb->l4_len = vnet_buffer2 (b)->gso_l4_hdr_sz;
mb->tso_segsz = vnet_buffer2 (b)->gso_size;
/* ensure packet is large enough to require tso */
- max_pkt_len = mb->l2_len + mb->l3_len + mb->l4_len + mb->tso_segsz;
+ max_pkt_len =
+ outer_hdr_len + mb->l2_len + mb->l3_len + mb->l4_len + mb->tso_segsz;
if (mb->tso_segsz != 0 && mb->pkt_len > max_pkt_len)
- ol_flags |= (tcp_cksum ? PKT_TX_TCP_SEG : PKT_TX_UDP_SEG);
+ ol_flags |=
+ (tcp_cksum ? RTE_MBUF_F_TX_TCP_SEG : RTE_MBUF_F_TX_UDP_SEG);
}
mb->ol_flags |= ol_flags;
@@ -274,11 +276,13 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
dpdk_main_t *dm = &dpdk_main;
vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance);
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (f);
u32 n_packets = f->n_vectors;
u32 n_left;
u32 thread_index = vm->thread_index;
- int queue_id = thread_index;
- u32 tx_pkts = 0, all_or_flags = 0;
+ int queue_id = tf->queue_id;
+ u8 is_shared = tf->shared_queue;
+ u32 tx_pkts = 0;
dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data,
thread_index);
struct rte_mbuf **mb;
@@ -310,7 +314,6 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
b[3] = vlib_buffer_from_rte_mbuf (mb[3]);
or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
- all_or_flags |= or_flags;
if (or_flags & VLIB_BUFFER_NEXT_PRESENT)
{
@@ -368,7 +371,6 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
b[1] = vlib_buffer_from_rte_mbuf (mb[1]);
or_flags = b[0]->flags | b[1]->flags;
- all_or_flags |= or_flags;
if (or_flags & VLIB_BUFFER_NEXT_PRESENT)
{
@@ -404,7 +406,6 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
while (n_left > 0)
{
b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
- all_or_flags |= b[0]->flags;
dpdk_validate_rte_mbuf (vm, b[0], 1);
dpdk_buffer_tx_offload (xd, b[0], mb[0]);
@@ -419,7 +420,8 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
/* transmit as many packets as possible */
tx_pkts = n_packets = mb - ptd->mbufs;
- n_left = tx_burst_vector_internal (vm, xd, ptd->mbufs, n_packets);
+ n_left = tx_burst_vector_internal (vm, xd, ptd->mbufs, n_packets, queue_id,
+ is_shared);
{
/* If there is no callback then drop any non-transmitted packets */
@@ -475,7 +477,7 @@ dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
if (vec_len (xd->errors))
return clib_error_create ("Interface start failed");
xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP;
- f64 now = vlib_time_now (dm->vlib_main);
+ f64 now = vlib_time_now (vlib_get_main ());
dpdk_update_counters (xd, now);
dpdk_update_link_state (xd, now);
}
@@ -511,7 +513,7 @@ dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
}
xd->per_interface_next_index =
- vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index);
+ vlib_node_add_next (vlib_get_main (), dpdk_input_node.index, node_index);
}
@@ -533,11 +535,8 @@ dpdk_subif_add_del_function (vnet_main_t * vnm,
else if (xd->num_subifs)
xd->num_subifs--;
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- goto done;
-
/* currently we program VLANS only for IXGBE VF */
- if (xd->pmd != VNET_DPDK_PMD_IXGBEVF)
+ if (xd->driver->program_vlans == 0)
goto done;
if (t->sub.eth.flags.no_tags == 1)
@@ -551,7 +550,7 @@ dpdk_subif_add_del_function (vnet_main_t * vnm,
}
vlan_offload = rte_eth_dev_get_vlan_offload (xd->port_id);
- vlan_offload |= ETH_VLAN_FILTER_OFFLOAD;
+ vlan_offload |= RTE_ETH_VLAN_FILTER_OFFLOAD;
if ((r = rte_eth_dev_set_vlan_offload (xd->port_id, vlan_offload)))
{
@@ -625,7 +624,6 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm,
clib_memset (reta, 0, dev_info.reta_size * sizeof (*reta));
valid_queue_count = 0;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, bitmap) {
if (i >= dev_info.nb_rx_queues)
{
@@ -634,7 +632,6 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm,
}
reta[valid_queue_count++] = i;
}
- /* *INDENT-ON* */
/* check valid_queue_count not zero, make coverity happy */
if (valid_queue_count == 0)
@@ -651,10 +648,8 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm,
}
/* update reta table */
- reta_conf =
- (struct rte_eth_rss_reta_entry64 *) clib_mem_alloc (dev_info.reta_size /
- RTE_RETA_GROUP_SIZE *
- sizeof (*reta_conf));
+ reta_conf = (struct rte_eth_rss_reta_entry64 *) clib_mem_alloc (
+ dev_info.reta_size / RTE_ETH_RETA_GROUP_SIZE * sizeof (*reta_conf));
if (reta_conf == NULL)
{
err = clib_error_return (0, "clib_mem_alloc failed");
@@ -662,13 +657,13 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm,
}
clib_memset (reta_conf, 0,
- dev_info.reta_size / RTE_RETA_GROUP_SIZE *
- sizeof (*reta_conf));
+ dev_info.reta_size / RTE_ETH_RETA_GROUP_SIZE *
+ sizeof (*reta_conf));
for (i = 0; i < dev_info.reta_size; i++)
{
- uint32_t reta_id = i / RTE_RETA_GROUP_SIZE;
- uint32_t reta_pos = i % RTE_RETA_GROUP_SIZE;
+ uint32_t reta_id = i / RTE_ETH_RETA_GROUP_SIZE;
+ uint32_t reta_pos = i % RTE_ETH_RETA_GROUP_SIZE;
reta_conf[reta_id].mask = UINT64_MAX;
reta_conf[reta_id].reta[reta_pos] = reta[i];
@@ -726,7 +721,6 @@ dpdk_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid,
return 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (dpdk_device_class) = {
.name = "dpdk",
.tx_function_n_errors = DPDK_TX_FUNC_N_ERROR,
@@ -745,7 +739,6 @@ VNET_DEVICE_CLASS (dpdk_device_class) = {
.set_rss_queues_function = dpdk_interface_set_rss_queues,
.rx_mode_change_function = dpdk_interface_rx_mode_change,
};
-/* *INDENT-ON* */
#define UP_DOWN_FLAG_EVENT 1
@@ -792,14 +785,12 @@ admin_up_down_process (vlib_main_t * vm,
return 0; /* or not */
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (admin_up_down_process_node) = {
.function = admin_up_down_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "admin-up-down-process",
.process_log2_n_stack_bytes = 17, // 256KB
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
index 66794a4e67c..88a4d9ff618 100644
--- a/src/plugins/dpdk/device/dpdk.h
+++ b/src/plugins/dpdk/device/dpdk.h
@@ -22,27 +22,25 @@
#include <rte_config.h>
-#include <rte_common.h>
-#include <rte_dev.h>
-#include <rte_memory.h>
#include <rte_eal.h>
-#include <rte_per_lcore.h>
-#include <rte_cycles.h>
-#include <rte_lcore.h>
-#include <rte_per_lcore.h>
-#include <rte_interrupts.h>
-#include <rte_pci.h>
+#include <rte_bus_pci.h>
+#ifdef __linux__
#include <rte_bus_vmbus.h>
-#include <rte_ether.h>
+#endif /* __linux__ */
#include <rte_ethdev.h>
-#include <rte_ring.h>
-#include <rte_mempool.h>
-#include <rte_mbuf.h>
#include <rte_version.h>
-#include <rte_sched.h>
#include <rte_net.h>
-#include <rte_bus_pci.h>
-#include <rte_flow.h>
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+#include <rte_bus.h>
+#include <rte_pci.h>
+#include <ctype.h>
+
+#include <bus_driver.h>
+#include <bus_pci_driver.h>
+#ifdef __linux__
+#include <bus_vmbus_driver.h>
+#endif /* __linux__ */
+#endif
#include <vnet/devices/devices.h>
@@ -60,79 +58,11 @@ extern vnet_device_class_t dpdk_device_class;
extern vlib_node_registration_t dpdk_input_node;
extern vlib_node_registration_t admin_up_down_process_node;
-#if RTE_VERSION < RTE_VERSION_NUM(20, 8, 0, 0)
-#define DPDK_MLX5_PMD_NAME "net_mlx5"
-#else
-#define DPDK_MLX5_PMD_NAME "mlx5_pci"
-#endif
-
-#define foreach_dpdk_pmd \
- _ ("net_thunderx", THUNDERX) \
- _ ("net_e1000_em", E1000EM) \
- _ ("net_e1000_igb", IGB) \
- _ ("net_e1000_igb_vf", IGBVF) \
- _ ("net_ixgbe", IXGBE) \
- _ ("net_ixgbe_vf", IXGBEVF) \
- _ ("net_i40e", I40E) \
- _ ("net_i40e_vf", I40EVF) \
- _ ("net_ice", ICE) \
- _ ("net_iavf", IAVF) \
- _ ("net_igc", IGC) \
- _ ("net_virtio", VIRTIO) \
- _ ("net_enic", ENIC) \
- _ ("net_vmxnet3", VMXNET3) \
- _ ("AF_PACKET PMD", AF_PACKET) \
- _ ("net_fm10k", FM10K) \
- _ ("net_cxgbe", CXGBE) \
- _ ("net_mlx4", MLX4) \
- _ (DPDK_MLX5_PMD_NAME, MLX5) \
- _ ("net_dpaa2", DPAA2) \
- _ ("net_virtio_user", VIRTIO_USER) \
- _ ("net_vhost", VHOST_ETHER) \
- _ ("net_ena", ENA) \
- _ ("net_failsafe", FAILSAFE) \
- _ ("net_liovf", LIOVF_ETHER) \
- _ ("net_qede", QEDE) \
- _ ("net_netvsc", NETVSC) \
- _ ("net_bnxt", BNXT)
-
-typedef enum
-{
- VNET_DPDK_PMD_NONE,
-#define _(s,f) VNET_DPDK_PMD_##f,
- foreach_dpdk_pmd
-#undef _
- VNET_DPDK_PMD_UNKNOWN, /* must be last */
-} dpdk_pmd_t;
-
-typedef enum
-{
- VNET_DPDK_PORT_TYPE_ETH_1G,
- VNET_DPDK_PORT_TYPE_ETH_2_5G,
- VNET_DPDK_PORT_TYPE_ETH_5G,
- VNET_DPDK_PORT_TYPE_ETH_10G,
- VNET_DPDK_PORT_TYPE_ETH_20G,
- VNET_DPDK_PORT_TYPE_ETH_25G,
- VNET_DPDK_PORT_TYPE_ETH_40G,
- VNET_DPDK_PORT_TYPE_ETH_50G,
- VNET_DPDK_PORT_TYPE_ETH_56G,
- VNET_DPDK_PORT_TYPE_ETH_100G,
- VNET_DPDK_PORT_TYPE_ETH_SWITCH,
- VNET_DPDK_PORT_TYPE_AF_PACKET,
- VNET_DPDK_PORT_TYPE_ETH_VF,
- VNET_DPDK_PORT_TYPE_VIRTIO_USER,
- VNET_DPDK_PORT_TYPE_VHOST_ETHER,
- VNET_DPDK_PORT_TYPE_FAILSAFE,
- VNET_DPDK_PORT_TYPE_NETVSC,
- VNET_DPDK_PORT_TYPE_UNKNOWN,
-} dpdk_port_type_t;
-
typedef uint16_t dpdk_portid_t;
#define foreach_dpdk_device_flags \
_ (0, ADMIN_UP, "admin-up") \
_ (1, PROMISC, "promisc") \
- _ (2, PMD, "pmd") \
_ (3, PMD_INIT_FAIL, "pmd-init-fail") \
_ (4, MAYBE_MULTISEG, "maybe-multiseg") \
_ (5, HAVE_SUBIF, "subif") \
@@ -143,12 +73,12 @@ typedef uint16_t dpdk_portid_t;
_ (13, INT_SUPPORTED, "int-supported") \
_ (14, INT_UNMASKABLE, "int-unmaskable")
-enum
+typedef enum
{
#define _(a, b, c) DPDK_DEVICE_FLAG_##b = (1 << a),
foreach_dpdk_device_flags
#undef _
-};
+} dpdk_device_flag_t;
typedef struct
{
@@ -177,10 +107,62 @@ typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
clib_spinlock_t lock;
+ u32 queue_index;
} dpdk_tx_queue_t;
typedef struct
{
+ const char *name;
+ const char *desc;
+} dpdk_driver_name_t;
+
+typedef struct
+{
+ dpdk_driver_name_t *drivers;
+ const char *interface_name_prefix;
+ u16 n_rx_desc;
+ u16 n_tx_desc;
+ u32 supported_flow_actions;
+ u32 enable_lsc_int : 1;
+ u32 enable_rxq_int : 1;
+ u32 disable_rx_scatter : 1;
+ u32 program_vlans : 1;
+ u32 mq_mode_none : 1;
+ u32 interface_number_from_port_id : 1;
+ u32 use_intel_phdr_cksum : 1;
+ u32 int_unmaskable : 1;
+} dpdk_driver_t;
+
+dpdk_driver_t *dpdk_driver_find (const char *name, const char **desc);
+
+typedef union
+{
+ struct
+ {
+ u16 disable_multi_seg : 1;
+ u16 enable_lro : 1;
+ u16 enable_tso : 1;
+ u16 enable_tcp_udp_checksum : 1;
+ u16 enable_outer_checksum_offload : 1;
+ u16 enable_lsc_int : 1;
+ u16 enable_rxq_int : 1;
+ u16 disable_tx_checksum_offload : 1;
+ u16 disable_rss : 1;
+ u16 disable_rx_scatter : 1;
+ u16 n_rx_queues;
+ u16 n_tx_queues;
+ u16 n_rx_desc;
+ u16 n_tx_desc;
+ u32 max_lro_pkt_size;
+ u64 rss_hf;
+ };
+ u64 as_u64[3];
+} dpdk_port_conf_t;
+
+STATIC_ASSERT_SIZEOF (dpdk_port_conf_t, 24);
+
+typedef struct
+{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
dpdk_rx_queue_t *rx_queues;
@@ -191,33 +173,28 @@ typedef struct
u32 hw_if_index;
u32 sw_if_index;
+ u32 buffer_flags;
/* next node index if we decide to steal the rx graph arc */
u32 per_interface_next_index;
- u16 rx_q_used;
- u16 tx_q_used;
u16 flags;
/* DPDK device port number */
dpdk_portid_t port_id;
- dpdk_pmd_t pmd:8;
i8 cpu_socket;
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
- u16 nb_tx_desc;
- u16 nb_rx_desc;
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ u64 enabled_tx_off;
+ u64 enabled_rx_off;
+ dpdk_driver_t *driver;
u8 *name;
- u8 *interface_name_suffix;
+ const char *if_desc;
/* number of sub-interfaces */
u16 num_subifs;
- /* PMD related */
- struct rte_eth_conf port_conf;
- struct rte_eth_txconf tx_conf;
-
/* flow related */
u32 supported_flow_actions;
dpdk_flow_entry_t *flow_entries; /* pool */
@@ -226,9 +203,6 @@ typedef struct
u32 parked_loop_count;
struct rte_flow_error last_flow_error;
- /* af_packet instance number */
- u16 af_packet_instance_num;
-
struct rte_eth_link link;
f64 time_last_link_update;
@@ -236,26 +210,36 @@ typedef struct
struct rte_eth_stats last_stats;
struct rte_eth_xstat *xstats;
f64 time_last_stats_update;
- dpdk_port_type_t port_type;
/* mac address */
u8 *default_mac_address;
+ /* maximum supported max frame size */
+ u32 max_supported_frame_size;
+
+ /* due to lack of API to get ethernet max_frame_size we store information
+ * deducted from device info */
+ u8 driver_frame_overhead;
+
/* error string */
clib_error_t *errors;
+ dpdk_port_conf_t conf;
} dpdk_device_t;
+#define DPDK_MIN_POLL_INTERVAL (0.001) /* 1msec */
+
#define DPDK_STATS_POLL_INTERVAL (10.0)
-#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */
+#define DPDK_MIN_STATS_POLL_INTERVAL DPDK_MIN_POLL_INTERVAL
#define DPDK_LINK_POLL_INTERVAL (3.0)
-#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */
-
-#define foreach_dpdk_device_config_item \
- _ (num_rx_queues) \
- _ (num_tx_queues) \
- _ (num_rx_desc) \
- _ (num_tx_desc) \
+#define DPDK_MIN_LINK_POLL_INTERVAL DPDK_MIN_POLL_INTERVAL
+
+#define foreach_dpdk_device_config_item \
+ _ (num_rx_queues) \
+ _ (num_tx_queues) \
+ _ (num_rx_desc) \
+ _ (num_tx_desc) \
+ _ (max_lro_pkt_size) \
_ (rss_fn)
typedef enum
@@ -274,11 +258,8 @@ typedef struct
};
dpdk_device_addr_type_t dev_addr_type;
u8 *name;
+ u8 *tag;
u8 is_blacklisted;
- u8 vlan_strip_offload;
-#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0
-#define DPDK_DEVICE_VLAN_STRIP_OFF 1
-#define DPDK_DEVICE_VLAN_STRIP_ON 2
#define _(x) uword x;
foreach_dpdk_device_config_item
@@ -300,9 +281,7 @@ typedef struct
u8 **eal_init_args;
u8 *eal_init_args_str;
u8 *uio_driver_name;
- u8 no_multi_seg;
- u8 enable_tcp_udp_checksum;
- u8 no_tx_checksum_offload;
+ u8 uio_bind_force;
u8 enable_telemetry;
u16 max_simd_bitwidth;
@@ -310,13 +289,6 @@ typedef struct
#define DPDK_MAX_SIMD_BITWIDTH_256 256
#define DPDK_MAX_SIMD_BITWIDTH_512 512
- /* Required config parameters */
- u8 coremask_set_manually;
- u8 nchannels_set_manually;
- u32 coremask;
- u32 nchannels;
- u32 num_crypto_mbufs;
-
/*
* format interface names ala xxxEthernet%d/%d/%d instead of
* xxxEthernet%x/%x/%x.
@@ -347,20 +319,16 @@ typedef struct
u32 buffers[DPDK_RX_BURST_SZ];
u16 next[DPDK_RX_BURST_SZ];
u16 etype[DPDK_RX_BURST_SZ];
- u16 flags[DPDK_RX_BURST_SZ];
+ u32 flags[DPDK_RX_BURST_SZ];
vlib_buffer_t buffer_template;
} dpdk_per_thread_data_t;
typedef struct
{
-
/* Devices */
dpdk_device_t *devices;
dpdk_per_thread_data_t *per_thread_data;
- /* buffer flags template, configurable to enable/disable tcp / udp cksum */
- u32 buffer_flags_template;
-
/*
* flag indicating that a posted admin up/down
* (via post_sw_interface_set_flags) is in progress
@@ -371,10 +339,8 @@ typedef struct
f64 link_state_poll_interval;
f64 stat_poll_interval;
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
dpdk_config_main_t *conf;
+ dpdk_port_conf_t default_port_conf;
/* API message ID base */
u16 msg_id_base;
@@ -382,7 +348,6 @@ typedef struct
/* logging */
vlib_log_class_t log_default;
vlib_log_class_t log_cryptodev;
- vlib_log_class_t log_ipsec;
} dpdk_main_t;
extern dpdk_main_t dpdk_main;
@@ -440,35 +405,39 @@ typedef enum
vlib_log(VLIB_LOG_LEVEL_NOTICE, dpdk_main.log_default, __VA_ARGS__)
#define dpdk_log_info(...) \
vlib_log(VLIB_LOG_LEVEL_INFO, dpdk_main.log_default, __VA_ARGS__)
+#define dpdk_log_debug(...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, dpdk_main.log_default, __VA_ARGS__)
void dpdk_update_link_state (dpdk_device_t * xd, f64 now);
-#define foreach_dpdk_rss_hf \
- _(0, ETH_RSS_FRAG_IPV4, "ipv4-frag") \
- _(1, ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \
- _(2, ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \
- _(3, ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \
- _(4, ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \
- _(5, ETH_RSS_IPV4, "ipv4") \
- _(6, ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \
- _(7, ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \
- _(8, ETH_RSS_FRAG_IPV6, "ipv6-frag") \
- _(9, ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \
- _(10, ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \
- _(11, ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \
- _(12, ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \
- _(13, ETH_RSS_IPV6_EX, "ipv6-ex") \
- _(14, ETH_RSS_IPV6, "ipv6") \
- _(15, ETH_RSS_L2_PAYLOAD, "l2-payload") \
- _(16, ETH_RSS_PORT, "port") \
- _(17, ETH_RSS_VXLAN, "vxlan") \
- _(18, ETH_RSS_GENEVE, "geneve") \
- _(19, ETH_RSS_NVGRE, "nvgre") \
- _(20, ETH_RSS_GTPU, "gtpu") \
- _(60, ETH_RSS_L4_DST_ONLY, "l4-dst-only") \
- _(61, ETH_RSS_L4_SRC_ONLY, "l4-src-only") \
- _(62, ETH_RSS_L3_DST_ONLY, "l3-dst-only") \
- _(63, ETH_RSS_L3_SRC_ONLY, "l3-src-only")
+#define foreach_dpdk_rss_hf \
+ _ (0, RTE_ETH_RSS_FRAG_IPV4, "ipv4-frag") \
+ _ (1, RTE_ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \
+ _ (2, RTE_ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \
+ _ (3, RTE_ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \
+ _ (4, RTE_ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \
+ _ (5, RTE_ETH_RSS_IPV4, "ipv4") \
+ _ (6, RTE_ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \
+ _ (7, RTE_ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \
+ _ (8, RTE_ETH_RSS_FRAG_IPV6, "ipv6-frag") \
+ _ (9, RTE_ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \
+ _ (10, RTE_ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \
+ _ (11, RTE_ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \
+ _ (12, RTE_ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \
+ _ (13, RTE_ETH_RSS_IPV6_EX, "ipv6-ex") \
+ _ (14, RTE_ETH_RSS_IPV6, "ipv6") \
+ _ (15, RTE_ETH_RSS_L2_PAYLOAD, "l2-payload") \
+ _ (16, RTE_ETH_RSS_PORT, "port") \
+ _ (17, RTE_ETH_RSS_VXLAN, "vxlan") \
+ _ (18, RTE_ETH_RSS_GENEVE, "geneve") \
+ _ (19, RTE_ETH_RSS_NVGRE, "nvgre") \
+ _ (20, RTE_ETH_RSS_GTPU, "gtpu") \
+ _ (21, RTE_ETH_RSS_ESP, "esp") \
+ _ (22, RTE_ETH_RSS_L2TPV3, "l2tpv3") \
+ _ (60, RTE_ETH_RSS_L4_DST_ONLY, "l4-dst-only") \
+ _ (61, RTE_ETH_RSS_L4_SRC_ONLY, "l4-src-only") \
+ _ (62, RTE_ETH_RSS_L3_DST_ONLY, "l3-dst-only") \
+ _ (63, RTE_ETH_RSS_L3_SRC_ONLY, "l3-src-only")
format_function_t format_dpdk_device_name;
format_function_t format_dpdk_device;
@@ -481,6 +450,8 @@ format_function_t format_dpdk_flow;
format_function_t format_dpdk_rss_hf_name;
format_function_t format_dpdk_rx_offload_caps;
format_function_t format_dpdk_tx_offload_caps;
+format_function_t format_dpdk_burst_fn;
+format_function_t format_dpdk_rte_device;
vnet_flow_dev_ops_function_t dpdk_flow_ops_fn;
clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn);
diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h
index a5a8a2ad57d..cb7b185c112 100644
--- a/src/plugins/dpdk/device/dpdk_priv.h
+++ b/src/plugins/dpdk/device/dpdk_priv.h
@@ -15,15 +15,7 @@
#define DPDK_NB_RX_DESC_DEFAULT 1024
#define DPDK_NB_TX_DESC_DEFAULT 1024
-#define DPDK_NB_RX_DESC_VIRTIO 256
-#define DPDK_NB_TX_DESC_VIRTIO 256
-
-#define I40E_DEV_ID_SFP_XL710 0x1572
-#define I40E_DEV_ID_QSFP_A 0x1583
-#define I40E_DEV_ID_QSFP_B 0x1584
-#define I40E_DEV_ID_QSFP_C 0x1585
-#define I40E_DEV_ID_10G_BASE_T 0x1586
-#define I40E_DEV_ID_VF 0x154C
+#define DPDK_MAX_LRO_SIZE_DEFAULT 65536
/* These args appear by themselves */
#define foreach_eal_double_hyphen_predicate_arg \
@@ -32,10 +24,6 @@ _(no-hpet) \
_(no-huge) \
_(vmware-tsc-map)
-#define foreach_eal_single_hyphen_mandatory_arg \
-_(coremask, c) \
-_(nchannels, n) \
-
#define foreach_eal_single_hyphen_arg \
_(mem-alloc-request, m) \
_(force-ranks, r)
@@ -48,10 +36,17 @@ _(proc-type) \
_(file-prefix) \
_(vdev) \
_(log-level) \
+_(block) \
_(iova-mode) \
_(base-virtaddr)
/* clang-format on */
+static_always_inline void
+dpdk_device_flag_set (dpdk_device_t *xd, __typeof__ (xd->flags) flag, int val)
+{
+ xd->flags = val ? xd->flags | flag : xd->flags & ~flag;
+}
+
static inline void
dpdk_get_xstats (dpdk_device_t * xd)
{
@@ -69,11 +64,11 @@ dpdk_get_xstats (dpdk_device_t * xd)
ret = rte_eth_xstats_get (xd->port_id, xd->xstats, len);
if (ret < 0 || ret > len)
{
- _vec_len (xd->xstats) = 0;
+ vec_set_len (xd->xstats, 0);
return;
}
- _vec_len (xd->xstats) = len;
+ vec_set_len (xd->xstats, len);
}
#define DPDK_UPDATE_COUNTER(vnm, tidx, xd, stat, cnt) \
@@ -100,10 +95,6 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
vnet_main_t *vnm = vnet_get_main ();
u32 thread_index = vlib_get_thread_index ();
- /* only update counters for PMD interfaces */
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- return;
-
xd->time_last_stats_update = now ? now : xd->time_last_stats_update;
clib_memcpy_fast (&xd->last_stats, &xd->stats, sizeof (xd->last_stats));
rte_eth_stats_get (xd->port_id, &xd->stats);
@@ -119,6 +110,119 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
dpdk_get_xstats (xd);
}
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+#define RTE_MBUF_F_RX_FDIR PKT_RX_FDIR
+#define RTE_MBUF_F_RX_FDIR_FLX PKT_RX_FDIR_FLX
+#define RTE_MBUF_F_RX_FDIR_ID PKT_RX_FDIR_ID
+#define RTE_MBUF_F_RX_IEEE1588_PTP PKT_RX_IEEE1588_PTP
+#define RTE_MBUF_F_RX_IEEE1588_TMST PKT_RX_IEEE1588_TMST
+#define RTE_MBUF_F_RX_IP_CKSUM_BAD PKT_RX_IP_CKSUM_BAD
+#define RTE_MBUF_F_RX_IP_CKSUM_GOOD PKT_RX_IP_CKSUM_GOOD
+#define RTE_MBUF_F_RX_IP_CKSUM_NONE PKT_RX_IP_CKSUM_GOOD
+#define RTE_MBUF_F_RX_L4_CKSUM_BAD PKT_RX_L4_CKSUM_BAD
+#define RTE_MBUF_F_RX_L4_CKSUM_GOOD PKT_RX_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_L4_CKSUM_NONE PKT_RX_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_LRO PKT_RX_LRO
+#define RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD PKT_RX_OUTER_IP_CKSUM_BAD
+#define RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD PKT_RX_OUTER_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD PKT_RX_OUTER_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_OUTER_L4_CKSUM_NONE PKT_RX_OUTER_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_QINQ PKT_RX_QINQ
+#define RTE_MBUF_F_RX_QINQ_STRIPPED PKT_RX_QINQ_STRIPPED
+#define RTE_MBUF_F_RX_RSS_HASH PKT_RX_RSS_HASH
+#define RTE_MBUF_F_RX_SEC_OFFLOAD PKT_RX_SEC_OFFLOAD
+#define RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED PKT_RX_SEC_OFFLOAD_FAILED
+#define RTE_MBUF_F_RX_VLAN PKT_RX_VLAN
+#define RTE_MBUF_F_RX_VLAN_STRIPPED PKT_RX_VLAN_STRIPPED
+#define RTE_MBUF_F_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
+#define RTE_MBUF_F_TX_IPV4 PKT_TX_IPV4
+#define RTE_MBUF_F_TX_IPV6 PKT_TX_IPV6
+#define RTE_MBUF_F_TX_IP_CKSUM PKT_TX_IP_CKSUM
+#define RTE_MBUF_F_TX_MACSEC PKT_TX_MACSEC
+#define RTE_MBUF_F_TX_OUTER_IPV4 PKT_TX_OUTER_IPV4
+#define RTE_MBUF_F_TX_OUTER_IPV6 PKT_TX_OUTER_IPV6
+#define RTE_MBUF_F_TX_OUTER_IP_CKSUM PKT_TX_OUTER_IP_CKSUM
+#define RTE_MBUF_F_TX_OUTER_UDP_CKSUM PKT_TX_OUTER_UDP_CKSUM
+#define RTE_MBUF_F_TX_QINQ PKT_TX_QINQ
+#define RTE_MBUF_F_TX_SCTP_CKSUM PKT_TX_SCTP_CKSUM
+#define RTE_MBUF_F_TX_SEC_OFFLOAD PKT_TX_SEC_OFFLOAD
+#define RTE_MBUF_F_TX_TCP_CKSUM PKT_TX_TCP_CKSUM
+#define RTE_MBUF_F_TX_TCP_SEG PKT_TX_TCP_SEG
+#define RTE_MBUF_F_TX_TUNNEL_GENEVE PKT_TX_TUNNEL_GENEVE
+#define RTE_MBUF_F_TX_TUNNEL_GRE PKT_TX_TUNNEL_GRE
+#define RTE_MBUF_F_TX_TUNNEL_GTP PKT_TX_TUNNEL_GTP
+#define RTE_MBUF_F_TX_TUNNEL_IP PKT_TX_TUNNEL_IP
+#define RTE_MBUF_F_TX_TUNNEL_IPIP PKT_TX_TUNNEL_IPIP
+#define RTE_MBUF_F_TX_TUNNEL_MPLSINUDP PKT_TX_TUNNEL_MPLSINUDP
+#define RTE_MBUF_F_TX_TUNNEL_UDP PKT_TX_TUNNEL_UDP
+#define RTE_MBUF_F_TX_TUNNEL_VXLAN PKT_TX_TUNNEL_VXLAN
+#define RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE PKT_TX_TUNNEL_VXLAN_GPE
+#define RTE_MBUF_F_TX_UDP_CKSUM PKT_TX_UDP_CKSUM
+#define RTE_MBUF_F_TX_UDP_SEG PKT_TX_UDP_SEG
+#define RTE_MBUF_F_TX_VLAN PKT_TX_VLAN
+#define RTE_ETH_RSS_FRAG_IPV4 ETH_RSS_FRAG_IPV4
+#define RTE_ETH_RSS_NONFRAG_IPV4_TCP ETH_RSS_NONFRAG_IPV4_TCP
+#define RTE_ETH_RSS_NONFRAG_IPV4_UDP ETH_RSS_NONFRAG_IPV4_UDP
+#define RTE_ETH_RSS_NONFRAG_IPV4_SCTP ETH_RSS_NONFRAG_IPV4_SCTP
+#define RTE_ETH_RSS_NONFRAG_IPV4_OTHER ETH_RSS_NONFRAG_IPV4_OTHER
+#define RTE_ETH_RSS_IPV4 ETH_RSS_IPV4
+#define RTE_ETH_RSS_IPV6_TCP_EX ETH_RSS_IPV6_TCP_EX
+#define RTE_ETH_RSS_IPV6_UDP_EX ETH_RSS_IPV6_UDP_EX
+#define RTE_ETH_RSS_FRAG_IPV6 ETH_RSS_FRAG_IPV6
+#define RTE_ETH_RSS_NONFRAG_IPV6_TCP ETH_RSS_NONFRAG_IPV6_TCP
+#define RTE_ETH_RSS_NONFRAG_IPV6_UDP ETH_RSS_NONFRAG_IPV6_UDP
+#define RTE_ETH_RSS_NONFRAG_IPV6_SCTP ETH_RSS_NONFRAG_IPV6_SCTP
+#define RTE_ETH_RSS_NONFRAG_IPV6_OTHER ETH_RSS_NONFRAG_IPV6_OTHER
+#define RTE_ETH_RSS_IPV6_EX ETH_RSS_IPV6_EX
+#define RTE_ETH_RSS_IPV6 ETH_RSS_IPV6
+#define RTE_ETH_RSS_L2_PAYLOAD ETH_RSS_L2_PAYLOAD
+#define RTE_ETH_RSS_PORT ETH_RSS_PORT
+#define RTE_ETH_RSS_VXLAN ETH_RSS_VXLAN
+#define RTE_ETH_RSS_GENEVE ETH_RSS_GENEVE
+#define RTE_ETH_RSS_NVGRE ETH_RSS_NVGRE
+#define RTE_ETH_RSS_GTPU ETH_RSS_GTPU
+#define RTE_ETH_RSS_ESP ETH_RSS_ESP
+#define RTE_ETH_RSS_L4_DST_ONLY ETH_RSS_L4_DST_ONLY
+#define RTE_ETH_RSS_L4_SRC_ONLY ETH_RSS_L4_SRC_ONLY
+#define RTE_ETH_RSS_L3_DST_ONLY ETH_RSS_L3_DST_ONLY
+#define RTE_ETH_RSS_L3_SRC_ONLY ETH_RSS_L3_SRC_ONLY
+#define RTE_ETH_RETA_GROUP_SIZE RTE_RETA_GROUP_SIZE
+#define RTE_ETH_TX_OFFLOAD_IPV4_CKSUM DEV_TX_OFFLOAD_IPV4_CKSUM
+#define RTE_ETH_TX_OFFLOAD_TCP_CKSUM DEV_TX_OFFLOAD_TCP_CKSUM
+#define RTE_ETH_TX_OFFLOAD_UDP_CKSUM DEV_TX_OFFLOAD_UDP_CKSUM
+#define RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM
+#define RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM DEV_TX_OFFLOAD_OUTER_UDP_CKSUM
+#define RTE_ETH_TX_OFFLOAD_TCP_TSO DEV_TX_OFFLOAD_TCP_TSO
+#define RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO DEV_TX_OFFLOAD_VXLAN_TNL_TSO
+#define RTE_ETH_TX_OFFLOAD_MULTI_SEGS DEV_TX_OFFLOAD_MULTI_SEGS
+#define RTE_ETH_RX_OFFLOAD_IPV4_CKSUM DEV_RX_OFFLOAD_IPV4_CKSUM
+#define RTE_ETH_RX_OFFLOAD_SCATTER DEV_RX_OFFLOAD_SCATTER
+#define RTE_ETH_RX_OFFLOAD_TCP_LRO DEV_RX_OFFLOAD_TCP_LRO
+#define RTE_ETH_MQ_RX_RSS ETH_MQ_RX_RSS
+#define RTE_ETH_RX_OFFLOAD_TCP_CKSUM DEV_RX_OFFLOAD_TCP_CKSUM
+#define RTE_ETH_RX_OFFLOAD_UDP_CKSUM DEV_RX_OFFLOAD_UDP_CKSUM
+#define RTE_ETH_MQ_RX_NONE ETH_MQ_RX_NONE
+#define RTE_ETH_LINK_FULL_DUPLEX ETH_LINK_FULL_DUPLEX
+#define RTE_ETH_LINK_HALF_DUPLEX ETH_LINK_HALF_DUPLEX
+#define RTE_ETH_VLAN_STRIP_OFFLOAD ETH_VLAN_STRIP_OFFLOAD
+#define RTE_ETH_VLAN_FILTER_OFFLOAD ETH_VLAN_FILTER_OFFLOAD
+#define RTE_ETH_VLAN_EXTEND_OFFLOAD ETH_VLAN_EXTEND_OFFLOAD
+#define RTE_ETH_LINK_SPEED_200G ETH_LINK_SPEED_200G
+#define RTE_ETH_LINK_SPEED_100G ETH_LINK_SPEED_100G
+#define RTE_ETH_LINK_SPEED_56G ETH_LINK_SPEED_56G
+#define RTE_ETH_LINK_SPEED_50G ETH_LINK_SPEED_50G
+#define RTE_ETH_LINK_SPEED_40G ETH_LINK_SPEED_40G
+#define RTE_ETH_LINK_SPEED_25G ETH_LINK_SPEED_25G
+#define RTE_ETH_LINK_SPEED_20G ETH_LINK_SPEED_20G
+#define RTE_ETH_LINK_SPEED_10G ETH_LINK_SPEED_10G
+#define RTE_ETH_LINK_SPEED_5G ETH_LINK_SPEED_5G
+#define RTE_ETH_LINK_SPEED_2_5G ETH_LINK_SPEED_2_5G
+#define RTE_ETH_LINK_SPEED_1G ETH_LINK_SPEED_1G
+#define RTE_ETH_RSS_IP ETH_RSS_IP
+#define RTE_ETH_RSS_UDP ETH_RSS_UDP
+#define RTE_ETH_RSS_TCP ETH_RSS_TCP
+#endif
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/dpdk/device/driver.c b/src/plugins/dpdk/device/driver.c
new file mode 100644
index 00000000000..9c368dd9038
--- /dev/null
+++ b/src/plugins/dpdk/device/driver.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+
+#include <dpdk/device/dpdk.h>
+
+static const u32 supported_flow_actions_intel =
+ (VNET_FLOW_ACTION_MARK | VNET_FLOW_ACTION_REDIRECT_TO_NODE |
+ VNET_FLOW_ACTION_REDIRECT_TO_QUEUE | VNET_FLOW_ACTION_BUFFER_ADVANCE |
+ VNET_FLOW_ACTION_COUNT | VNET_FLOW_ACTION_DROP | VNET_FLOW_ACTION_RSS);
+
+#define DPDK_DRIVERS(...) \
+ (dpdk_driver_name_t[]) \
+ { \
+ __VA_ARGS__, {} \
+ }
+
+static dpdk_driver_t dpdk_drivers[] = {
+ {
+ .drivers = DPDK_DRIVERS ({ "net_ice", "Intel E810 Family" },
+ { "net_igc", "Intel I225 2.5G Family" },
+ { "net_e1000_igb", "Intel e1000" },
+ { "net_e1000_em", "Intel 82540EM (e1000)" }),
+ .enable_rxq_int = 1,
+ .supported_flow_actions = supported_flow_actions_intel,
+ .use_intel_phdr_cksum = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_ixgbe", "Intel 82599" }),
+ .enable_rxq_int = 1,
+ .supported_flow_actions = supported_flow_actions_intel,
+ .use_intel_phdr_cksum = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_i40e", "Intel X710/XL710 Family" }),
+ .enable_rxq_int = 1,
+ .supported_flow_actions = supported_flow_actions_intel,
+ .use_intel_phdr_cksum = 1,
+ .int_unmaskable = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_liovf", "Cavium Lio VF" },
+ { "net_thunderx", "Cavium ThunderX" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_iavf", "Intel iAVF" },
+ { "net_i40e_vf", "Intel X710/XL710 Family VF" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ .supported_flow_actions = supported_flow_actions_intel,
+ .use_intel_phdr_cksum = 1,
+ .int_unmaskable = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_e1000_igb_vf", "Intel e1000 VF" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ .use_intel_phdr_cksum = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_ixgbe_vf", "Intel 82599 VF" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ .use_intel_phdr_cksum = 1,
+ .program_vlans = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_dpaa2", "NXP DPAA2 Mac" }),
+ .interface_name_prefix = "TenGigabitEthernet",
+ },
+ {
+ .drivers =
+ DPDK_DRIVERS ({ "net_fm10k", "Intel FM10000 Family Ethernet Switch" }),
+ .interface_name_prefix = "EthernetSwitch",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_cxgbe", "Chelsio T4/T5" }),
+ .interface_number_from_port_id = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_mlx4", "Mellanox ConnectX-3 Family" },
+ { "net_qede", "Cavium QLogic FastLinQ QL4xxxx" },
+ { "net_bnxt", "Broadcom NetXtreme E/S-Series" }),
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_virtio_user", "Virtio User" }),
+ .interface_name_prefix = "VirtioUser",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_vhost", "VhostEthernet" }),
+ .interface_name_prefix = "VhostEthernet",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "mlx5_pci", "Mellanox ConnectX-4/5/6 Family" },
+ { "net_enic", "Cisco VIC" }),
+ .use_intel_phdr_cksum = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_failsafe", "FailsafeEthernet" }),
+ .interface_name_prefix = "FailsafeEthernet",
+ .enable_lsc_int = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "AF_PACKET PMD", "af_packet" }),
+ .interface_name_prefix = "af_packet",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_netvsc", "Microsoft Hyper-V Netvsc" }),
+ .interface_name_prefix = "NetVSC",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_ena", "AWS ENA VF" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ .enable_rxq_int = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_vmxnet3", "VMware VMXNET3" }),
+ .interface_name_prefix = "GigabitEthernet",
+ .enable_rxq_int = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_virtio", "Red Hat Virtio" }),
+ .interface_name_prefix = "GigabitEthernet",
+ .n_rx_desc = 256,
+ .n_tx_desc = 256,
+ .mq_mode_none = 1,
+ .enable_rxq_int = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_gve", "Google vNIC" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ }
+};
+
+dpdk_driver_t *
+dpdk_driver_find (const char *name, const char **desc)
+{
+ for (int i = 0; i < ARRAY_LEN (dpdk_drivers); i++)
+ {
+ dpdk_driver_t *dr = dpdk_drivers + i;
+ dpdk_driver_name_t *dn = dr->drivers;
+
+ while (dn->name)
+ {
+ if (name && !strcmp (name, dn->name))
+ {
+ *desc = dn->desc;
+ return dr;
+ }
+ dn++;
+ }
+ }
+ return 0;
+}
diff --git a/src/plugins/dpdk/device/flow.c b/src/plugins/dpdk/device/flow.c
index a090ec0e930..635f6f37ebf 100644
--- a/src/plugins/dpdk/device/flow.c
+++ b/src/plugins/dpdk/device/flow.c
@@ -21,7 +21,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ethernet/arp_packet.h>
-#include <vnet/vxlan/vxlan.h>
+#include <vxlan/vxlan.h>
#include <dpdk/device/dpdk.h>
#include <dpdk/device/dpdk_priv.h>
#include <vppinfra/error.h>
@@ -29,22 +29,30 @@
#define FLOW_IS_ETHERNET_CLASS(f) \
(f->type == VNET_FLOW_TYPE_ETHERNET)
-#define FLOW_IS_IPV4_CLASS(f) \
- ((f->type == VNET_FLOW_TYPE_IP4) || \
- (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
- (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
- (f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
- (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
- (f->type == VNET_FLOW_TYPE_IP4_GTPU) || \
- (f->type == VNET_FLOW_TYPE_IP4_L2TPV3OIP) || \
- (f->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) || \
- (f->type == VNET_FLOW_TYPE_IP4_IPSEC_AH))
-
-#define FLOW_IS_IPV6_CLASS(f) \
- ((f->type == VNET_FLOW_TYPE_IP6) || \
- (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
- (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) || \
- (f->type == VNET_FLOW_TYPE_IP6_VXLAN))
+#define FLOW_IS_IPV4_CLASS(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4) || \
+ (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
+ (f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPU) || \
+ (f->type == VNET_FLOW_TYPE_IP4_L2TPV3OIP) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IPSEC_AH) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP4) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP6) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP6_N_TUPLE))
+
+#define FLOW_IS_IPV6_CLASS(f) \
+ ((f->type == VNET_FLOW_TYPE_IP6) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) || \
+ (f->type == VNET_FLOW_TYPE_IP6_VXLAN) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP4) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP6) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE))
/* check if flow is VLAN sensitive */
#define FLOW_HAS_VLAN_TAG(f) \
@@ -70,6 +78,13 @@
(f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
(f->type == VNET_FLOW_TYPE_IP4_GTPU))
+/* check if flow has a inner TCP/UDP header */
+#define FLOW_HAS_INNER_N_TUPLE(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP6_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE))
+
/* constant structs */
static const struct rte_flow_attr ingress = {.ingress = 1 };
@@ -103,6 +118,25 @@ dpdk_flow_convert_rss_types (u64 type, u64 * dpdk_rss_type)
return;
}
+/** Maximum number of queue indices in struct rte_flow_action_rss. */
+#define ACTION_RSS_QUEUE_NUM 128
+
+static inline void
+dpdk_flow_convert_rss_queues (u32 queue_index, u32 queue_num,
+ struct rte_flow_action_rss *rss)
+{
+ u16 *queues = clib_mem_alloc (sizeof (*queues) * ACTION_RSS_QUEUE_NUM);
+ int i;
+
+ for (i = 0; i < queue_num; i++)
+ queues[i] = queue_index++;
+
+ rss->queue_num = queue_num;
+ rss->queue = queues;
+
+ return;
+}
+
static inline enum rte_eth_hash_function
dpdk_flow_convert_rss_func (vnet_rss_function_t func)
{
@@ -134,14 +168,15 @@ static int
dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
{
struct rte_flow_item_eth eth[2] = { };
- struct rte_flow_item_ipv4 ip4[2] = { };
- struct rte_flow_item_ipv6 ip6[2] = { };
- struct rte_flow_item_udp udp[2] = { };
- struct rte_flow_item_tcp tcp[2] = { };
+ struct rte_flow_item_ipv4 ip4[2] = {}, in_ip4[2] = {};
+ struct rte_flow_item_ipv6 ip6[2] = {}, in_ip6[2] = {};
+ struct rte_flow_item_udp udp[2] = {}, in_UDP[2] = {};
+ struct rte_flow_item_tcp tcp[2] = {}, in_TCP[2] = {};
struct rte_flow_item_gtp gtp[2] = { };
struct rte_flow_item_l2tpv3oip l2tp[2] = { };
struct rte_flow_item_esp esp[2] = { };
struct rte_flow_item_ah ah[2] = { };
+ struct rte_flow_item_raw generic[2] = {};
struct rte_flow_action_mark mark = { 0 };
struct rte_flow_action_queue queue = { 0 };
struct rte_flow_action_rss rss = { 0 };
@@ -165,6 +200,20 @@ dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
u8 protocol = IP_PROTOCOL_RESERVED;
int rv = 0;
+ /* Handle generic flow first */
+ if (f->type == VNET_FLOW_TYPE_GENERIC)
+ {
+ generic[0].pattern = f->generic.pattern.spec;
+ generic[1].pattern = f->generic.pattern.mask;
+
+ vec_add2 (items, item, 1);
+ item->type = RTE_FLOW_ITEM_TYPE_RAW;
+ item->spec = generic;
+ item->mask = generic + 1;
+
+ goto pattern_end;
+ }
+
enum
{
FLOW_UNKNOWN_CLASS,
@@ -285,7 +334,8 @@ dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
if ((ip6_ptr->src_addr.mask.as_u64[0] == 0) &&
(ip6_ptr->src_addr.mask.as_u64[1] == 0) &&
- (!ip6_ptr->protocol.mask))
+ (ip6_ptr->dst_addr.mask.as_u64[0] == 0) &&
+ (ip6_ptr->dst_addr.mask.as_u64[1] == 0) && (!ip6_ptr->protocol.mask))
{
item->spec = NULL;
item->mask = NULL;
@@ -437,13 +487,127 @@ dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
item->mask = raw + 1;
}
break;
+ case IP_PROTOCOL_IPV6:
+ item->type = RTE_FLOW_ITEM_TYPE_IPV6;
+#define fill_inner_ip6_with_outer_ipv(OUTER_IP_VER) \
+ if (f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP6 || \
+ f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP6_N_TUPLE) \
+ { \
+ vnet_flow_ip##OUTER_IP_VER##_ip6_t *ptr = &f->ip##OUTER_IP_VER##_ip6; \
+ if ((ptr->in_src_addr.mask.as_u64[0] == 0) && \
+ (ptr->in_src_addr.mask.as_u64[1] == 0) && \
+ (ptr->in_dst_addr.mask.as_u64[0] == 0) && \
+ (ptr->in_dst_addr.mask.as_u64[1] == 0) && (!ptr->in_protocol.mask)) \
+ { \
+ item->spec = NULL; \
+ item->mask = NULL; \
+ } \
+ else \
+ { \
+ clib_memcpy (in_ip6[0].hdr.src_addr, &ptr->in_src_addr.addr, \
+ ARRAY_LEN (ptr->in_src_addr.addr.as_u8)); \
+ clib_memcpy (in_ip6[1].hdr.src_addr, &ptr->in_src_addr.mask, \
+ ARRAY_LEN (ptr->in_src_addr.mask.as_u8)); \
+ clib_memcpy (in_ip6[0].hdr.dst_addr, &ptr->in_dst_addr.addr, \
+ ARRAY_LEN (ptr->in_dst_addr.addr.as_u8)); \
+ clib_memcpy (in_ip6[1].hdr.dst_addr, &ptr->in_dst_addr.mask, \
+ ARRAY_LEN (ptr->in_dst_addr.mask.as_u8)); \
+ item->spec = in_ip6; \
+ item->mask = in_ip6 + 1; \
+ } \
+ }
+ fill_inner_ip6_with_outer_ipv (6) fill_inner_ip6_with_outer_ipv (4)
+#undef fill_inner_ip6_with_outer_ipv
+ break;
+ case IP_PROTOCOL_IP_IN_IP:
+ item->type = RTE_FLOW_ITEM_TYPE_IPV4;
+
+#define fill_inner_ip4_with_outer_ipv(OUTER_IP_VER) \
+ if (f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP4 || \
+ f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP4_N_TUPLE) \
+ { \
+ vnet_flow_ip##OUTER_IP_VER##_ip4_t *ptr = &f->ip##OUTER_IP_VER##_ip4; \
+ if ((!ptr->in_src_addr.mask.as_u32) && \
+ (!ptr->in_dst_addr.mask.as_u32) && (!ptr->in_protocol.mask)) \
+ { \
+ item->spec = NULL; \
+ item->mask = NULL; \
+ } \
+ else \
+ { \
+ in_ip4[0].hdr.src_addr = ptr->in_src_addr.addr.as_u32; \
+ in_ip4[1].hdr.src_addr = ptr->in_src_addr.mask.as_u32; \
+ in_ip4[0].hdr.dst_addr = ptr->in_dst_addr.addr.as_u32; \
+ in_ip4[1].hdr.dst_addr = ptr->in_dst_addr.mask.as_u32; \
+ item->spec = in_ip4; \
+ item->mask = in_ip4 + 1; \
+ } \
+ }
+ fill_inner_ip4_with_outer_ipv (6) fill_inner_ip4_with_outer_ipv (4)
+#undef fill_inner_ip4_with_outer_ipv
+ break;
default:
rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
goto done;
}
+ if (FLOW_HAS_INNER_N_TUPLE (f))
+ {
+ vec_add2 (items, item, 1);
+
+#define fill_inner_n_tuple_of(proto) \
+ item->type = RTE_FLOW_ITEM_TYPE_##proto; \
+ if ((ptr->in_src_port.mask == 0) && (ptr->in_dst_port.mask == 0)) \
+ { \
+ item->spec = NULL; \
+ item->mask = NULL; \
+ } \
+ else \
+ { \
+ in_##proto[0].hdr.src_port = \
+ clib_host_to_net_u16 (ptr->in_src_port.port); \
+ in_##proto[1].hdr.src_port = \
+ clib_host_to_net_u16 (ptr->in_src_port.mask); \
+ in_##proto[0].hdr.dst_port = \
+ clib_host_to_net_u16 (ptr->in_dst_port.port); \
+ in_##proto[1].hdr.dst_port = \
+ clib_host_to_net_u16 (ptr->in_dst_port.mask); \
+ item->spec = in_##proto; \
+ item->mask = in_##proto + 1; \
+ }
+
+#define fill_inner_n_tuple(OUTER_IP_VER, INNER_IP_VER) \
+ if (f->type == \
+ VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP##INNER_IP_VER##_N_TUPLE) \
+ { \
+ vnet_flow_ip##OUTER_IP_VER##_ip##INNER_IP_VER##_n_tuple_t *ptr = \
+ &f->ip##OUTER_IP_VER##_ip##INNER_IP_VER##_n_tuple; \
+ switch (ptr->in_protocol.prot) \
+ { \
+ case IP_PROTOCOL_UDP: \
+ fill_inner_n_tuple_of (UDP) break; \
+ case IP_PROTOCOL_TCP: \
+ fill_inner_n_tuple_of (TCP) break; \
+ default: \
+ break; \
+ } \
+ }
+ fill_inner_n_tuple (6, 4) fill_inner_n_tuple (4, 4)
+ fill_inner_n_tuple (6, 6) fill_inner_n_tuple (4, 6)
+#undef fill_inner_n_tuple
+#undef fill_inner_n_tuple_of
+ }
+
pattern_end:
+ if ((f->actions & VNET_FLOW_ACTION_RSS) &&
+ (f->rss_types & (1ULL << VNET_FLOW_RSS_TYPES_ESP)))
+ {
+
+ vec_add2 (items, item, 1);
+ item->type = RTE_FLOW_ITEM_TYPE_ESP;
+ }
+
vec_add2 (items, item, 1);
item->type = RTE_FLOW_ITEM_TYPE_END;
@@ -482,6 +646,10 @@ pattern_end:
/* convert types to DPDK rss bitmask */
dpdk_flow_convert_rss_types (f->rss_types, &rss_type);
+ if (f->queue_num)
+ /* convert rss queues to array */
+ dpdk_flow_convert_rss_queues (f->queue_index, f->queue_num, &rss);
+
rss.types = rss_type;
if ((rss.func = dpdk_flow_convert_rss_func (f->rss_fun)) ==
RTE_ETH_HASH_FUNCTION_MAX)
@@ -547,6 +715,7 @@ int
dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
u32 flow_index, uword * private_data)
{
+ vlib_main_t *vm = vlib_get_main ();
dpdk_main_t *dm = &dpdk_main;
vnet_flow_t *flow = vnet_get_flow (flow_index);
dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
@@ -557,7 +726,7 @@ dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
/* recycle old flow lookup entries only after the main loop counter
increases - i.e. previously DMA'ed packets were handled */
if (vec_len (xd->parked_lookup_indexes) > 0 &&
- xd->parked_loop_count != dm->vlib_main->main_loop_count)
+ xd->parked_loop_count != vm->main_loop_count)
{
u32 *fl_index;
@@ -580,7 +749,7 @@ dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
fle = pool_elt_at_index (xd->flow_lookup_entries, fe->mark);
clib_memset (fle, -1, sizeof (*fle));
vec_add1 (xd->parked_lookup_indexes, fe->mark);
- xd->parked_loop_count = dm->vlib_main->main_loop_count;
+ xd->parked_loop_count = vm->main_loop_count;
}
clib_memset (fe, 0, sizeof (*fe));
@@ -644,6 +813,15 @@ dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
case VNET_FLOW_TYPE_IP4_L2TPV3OIP:
case VNET_FLOW_TYPE_IP4_IPSEC_ESP:
case VNET_FLOW_TYPE_IP4_IPSEC_AH:
+ case VNET_FLOW_TYPE_IP4_IP4:
+ case VNET_FLOW_TYPE_IP4_IP4_N_TUPLE:
+ case VNET_FLOW_TYPE_IP4_IP6:
+ case VNET_FLOW_TYPE_IP4_IP6_N_TUPLE:
+ case VNET_FLOW_TYPE_IP6_IP4:
+ case VNET_FLOW_TYPE_IP6_IP4_N_TUPLE:
+ case VNET_FLOW_TYPE_IP6_IP6:
+ case VNET_FLOW_TYPE_IP6_IP6_N_TUPLE:
+ case VNET_FLOW_TYPE_GENERIC:
if ((rv = dpdk_flow_add (xd, flow, fe)))
goto done;
break;
diff --git a/src/plugins/dpdk/device/format.c b/src/plugins/dpdk/device/format.c
index 24994aa9426..c4170c20329 100644
--- a/src/plugins/dpdk/device/format.c
+++ b/src/plugins/dpdk/device/format.c
@@ -17,9 +17,6 @@
#include <vppinfra/format.h>
#include <assert.h>
-#define __USE_GNU
-#include <dlfcn.h>
-
#include <vnet/ethernet/ethernet.h>
#include <vnet/ethernet/sfp.h>
#include <dpdk/device/dpdk.h>
@@ -49,18 +46,28 @@
#endif
#define foreach_dpdk_pkt_rx_offload_flag \
- _ (PKT_RX_VLAN, "RX packet is a 802.1q VLAN packet") \
- _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \
- _ (PKT_RX_FDIR, "RX packet with FDIR infos") \
- _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
- _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
- _ (PKT_RX_OUTER_IP_CKSUM_BAD, "External IP header checksum error") \
- _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \
- _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \
- _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \
- _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
- _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \
- _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped")
+ _ (RX_FDIR, "RX packet with FDIR infos") \
+ _ (RX_FDIR_FLX, "RX packet with FDIR_FLX info") \
+ _ (RX_FDIR_ID, "RX packet with FDIR_ID info") \
+ _ (RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
+ _ (RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \
+ _ (RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
+ _ (RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \
+ _ (RX_IP_CKSUM_NONE, "no IP cksum of RX pkt.") \
+ _ (RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
+ _ (RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \
+ _ (RX_L4_CKSUM_NONE, "no L4 cksum of RX pkt.") \
+ _ (RX_LRO, "LRO packet") \
+ _ (RX_OUTER_IP_CKSUM_BAD, "External IP header checksum error") \
+ _ (RX_OUTER_L4_CKSUM_BAD, "External L4 header checksum error") \
+ _ (RX_OUTER_L4_CKSUM_GOOD, "External L4 header checksum OK") \
+ _ (RX_QINQ, "RX packet with QinQ tags") \
+ _ (RX_QINQ_STRIPPED, "RX packet QinQ tags stripped") \
+ _ (RX_RSS_HASH, "RX packet with RSS hash result") \
+ _ (RX_SEC_OFFLOAD, "RX packet with security offload") \
+ _ (RX_SEC_OFFLOAD_FAILED, "RX packet with security offload failed") \
+ _ (RX_VLAN, "RX packet is a 802.1q VLAN packet") \
+ _ (RX_VLAN_STRIPPED, "RX packet VLAN tag stripped")
#define foreach_dpdk_pkt_type \
_ (L2, ETHER, "Ethernet packet") \
@@ -103,14 +110,32 @@
_ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet")
#define foreach_dpdk_pkt_tx_offload_flag \
- _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \
- _ (PKT_TX_TUNNEL_VXLAN, "TX packet is a VXLAN packet") \
- _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \
- _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \
- _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \
- _ (PKT_TX_OUTER_IP_CKSUM, "Outer IP cksum of Tx pkt. computed by NIC") \
- _ (PKT_TX_TCP_SEG, "TSO of TX pkt. done by NIC") \
- _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp")
+ _ (TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp") \
+ _ (TX_IPV4, "TX IPV4") \
+ _ (TX_IPV6, "TX IPV6") \
+ _ (TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \
+ _ (TX_MACSEC, "TX MACSEC") \
+ _ (TX_OUTER_IPV4, "TX outer IPV4") \
+ _ (TX_OUTER_IPV6, "TX outer IPV6") \
+ _ (TX_OUTER_IP_CKSUM, "Outer IP cksum of Tx pkt. computed by NIC") \
+ _ (TX_OUTER_UDP_CKSUM, "TX outer UDP cksum") \
+ _ (TX_QINQ, "TX QINQ") \
+ _ (TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \
+ _ (TX_SEC_OFFLOAD, "TX SEC OFFLOAD") \
+ _ (TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \
+ _ (TX_TCP_SEG, "TSO of TX pkt. done by NIC") \
+ _ (TX_TUNNEL_GENEVE, "TX tunnel GENEVE") \
+ _ (TX_TUNNEL_GRE, "TX tunnel GRE") \
+ _ (TX_TUNNEL_GTP, "TX tunnel GTP") \
+ _ (TX_TUNNEL_IP, "TX tunnel IP") \
+ _ (TX_TUNNEL_IPIP, "TX tunnel IPIP") \
+ _ (TX_TUNNEL_MPLSINUDP, "TX tunnel MPLSinUDP") \
+ _ (TX_TUNNEL_UDP, "TX tunnel UDP") \
+ _ (TX_TUNNEL_VXLAN, "TX packet is a VXLAN packet") \
+ _ (TX_TUNNEL_VXLAN_GPE, "TX tunnel VXLAN GPE") \
+ _ (TX_UDP_CKSUM, "TX UDP cksum") \
+ _ (TX_UDP_SEG, "TX UDP SEG") \
+ _ (TX_VLAN, "TX packet is a 802.1q VLAN packet")
#define foreach_dpdk_pkt_offload_flag \
foreach_dpdk_pkt_rx_offload_flag \
@@ -123,105 +148,10 @@ u8 *
format_dpdk_device_name (u8 * s, va_list * args)
{
dpdk_main_t *dm = &dpdk_main;
- char *devname_format;
- char *device_name;
u32 i = va_arg (*args, u32);
dpdk_device_t *xd = vec_elt_at_index (dm->devices, i);
- struct rte_eth_dev_info dev_info;
- struct rte_pci_device *pci_dev;
- u8 *ret;
-
- if (xd->name)
- return format (s, "%s", xd->name);
-
- if (dm->conf->interface_name_format_decimal)
- devname_format = "%s%d/%d/%d";
- else
- devname_format = "%s%x/%x/%x";
-
- switch (xd->port_type)
- {
- case VNET_DPDK_PORT_TYPE_ETH_1G:
- device_name = "GigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_2_5G:
- device_name = "Two_FiveGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_5G:
- device_name = "FiveGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_10G:
- device_name = "TenGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_20G:
- device_name = "TwentyGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_25G:
- device_name = "TwentyFiveGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_40G:
- device_name = "FortyGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_50G:
- device_name = "FiftyGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_56G:
- device_name = "FiftySixGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_100G:
- device_name = "HundredGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_SWITCH:
- device_name = "EthernetSwitch";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_VF:
- device_name = "VirtualFunctionEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_AF_PACKET:
- return format (s, "af_packet%d", xd->af_packet_instance_num);
-
- case VNET_DPDK_PORT_TYPE_VIRTIO_USER:
- device_name = "VirtioUser";
- break;
-
- case VNET_DPDK_PORT_TYPE_VHOST_ETHER:
- device_name = "VhostEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_FAILSAFE:
- device_name = "FailsafeEthernet";
- break;
-
- default:
- case VNET_DPDK_PORT_TYPE_UNKNOWN:
- device_name = "UnknownEthernet";
- break;
- }
-
- rte_eth_dev_info_get (xd->port_id, &dev_info);
- pci_dev = dpdk_get_pci_device (&dev_info);
-
- if (pci_dev && xd->port_type != VNET_DPDK_PORT_TYPE_FAILSAFE)
- ret = format (s, devname_format, device_name, pci_dev->addr.bus,
- pci_dev->addr.devid, pci_dev->addr.function);
- else
- ret = format (s, "%s%d", device_name, xd->port_id);
- if (xd->interface_name_suffix)
- return format (ret, "/%s", xd->interface_name_suffix);
- return ret;
+ return format (s, "%v", xd->name);
}
u8 *
@@ -243,126 +173,12 @@ static u8 *
format_dpdk_device_type (u8 * s, va_list * args)
{
dpdk_main_t *dm = &dpdk_main;
- char *dev_type;
u32 i = va_arg (*args, u32);
- switch (dm->devices[i].pmd)
- {
- case VNET_DPDK_PMD_E1000EM:
- dev_type = "Intel 82540EM (e1000)";
- break;
-
- case VNET_DPDK_PMD_IGB:
- dev_type = "Intel e1000";
- break;
-
- case VNET_DPDK_PMD_I40E:
- dev_type = "Intel X710/XL710 Family";
- break;
-
- case VNET_DPDK_PMD_I40EVF:
- dev_type = "Intel X710/XL710 Family VF";
- break;
-
- case VNET_DPDK_PMD_ICE:
- dev_type = "Intel E810 Family";
- break;
-
- case VNET_DPDK_PMD_IAVF:
- dev_type = "Intel iAVF";
- break;
-
- case VNET_DPDK_PMD_FM10K:
- dev_type = "Intel FM10000 Family Ethernet Switch";
- break;
-
- case VNET_DPDK_PMD_IGBVF:
- dev_type = "Intel e1000 VF";
- break;
-
- case VNET_DPDK_PMD_VIRTIO:
- dev_type = "Red Hat Virtio";
- break;
-
- case VNET_DPDK_PMD_IXGBEVF:
- dev_type = "Intel 82599 VF";
- break;
-
- case VNET_DPDK_PMD_IXGBE:
- dev_type = "Intel 82599";
- break;
-
- case VNET_DPDK_PMD_ENIC:
- dev_type = "Cisco VIC";
- break;
-
- case VNET_DPDK_PMD_CXGBE:
- dev_type = "Chelsio T4/T5";
- break;
-
- case VNET_DPDK_PMD_MLX4:
- dev_type = "Mellanox ConnectX-3 Family";
- break;
-
- case VNET_DPDK_PMD_MLX5:
- dev_type = "Mellanox ConnectX-4 Family";
- break;
-
- case VNET_DPDK_PMD_VMXNET3:
- dev_type = "VMware VMXNET3";
- break;
-
- case VNET_DPDK_PMD_AF_PACKET:
- dev_type = "af_packet";
- break;
-
- case VNET_DPDK_PMD_DPAA2:
- dev_type = "NXP DPAA2 Mac";
- break;
-
- case VNET_DPDK_PMD_VIRTIO_USER:
- dev_type = "Virtio User";
- break;
-
- case VNET_DPDK_PMD_THUNDERX:
- dev_type = "Cavium ThunderX";
- break;
-
- case VNET_DPDK_PMD_VHOST_ETHER:
- dev_type = "VhostEthernet";
- break;
-
- case VNET_DPDK_PMD_ENA:
- dev_type = "AWS ENA VF";
- break;
-
- case VNET_DPDK_PMD_FAILSAFE:
- dev_type = "FailsafeEthernet";
- break;
-
- case VNET_DPDK_PMD_LIOVF_ETHER:
- dev_type = "Cavium Lio VF";
- break;
-
- case VNET_DPDK_PMD_QEDE:
- dev_type = "Cavium QLogic FastLinQ QL4xxxx";
- break;
-
- case VNET_DPDK_PMD_NETVSC:
- dev_type = "Microsoft Hyper-V Netvsc";
- break;
-
- case VNET_DPDK_PMD_BNXT:
- dev_type = "Broadcom NetXtreme E/S-Series";
- break;
-
- default:
- case VNET_DPDK_PMD_UNKNOWN:
- dev_type = "### UNKNOWN ###";
- break;
- }
-
- return format (s, dev_type);
+ if (dm->devices[i].if_desc)
+ return format (s, dm->devices[i].if_desc);
+ else
+ return format (s, "### UNKNOWN ###");
}
static u8 *
@@ -378,10 +194,11 @@ format_dpdk_link_status (u8 * s, va_list * args)
{
u32 promisc = rte_eth_promiscuous_get (xd->port_id);
- s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ?
- "full" : "half");
- s = format (s, "mtu %d %s\n", hi->max_packet_bytes, promisc ?
- " promisc" : "");
+ s = format (s, "%s duplex ",
+ (l->link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ? "full" :
+ "half");
+ s = format (s, "max-frame-size %d %s\n", hi->max_frame_size,
+ promisc ? " promisc" : "");
}
else
s = format (s, "\n");
@@ -419,8 +236,6 @@ format_offload (u8 * s, va_list * va)
uword i, l;
l = ~0;
- if (clib_mem_is_vec (id))
- l = vec_len (id);
if (id)
for (i = 0; id[i] != 0 && i < l; i++)
@@ -523,15 +338,29 @@ format_dpdk_device_module_info (u8 * s, va_list * args)
return s;
}
-static const char *
-ptr2sname (void *p)
+u8 *
+format_dpdk_burst_fn (u8 *s, va_list *args)
{
- Dl_info info = { 0 };
+ dpdk_device_t *xd = va_arg (*args, dpdk_device_t *);
+ vlib_rx_or_tx_t dir = va_arg (*args, vlib_rx_or_tx_t);
+ void *p;
+ clib_elf_symbol_t sym;
- if (dladdr (p, &info) == 0)
- return 0;
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+#define rte_eth_fp_ops rte_eth_devices
+#endif
+
+ p = (dir == VLIB_TX) ? rte_eth_fp_ops[xd->port_id].tx_pkt_burst :
+ rte_eth_fp_ops[xd->port_id].rx_pkt_burst;
- return info.dli_sname;
+ if (clib_elf_symbol_by_address (pointer_to_uword (p), &sym))
+ {
+ return format (s, "%s", clib_elf_symbol_name (&sym));
+ }
+ else
+ {
+ return format (s, "(not available)");
+ }
}
static u8 *
@@ -549,16 +378,51 @@ format_switch_info (u8 * s, va_list * args)
}
u8 *
+format_dpdk_rte_device (u8 *s, va_list *args)
+{
+ struct rte_device *d = va_arg (*args, struct rte_device *);
+
+ if (!d)
+ return format (s, "not available");
+
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ s =
+ format (s, "name: %s, numa: %d", rte_dev_name (d), rte_dev_numa_node (d));
+
+ if (rte_dev_driver (d))
+ s = format (s, ", driver: %s", rte_driver_name (rte_dev_driver (d)));
+
+ if (rte_dev_bus (d))
+ s = format (s, ", bus: %s", rte_bus_name (rte_dev_bus (d)));
+#else
+ s = format (s, "name: %s, numa: %d", d->name, d->numa_node);
+
+ if (d->driver)
+ s = format (s, ", driver: %s", d->driver->name);
+
+ if (d->bus)
+ s = format (s, ", bus: %s", d->bus->name);
+#endif
+
+ return s;
+}
+
+u8 *
format_dpdk_device (u8 * s, va_list * args)
{
u32 dev_instance = va_arg (*args, u32);
int verbose = va_arg (*args, int);
dpdk_main_t *dm = &dpdk_main;
+ vlib_main_t *vm = vlib_get_main ();
dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
u32 indent = format_get_indent (s);
- f64 now = vlib_time_now (dm->vlib_main);
+ f64 now = vlib_time_now (vm);
struct rte_eth_dev_info di;
struct rte_eth_burst_mode mode;
+ struct rte_pci_device *pci;
+ struct rte_eth_rss_conf rss_conf;
+ int vlan_off;
+ int retval;
dpdk_update_counters (xd, now);
dpdk_update_link_state (xd, now);
@@ -569,126 +433,114 @@ format_dpdk_device (u8 * s, va_list * args)
format_white_space, indent + 2, format_dpdk_link_status, xd);
s = format (s, "%Uflags: %U\n",
format_white_space, indent + 2, format_dpdk_device_flags, xd);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (rte_dev_devargs (di.device) && rte_dev_devargs (di.device)->args)
+ s = format (s, "%UDevargs: %s\n", format_white_space, indent + 2,
+ rte_dev_devargs (di.device)->args);
+#else
if (di.device->devargs && di.device->devargs->args)
s = format (s, "%UDevargs: %s\n",
format_white_space, indent + 2, di.device->devargs->args);
- s = format (s, "%Urx: queues %d (max %d), desc %d "
+#endif
+ s = format (s,
+ "%Urx: queues %d (max %d), desc %d "
"(min %d max %d align %d)\n",
- format_white_space, indent + 2, xd->rx_q_used, di.max_rx_queues,
- xd->nb_rx_desc, di.rx_desc_lim.nb_min, di.rx_desc_lim.nb_max,
- di.rx_desc_lim.nb_align);
- s = format (s, "%Utx: queues %d (max %d), desc %d "
+ format_white_space, indent + 2, xd->conf.n_rx_queues,
+ di.max_rx_queues, xd->conf.n_rx_desc, di.rx_desc_lim.nb_min,
+ di.rx_desc_lim.nb_max, di.rx_desc_lim.nb_align);
+ s = format (s,
+ "%Utx: queues %d (max %d), desc %d "
"(min %d max %d align %d)\n",
- format_white_space, indent + 2, xd->tx_q_used, di.max_tx_queues,
- xd->nb_tx_desc, di.tx_desc_lim.nb_min, di.tx_desc_lim.nb_max,
- di.tx_desc_lim.nb_align);
+ format_white_space, indent + 2, xd->conf.n_tx_queues,
+ di.max_tx_queues, xd->conf.n_tx_desc, di.tx_desc_lim.nb_min,
+ di.tx_desc_lim.nb_max, di.tx_desc_lim.nb_align);
- if (xd->flags & DPDK_DEVICE_FLAG_PMD)
- {
- struct rte_pci_device *pci;
- struct rte_eth_rss_conf rss_conf;
- int vlan_off;
- int retval;
+ rss_conf.rss_key = 0;
+ rss_conf.rss_hf = 0;
+ retval = rte_eth_dev_rss_hash_conf_get (xd->port_id, &rss_conf);
+ if (retval < 0)
+ clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval);
- rss_conf.rss_key = 0;
- rss_conf.rss_hf = 0;
- retval = rte_eth_dev_rss_hash_conf_get (xd->port_id, &rss_conf);
- if (retval < 0)
- clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval);
+ pci = dpdk_get_pci_device (&di);
- pci = dpdk_get_pci_device (&di);
+ if (pci)
+ {
+ u8 *s2;
+ if (xd->cpu_socket > -1)
+ s2 = format (0, "%d", xd->cpu_socket);
+ else
+ s2 = format (0, "unknown");
+ s = format (s,
+ "%Upci: device %04x:%04x subsystem %04x:%04x "
+ "address %04x:%02x:%02x.%02x numa %v\n",
+ format_white_space, indent + 2, pci->id.vendor_id,
+ pci->id.device_id, pci->id.subsystem_vendor_id,
+ pci->id.subsystem_device_id, pci->addr.domain, pci->addr.bus,
+ pci->addr.devid, pci->addr.function, s2);
+ vec_free (s2);
+ }
- if (pci)
- {
- u8 *s2;
- if (xd->cpu_socket > -1)
- s2 = format (0, "%d", xd->cpu_socket);
- else
- s2 = format (0, "unknown");
- s = format (s, "%Upci: device %04x:%04x subsystem %04x:%04x "
- "address %04x:%02x:%02x.%02x numa %v\n",
- format_white_space, indent + 2, pci->id.vendor_id,
- pci->id.device_id, pci->id.subsystem_vendor_id,
- pci->id.subsystem_device_id, pci->addr.domain,
- pci->addr.bus, pci->addr.devid, pci->addr.function, s2);
- vec_free (s2);
- }
+ if (di.switch_info.domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
+ {
+ s = format (s, "%Uswitch info: %U\n", format_white_space, indent + 2,
+ format_switch_info, &di.switch_info);
+ }
- if (di.switch_info.domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
- {
- s =
- format (s, "%Uswitch info: %U\n", format_white_space, indent + 2,
- format_switch_info, &di.switch_info);
- }
+ if (1 < verbose)
+ {
+ s = format (s, "%Umodule: %U\n", format_white_space, indent + 2,
+ format_dpdk_device_module_info, xd);
+ }
- if (1 < verbose)
- {
- s = format (s, "%Umodule: %U\n", format_white_space, indent + 2,
- format_dpdk_device_module_info, xd);
- }
+ s = format (s, "%Umax rx packet len: %d\n", format_white_space, indent + 2,
+ di.max_rx_pktlen);
+ s = format (s, "%Upromiscuous: unicast %s all-multicast %s\n",
+ format_white_space, indent + 2,
+ rte_eth_promiscuous_get (xd->port_id) ? "on" : "off",
+ rte_eth_allmulticast_get (xd->port_id) ? "on" : "off");
+ vlan_off = rte_eth_dev_get_vlan_offload (xd->port_id);
+ s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n",
+ format_white_space, indent + 2,
+ vlan_off & RTE_ETH_VLAN_STRIP_OFFLOAD ? "on" : "off",
+ vlan_off & RTE_ETH_VLAN_FILTER_OFFLOAD ? "on" : "off",
+ vlan_off & RTE_ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off");
+ s = format (s, "%Urx offload avail: %U\n", format_white_space, indent + 2,
+ format_dpdk_rx_offload_caps, di.rx_offload_capa);
+ s = format (s, "%Urx offload active: %U\n", format_white_space, indent + 2,
+ format_dpdk_rx_offload_caps, xd->enabled_rx_off);
+ s = format (s, "%Utx offload avail: %U\n", format_white_space, indent + 2,
+ format_dpdk_tx_offload_caps, di.tx_offload_capa);
+ s = format (s, "%Utx offload active: %U\n", format_white_space, indent + 2,
+ format_dpdk_tx_offload_caps, xd->enabled_tx_off);
+ s = format (s,
+ "%Urss avail: %U\n"
+ "%Urss active: %U\n",
+ format_white_space, indent + 2, format_dpdk_rss_hf_name,
+ di.flow_type_rss_offloads, format_white_space, indent + 2,
+ format_dpdk_rss_hf_name, rss_conf.rss_hf);
+
+ if (rte_eth_tx_burst_mode_get (xd->port_id, 0, &mode) == 0)
+ {
+ s = format (s, "%Utx burst mode: %s%s\n", format_white_space, indent + 2,
+ mode.info,
+ mode.flags & RTE_ETH_BURST_FLAG_PER_QUEUE ? " (per queue)" :
+ "");
+ }
- s = format (s, "%Umax rx packet len: %d\n", format_white_space,
- indent + 2, di.max_rx_pktlen);
- s = format (s, "%Upromiscuous: unicast %s all-multicast %s\n",
- format_white_space, indent + 2,
- rte_eth_promiscuous_get (xd->port_id) ? "on" : "off",
- rte_eth_allmulticast_get (xd->port_id) ? "on" : "off");
- vlan_off = rte_eth_dev_get_vlan_offload (xd->port_id);
- s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n",
- format_white_space, indent + 2,
- vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off",
- vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off",
- vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off");
- s = format (s, "%Urx offload avail: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rx_offload_caps, di.rx_offload_capa);
- s = format (s, "%Urx offload active: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rx_offload_caps, xd->port_conf.rxmode.offloads);
- s = format (s, "%Utx offload avail: %U\n",
- format_white_space, indent + 2,
- format_dpdk_tx_offload_caps, di.tx_offload_capa);
- s = format (s, "%Utx offload active: %U\n",
- format_white_space, indent + 2,
- format_dpdk_tx_offload_caps, xd->port_conf.txmode.offloads);
- s = format (s, "%Urss avail: %U\n"
- "%Urss active: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rss_hf_name, di.flow_type_rss_offloads,
- format_white_space, indent + 2,
- format_dpdk_rss_hf_name, rss_conf.rss_hf);
-
- if (rte_eth_tx_burst_mode_get (xd->port_id, 0, &mode) == 0)
- {
- s = format (s, "%Utx burst mode: %s%s\n",
- format_white_space, indent + 2,
- mode.info,
- mode.flags & RTE_ETH_BURST_FLAG_PER_QUEUE ?
- " (per queue)" : "");
- }
- else
- {
- s = format (s, "%Utx burst function: %s\n",
- format_white_space, indent + 2,
- ptr2sname (rte_eth_devices[xd->port_id].tx_pkt_burst));
- }
+ s = format (s, "%Utx burst function: %U\n", format_white_space, indent + 2,
+ format_dpdk_burst_fn, xd, VLIB_TX);
- if (rte_eth_rx_burst_mode_get (xd->port_id, 0, &mode) == 0)
- {
- s = format (s, "%Urx burst mode: %s%s\n",
- format_white_space, indent + 2,
- mode.info,
- mode.flags & RTE_ETH_BURST_FLAG_PER_QUEUE ?
- " (per queue)" : "");
- }
- else
- {
- s = format (s, "%Urx burst function: %s\n",
- format_white_space, indent + 2,
- ptr2sname (rte_eth_devices[xd->port_id].rx_pkt_burst));
- }
+ if (rte_eth_rx_burst_mode_get (xd->port_id, 0, &mode) == 0)
+ {
+ s = format (s, "%Urx burst mode: %s%s\n", format_white_space, indent + 2,
+ mode.info,
+ mode.flags & RTE_ETH_BURST_FLAG_PER_QUEUE ? " (per queue)" :
+ "");
}
+ s = format (s, "%Urx burst function: %U\n", format_white_space, indent + 2,
+ format_dpdk_burst_fn, xd, VLIB_RX);
+
/* $$$ MIB counters */
{
#define _(N, V) \
@@ -713,7 +565,6 @@ format_dpdk_device (u8 * s, va_list * args)
if (ret >= 0 && ret <= len)
{
- /* *INDENT-OFF* */
vec_foreach_index(i, xd->xstats)
{
xstat = vec_elt_at_index(xd->xstats, i);
@@ -725,7 +576,6 @@ format_dpdk_device (u8 * s, va_list * args)
xstat->value);
}
}
- /* *INDENT-ON* */
vec_free (xstat_names);
}
@@ -756,14 +606,12 @@ format_dpdk_tx_trace (u8 * s, va_list * va)
dpdk_main_t *dm = &dpdk_main;
dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index);
u32 indent = format_get_indent (s);
- vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index);
- s = format (s, "%U tx queue %d",
- format_vnet_sw_interface_name, vnm, sw, t->queue_index);
+ s = format (s, "%U tx queue %d", format_vnet_sw_if_index_name, vnm,
+ xd->sw_if_index, t->queue_index);
- s = format (s, "\n%Ubuffer 0x%x: %U",
- format_white_space, indent,
- t->buffer_index, format_vnet_buffer, &t->buffer);
+ s = format (s, "\n%Ubuffer 0x%x: %U", format_white_space, indent,
+ t->buffer_index, format_vnet_buffer_no_chain, &t->buffer);
s = format (s, "\n%U%U",
format_white_space, indent,
@@ -787,14 +635,12 @@ format_dpdk_rx_trace (u8 * s, va_list * va)
dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index);
format_function_t *f;
u32 indent = format_get_indent (s);
- vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index);
- s = format (s, "%U rx queue %d",
- format_vnet_sw_interface_name, vnm, sw, t->queue_index);
+ s = format (s, "%U rx queue %d", format_vnet_sw_if_index_name, vnm,
+ xd->sw_if_index, t->queue_index);
- s = format (s, "\n%Ubuffer 0x%x: %U",
- format_white_space, indent,
- t->buffer_index, format_vnet_buffer, &t->buffer);
+ s = format (s, "\n%Ubuffer 0x%x: %U", format_white_space, indent,
+ t->buffer_index, format_vnet_buffer_no_chain, &t->buffer);
s = format (s, "\n%U%U",
format_white_space, indent,
@@ -855,11 +701,11 @@ format_dpdk_pkt_offload_flags (u8 * s, va_list * va)
s = format (s, "Packet Offload Flags");
-#define _(F, S) \
- if (*ol_flags & F) \
- { \
- s = format (s, "\n%U%s (0x%04x) %s", \
- format_white_space, indent, #F, F, S); \
+#define _(F, S) \
+ if ((*ol_flags & RTE_MBUF_F_##F) == RTE_MBUF_F_##F) \
+ { \
+ s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \
+ "PKT_" #F, RTE_MBUF_F_##F, S); \
}
foreach_dpdk_pkt_offload_flag
@@ -887,7 +733,7 @@ u8 *
format_dpdk_rte_mbuf_tso (u8 *s, va_list *va)
{
struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *);
- if (mb->ol_flags & PKT_TX_TCP_SEG)
+ if (mb->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
{
s = format (s, "l4_len %u tso_segsz %u", mb->l4_len, mb->tso_segsz);
}
@@ -940,8 +786,9 @@ format_dpdk_rte_mbuf (u8 * s, va_list * va)
s = format (s, "\n%U%U", format_white_space, indent,
format_dpdk_pkt_offload_flags, &mb->ol_flags);
- if ((mb->ol_flags & PKT_RX_VLAN) &&
- ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0))
+ if ((mb->ol_flags & RTE_MBUF_F_RX_VLAN) &&
+ ((mb->ol_flags &
+ (RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_QINQ_STRIPPED)) == 0))
{
ethernet_vlan_header_tv_t *vlan_hdr =
((ethernet_vlan_header_tv_t *) & (eth_hdr->type));
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index f923da6c09e..e416efe2e4d 100644
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -21,8 +21,10 @@
#include <vlib/unix/unix.h>
#include <vlib/log.h>
+#include <vnet/vnet.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
#include <dpdk/buffer.h>
#include <dpdk/device/dpdk.h>
#include <dpdk/cryptodev/cryptodev.h>
@@ -43,71 +45,69 @@
#include <dpdk/device/dpdk_priv.h>
-#define ETHER_MAX_LEN 1518 /**< Maximum frame len, including CRC. */
-
dpdk_main_t dpdk_main;
dpdk_config_main_t dpdk_config_main;
#define LINK_STATE_ELOGS 0
-/* Port configuration, mildly modified Intel app values */
+/* dev_info.speed_capa -> interface name mapppings */
+const struct
+{
+ u32 link_speed;
+ const char *pfx;
+} if_name_prefixes[] = {
+ /* sorted, higher speed first */
+ { RTE_ETH_LINK_SPEED_200G, "TwoHundredGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_100G, "HundredGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_56G, "FiftySixGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_50G, "FiftyGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_40G, "FortyGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_25G, "TwentyFiveGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_20G, "TwentyGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_10G, "TenGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_5G, "FiveGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_2_5G, "TwoDotFiveGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_1G, "GigabitEthernet" },
+};
-static dpdk_port_type_t
-port_type_from_speed_capa (struct rte_eth_dev_info *dev_info)
+static clib_error_t *
+dpdk_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ u32 frame_size)
{
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
+ int rv;
+ u32 mtu;
- if (dev_info->speed_capa & ETH_LINK_SPEED_100G)
- return VNET_DPDK_PORT_TYPE_ETH_100G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_56G)
- return VNET_DPDK_PORT_TYPE_ETH_56G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_50G)
- return VNET_DPDK_PORT_TYPE_ETH_50G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_40G)
- return VNET_DPDK_PORT_TYPE_ETH_40G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_25G)
- return VNET_DPDK_PORT_TYPE_ETH_25G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_20G)
- return VNET_DPDK_PORT_TYPE_ETH_20G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_10G)
- return VNET_DPDK_PORT_TYPE_ETH_10G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_5G)
- return VNET_DPDK_PORT_TYPE_ETH_5G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_2_5G)
- return VNET_DPDK_PORT_TYPE_ETH_2_5G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_1G)
- return VNET_DPDK_PORT_TYPE_ETH_1G;
-
- return VNET_DPDK_PORT_TYPE_UNKNOWN;
-}
+ mtu = frame_size - xd->driver_frame_overhead;
-static dpdk_port_type_t
-port_type_from_link_speed (u32 link_speed)
-{
- switch (link_speed)
+ rv = rte_eth_dev_set_mtu (xd->port_id, mtu);
+
+ if (rv < 0)
{
- case ETH_SPEED_NUM_1G:
- return VNET_DPDK_PORT_TYPE_ETH_1G;
- case ETH_SPEED_NUM_2_5G:
- return VNET_DPDK_PORT_TYPE_ETH_2_5G;
- case ETH_SPEED_NUM_5G:
- return VNET_DPDK_PORT_TYPE_ETH_5G;
- case ETH_SPEED_NUM_10G:
- return VNET_DPDK_PORT_TYPE_ETH_10G;
- case ETH_SPEED_NUM_20G:
- return VNET_DPDK_PORT_TYPE_ETH_20G;
- case ETH_SPEED_NUM_25G:
- return VNET_DPDK_PORT_TYPE_ETH_25G;
- case ETH_SPEED_NUM_40G:
- return VNET_DPDK_PORT_TYPE_ETH_40G;
- case ETH_SPEED_NUM_50G:
- return VNET_DPDK_PORT_TYPE_ETH_50G;
- case ETH_SPEED_NUM_56G:
- return VNET_DPDK_PORT_TYPE_ETH_56G;
- case ETH_SPEED_NUM_100G:
- return VNET_DPDK_PORT_TYPE_ETH_100G;
- default:
- return VNET_DPDK_PORT_TYPE_UNKNOWN;
+ dpdk_log_err ("[%u] rte_eth_dev_set_mtu failed (mtu %u, rv %d)",
+ xd->port_id, mtu, rv);
+ switch (rv)
+ {
+ case -ENOTSUP:
+ return vnet_error (VNET_ERR_UNSUPPORTED,
+ "dpdk driver doesn't support MTU change");
+ case -EBUSY:
+ return vnet_error (VNET_ERR_BUSY, "port is running");
+ case -EINVAL:
+ return vnet_error (VNET_ERR_INVALID_VALUE, "invalid MTU");
+ default:
+ return vnet_error (VNET_ERR_BUG,
+ "unexpected return value %d returned from "
+ "rte_eth_dev_set_mtu(...)",
+ rv);
+ }
}
+ else
+ dpdk_log_debug ("[%u] max_frame_size set to %u by setting MTU to %u",
+ xd->port_id, frame_size, mtu);
+
+ return 0;
}
static u32
@@ -121,15 +121,11 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
{
case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
/* set to L3/non-promisc mode */
- xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC;
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_PROMISC, 0);
break;
case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
- xd->flags |= DPDK_DEVICE_FLAG_PROMISC;
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_PROMISC, 1);
break;
- case ETHERNET_INTERFACE_FLAG_MTU:
- xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes;
- dpdk_device_setup (xd);
- return 0;
default:
return ~0;
}
@@ -145,12 +141,6 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
return old;
}
-static int
-dpdk_port_crc_strip_enabled (dpdk_device_t * xd)
-{
- return !(xd->port_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC);
-}
-
/* The function check_l3cache helps check if Level 3 cache exists or not on current CPUs
return value 1: exist.
return value 0: not exist.
@@ -192,701 +182,360 @@ check_l3cache ()
return 0;
}
-static void
-dpdk_enable_l4_csum_offload (dpdk_device_t * xd)
+static dpdk_device_config_t *
+dpdk_find_startup_config (struct rte_eth_dev_info *di)
{
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |= DPDK_DEVICE_FLAG_TX_OFFLOAD |
- DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
+ dpdk_main_t *dm = &dpdk_main;
+ struct rte_pci_device *pci_dev;
+ vlib_pci_addr_t pci_addr;
+#ifdef __linux__
+ struct rte_vmbus_device *vmbus_dev;
+ vlib_vmbus_addr_t vmbus_addr;
+#endif /* __linux__ */
+ uword *p = 0;
+
+ if ((pci_dev = dpdk_get_pci_device (di)))
+ {
+ pci_addr.domain = pci_dev->addr.domain;
+ pci_addr.bus = pci_dev->addr.bus;
+ pci_addr.slot = pci_dev->addr.devid;
+ pci_addr.function = pci_dev->addr.function;
+ p =
+ hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
+ }
+
+#ifdef __linux__
+ if ((vmbus_dev = dpdk_get_vmbus_device (di)))
+ {
+ unformat_input_t input_vmbus;
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ const char *dev_name = rte_dev_name (di->device);
+#else
+ const char *dev_name = di->device->name;
+#endif
+ unformat_init_string (&input_vmbus, dev_name, strlen (dev_name));
+ if (unformat (&input_vmbus, "%U", unformat_vlib_vmbus_addr, &vmbus_addr))
+ p = mhash_get (&dm->conf->device_config_index_by_vmbus_addr,
+ &vmbus_addr);
+ unformat_free (&input_vmbus);
+ }
+#endif /* __linux__ */
+
+ if (p)
+ return pool_elt_at_index (dm->conf->dev_confs, p[0]);
+ return &dm->conf->default_devconf;
}
static clib_error_t *
dpdk_lib_init (dpdk_main_t * dm)
{
- u32 nports;
- u32 mtu, max_rx_frame;
- int i;
- clib_error_t *error;
- vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ u16 port_id;
vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_device_main_t *vdm = &vnet_device_main;
vnet_sw_interface_t *sw;
vnet_hw_interface_t *hi;
dpdk_device_t *xd;
- vlib_pci_addr_t last_pci_addr;
- u32 last_pci_addr_port = 0;
- u8 af_packet_instance_num = 0;
- last_pci_addr.as_u32 = ~0;
-
- nports = rte_eth_dev_count_avail ();
-
- if (nports < 1)
- {
- dpdk_log_notice ("DPDK drivers found no Ethernet devices...");
- }
-
- if (CLIB_DEBUG > 0)
- dpdk_log_notice ("DPDK drivers found %d ports...", nports);
-
- if (dm->conf->enable_tcp_udp_checksum)
- dm->buffer_flags_template &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT
- | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
+ char *if_num_fmt;
/* vlib_buffer_t template */
vec_validate_aligned (dm->per_thread_data, tm->n_vlib_mains - 1,
CLIB_CACHE_LINE_BYTES);
- for (i = 0; i < tm->n_vlib_mains; i++)
+ for (int i = 0; i < tm->n_vlib_mains; i++)
{
dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data, i);
clib_memset (&ptd->buffer_template, 0, sizeof (vlib_buffer_t));
- ptd->buffer_template.flags = dm->buffer_flags_template;
vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_TX] = (u32) ~ 0;
}
- /* *INDENT-OFF* */
- RTE_ETH_FOREACH_DEV(i)
+ if_num_fmt =
+ dm->conf->interface_name_format_decimal ? "%d/%d/%d" : "%x/%x/%x";
+
+ /* device config defaults */
+ dm->default_port_conf.n_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
+ dm->default_port_conf.n_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
+ dm->default_port_conf.n_rx_queues = 1;
+ dm->default_port_conf.n_tx_queues = tm->n_vlib_mains;
+ dm->default_port_conf.rss_hf =
+ RTE_ETH_RSS_IP | RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP;
+ dm->default_port_conf.max_lro_pkt_size = DPDK_MAX_LRO_SIZE_DEFAULT;
+
+ if ((clib_mem_get_default_hugepage_size () == 2 << 20) &&
+ check_l3cache () == 0)
+ dm->default_port_conf.n_rx_desc = dm->default_port_conf.n_tx_desc = 512;
+
+ RTE_ETH_FOREACH_DEV (port_id)
{
u8 addr[6];
- int vlan_off;
- struct rte_eth_dev_info dev_info;
- struct rte_pci_device *pci_dev;
- struct rte_vmbus_device *vmbus_dev;
- dpdk_portid_t next_port_id;
+ int rv, q;
+ struct rte_eth_dev_info di;
dpdk_device_config_t *devconf = 0;
- vlib_pci_addr_t pci_addr;
- vlib_vmbus_addr_t vmbus_addr;
- uword *p = 0;
+ vnet_eth_interface_registration_t eir = {};
+ dpdk_driver_t *dr;
+ i8 numa_node;
- if (!rte_eth_dev_is_valid_port(i))
+ if (!rte_eth_dev_is_valid_port (port_id))
continue;
- rte_eth_dev_info_get (i, &dev_info);
-
- if (dev_info.device == 0)
+ if ((rv = rte_eth_dev_info_get (port_id, &di)) != 0)
{
- dpdk_log_notice ("DPDK bug: missing device info. Skipping %s device",
- dev_info.driver_name);
+ dpdk_log_warn ("[%u] failed to get device info. skipping device.",
+ port_id);
continue;
}
- pci_dev = dpdk_get_pci_device (&dev_info);
-
- if (pci_dev)
+ if (di.device == 0)
{
- pci_addr.domain = pci_dev->addr.domain;
- pci_addr.bus = pci_dev->addr.bus;
- pci_addr.slot = pci_dev->addr.devid;
- pci_addr.function = pci_dev->addr.function;
- p = hash_get (dm->conf->device_config_index_by_pci_addr,
- pci_addr.as_u32);
+ dpdk_log_warn ("[%u] missing device info. Skipping '%s' device",
+ port_id, di.driver_name);
+ continue;
}
- vmbus_dev = dpdk_get_vmbus_device (&dev_info);
+ devconf = dpdk_find_startup_config (&di);
- if (vmbus_dev)
+ /* If device is blacklisted, we should skip it */
+ if (devconf->is_blacklisted)
{
- unformat_input_t input_vmbus;
-
- unformat_init_vector (&input_vmbus, (u8 *) dev_info.device->name);
- if (unformat (&input_vmbus, "%U", unformat_vlib_vmbus_addr,
- &vmbus_addr))
- {
- p = mhash_get (&dm->conf->device_config_index_by_vmbus_addr,
- &vmbus_addr);
- }
+ dpdk_log_notice ("[%d] Device %s blacklisted. Skipping...", port_id,
+ di.driver_name);
+ continue;
}
- if (p)
+ vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES);
+ xd->port_id = port_id;
+ xd->device_index = xd - dm->devices;
+ xd->per_interface_next_index = ~0;
+
+ clib_memcpy (&xd->conf, &dm->default_port_conf,
+ sizeof (dpdk_port_conf_t));
+
+ /* find driver datea for this PMD */
+ if ((dr = dpdk_driver_find (di.driver_name, &xd->if_desc)))
{
- devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
- /* If device is blacklisted, we should skip it */
- if (devconf->is_blacklisted)
- {
- continue;
- }
+ xd->driver = dr;
+ xd->supported_flow_actions = dr->supported_flow_actions;
+ xd->conf.disable_rss = dr->mq_mode_none;
+ xd->conf.disable_rx_scatter = dr->disable_rx_scatter;
+ xd->conf.enable_rxq_int = dr->enable_rxq_int;
+ if (dr->use_intel_phdr_cksum)
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM, 1);
+ if (dr->int_unmaskable)
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_INT_UNMASKABLE, 1);
}
else
- devconf = &dm->conf->default_devconf;
+ dpdk_log_warn ("[%u] unknown driver '%s'", port_id, di.driver_name);
- /* Create vnet interface */
- vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES);
- xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
- xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
- xd->cpu_socket = (i8) rte_eth_dev_socket_id (i);
- if (p)
+ if (devconf->name)
{
xd->name = devconf->name;
}
-
- /* Handle representor devices that share the same PCI ID */
- if (dev_info.switch_info.domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
- {
- if (dev_info.switch_info.port_id != (uint16_t)-1)
- xd->interface_name_suffix = format (0, "%d", dev_info.switch_info.port_id);
- }
- /* Handle interface naming for devices with multiple ports sharing same PCI ID */
- else if (pci_dev &&
- ((next_port_id = rte_eth_find_next (i + 1)) != RTE_MAX_ETHPORTS))
+ else
{
- struct rte_eth_dev_info di = { 0 };
- struct rte_pci_device *next_pci_dev;
- rte_eth_dev_info_get (next_port_id, &di);
- next_pci_dev = di.device ? RTE_DEV_TO_PCI (di.device) : 0;
- if (next_pci_dev &&
- pci_addr.as_u32 != last_pci_addr.as_u32 &&
- memcmp (&pci_dev->addr, &next_pci_dev->addr,
- sizeof (struct rte_pci_addr)) == 0)
+ struct rte_pci_device *pci_dev;
+ if (dr && dr->interface_name_prefix)
{
- xd->interface_name_suffix = format (0, "0");
- last_pci_addr.as_u32 = pci_addr.as_u32;
- last_pci_addr_port = i;
- }
- else if (pci_addr.as_u32 == last_pci_addr.as_u32)
- {
- xd->interface_name_suffix =
- format (0, "%u", i - last_pci_addr_port);
+ /* prefix override by driver */
+ xd->name = format (xd->name, "%s", dr->interface_name_prefix);
}
else
{
- last_pci_addr.as_u32 = ~0;
+ /* interface name prefix from speed_capa */
+ u64 mask = ~((if_name_prefixes[0].link_speed << 1) - 1);
+
+ if (di.speed_capa & mask)
+ dpdk_log_warn ("[%u] unknown speed capability 0x%x reported",
+ xd->port_id, di.speed_capa & mask);
+
+ for (int i = 0; i < ARRAY_LEN (if_name_prefixes); i++)
+ if (if_name_prefixes[i].link_speed & di.speed_capa)
+ {
+ xd->name =
+ format (xd->name, "%s", if_name_prefixes[i].pfx);
+ break;
+ }
+ if (xd->name == 0)
+ xd->name = format (xd->name, "Ethernet");
}
- }
- else
- last_pci_addr.as_u32 = ~0;
- clib_memcpy (&xd->tx_conf, &dev_info.default_txconf,
- sizeof (struct rte_eth_txconf));
-
- if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
- {
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM;
- xd->flags |= DPDK_DEVICE_FLAG_RX_IP4_CKSUM;
- }
+ if (dr && dr->interface_number_from_port_id)
+ xd->name = format (xd->name, "%u", port_id);
+ else if ((pci_dev = dpdk_get_pci_device (&di)))
+ xd->name = format (xd->name, if_num_fmt, pci_dev->addr.bus,
+ pci_dev->addr.devid, pci_dev->addr.function);
+ else
+ xd->name = format (xd->name, "%u", port_id);
- if (dm->conf->enable_tcp_udp_checksum)
- {
- if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_UDP_CKSUM;
- if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_TCP_CKSUM;
+ /* Handle representor devices that share the same PCI ID */
+ if ((di.switch_info.domain_id !=
+ RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) &&
+ (di.switch_info.port_id != (uint16_t) -1))
+ xd->name = format (xd->name, "/%d", di.switch_info.port_id);
}
- if (dm->conf->no_multi_seg)
- {
- xd->port_conf.txmode.offloads &= ~DEV_TX_OFFLOAD_MULTI_SEGS;
- xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
- xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_SCATTER;
- }
- else
+ /* number of RX and TX queues */
+ if (devconf->num_tx_queues > 0)
{
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
- xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG;
+ if (di.max_tx_queues < devconf->num_tx_queues)
+ dpdk_log_warn ("[%u] Configured number of TX queues (%u) is "
+ "bigger than maximum supported (%u)",
+ port_id, devconf->num_tx_queues, di.max_tx_queues);
+ xd->conf.n_tx_queues = devconf->num_tx_queues;
}
- xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains);
-
- if (devconf->num_tx_queues > 0
- && devconf->num_tx_queues < xd->tx_q_used)
- xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues);
+ xd->conf.n_tx_queues = clib_min (di.max_tx_queues, xd->conf.n_tx_queues);
- if (devconf->num_rx_queues > 1
- && dev_info.max_rx_queues >= devconf->num_rx_queues)
+ if (devconf->num_rx_queues > 1 &&
+ di.max_rx_queues >= devconf->num_rx_queues)
{
- xd->rx_q_used = devconf->num_rx_queues;
- xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
- if (devconf->rss_fn == 0)
- xd->port_conf.rx_adv_conf.rss_conf.rss_hf =
- ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
- else
+ xd->conf.n_rx_queues = devconf->num_rx_queues;
+ if (devconf->rss_fn)
{
u64 unsupported_bits;
- xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn;
- unsupported_bits = xd->port_conf.rx_adv_conf.rss_conf.rss_hf;
- unsupported_bits &= ~dev_info.flow_type_rss_offloads;
+ xd->conf.rss_hf = devconf->rss_fn;
+ unsupported_bits = xd->conf.rss_hf;
+ unsupported_bits &= ~di.flow_type_rss_offloads;
if (unsupported_bits)
dpdk_log_warn ("Unsupported RSS hash functions: %U",
format_dpdk_rss_hf_name, unsupported_bits);
}
- xd->port_conf.rx_adv_conf.rss_conf.rss_hf &=
- dev_info.flow_type_rss_offloads;
+ xd->conf.rss_hf &= di.flow_type_rss_offloads;
+ dpdk_log_debug ("[%u] rss_hf: %U", port_id, format_dpdk_rss_hf_name,
+ xd->conf.rss_hf);
}
- else
- xd->rx_q_used = 1;
- vec_validate_aligned (xd->rx_queues, xd->rx_q_used - 1,
+#ifndef RTE_VLAN_HLEN
+#define RTE_VLAN_HLEN 4
+#endif
+ xd->driver_frame_overhead =
+ RTE_ETHER_HDR_LEN + 2 * RTE_VLAN_HLEN + RTE_ETHER_CRC_LEN;
+#if RTE_VERSION >= RTE_VERSION_NUM(21, 11, 0, 0)
+ q = di.max_rx_pktlen - di.max_mtu;
+
+ /* attempt to protect from bogus value provided by pmd */
+ if (q < (2 * xd->driver_frame_overhead) && q > 0 &&
+ di.max_mtu != UINT16_MAX)
+ xd->driver_frame_overhead = q;
+ dpdk_log_debug ("[%u] min_mtu: %u, max_mtu: %u, min_rx_bufsize: %u, "
+ "max_rx_pktlen: %u, max_lro_pkt_size: %u",
+ xd->port_id, di.min_mtu, di.max_mtu, di.min_rx_bufsize,
+ di.max_rx_pktlen, di.max_lro_pkt_size);
+#endif
+ dpdk_log_debug ("[%u] driver frame overhead is %u", port_id,
+ xd->driver_frame_overhead);
+
+ /* number of RX and TX tescriptors */
+ if (devconf->num_rx_desc)
+ xd->conf.n_rx_desc = devconf->num_rx_desc;
+ else if (dr && dr->n_rx_desc)
+ xd->conf.n_rx_desc = dr->n_rx_desc;
+
+ if (devconf->num_tx_desc)
+ xd->conf.n_tx_desc = devconf->num_tx_desc;
+ else if (dr && dr->n_tx_desc)
+ xd->conf.n_tx_desc = dr->n_tx_desc;
+
+ dpdk_log_debug (
+ "[%u] n_rx_queues: %u n_tx_queues: %u n_rx_desc: %u n_tx_desc: %u",
+ port_id, xd->conf.n_rx_queues, xd->conf.n_tx_queues,
+ xd->conf.n_rx_desc, xd->conf.n_tx_desc);
+
+ vec_validate_aligned (xd->rx_queues, xd->conf.n_rx_queues - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (xd->tx_queues, xd->conf.n_tx_queues - 1,
CLIB_CACHE_LINE_BYTES);
- xd->flags |= DPDK_DEVICE_FLAG_PMD;
-
- /* workaround for drivers not setting driver_name */
- if ((!dev_info.driver_name) && (pci_dev))
- dev_info.driver_name = pci_dev->driver->driver.name;
-
- ASSERT (dev_info.driver_name);
-
- if (!xd->pmd)
- {
-
-
-#define _(s,f) else if (dev_info.driver_name && \
- !strcmp(dev_info.driver_name, s)) \
- xd->pmd = VNET_DPDK_PMD_##f;
- if (0)
- ;
- foreach_dpdk_pmd
-#undef _
- else
- xd->pmd = VNET_DPDK_PMD_UNKNOWN;
-
- xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
- xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
- xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
-
- switch (xd->pmd)
- {
- /* Drivers with valid speed_capa set */
- case VNET_DPDK_PMD_I40E:
- xd->flags |= DPDK_DEVICE_FLAG_INT_UNMASKABLE;
- case VNET_DPDK_PMD_E1000EM:
- case VNET_DPDK_PMD_IGB:
- case VNET_DPDK_PMD_IGC:
- case VNET_DPDK_PMD_IXGBE:
- case VNET_DPDK_PMD_ICE:
- xd->port_type = port_type_from_speed_capa (&dev_info);
- xd->supported_flow_actions = VNET_FLOW_ACTION_MARK |
- VNET_FLOW_ACTION_REDIRECT_TO_NODE |
- VNET_FLOW_ACTION_REDIRECT_TO_QUEUE |
- VNET_FLOW_ACTION_BUFFER_ADVANCE |
- VNET_FLOW_ACTION_COUNT | VNET_FLOW_ACTION_DROP |
- VNET_FLOW_ACTION_RSS;
-
- if (dm->conf->no_tx_checksum_offload == 0)
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |=
- DPDK_DEVICE_FLAG_TX_OFFLOAD |
- DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
- }
-
- xd->port_conf.intr_conf.rxq = 1;
- break;
- case VNET_DPDK_PMD_CXGBE:
- case VNET_DPDK_PMD_MLX4:
- case VNET_DPDK_PMD_MLX5:
- case VNET_DPDK_PMD_QEDE:
- case VNET_DPDK_PMD_BNXT:
- xd->port_type = port_type_from_speed_capa (&dev_info);
- break;
-
- /* SR-IOV VFs */
- case VNET_DPDK_PMD_I40EVF:
- xd->flags |= DPDK_DEVICE_FLAG_INT_UNMASKABLE;
- case VNET_DPDK_PMD_IGBVF:
- case VNET_DPDK_PMD_IXGBEVF:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- if (dm->conf->no_tx_checksum_offload == 0)
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |=
- DPDK_DEVICE_FLAG_TX_OFFLOAD |
- DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
- }
- /* DPDK bug in multiqueue... */
- /* xd->port_conf.intr_conf.rxq = 1; */
- break;
-
- /* iAVF */
- case VNET_DPDK_PMD_IAVF:
- xd->flags |= DPDK_DEVICE_FLAG_INT_UNMASKABLE;
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- xd->supported_flow_actions =
- VNET_FLOW_ACTION_MARK | VNET_FLOW_ACTION_REDIRECT_TO_NODE |
- VNET_FLOW_ACTION_REDIRECT_TO_QUEUE |
- VNET_FLOW_ACTION_BUFFER_ADVANCE | VNET_FLOW_ACTION_COUNT |
- VNET_FLOW_ACTION_DROP | VNET_FLOW_ACTION_RSS;
-
- if (dm->conf->no_tx_checksum_offload == 0)
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |=
- DPDK_DEVICE_FLAG_TX_OFFLOAD |
- DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
- }
- /* DPDK bug in multiqueue... */
- /* xd->port_conf.intr_conf.rxq = 1; */
- break;
-
- case VNET_DPDK_PMD_THUNDERX:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
-
- if (dm->conf->no_tx_checksum_offload == 0)
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |= DPDK_DEVICE_FLAG_TX_OFFLOAD;
- }
- break;
-
- case VNET_DPDK_PMD_ENA:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_SCATTER;
- xd->port_conf.intr_conf.rxq = 1;
- break;
-
- case VNET_DPDK_PMD_DPAA2:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
- break;
-
- /* Cisco VIC */
- case VNET_DPDK_PMD_ENIC:
- {
- struct rte_eth_link l;
- rte_eth_link_get_nowait (i, &l);
- xd->port_type = port_type_from_link_speed (l.link_speed);
- if (dm->conf->enable_tcp_udp_checksum)
- dpdk_enable_l4_csum_offload (xd);
- }
- break;
-
- /* Intel Red Rock Canyon */
- case VNET_DPDK_PMD_FM10K:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH;
- break;
-
- /* virtio */
- case VNET_DPDK_PMD_VIRTIO:
- xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G;
- xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO;
- xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO;
- /*
- * Enable use of RX interrupts if supported.
- *
- * There is no device flag or capability for this, so
- * use the same check that the virtio driver does.
- */
- if (pci_dev && rte_intr_cap_multiple (&pci_dev->intr_handle))
- xd->port_conf.intr_conf.rxq = 1;
- break;
-
- /* vmxnet3 */
- case VNET_DPDK_PMD_VMXNET3:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
- break;
-
- case VNET_DPDK_PMD_AF_PACKET:
- xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET;
- xd->af_packet_instance_num = af_packet_instance_num++;
- break;
-
- case VNET_DPDK_PMD_VIRTIO_USER:
- xd->port_type = VNET_DPDK_PORT_TYPE_VIRTIO_USER;
- break;
-
- case VNET_DPDK_PMD_VHOST_ETHER:
- xd->port_type = VNET_DPDK_PORT_TYPE_VHOST_ETHER;
- break;
-
- case VNET_DPDK_PMD_LIOVF_ETHER:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- break;
-
- case VNET_DPDK_PMD_FAILSAFE:
- xd->port_type = VNET_DPDK_PORT_TYPE_FAILSAFE;
- xd->port_conf.intr_conf.lsc = 1;
- break;
-
- case VNET_DPDK_PMD_NETVSC:
- {
- struct rte_eth_link l;
- rte_eth_link_get_nowait (i, &l);
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- }
- break;
-
- default:
- xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
- }
-
- if (devconf->num_rx_desc)
- xd->nb_rx_desc = devconf->num_rx_desc;
- else {
-
- /* If num_rx_desc is not specified by VPP user, the current CPU is working
- with 2M page and has no L3 cache, default num_rx_desc is changed to 512
- from original 1024 to help reduce TLB misses.
- */
- if ((clib_mem_get_default_hugepage_size () == 2 << 20)
- && check_l3cache() == 0)
- xd->nb_rx_desc = 512;
- }
-
- if (devconf->num_tx_desc)
- xd->nb_tx_desc = devconf->num_tx_desc;
- else {
-
- /* If num_tx_desc is not specified by VPP user, the current CPU is working
- with 2M page and has no L3 cache, default num_tx_desc is changed to 512
- from original 1024 to help reduce TLB misses.
- */
- if ((clib_mem_get_default_hugepage_size () == 2 << 20)
- && check_l3cache() == 0)
- xd->nb_tx_desc = 512;
- }
- }
-
- if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
- {
- f64 now = vlib_time_now (vm);
- u32 rnd;
- rnd = (u32) (now * 1e6);
- rnd = random_u32 (&rnd);
- clib_memcpy (addr + 2, &rnd, sizeof (rnd));
- addr[0] = 2;
- addr[1] = 0xfe;
- }
+ rte_eth_macaddr_get (port_id, (void *) addr);
+
+ /* create interface */
+ eir.dev_class_index = dpdk_device_class.index;
+ eir.dev_instance = xd->device_index;
+ eir.address = addr;
+ eir.cb.flag_change = dpdk_flag_change;
+ eir.cb.set_max_frame_size = dpdk_set_max_frame_size;
+ xd->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+ hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+ numa_node = (i8) rte_eth_dev_socket_id (port_id);
+ if (numa_node == SOCKET_ID_ANY)
+ /* numa_node is not set, default to 0 */
+ hi->numa_node = xd->cpu_socket = 0;
else
- rte_eth_macaddr_get (i, (void *) addr);
-
- xd->port_id = i;
- xd->device_index = xd - dm->devices;
- xd->per_interface_next_index = ~0;
-
- /* assign interface to input thread */
- int q;
-
- error = ethernet_register_interface
- (dm->vnet_main, dpdk_device_class.index, xd->device_index,
- /* ethernet address */ addr,
- &xd->hw_if_index, dpdk_flag_change);
- if (error)
- return error;
-
- /*
- * Ensure default mtu is not > the mtu read from the hardware.
- * Otherwise rte_eth_dev_configure() will fail and the port will
- * not be available.
- * Calculate max_frame_size and mtu supported by NIC
- */
- if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen)
- {
- /*
- * This device does not support the platforms's max frame
- * size. Use it's advertised mru instead.
- */
- max_rx_frame = dev_info.max_rx_pktlen;
- mtu = dev_info.max_rx_pktlen - sizeof (ethernet_header_t);
- }
- else
- {
- /* VPP treats MTU and max_rx_pktlen both equal to
- * ETHERNET_MAX_PACKET_BYTES, if dev_info.max_rx_pktlen >=
- * ETHERNET_MAX_PACKET_BYTES + sizeof(ethernet_header_t)
- */
- if (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES +
- sizeof (ethernet_header_t)))
- {
- mtu = ETHERNET_MAX_PACKET_BYTES;
- max_rx_frame = ETHERNET_MAX_PACKET_BYTES;
-
- /*
- * Some platforms do not account for Ethernet FCS (4 bytes) in
- * MTU calculations. To interop with them increase mru but only
- * if the device's settings can support it.
- */
- if (dpdk_port_crc_strip_enabled (xd) &&
- (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES +
- sizeof (ethernet_header_t) +
- 4)))
- {
- max_rx_frame += 4;
- }
- }
- else
- {
- max_rx_frame = ETHERNET_MAX_PACKET_BYTES;
- mtu = ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t);
-
- if (dpdk_port_crc_strip_enabled (xd) &&
- (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)))
- {
- max_rx_frame += 4;
- }
- }
- }
-
- if (xd->pmd == VNET_DPDK_PMD_FAILSAFE)
- {
- /* failsafe device numerables are reported with active device only,
- * need to query the mtu for current device setup to overwrite
- * reported value.
- */
- uint16_t dev_mtu;
- if (!rte_eth_dev_get_mtu (i, &dev_mtu))
- {
- mtu = dev_mtu;
- max_rx_frame = mtu + sizeof (ethernet_header_t);
-
- if (dpdk_port_crc_strip_enabled (xd))
- {
- max_rx_frame += 4;
- }
- }
- }
+ hi->numa_node = xd->cpu_socket = numa_node;
+ sw = vnet_get_hw_sw_interface (vnm, xd->hw_if_index);
+ xd->sw_if_index = sw->sw_if_index;
+ dpdk_log_debug ("[%u] interface %v created", port_id, hi->name);
- /*Set port rxmode config */
- xd->port_conf.rxmode.max_rx_pkt_len = max_rx_frame;
+ if (devconf->tag)
+ vnet_set_sw_interface_tag (vnm, devconf->tag, sw->sw_if_index);
- sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->hw_if_index);
- xd->sw_if_index = sw->sw_if_index;
- vnet_hw_if_set_input_node (dm->vnet_main, xd->hw_if_index,
- dpdk_input_node.index);
+ ethernet_set_flags (vnm, xd->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+ /* assign worker threads */
+ vnet_hw_if_set_input_node (vnm, xd->hw_if_index, dpdk_input_node.index);
if (devconf->workers)
{
- int i;
+ int j;
q = 0;
- clib_bitmap_foreach (i, devconf->workers) {
+ clib_bitmap_foreach (j, devconf->workers)
+ {
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
rxq->queue_index = vnet_hw_if_register_rx_queue (
- dm->vnet_main, xd->hw_if_index, q++,
- vdm->first_worker_thread_index + i);
- }
+ vnm, xd->hw_if_index, q++, vdm->first_worker_thread_index + j);
+ }
}
else
- for (q = 0; q < xd->rx_q_used; q++)
+ for (q = 0; q < xd->conf.n_rx_queues; q++)
{
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
rxq->queue_index = vnet_hw_if_register_rx_queue (
- dm->vnet_main, xd->hw_if_index, q, VNET_HW_IF_RXQ_THREAD_ANY);
+ vnm, xd->hw_if_index, q, VNET_HW_IF_RXQ_THREAD_ANY);
}
- vnet_hw_if_update_runtime_data (dm->vnet_main, xd->hw_if_index);
-
- /*Get vnet hardware interface */
- hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index);
+ for (q = 0; q < xd->conf.n_tx_queues; q++)
+ {
+ dpdk_tx_queue_t *txq = vec_elt_at_index (xd->tx_queues, q);
+ txq->queue_index =
+ vnet_hw_if_register_tx_queue (vnm, xd->hw_if_index, q);
+ }
- /*Override default max_packet_bytes and max_supported_bytes set in
- * ethernet_register_interface() above*/
- if (hi)
+ for (q = 0; q < tm->n_vlib_mains; q++)
{
- hi->max_packet_bytes = mtu;
- hi->max_supported_packet_bytes = max_rx_frame;
- hi->numa_node = xd->cpu_socket;
-
- /* Indicate ability to support L3 DMAC filtering and
- * initialize interface to L3 non-promisc mode */
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER;
- ethernet_set_flags (dm->vnet_main, xd->hw_if_index,
- ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+ u32 qi = xd->tx_queues[q % xd->conf.n_tx_queues].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, q);
}
- if (dm->conf->no_tx_checksum_offload == 0)
- if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD && hi != NULL)
- {
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TX_IP4_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM;
- }
- if (devconf->tso == DPDK_DEVICE_TSO_ON && hi != NULL)
+ if (devconf->tso == DPDK_DEVICE_TSO_ON)
{
/*tcp_udp checksum must be enabled*/
- if ((dm->conf->enable_tcp_udp_checksum) &&
- (hi->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TX_CKSUM))
- {
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_UDP_GSO;
- xd->port_conf.txmode.offloads |=
- DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_UDP_TSO;
- }
+ if (xd->conf.enable_tcp_udp_checksum == 0)
+ dpdk_log_warn ("[%u] TCP/UDP checksum offload must be enabled",
+ xd->port_id);
+ else if ((di.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) == 0)
+ dpdk_log_warn ("[%u] TSO not supported by device", xd->port_id);
else
- clib_warning ("%s: TCP/UDP checksum offload must be enabled",
- hi->name);
+ xd->conf.enable_tso = 1;
}
+ if (devconf->max_lro_pkt_size)
+ xd->conf.max_lro_pkt_size = devconf->max_lro_pkt_size;
+
dpdk_device_setup (xd);
/* rss queues should be configured after dpdk_device_setup() */
- if ((hi != NULL) && (devconf->rss_queues != NULL))
- {
- if (vnet_hw_interface_set_rss_queues
- (vnet_get_main (), hi, devconf->rss_queues))
- {
- clib_warning ("%s: Failed to set rss queues", hi->name);
- }
- }
+ if (devconf->rss_queues)
+ {
+ if (vnet_hw_interface_set_rss_queues (vnet_get_main (), hi,
+ devconf->rss_queues))
+ dpdk_log_warn ("[%u] Failed to set rss queues", port_id);
+ }
if (vec_len (xd->errors))
- dpdk_log_err ("setup failed for device %U. Errors:\n %U",
- format_dpdk_device_name, i,
- format_dpdk_device_errors, xd);
-
- /*
- * A note on Cisco VIC (PMD_ENIC) and VLAN:
- *
- * With Cisco VIC vNIC, every ingress packet is tagged. On a
- * trunk vNIC (C series "standalone" server), packets on no VLAN
- * are tagged with vlan 0. On an access vNIC (standalone or B
- * series "blade" server), packets on the default/native VLAN
- * are tagged with that vNIC's VLAN. VPP expects these packets
- * to be untagged, and previously enabled VLAN strip on VIC by
- * default. But it also broke vlan sub-interfaces.
- *
- * The VIC adapter has "untag default vlan" ingress VLAN rewrite
- * mode, which removes tags from these packets. VPP now includes
- * a local patch for the enic driver to use this untag mode, so
- * enabling vlan stripping is no longer needed. In future, the
- * driver + dpdk will have an API to set the mode after
- * rte_eal_init. Then, this note and local patch will be
- * removed.
- */
-
- /*
- * VLAN stripping: default to VLAN strip disabled, unless specified
- * otherwise in the startup config.
- */
-
- vlan_off = rte_eth_dev_get_vlan_offload (xd->port_id);
- if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON)
- {
- vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
- if (rte_eth_dev_set_vlan_offload (xd->port_id, vlan_off) >= 0)
- dpdk_log_info ("VLAN strip enabled for interface\n");
- else
- dpdk_log_warn ("VLAN strip cannot be supported by interface\n");
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
- }
- else
- {
- if (vlan_off & ETH_VLAN_STRIP_OFFLOAD)
- {
- vlan_off &= ~ETH_VLAN_STRIP_OFFLOAD;
- if (rte_eth_dev_set_vlan_offload (xd->port_id, vlan_off) >= 0)
- dpdk_log_warn ("set VLAN offload failed\n");
- }
- xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
- }
-
- if (hi)
- hi->max_packet_bytes = xd->port_conf.rxmode.max_rx_pkt_len
- - sizeof (ethernet_header_t);
- else
- dpdk_log_warn ("hi NULL");
-
- if (dm->conf->no_multi_seg)
- mtu = mtu > ETHER_MAX_LEN ? ETHER_MAX_LEN : mtu;
-
- rte_eth_dev_set_mtu (xd->port_id, mtu);
-}
+ dpdk_log_err ("[%u] setup failed Errors:\n %U", port_id,
+ format_dpdk_device_errors, xd);
+ }
- /* *INDENT-ON* */
+ for (int i = 0; i < vec_len (dm->devices); i++)
+ vnet_hw_if_update_runtime_data (vnm, dm->devices[i].hw_if_index);
return 0;
}
@@ -903,7 +552,6 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
int i;
addrs = vlib_pci_get_all_dev_addrs ();
- /* *INDENT-OFF* */
vec_foreach (addr, addrs)
{
dpdk_device_config_t * devconf = 0;
@@ -922,8 +570,18 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
continue;
}
+#ifdef __FreeBSD__
+ /*
+ * The defines for the PCI_CLASS_* types are platform specific and differ
+ * on FreeBSD.
+ */
+ if (d->device_class != PCI_CLASS_NETWORK &&
+ d->device_class != PCI_CLASS_PROCESSOR_CO)
+ continue;
+#else
if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO)
continue;
+#endif /* __FreeBSD__ */
if (num_whitelisted)
{
@@ -991,9 +649,13 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_NETWORK_ETHERNET)
;
/* all Intel QAT devices VFs */
- else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_PROCESSOR_CO &&
- (d->device_id == 0x0443 || d->device_id == 0x18a1 || d->device_id == 0x19e3 ||
- d->device_id == 0x37c9 || d->device_id == 0x6f55))
+ else if (d->vendor_id == 0x8086 &&
+ d->device_class == PCI_CLASS_PROCESSOR_CO &&
+ (d->device_id == 0x0443 || d->device_id == 0x18a1 ||
+ d->device_id == 0x19e3 || d->device_id == 0x37c9 ||
+ d->device_id == 0x6f55 || d->device_id == 0x18ef ||
+ d->device_id == 0x4941 || d->device_id == 0x4943 ||
+ d->device_id == 0x4945))
;
/* Cisco VIC */
else if (d->vendor_id == 0x1137 &&
@@ -1021,10 +683,28 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
{
continue;
}
- /* Mellanox CX6, CX6VF, CX6DX, CX6DXVF */
- else if (d->vendor_id == 0x15b3 && d->device_id >= 0x101b && d->device_id <= 0x101e)
+ /* Mellanox CX6, CX6VF, CX6DX, CX6DXVF, CX6LX */
+ else if (d->vendor_id == 0x15b3 &&
+ (d->device_id >= 0x101b && d->device_id <= 0x101f))
{
- continue;
+ continue;
+ }
+ /* Mellanox CX7 */
+ else if (d->vendor_id == 0x15b3 && d->device_id == 0x1021)
+ {
+ continue;
+ }
+ /* Mellanox BF, BFVF */
+ else if (d->vendor_id == 0x15b3 &&
+ (d->device_id >= 0xa2d2 && d->device_id <= 0Xa2d3))
+ {
+ continue;
+ }
+ /* Mellanox BF2, BF3 */
+ else if (d->vendor_id == 0x15b3 &&
+ (d->device_id == 0xa2d6 || d->device_id == 0xa2dc))
+ {
+ continue;
}
/* Broadcom NetXtreme S, and E series only */
else if (d->vendor_id == 0x14e4 &&
@@ -1039,6 +719,9 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
d->device_id == 0x1614 || d->device_id == 0x1606 ||
d->device_id == 0x1609 || d->device_id == 0x1614)))
;
+ /* Google vNIC */
+ else if (d->vendor_id == 0x1ae0 && d->device_id == 0x0042)
+ ;
else
{
dpdk_log_warn ("Unsupported PCI device 0x%04x:0x%04x found "
@@ -1047,7 +730,8 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
continue;
}
- error = vlib_pci_bind_to_uio (vm, addr, (char *) conf->uio_driver_name);
+ error = vlib_pci_bind_to_uio (vm, addr, (char *) conf->uio_driver_name,
+ conf->uio_bind_force);
if (error)
{
@@ -1063,7 +747,6 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
clib_error_report (error);
}
}
- /* *INDENT-ON* */
vec_free (pci_addr);
vlib_pci_free_device_info (d);
}
@@ -1078,7 +761,6 @@ dpdk_bind_vmbus_devices_to_uio (dpdk_config_main_t * conf)
addrs = vlib_vmbus_get_all_dev_addrs ();
- /* *INDENT-OFF* */
vec_foreach (addr, addrs)
{
dpdk_device_config_t *devconf = 0;
@@ -1143,7 +825,6 @@ dpdk_bind_vmbus_devices_to_uio (dpdk_config_main_t * conf)
clib_error_report (error);
}
}
- /* *INDENT-ON* */
}
uword
@@ -1240,7 +921,9 @@ dpdk_device_config (dpdk_config_main_t *conf, void *addr,
;
else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc))
;
- else if (unformat (input, "name %s", &devconf->name))
+ else if (unformat (input, "name %v", &devconf->name))
+ ;
+ else if (unformat (input, "tag %s", &devconf->tag))
;
else if (unformat (input, "workers %U", unformat_bitmap_list,
&devconf->workers))
@@ -1253,10 +936,6 @@ dpdk_device_config (dpdk_config_main_t *conf, void *addr,
if (error)
break;
}
- else if (unformat (input, "vlan-strip-offload off"))
- devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF;
- else if (unformat (input, "vlan-strip-offload on"))
- devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON;
else if (unformat (input, "tso on"))
{
devconf->tso = DPDK_DEVICE_TSO_ON;
@@ -1270,6 +949,9 @@ dpdk_device_config (dpdk_config_main_t *conf, void *addr,
else if (unformat (input, "rss-queues %U",
unformat_bitmap_list, &devconf->rss_queues))
;
+ else if (unformat (input, "max-lro-pkt-size %u",
+ &devconf->max_lro_pkt_size))
+ ;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -1310,14 +992,26 @@ dpdk_log_read_ready (clib_file_t * uf)
n = read (uf->file_descriptor, s + len, n_try);
if (n < 0 && errno != EAGAIN)
return clib_error_return_unix (0, "read");
- _vec_len (s) = len + (n < 0 ? 0 : n);
+ vec_set_len (s, len + (n < 0 ? 0 : n));
}
unformat_init_vector (&input, s);
while (unformat_user (&input, unformat_line, &line))
{
- dpdk_log_notice ("%v", line);
+ int skip = 0;
+ vec_add1 (line, 0);
+
+ /* unfortunatelly DPDK polutes log with this error messages
+ * even when we pass --in-memory which means no secondary process */
+ if (strstr ((char *) line, "WARNING! Base virtual address hint"))
+ skip = 1;
+ else if (strstr ((char *) line, "This may cause issues with mapping "
+ "memory into secondary processes"))
+ skip = 1;
+ vec_pop (line);
+ if (!skip)
+ dpdk_log_notice ("%v", line);
vec_free (line);
}
@@ -1326,16 +1020,39 @@ dpdk_log_read_ready (clib_file_t * uf)
}
static clib_error_t *
+dpdk_set_stat_poll_interval (f64 interval)
+{
+ if (interval < DPDK_MIN_STATS_POLL_INTERVAL)
+ return clib_error_return (0, "wrong stats-poll-interval value");
+
+ dpdk_main.stat_poll_interval = interval;
+ return 0;
+}
+
+static clib_error_t *
+dpdk_set_link_state_poll_interval (f64 interval)
+{
+ if (interval < DPDK_MIN_LINK_POLL_INTERVAL)
+ return clib_error_return (0, "wrong link-state-poll-interval value");
+
+ dpdk_main.link_state_poll_interval = interval;
+ return 0;
+}
+
+static clib_error_t *
dpdk_config (vlib_main_t * vm, unformat_input_t * input)
{
+ dpdk_main_t *dm = &dpdk_main;
clib_error_t *error = 0;
dpdk_config_main_t *conf = &dpdk_config_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
dpdk_device_config_t *devconf;
vlib_pci_addr_t pci_addr = { 0 };
vlib_vmbus_addr_t vmbus_addr = { 0 };
unformat_input_t sub_input;
+#ifdef __linux
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
uword default_hugepage_sz, x;
+#endif /* __linux__ */
u8 *s, *tmp = 0;
int ret, i;
int num_whitelisted = 0;
@@ -1344,11 +1061,10 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
u8 no_vmbus = 0;
u8 file_prefix = 0;
u8 *socket_mem = 0;
- u8 *huge_dir_path = 0;
u32 vendor, device, domain, bus, func;
-
- huge_dir_path =
- format (0, "%s/hugepages%c", vlib_unix_get_runtime_dir (), 0);
+ void *fmt_func;
+ void *fmt_addr;
+ f64 poll_interval;
conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword));
mhash_init (&conf->device_config_index_by_vmbus_addr, sizeof (uword),
@@ -1366,19 +1082,36 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
conf->enable_telemetry = 1;
else if (unformat (input, "enable-tcp-udp-checksum"))
- conf->enable_tcp_udp_checksum = 1;
-
+ {
+ dm->default_port_conf.enable_tcp_udp_checksum = 1;
+ if (unformat (input, "enable-outer-checksum-offload"))
+ dm->default_port_conf.enable_outer_checksum_offload = 1;
+ }
else if (unformat (input, "no-tx-checksum-offload"))
- conf->no_tx_checksum_offload = 1;
+ dm->default_port_conf.disable_tx_checksum_offload = 1;
else if (unformat (input, "decimal-interface-names"))
conf->interface_name_format_decimal = 1;
else if (unformat (input, "no-multi-seg"))
- conf->no_multi_seg = 1;
+ dm->default_port_conf.disable_multi_seg = 1;
+ else if (unformat (input, "enable-lro"))
+ dm->default_port_conf.enable_lro = 1;
else if (unformat (input, "max-simd-bitwidth %U",
unformat_max_simd_bitwidth, &conf->max_simd_bitwidth))
;
+ else if (unformat (input, "link-state-poll-interval %f", &poll_interval))
+ {
+ error = dpdk_set_link_state_poll_interval (poll_interval);
+ if (error != 0)
+ return error;
+ }
+ else if (unformat (input, "stats-poll-interval %f", &poll_interval))
+ {
+ error = dpdk_set_stat_poll_interval (poll_interval);
+ if (error != 0)
+ return error;
+ }
else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input,
&sub_input))
{
@@ -1433,13 +1166,10 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
num_whitelisted++;
}
- else if (unformat (input, "num-mem-channels %d", &conf->nchannels))
- conf->nchannels_set_manually = 0;
- else if (unformat (input, "num-crypto-mbufs %d",
- &conf->num_crypto_mbufs))
- ;
else if (unformat (input, "uio-driver %s", &conf->uio_driver_name))
;
+ else if (unformat (input, "uio-bind-force"))
+ conf->uio_bind_force = 1;
else if (unformat (input, "socket-mem %s", &socket_mem))
;
else if (unformat (input, "no-pci"))
@@ -1514,28 +1244,13 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
}
foreach_eal_single_hyphen_arg
#undef _
-#define _(a,b) \
- else if (unformat(input, #a " %s", &s)) \
- { \
- tmp = format (0, "-%s%c", #b, 0); \
- vec_add1 (conf->eal_init_args, tmp); \
- vec_add1 (s, 0); \
- vec_add1 (conf->eal_init_args, s); \
- conf->a##_set_manually = 1; \
- }
- foreach_eal_single_hyphen_mandatory_arg
-#undef _
else if (unformat (input, "default"))
;
else if (unformat_skip_white_space (input))
;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
+ else return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
- goto done;
- }
}
if (!conf->uio_driver_name)
@@ -1545,9 +1260,13 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
{
vec_add1 (conf->eal_init_args, (u8 *) "--in-memory");
+#ifdef __linux__
+ /*
+ * FreeBSD performs huge page prealloc through a dedicated kernel mode
+ * this process is only required on Linux.
+ */
default_hugepage_sz = clib_mem_get_default_hugepage_size ();
- /* *INDENT-OFF* */
clib_bitmap_foreach (x, tm->cpu_socket_bitmap)
{
clib_error_t *e;
@@ -1560,7 +1279,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
if ((e = clib_sysfs_prealloc_hugepages(x, 0, n_pages)))
clib_error_report (e);
}
- /* *INDENT-ON* */
+#endif /* __linux__ */
}
/* on/off dpdk's telemetry thread */
@@ -1569,6 +1288,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
vec_add1 (conf->eal_init_args, (u8 *) "--no-telemetry");
}
+#ifdef __linux__
if (!file_prefix)
{
tmp = format (0, "--file-prefix%c", 0);
@@ -1576,41 +1296,7 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
tmp = format (0, "vpp%c", 0);
vec_add1 (conf->eal_init_args, tmp);
}
-
- if (error)
- return error;
-
- /* I'll bet that -c and -n must be the first and second args... */
- if (!conf->coremask_set_manually)
- {
- vlib_thread_registration_t *tr;
- uword *coremask = 0;
- int i;
-
- /* main thread core */
- coremask = clib_bitmap_set (coremask, tm->main_lcore, 1);
-
- for (i = 0; i < vec_len (tm->registrations); i++)
- {
- tr = tm->registrations[i];
- coremask = clib_bitmap_or (coremask, tr->coremask);
- }
-
- vec_insert (conf->eal_init_args, 2, 1);
- conf->eal_init_args[1] = (u8 *) "-c";
- tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0);
- conf->eal_init_args[2] = tmp;
- clib_bitmap_free (coremask);
- }
-
- if (!conf->nchannels_set_manually)
- {
- vec_insert (conf->eal_init_args, 2, 3);
- conf->eal_init_args[3] = (u8 *) "-n";
- tmp = format (0, "%d", conf->nchannels);
- vec_terminate_c_string (tmp);
- conf->eal_init_args[4] = tmp;
- }
+#endif
if (no_pci == 0 && geteuid () == 0)
dpdk_bind_devices_to_uio (conf);
@@ -1622,15 +1308,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
if (devconf->x == 0 && conf->default_devconf.x > 0) \
devconf->x = conf->default_devconf.x ;
- /* *INDENT-OFF* */
pool_foreach (devconf, conf->dev_confs) {
/* default per-device config items */
foreach_dpdk_device_config_item
- /* copy vlan_strip config from default device */
- _ (vlan_strip_offload)
-
/* copy tso config from default device */
_ (tso)
@@ -1640,56 +1322,57 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
/* copy rss_queues config from default device */
_ (rss_queues)
- /* add DPDK EAL whitelist/blacklist entry */
- if (num_whitelisted > 0 && devconf->is_blacklisted == 0 &&
- devconf->dev_addr_type == VNET_DEV_ADDR_PCI)
- {
- tmp = format (0, "-a%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- if (devconf->devargs)
+ /* assume that default is PCI */
+ fmt_func = format_vlib_pci_addr;
+ fmt_addr = &devconf->pci_addr;
+
+ if (devconf->dev_addr_type == VNET_DEV_ADDR_VMBUS)
+ {
+ fmt_func = format_vlib_vmbus_addr;
+ fmt_addr = &devconf->vmbus_addr;
+ }
+
+ /* add DPDK EAL whitelist/blacklist entry */
+ if (num_whitelisted > 0 && devconf->is_blacklisted == 0)
+ {
+ tmp = format (0, "-a%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ if (devconf->devargs)
{
- tmp = format (0, "%U,%s%c", format_vlib_pci_addr,
- &devconf->pci_addr, devconf->devargs, 0);
+ tmp =
+ format (0, "%U,%s%c", fmt_func, fmt_addr, devconf->devargs, 0);
}
else
{
- tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
+ tmp = format (0, "%U%c", fmt_func, fmt_addr, 0);
}
vec_add1 (conf->eal_init_args, tmp);
- }
- else if (num_whitelisted == 0 && devconf->is_blacklisted != 0 &&
- devconf->dev_addr_type == VNET_DEV_ADDR_PCI)
- {
- tmp = format (0, "-b%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
- vec_add1 (conf->eal_init_args, tmp);
- }
+ }
+ else if (num_whitelisted == 0 && devconf->is_blacklisted != 0)
+ {
+ tmp = format (0, "-b%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ tmp = format (0, "%U%c", fmt_func, fmt_addr, 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ }
}
- /* *INDENT-ON* */
#undef _
- /* set master-lcore */
- tmp = format (0, "--main-lcore%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- tmp = format (0, "%u%c", tm->main_lcore, 0);
- vec_add1 (conf->eal_init_args, tmp);
-
-
if (socket_mem)
clib_warning ("socket-mem argument is deprecated");
/* NULL terminate the "argv" vector, in case of stupidity */
vec_add1 (conf->eal_init_args, 0);
- _vec_len (conf->eal_init_args) -= 1;
+ vec_dec_len (conf->eal_init_args, 1);
/* Set up DPDK eal and packet mbuf pool early. */
int log_fds[2] = { 0 };
if (pipe (log_fds) == 0)
{
- if (fcntl (log_fds[1], F_SETFL, O_NONBLOCK) == 0)
+ if (fcntl (log_fds[0], F_SETFL, O_NONBLOCK) == 0 &&
+ fcntl (log_fds[1], F_SETFL, O_NONBLOCK) == 0)
{
FILE *f = fdopen (log_fds[1], "a");
if (f && rte_openlog_stream (f) == 0)
@@ -1720,6 +1403,8 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
dpdk_log_notice ("EAL init args: %s", conf->eal_init_args_str);
ret = rte_eal_init (vec_len (conf->eal_init_args),
(char **) conf->eal_init_args);
+ if (ret < 0)
+ return clib_error_return (0, "rte_eal_init returned %d", ret);
/* enable the AVX-512 vPMDs in DPDK */
if (clib_cpu_supports_avx512_bitalg () &&
@@ -1731,20 +1416,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
RTE_VECT_SIMD_256 :
RTE_VECT_SIMD_512);
- /* lazy umount hugepages */
- umount2 ((char *) huge_dir_path, MNT_DETACH);
- rmdir ((char *) huge_dir_path);
- vec_free (huge_dir_path);
-
- if (ret < 0)
- return clib_error_return (0, "rte_eal_init returned %d", ret);
-
/* main thread 1st */
if ((error = dpdk_buffer_pools_create (vm)))
return error;
-done:
- return error;
+ return 0;
}
VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk");
@@ -1757,10 +1433,6 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
u32 hw_flags = 0;
u8 hw_flags_chg = 0;
- /* only update link state for PMD interfaces */
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- return;
-
xd->time_last_link_update = now ? now : xd->time_last_link_update;
clib_memset (&xd->link, 0, sizeof (xd->link));
rte_eth_link_get_nowait (xd->port_id, &xd->link);
@@ -1788,35 +1460,32 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
ed->new_link_state = (u8) xd->link.link_status;
}
- if ((xd->link.link_duplex != prev_link.link_duplex))
+ hw_flags_chg = ((xd->link.link_duplex != prev_link.link_duplex) ||
+ (xd->link.link_status != prev_link.link_status));
+
+ if (xd->link.link_speed != prev_link.link_speed)
+ vnet_hw_interface_set_link_speed (vnm, xd->hw_if_index,
+ (xd->link.link_speed == UINT32_MAX) ?
+ UINT32_MAX :
+ xd->link.link_speed * 1000);
+
+ if (hw_flags_chg)
{
- hw_flags_chg = 1;
+ if (xd->link.link_status)
+ hw_flags |= VNET_HW_INTERFACE_FLAG_LINK_UP;
+
switch (xd->link.link_duplex)
{
- case ETH_LINK_HALF_DUPLEX:
+ case RTE_ETH_LINK_HALF_DUPLEX:
hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX;
break;
- case ETH_LINK_FULL_DUPLEX:
+ case RTE_ETH_LINK_FULL_DUPLEX:
hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX;
break;
default:
break;
}
- }
- if (xd->link.link_speed != prev_link.link_speed)
- vnet_hw_interface_set_link_speed (vnm, xd->hw_if_index,
- xd->link.link_speed * 1000);
-
- if (xd->link.link_status != prev_link.link_status)
- {
- hw_flags_chg = 1;
- if (xd->link.link_status)
- hw_flags |= VNET_HW_INTERFACE_FLAG_LINK_UP;
- }
-
- if (hw_flags_chg)
- {
if (LINK_STATE_ELOGS)
{
ELOG_TYPE_DECLARE (e) =
@@ -1846,6 +1515,7 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
dpdk_device_t *xd;
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_worker_thread_barrier_sync (vm);
error = dpdk_lib_init (dm);
if (error)
@@ -1862,6 +1532,7 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
}
}
+ vlib_worker_thread_barrier_release (vm);
tm->worker_thread_release = 1;
f64 now = vlib_time_now (vm);
@@ -1870,16 +1541,17 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
dpdk_update_link_state (xd, now);
}
+ f64 timeout =
+ clib_min (dm->link_state_poll_interval, dm->stat_poll_interval);
+
while (1)
{
- /*
- * check each time through the loop in case intervals are changed
- */
- f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ?
- dm->link_state_poll_interval : dm->stat_poll_interval;
-
+ f64 min_wait = clib_max (timeout, DPDK_MIN_POLL_INTERVAL);
vlib_process_wait_for_event_or_clock (vm, min_wait);
+ timeout =
+ clib_min (dm->link_state_poll_interval, dm->stat_poll_interval);
+
if (dm->admin_up_down_in_progress)
/* skip the poll if an admin up down is in progress (on any interface) */
continue;
@@ -1893,19 +1565,25 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
dpdk_update_link_state (xd, now);
}
- }
+ now = vlib_time_now (vm);
+ vec_foreach (xd, dm->devices)
+ {
+ timeout = clib_min (timeout, xd->time_last_stats_update +
+ dm->stat_poll_interval - now);
+ timeout = clib_min (timeout, xd->time_last_link_update +
+ dm->link_state_poll_interval - now);
+ }
+ }
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dpdk_process_node,static) = {
.function = dpdk_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "dpdk-process",
.process_log2_n_stack_bytes = 17,
};
-/* *INDENT-ON* */
static clib_error_t *
dpdk_init (vlib_main_t * vm)
@@ -1924,35 +1602,28 @@ dpdk_init (vlib_main_t * vm)
dpdk_cli_reference ();
- dm->vlib_main = vm;
- dm->vnet_main = vnet_get_main ();
dm->conf = &dpdk_config_main;
- dm->conf->nchannels = 4;
vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet");
- /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
- dm->buffer_flags_template = (VLIB_BUFFER_TOTAL_LENGTH_VALID |
- VLIB_BUFFER_EXT_HDR_VALID |
- VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
- VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
-
dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL;
dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL;
dm->log_default = vlib_log_register_class ("dpdk", 0);
dm->log_cryptodev = vlib_log_register_class ("dpdk", "cryptodev");
- dm->log_ipsec = vlib_log_register_class ("dpdk", "ipsec");
return error;
}
VLIB_INIT_FUNCTION (dpdk_init);
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+static clib_error_t *
+dpdk_worker_thread_init (vlib_main_t *vm)
+{
+ if (rte_thread_register () < 0)
+ clib_panic ("dpdk: cannot register thread %u - %s", vm->thread_index,
+ rte_strerror (rte_errno));
+ return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (dpdk_worker_thread_init);
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
index 25222856912..ca1690b708f 100644
--- a/src/plugins/dpdk/device/node.c
+++ b/src/plugins/dpdk/device/node.c
@@ -23,10 +23,10 @@
#include <dpdk/device/dpdk.h>
#include <vnet/classify/vnet_classify.h>
#include <vnet/mpls/packet.h>
-#include <vnet/handoff.h>
#include <vnet/devices/devices.h>
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/feature/feature.h>
+#include <vnet/tcp/tcp_packet.h>
#include <dpdk/device/dpdk_priv.h>
@@ -36,9 +36,13 @@ static char *dpdk_error_strings[] = {
#undef _
};
-/* make sure all flags we need are stored in lower 8 bits */
-STATIC_ASSERT ((PKT_RX_IP_CKSUM_BAD | PKT_RX_FDIR) <
- 256, "dpdk flags not un lower byte, fix needed");
+/* make sure all flags we need are stored in lower 32 bits */
+STATIC_ASSERT ((u64) (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+ RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_LRO) < (1ULL << 32),
+ "dpdk flags not in lower word, fix needed");
+
+STATIC_ASSERT (RTE_MBUF_F_RX_L4_CKSUM_BAD == (1ULL << 3),
+ "bit number of RTE_MBUF_F_RX_L4_CKSUM_BAD is no longer 3!");
static_always_inline uword
dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b,
@@ -97,13 +101,13 @@ dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[])
{
vlib_buffer_t *b;
b = vlib_buffer_from_rte_mbuf (mb[0]);
- clib_prefetch_load (b);
+ clib_prefetch_store (b);
b = vlib_buffer_from_rte_mbuf (mb[1]);
- clib_prefetch_load (b);
+ clib_prefetch_store (b);
b = vlib_buffer_from_rte_mbuf (mb[2]);
- clib_prefetch_load (b);
+ clib_prefetch_store (b);
b = vlib_buffer_from_rte_mbuf (mb[3]);
- clib_prefetch_load (b);
+ clib_prefetch_store (b);
}
/** \brief Main DPDK input node
@@ -125,18 +129,18 @@ dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[])
@em Uses:
- <code>struct rte_mbuf mb->ol_flags</code>
- - PKT_RX_IP_CKSUM_BAD
+ - RTE_MBUF_F_RX_IP_CKSUM_BAD
@em Sets:
- <code>b->error</code> if the packet is to be dropped immediately
- <code>b->current_data, b->current_length</code>
- - adjusted as needed to skip the L2 header in direct-dispatch cases
+ - adjusted as needed to skip the L2 header in direct-dispatch cases
- <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
- - rx interface sw_if_index
+ - rx interface sw_if_index
- <code>vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0</code>
- - required by ipX-lookup
+ - required by ipX-lookup
- <code>b->flags</code>
- - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc.
+ - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc.
<em>Next Nodes:</em>
- Static arcs to: error-drop, ethernet-input,
@@ -145,31 +149,30 @@ dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[])
<code>xd->per_interface_next_index</code>
*/
-static_always_inline u16
-dpdk_ol_flags_extract (struct rte_mbuf **mb, u16 * flags, int count)
+static_always_inline u32
+dpdk_ol_flags_extract (struct rte_mbuf **mb, u32 *flags, int count)
{
- u16 rv = 0;
+ u32 rv = 0;
int i;
for (i = 0; i < count; i++)
{
/* all flags we are interested in are in lower 8 bits but
that might change */
- flags[i] = (u16) mb[i]->ol_flags;
+ flags[i] = (u32) mb[i]->ol_flags;
rv |= flags[i];
}
return rv;
}
static_always_inline uword
-dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd,
- uword n_rx_packets, int maybe_multiseg,
- u16 * or_flagsp)
+dpdk_process_rx_burst (vlib_main_t *vm, dpdk_per_thread_data_t *ptd,
+ uword n_rx_packets, int maybe_multiseg, u32 *or_flagsp)
{
u32 n_left = n_rx_packets;
vlib_buffer_t *b[4];
struct rte_mbuf **mb = ptd->mbufs;
uword n_bytes = 0;
- u16 *flags, or_flags = 0;
+ u32 *flags, or_flags = 0;
vlib_buffer_t bt;
mb = ptd->mbufs;
@@ -254,7 +257,7 @@ dpdk_process_flow_offload (dpdk_device_t * xd, dpdk_per_thread_data_t * ptd,
/* TODO prefetch and quad-loop */
for (n = 0; n < n_rx_packets; n++)
{
- if ((ptd->flags[n] & PKT_RX_FDIR_ID) == 0)
+ if ((ptd->flags[n] & RTE_MBUF_F_RX_FDIR_ID) == 0)
continue;
fle = pool_elt_at_index (xd->flow_lookup_entries,
@@ -277,6 +280,65 @@ dpdk_process_flow_offload (dpdk_device_t * xd, dpdk_per_thread_data_t * ptd,
}
}
+static_always_inline u16
+dpdk_lro_find_l4_hdr_sz (vlib_buffer_t *b)
+{
+ u16 l4_hdr_sz = 0;
+ u16 current_offset = 0;
+ ethernet_header_t *e;
+ tcp_header_t *tcp;
+ u8 *data = vlib_buffer_get_current (b);
+ u16 ethertype;
+ e = (void *) data;
+ current_offset += sizeof (e[0]);
+ ethertype = clib_net_to_host_u16 (e->type);
+ if (ethernet_frame_is_tagged (ethertype))
+ {
+ ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (e + 1);
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ current_offset += sizeof (*vlan);
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ vlan++;
+ current_offset += sizeof (*vlan);
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ }
+ }
+ data += current_offset;
+ if (ethertype == ETHERNET_TYPE_IP4)
+ {
+ data += sizeof (ip4_header_t);
+ tcp = (void *) data;
+ l4_hdr_sz = tcp_header_bytes (tcp);
+ }
+ else
+ {
+ /* FIXME: extension headers...*/
+ data += sizeof (ip6_header_t);
+ tcp = (void *) data;
+ l4_hdr_sz = tcp_header_bytes (tcp);
+ }
+ return l4_hdr_sz;
+}
+
+static_always_inline void
+dpdk_process_lro_offload (dpdk_device_t *xd, dpdk_per_thread_data_t *ptd,
+ uword n_rx_packets)
+{
+ uword n;
+ vlib_buffer_t *b0;
+ for (n = 0; n < n_rx_packets; n++)
+ {
+ b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
+ if (ptd->flags[n] & RTE_MBUF_F_RX_LRO)
+ {
+ b0->flags |= VNET_BUFFER_F_GSO;
+ vnet_buffer2 (b0)->gso_size = ptd->mbufs[n]->tso_segsz;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = dpdk_lro_find_l4_hdr_sz (b0);
+ }
+ }
+}
+
static_always_inline u32
dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
vlib_node_runtime_t * node, u32 thread_index, u16 queue_id)
@@ -289,7 +351,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
struct rte_mbuf **mb;
vlib_buffer_t *b0;
u16 *next;
- u16 or_flags;
+ u32 or_flags;
u32 n;
int single_next = 0;
@@ -303,12 +365,13 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
/* get up to DPDK_RX_BURST_SZ buffers from PMD */
while (n_rx_packets < DPDK_RX_BURST_SZ)
{
- n = rte_eth_rx_burst (xd->port_id, queue_id,
- ptd->mbufs + n_rx_packets,
- DPDK_RX_BURST_SZ - n_rx_packets);
+ u32 n_to_rx = clib_min (DPDK_RX_BURST_SZ - n_rx_packets, 32);
+
+ n = rte_eth_rx_burst (xd->port_id, queue_id, ptd->mbufs + n_rx_packets,
+ n_to_rx);
n_rx_packets += n;
- if (n < 32)
+ if (n < n_to_rx)
break;
}
@@ -318,6 +381,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
/* Update buffer template */
vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->sw_if_index;
bt->error = node->errors[DPDK_ERROR_NONE];
+ bt->flags = xd->buffer_flags;
/* as DPDK is allocating empty buffers from mempool provided before interface
start for each queue, it is safe to store this in the template */
bt->buffer_pool_index = rxq->buffer_pool_index;
@@ -332,14 +396,34 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
/* as all packets belong to the same interface feature arc lookup
can be don once and result stored in the buffer template */
if (PREDICT_FALSE (vnet_device_input_have_features (xd->sw_if_index)))
- vnet_feature_start_device_input_x1 (xd->sw_if_index, &next_index, bt);
+ vnet_feature_start_device_input (xd->sw_if_index, &next_index, bt);
if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 1, &or_flags);
else
n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 0, &or_flags);
- if (PREDICT_FALSE (or_flags & PKT_RX_FDIR))
+ if (PREDICT_FALSE ((or_flags & RTE_MBUF_F_RX_LRO)))
+ dpdk_process_lro_offload (xd, ptd, n_rx_packets);
+
+ if (PREDICT_FALSE ((or_flags & RTE_MBUF_F_RX_L4_CKSUM_BAD) &&
+ (xd->buffer_flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT)))
+ {
+ for (n = 0; n < n_rx_packets; n++)
+ {
+ /* Check and reset VNET_BUFFER_F_L4_CHECKSUM_CORRECT flag
+ if RTE_MBUF_F_RX_L4_CKSUM_BAD is set.
+ The magic num 3 is the bit number of RTE_MBUF_F_RX_L4_CKSUM_BAD
+ which is defined in DPDK.
+ Have made a STATIC_ASSERT in this file to ensure this.
+ */
+ b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
+ b0->flags ^= (ptd->flags[n] & RTE_MBUF_F_RX_L4_CKSUM_BAD)
+ << (VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT - 3);
+ }
+ }
+
+ if (PREDICT_FALSE (or_flags & RTE_MBUF_F_RX_FDIR))
{
/* some packets will need to go to different next nodes */
for (n = 0; n < n_rx_packets; n++)
@@ -348,7 +432,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
/* flow offload - process if rx flow offload enabled and at least one
packet is marked */
if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) &&
- (or_flags & PKT_RX_FDIR)))
+ (or_flags & RTE_MBUF_F_RX_FDIR)))
dpdk_process_flow_offload (xd, ptd, n_rx_packets);
/* enqueue buffers to the next node */
@@ -385,7 +469,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
marked as ip4 checksum bad we can notify ethernet input so it
can send pacets to ip4-input-no-checksum node */
if (xd->flags & DPDK_DEVICE_FLAG_RX_IP4_CKSUM &&
- (or_flags & PKT_RX_IP_CKSUM_BAD) == 0)
+ (or_flags & RTE_MBUF_F_RX_IP_CKSUM_BAD) == 0)
f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
vlib_frame_no_append (f);
}
@@ -459,7 +543,7 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
dpdk_device_t *xd;
uword n_rx_packets = 0;
vnet_hw_if_rxq_poll_vector_t *pv;
- u32 thread_index = node->thread_index;
+ u32 thread_index = vm->thread_index;
/*
* Poll all devices on this cpu for input/interrupts.
@@ -476,7 +560,6 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return n_rx_packets;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dpdk_input_node) = {
.type = VLIB_NODE_TYPE_INPUT,
.name = "dpdk-input",
@@ -492,7 +575,6 @@ VLIB_REGISTER_NODE (dpdk_input_node) = {
.n_errors = DPDK_N_ERROR,
.error_strings = dpdk_error_strings,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c
index 413a0f0df9d..9781d0ed7f0 100644
--- a/src/plugins/dpdk/main.c
+++ b/src/plugins/dpdk/main.c
@@ -13,13 +13,6 @@
* limitations under the License.
*/
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <linux/vfio.h>
-#include <sys/ioctl.h>
-
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
#include <dpdk/device/dpdk.h>
@@ -79,19 +72,14 @@ rte_delay_us_override_cb (unsigned us)
static clib_error_t * dpdk_main_init (vlib_main_t * vm)
{
- dpdk_main_t * dm = &dpdk_main;
clib_error_t * error = 0;
- dm->vlib_main = vm;
- dm->vnet_main = vnet_get_main ();
-
/* register custom delay function */
rte_delay_us_callback_register (rte_delay_us_override_cb);
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (dpdk_main_init) =
{
.runs_after = VLIB_INITS("dpdk_init"),
@@ -101,4 +89,3 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Data Plane Development Kit (DPDK)",
};
-/* *INDENT-ON* */
diff --git a/src/plugins/dpdk/thread.c b/src/plugins/dpdk/thread.c
deleted file mode 100644
index 3a3fcc6cea6..00000000000
--- a/src/plugins/dpdk/thread.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2017 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <rte_config.h>
-
-#include <rte_common.h>
-#include <rte_log.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_tailq.h>
-#include <rte_eal.h>
-#include <rte_per_lcore.h>
-#include <rte_launch.h>
-#include <rte_atomic.h>
-#include <rte_cycles.h>
-#include <rte_prefetch.h>
-#include <rte_lcore.h>
-#include <rte_per_lcore.h>
-#include <rte_branch_prediction.h>
-#include <rte_interrupts.h>
-#include <rte_pci.h>
-#include <rte_random.h>
-#include <rte_debug.h>
-#include <rte_ether.h>
-#include <rte_ethdev.h>
-#include <rte_ring.h>
-#include <rte_mempool.h>
-#include <rte_mbuf.h>
-#include <rte_version.h>
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <dpdk/device/dpdk.h>
-#include <dpdk/device/dpdk_priv.h>
-
-static clib_error_t *
-dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id)
-{
- int r;
- r = rte_eal_remote_launch (fp, (void *) w, lcore_id);
- if (r)
- return clib_error_return (0, "Failed to launch thread %u", lcore_id);
- return 0;
-}
-
-static clib_error_t *
-dpdk_thread_set_lcore (u32 thread, u16 lcore)
-{
- return 0;
-}
-
-static vlib_thread_callbacks_t callbacks = {
- .vlib_launch_thread_cb = &dpdk_launch_thread,
- .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore,
-};
-
-static clib_error_t *
-dpdk_thread_init (vlib_main_t * vm)
-{
- vlib_thread_cb_register (vm, &callbacks);
- return 0;
-}
-
-VLIB_INIT_FUNCTION (dpdk_thread_init);
-
-/** @endcond */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */