aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/rdma
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/rdma')
-rw-r--r--src/plugins/rdma/CMakeLists.txt8
-rw-r--r--src/plugins/rdma/api.c52
-rw-r--r--src/plugins/rdma/cli.c14
-rw-r--r--src/plugins/rdma/device.c71
-rw-r--r--src/plugins/rdma/format.c15
-rw-r--r--src/plugins/rdma/input.c90
-rw-r--r--src/plugins/rdma/plugin.c2
-rw-r--r--src/plugins/rdma/rdma.api54
-rw-r--r--src/plugins/rdma/rdma_doc.md75
-rw-r--r--src/plugins/rdma/rdma_doc.rst102
-rw-r--r--src/plugins/rdma/rdma_mlx5dv.h12
-rw-r--r--src/plugins/rdma/test_api.c53
12 files changed, 364 insertions, 184 deletions
diff --git a/src/plugins/rdma/CMakeLists.txt b/src/plugins/rdma/CMakeLists.txt
index f598ff8c701..ef8bc90c6dd 100644
--- a/src/plugins/rdma/CMakeLists.txt
+++ b/src/plugins/rdma/CMakeLists.txt
@@ -19,17 +19,16 @@ if (NOT IBVERBS_INCLUDE_DIR)
endif()
vpp_plugin_find_library(rdma IBVERBS_LIB libibverbs.a)
-vpp_plugin_find_library(rdma RDMA_UTIL_LIB librdma_util.a)
vpp_plugin_find_library(rdma MLX5_LIB libmlx5.a)
-if (NOT IBVERBS_LIB OR NOT RDMA_UTIL_LIB OR NOT MLX5_LIB)
+if (NOT IBVERBS_LIB OR NOT MLX5_LIB)
message(WARNING "rdma plugin - ibverbs not found - rdma plugin disabled")
return()
endif()
-string_append(RDMA_LINK_FLAGS "-Wl,--whole-archive,${MLX5_LIB},--no-whole-archive")
+string_append(RDMA_LINK_FLAGS "-Wl,--whole-archive,${MLX5_LIB},--no-whole-archive -Wl,--exclude-libs,ALL")
-set(CMAKE_REQUIRED_FLAGS "-fPIC -shared -pthread -Wno-unused-command-line-argument ${RDMA_LINK_FLAGS} ${IBVERBS_LIB} ${RDMA_UTIL_LIB}")
+set(CMAKE_REQUIRED_FLAGS "-fPIC -shared -pthread -Wno-unused-command-line-argument ${RDMA_LINK_FLAGS} ${IBVERBS_LIB}")
set(CMAKE_REQUIRED_INCLUDES "${IBVERBS_INCLUDE_DIR}")
set(CMAKE_REQUIRED_LIBRARIES "c") # force linkage by including libc explicitely
CHECK_C_SOURCE_COMPILES("
@@ -73,5 +72,4 @@ add_vpp_plugin(rdma
LINK_LIBRARIES
${IBVERBS_LIB}
- ${RDMA_UTIL_LIB}
)
diff --git a/src/plugins/rdma/api.c b/src/plugins/rdma/api.c
index 7fe77105596..3fb17ff6ee0 100644
--- a/src/plugins/rdma/api.c
+++ b/src/plugins/rdma/api.c
@@ -27,6 +27,7 @@
#include <rdma/rdma.api_enum.h>
#include <rdma/rdma.api_types.h>
+#define REPLY_MSG_ID_BASE (rm->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static rdma_mode_t
@@ -41,6 +42,8 @@ rdma_api_mode (vl_api_rdma_mode_t mode)
case RDMA_API_MODE_DV:
return RDMA_MODE_DV;
}
+ /* Fail the debug build. Useful for investigating endian issues. */
+ ASSERT (0);
return RDMA_MODE_AUTO;
}
@@ -79,6 +82,35 @@ rdma_api_rss6 (const vl_api_rdma_rss6_t rss6)
}
static void
+vl_api_rdma_create_v4_t_handler (vl_api_rdma_create_v4_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ rdma_main_t *rm = &rdma_main;
+ vl_api_rdma_create_v4_reply_t *rmp;
+ rdma_create_if_args_t args;
+ int rv;
+
+ clib_memset (&args, 0, sizeof (rdma_create_if_args_t));
+
+ args.ifname = mp->host_if;
+ args.name = mp->name;
+ args.rxq_num = mp->rxq_num;
+ args.rxq_size = mp->rxq_size;
+ args.txq_size = mp->txq_size;
+ args.mode = rdma_api_mode (mp->mode);
+ args.disable_striding_rq = 0;
+ args.no_multi_seg = mp->no_multi_seg;
+ args.max_pktlen = mp->max_pktlen;
+ args.rss4 = rdma_api_rss4 (mp->rss4);
+ args.rss6 = rdma_api_rss6 (mp->rss6);
+ rdma_create_if (vm, &args);
+ rv = args.rv;
+
+ REPLY_MACRO2_END (VL_API_RDMA_CREATE_V4_REPLY,
+ ({ rmp->sw_if_index = args.sw_if_index; }));
+}
+
+static void
vl_api_rdma_create_v3_t_handler (vl_api_rdma_create_v3_t *mp)
{
vlib_main_t *vm = vlib_get_main ();
@@ -103,7 +135,7 @@ vl_api_rdma_create_v3_t_handler (vl_api_rdma_create_v3_t *mp)
rdma_create_if (vm, &args);
rv = args.rv;
- REPLY_MACRO2 (VL_API_RDMA_CREATE_V3_REPLY + rm->msg_id_base,
+ REPLY_MACRO2 (VL_API_RDMA_CREATE_V3_REPLY,
({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
@@ -130,12 +162,8 @@ vl_api_rdma_create_v2_t_handler (vl_api_rdma_create_v2_t * mp)
rdma_create_if (vm, &args);
rv = args.rv;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_RDMA_CREATE_V2_REPLY + rm->msg_id_base,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_RDMA_CREATE_V2_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
static void
@@ -162,12 +190,8 @@ vl_api_rdma_create_t_handler (vl_api_rdma_create_t * mp)
rdma_create_if (vm, &args);
rv = args.rv;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_RDMA_CREATE_REPLY + rm->msg_id_base,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_RDMA_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
static void
@@ -195,7 +219,7 @@ vl_api_rdma_delete_t_handler (vl_api_rdma_delete_t * mp)
rdma_delete_if (vm, rd);
reply:
- REPLY_MACRO (VL_API_RDMA_DELETE_REPLY + rm->msg_id_base);
+ REPLY_MACRO (VL_API_RDMA_DELETE_REPLY);
}
/* set tup the API message handling tables */
diff --git a/src/plugins/rdma/cli.c b/src/plugins/rdma/cli.c
index 8f191e34b63..bcedd625220 100644
--- a/src/plugins/rdma/cli.c
+++ b/src/plugins/rdma/cli.c
@@ -44,17 +44,15 @@ rdma_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (rdma_create_command, static) = {
.path = "create interface rdma",
.short_help = "create interface rdma <host-if ifname> [name <name>]"
- " [rx-queue-size <size>] [tx-queue-size <size>]"
- " [num-rx-queues <size>] [mode <auto|ibv|dv]"
- " [no-multi-seg] [no-striding]"
- " [max-pktlen <size>]",
+ " [rx-queue-size <size>] [tx-queue-size <size>]"
+ " [num-rx-queues <size>] [mode <auto|ibv|dv>]"
+ " [no-multi-seg] [no-striding]"
+ " [max-pktlen <size>]",
.function = rdma_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
rdma_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -99,14 +97,12 @@ rdma_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (rdma_delete_command, static) = {
.path = "delete interface rdma",
.short_help = "delete interface rdma "
"{<interface> | sw_if_index <sw_idx>}",
.function = rdma_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_rdma_dump_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -162,13 +158,11 @@ test_rdma_dump_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_rdma_mlx5dv_dump_command, static) = {
.path = "test rdma dump",
.short_help = "test rdma dump {<interface> | sw_if_index <sw_idx>}",
.function = test_rdma_dump_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
rdma_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/rdma/device.c b/src/plugins/rdma/device.c
index 1198d99b14e..8aeb586a42d 100644
--- a/src/plugins/rdma/device.c
+++ b/src/plugins/rdma/device.c
@@ -183,11 +183,11 @@ rdma_mac_change (vnet_hw_interface_t * hw, const u8 * old, const u8 * new)
return 0;
}
-static u32
-rdma_dev_change_mtu (rdma_device_t * rd)
+static clib_error_t *
+rdma_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 frame_size)
{
- rdma_log__ (VLIB_LOG_LEVEL_ERR, rd, "MTU change not supported");
- return ~0;
+ return vnet_error (VNET_ERR_UNSUPPORTED, 0);
}
static u32
@@ -202,8 +202,6 @@ rdma_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
return rdma_dev_set_ucast (rd);
case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
return rdma_dev_set_promisc (rd);
- case ETHERNET_INTERFACE_FLAG_MTU:
- return rdma_dev_change_mtu (rd);
}
rdma_log__ (VLIB_LOG_LEVEL_ERR, rd, "unknown flag %x requested", flags);
@@ -355,18 +353,20 @@ rdma_async_event_cleanup (rdma_device_t * rd)
static clib_error_t *
rdma_register_interface (vnet_main_t * vnm, rdma_device_t * rd)
{
- clib_error_t *err =
- ethernet_register_interface (vnm, rdma_device_class.index,
- rd->dev_instance, rd->hwaddr.bytes,
- &rd->hw_if_index, rdma_flag_change);
-
+ vnet_eth_interface_registration_t eir = {};
+
+ eir.dev_class_index = rdma_device_class.index;
+ eir.dev_instance = rd->dev_instance;
+ eir.address = rd->hwaddr.bytes;
+ eir.cb.flag_change = rdma_flag_change;
+ eir.cb.set_max_frame_size = rdma_set_max_frame_size;
+ rd->hw_if_index = vnet_eth_register_interface (vnm, &eir);
/* Indicate ability to support L3 DMAC filtering and
* initialize interface to L3 non-promisc mode */
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rd->hw_if_index);
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER;
+ vnet_hw_if_set_caps (vnm, rd->hw_if_index, VNET_HW_IF_CAP_MAC_FILTER);
ethernet_set_flags (vnm, rd->hw_if_index,
ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
- return err;
+ return 0;
}
static void
@@ -445,9 +445,10 @@ rdma_rxq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc,
if (is_mlx5dv)
{
struct mlx5dv_cq_init_attr dvcq = { };
- dvcq.comp_mask = MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
+ dvcq.comp_mask = MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE |
+ MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE;
dvcq.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
-
+ dvcq.cqe_size = 64;
if ((cqex = mlx5dv_create_cq (rd->ctx, &cqa, &dvcq)) == 0)
return clib_error_return_unix (0, "Create mlx5dv rx CQ Failed");
}
@@ -717,15 +718,30 @@ rdma_txq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc)
struct ibv_qp_init_attr qpia;
struct ibv_qp_attr qpa;
int qp_flags;
+ int is_mlx5dv = !!(rd->flags & RDMA_DEVICE_F_MLX5DV);
vec_validate_aligned (rd->txqs, qid, CLIB_CACHE_LINE_BYTES);
txq = vec_elt_at_index (rd->txqs, qid);
ASSERT (is_pow2 (n_desc));
txq->bufs_log2sz = min_log2 (n_desc);
vec_validate_aligned (txq->bufs, n_desc - 1, CLIB_CACHE_LINE_BYTES);
-
- if ((txq->cq = ibv_create_cq (rd->ctx, n_desc, NULL, NULL, 0)) == 0)
- return clib_error_return_unix (0, "Create CQ Failed");
+ if (is_mlx5dv)
+ {
+ struct ibv_cq_init_attr_ex cqa = {};
+ struct ibv_cq_ex *cqex;
+ struct mlx5dv_cq_init_attr dvcq = {};
+ dvcq.comp_mask = MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE;
+ dvcq.cqe_size = 64;
+ cqa.cqe = n_desc;
+ if ((cqex = mlx5dv_create_cq (rd->ctx, &cqa, &dvcq)) == 0)
+ return clib_error_return_unix (0, "Create mlx5dv tx CQ Failed");
+ txq->cq = ibv_cq_ex_to_cq (cqex);
+ }
+ else
+ {
+ if ((txq->cq = ibv_create_cq (rd->ctx, n_desc, NULL, NULL, 0)) == 0)
+ return clib_error_return_unix (0, "Create CQ Failed");
+ }
memset (&qpia, 0, sizeof (qpia));
qpia.send_cq = txq->cq;
@@ -866,7 +882,7 @@ sysfs_path_to_pci_addr (char *path, vlib_pci_addr_t * addr)
unformat_input_t in;
u8 *s;
- s = clib_sysfs_link_to_name (path);
+ s = clib_file_get_resolved_basename (path);
if (!s)
return 0;
@@ -1022,7 +1038,7 @@ are explicitly disabled, and if the interface supports it.*/
/*
* FIXME: add support for interrupt mode
* vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, rd->hw_if_index);
- * hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ * hw->caps |= VNET_HW_IF_CAP_INT_MODE;
*/
vnet_hw_if_set_input_node (vnm, rd->hw_if_index, rdma_input_node.index);
@@ -1136,15 +1152,4 @@ rdma_init (vlib_main_t * vm)
return 0;
}
-VLIB_INIT_FUNCTION (rdma_init) =
-{
- .runs_after = VLIB_INITS ("pci_bus_init"),
-};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+VLIB_INIT_FUNCTION (rdma_init);
diff --git a/src/plugins/rdma/format.c b/src/plugins/rdma/format.c
index aada52a1ec3..a999460bd55 100644
--- a/src/plugins/rdma/format.c
+++ b/src/plugins/rdma/format.c
@@ -58,13 +58,13 @@ format_rdma_bit_flag (u8 * s, va_list * args)
while (flags)
{
- if ((flags & (1 << i)))
+ if ((flags & ((u64) 1 << i)))
{
if (i < n_strs && strs[i] != 0)
s = format (s, " %s", strs[i]);
else
s = format (s, " unknown(%u)", i);
- flags ^= 1 << i;
+ flags ^= (u64) 1 << i;
}
i++;
}
@@ -122,8 +122,8 @@ format_rdma_device (u8 * s, va_list * args)
format_vlib_pci_addr, &rd->pci->addr);
if ((d = vlib_pci_get_device_info (vm, &rd->pci->addr, 0)))
{
- s = format (s, "%Uproduct name: %s\n", format_white_space, indent,
- d->product_name ? (char *) d->product_name : "");
+ s = format (s, "%Uproduct name: %v\n", format_white_space, indent,
+ d->product_name);
s = format (s, "%Upart number: %U\n", format_white_space, indent,
format_vlib_pci_vpd, d->vpd_r, "PN");
s = format (s, "%Urevision: %U\n", format_white_space, indent,
@@ -281,7 +281,7 @@ format_rdma_rxq (u8 * s, va_list * args)
if (rd->flags & RDMA_DEVICE_F_MLX5DV)
{
- u32 next_cqe_index = rxq->cq_ci & (rxq->size - 1);
+ u32 next_cqe_index = rxq->cq_ci & ((1 << rxq->log2_cq_size) - 1);
s = format (s, "\n%Uwq: stride %u wqe-cnt %u",
format_white_space, indent + 2, rxq->wq_stride,
rxq->wqe_cnt);
@@ -292,9 +292,8 @@ format_rdma_rxq (u8 * s, va_list * args)
next_cqe_index);
s = format (s, "\n%U%U", format_white_space, indent + 6,
format_mlx5_cqe_rx, rxq->cqes + next_cqe_index);
- s = format (s, "\n%U%U", format_white_space, indent + 6,
- format_hexdump, rxq->cqes + next_cqe_index,
- sizeof (mlx5dv_cqe_t));
+ s = format (s, "\n%U%U", format_white_space, indent + 6, format_hexdump,
+ rxq->cqes + next_cqe_index, (u32) sizeof (mlx5dv_cqe_t));
}
return s;
diff --git a/src/plugins/rdma/input.c b/src/plugins/rdma/input.c
index f1c508affa2..a7d41a1684d 100644
--- a/src/plugins/rdma/input.c
+++ b/src/plugins/rdma/input.c
@@ -228,7 +228,6 @@ rdma_device_input_refill (vlib_main_t * vm, rdma_device_t * rd,
about what RDMA core does (CYCLIC_RQ or LINKED_LIST_RQ). In cyclic
mode, the SRQ header is ignored anyways... */
-/* *INDENT-OFF* */
if (is_striding && !(current_data_seg & (wqe_sz - 1)))
*(mlx5dv_wqe_srq_next_t *) wqe = (mlx5dv_wqe_srq_next_t)
{
@@ -237,7 +236,6 @@ rdma_device_input_refill (vlib_main_t * vm, rdma_device_t * rd,
.signature = 0,
.rsvd1 = {0}
};
-/* *INDENT-ON* */
/* TODO: when log_skip_wqe > 2, hw_prefetcher doesn't work, lots of LLC store
misses occur for wqes, to be fixed... */
@@ -609,6 +607,7 @@ rdma_device_poll_cq_mlx5dv (rdma_device_t * rd, rdma_rxq_t * rxq,
n_rx_packets++;
cq_ci++;
byte_cnt++;
+ cqe_flags++;
continue;
}
@@ -670,46 +669,77 @@ rdma_device_mlx5dv_l3_validate_and_swap_bc (rdma_per_thread_data_t
* ptd, int n_rx_packets, u32 * bc)
{
u16 mask = CQE_FLAG_L3_HDR_TYPE_MASK | CQE_FLAG_L3_OK;
- u16 match = CQE_FLAG_L3_HDR_TYPE_IP4 << CQE_FLAG_L3_HDR_TYPE_SHIFT;
+ u16 match =
+ CQE_FLAG_L3_HDR_TYPE_IP4 << CQE_FLAG_L3_HDR_TYPE_SHIFT | CQE_FLAG_L3_OK;
+
+ /* convert mask/match to big endian for subsequant comparison */
+ mask = clib_host_to_net_u16 (mask);
+ match = clib_host_to_net_u16 (match);
/* verify that all ip4 packets have l3_ok flag set and convert packet
length from network to host byte order */
int skip_ip4_cksum = 1;
+ int n_left = n_rx_packets;
+ u16 *cqe_flags = ptd->cqe_flags;
#if defined CLIB_HAVE_VEC256
- u16x16 mask16 = u16x16_splat (mask);
- u16x16 match16 = u16x16_splat (match);
- u16x16 r = { };
+ if (n_left >= 16)
+ {
+ u16x16 mask16 = u16x16_splat (mask);
+ u16x16 match16 = u16x16_splat (match);
+ u16x16 r16 = {};
+
+ while (n_left >= 16)
+ {
+ r16 |= (*(u16x16 *) cqe_flags & mask16) != match16;
- for (int i = 0; i * 16 < n_rx_packets; i++)
- r |= (ptd->cqe_flags16[i] & mask16) != match16;
+ *(u32x8 *) bc = u32x8_byte_swap (*(u32x8 *) bc);
+ *(u32x8 *) (bc + 8) = u32x8_byte_swap (*(u32x8 *) (bc + 8));
- if (!u16x16_is_all_zero (r))
- skip_ip4_cksum = 0;
+ cqe_flags += 16;
+ bc += 16;
+ n_left -= 16;
+ }
- for (int i = 0; i < n_rx_packets; i += 8)
- *(u32x8 *) (bc + i) = u32x8_byte_swap (*(u32x8 *) (bc + i));
+ if (!u16x16_is_all_zero (r16))
+ skip_ip4_cksum = 0;
+ }
#elif defined CLIB_HAVE_VEC128
- u16x8 mask8 = u16x8_splat (mask);
- u16x8 match8 = u16x8_splat (match);
- u16x8 r = { };
+ if (n_left >= 8)
+ {
+ u16x8 mask8 = u16x8_splat (mask);
+ u16x8 match8 = u16x8_splat (match);
+ u16x8 r8 = {};
- for (int i = 0; i * 8 < n_rx_packets; i++)
- r |= (ptd->cqe_flags8[i] & mask8) != match8;
+ while (n_left >= 8)
+ {
+ r8 |= (*(u16x8 *) cqe_flags & mask8) != match8;
- if (!u16x8_is_all_zero (r))
- skip_ip4_cksum = 0;
+ *(u32x4 *) bc = u32x4_byte_swap (*(u32x4 *) bc);
+ *(u32x4 *) (bc + 4) = u32x4_byte_swap (*(u32x4 *) (bc + 4));
- for (int i = 0; i < n_rx_packets; i += 4)
- *(u32x4 *) (bc + i) = u32x4_byte_swap (*(u32x4 *) (bc + i));
-#else
- for (int i = 0; i < n_rx_packets; i++)
- if ((ptd->cqe_flags[i] & mask) != match)
- skip_ip4_cksum = 0;
+ cqe_flags += 8;
+ bc += 8;
+ n_left -= 8;
+ }
- for (int i = 0; i < n_rx_packets; i++)
- bc[i] = clib_net_to_host_u32 (bc[i]);
+ if (!u16x8_is_all_zero (r8))
+ skip_ip4_cksum = 0;
+ }
#endif
+
+ while (n_left >= 1)
+ {
+ if ((cqe_flags[0] & mask) != match)
+ skip_ip4_cksum = 0;
+
+ bc[0] = clib_net_to_host_u32 (bc[0]);
+
+ cqe_flags += 1;
+ bc += 1;
+ n_left -= 1;
+ }
+
return skip_ip4_cksum;
}
@@ -945,7 +975,7 @@ rdma_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* update buffer template for input feature arcs if any */
next_index = rd->per_interface_next_index;
if (PREDICT_FALSE (vnet_device_input_have_features (rd->sw_if_index)))
- vnet_feature_start_device_input_x1 (rd->sw_if_index, &next_index, &bt);
+ vnet_feature_start_device_input (rd->sw_if_index, &next_index, &bt);
vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -1028,7 +1058,7 @@ VLIB_NODE_FN (rdma_input_node) (vlib_main_t * vm,
if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ADMIN_UP) == 0)
continue;
- if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ERROR))
+ if (PREDICT_FALSE (rd->flags & RDMA_DEVICE_F_ERROR))
continue;
if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
@@ -1041,7 +1071,6 @@ VLIB_NODE_FN (rdma_input_node) (vlib_main_t * vm,
return n_rx;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (rdma_input_node) = {
.name = "rdma-input",
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -1053,7 +1082,6 @@ VLIB_REGISTER_NODE (rdma_input_node) = {
.error_strings = rdma_input_error_strings,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/rdma/plugin.c b/src/plugins/rdma/plugin.c
index b0dddee42b6..0d2cccc96f8 100644
--- a/src/plugins/rdma/plugin.c
+++ b/src/plugins/rdma/plugin.c
@@ -19,12 +19,10 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "RDMA IBverbs Device Driver",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/rdma/rdma.api b/src/plugins/rdma/rdma.api
index f2c70c7e514..4c06d8c6658 100644
--- a/src/plugins/rdma/rdma.api
+++ b/src/plugins/rdma/rdma.api
@@ -98,6 +98,8 @@ enum rdma_rss6
};
/** \brief
+ Same as v4, just not an autoendian (expect buggy handling of flag values).
+
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param host_if - Linux netdev interface name
@@ -114,6 +116,9 @@ enum rdma_rss6
define rdma_create_v3
{
+ option deprecated;
+ option replaced_by="rdma_create_v4";
+
u32 client_index;
u32 context;
@@ -130,6 +135,38 @@ define rdma_create_v3
option vat_help = "<host-if ifname> [name <name>] [rx-queue-size <size>] [tx-queue-size <size>] [num-rx-queues <size>] [mode <auto|ibv|dv>] [no-multi-seg] [max-pktlen <size>] [rss <ipv4|ipv4-udp|ipv4-tcp>] [rss <ipv6|ipv6-udp|ipv6-tcp>]";
};
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param host_if - Linux netdev interface name
+ @param name - new rdma interface name
+ @param rxq_num - number of receive queues (optional)
+ @param rxq_size - receive queue size (optional)
+ @param txq_size - transmit queue size (optional)
+ @param mode - operation mode (optional)
+ @param no_multi_seg (optional) - disable chained buffer RX
+ @param max_pktlen (optional) - maximal RX packet size.
+ @param rss4 (optional) - IPv4 RSS
+ @param rss6 (optional) - IPv6 RSS
+*/
+
+autoendian define rdma_create_v4
+{
+ u32 client_index;
+ u32 context;
+
+ string host_if[64];
+ string name[64];
+ u16 rxq_num [default=1];
+ u16 rxq_size [default=1024];
+ u16 txq_size [default=1024];
+ vl_api_rdma_mode_t mode [default=0];
+ bool no_multi_seg [default=0];
+ u16 max_pktlen [default=0];
+ vl_api_rdma_rss4_t rss4 [default=0];
+ vl_api_rdma_rss6_t rss6 [default=0];
+ option vat_help = "<host-if ifname> [name <name>] [rx-queue-size <size>] [tx-queue-size <size>] [num-rx-queues <size>] [mode <auto|ibv|dv>] [no-multi-seg] [max-pktlen <size>] [rss <ipv4|ipv4-udp|ipv4-tcp>] [rss <ipv6|ipv6-udp|ipv6-tcp>]";
+};
/** \brief
@param context - sender context, to match reply w/ request
@@ -139,6 +176,8 @@ define rdma_create_v3
define rdma_create_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
@@ -152,6 +191,8 @@ define rdma_create_reply
define rdma_create_v2_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
@@ -176,6 +217,19 @@ define rdma_create_v3_reply
@param sw_if_index - interface index
*/
+autoendian define rdma_create_v4_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index
+*/
+
autoreply define rdma_delete
{
u32 client_index;
diff --git a/src/plugins/rdma/rdma_doc.md b/src/plugins/rdma/rdma_doc.md
deleted file mode 100644
index 3fed5b6fc49..00000000000
--- a/src/plugins/rdma/rdma_doc.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# RDMA (ibverb) Ethernet driver {#rdma_doc}
-
-This driver relies on Linux rdma-core (libibverb) userspace poll-mode driver
-to rx/tx Ethernet packets. Despite using the RDMA APIs, this is **not** about
-RDMA (no Infiniband, no RoCE, no iWARP), just pure traditional Ethernet
-packets.
-
-## Maturity level
-Under development: it should work, but has not been thoroughly tested.
-
-## Supported Hardware
- - Mellanox ConnectX-4
- - Mellanox ConnectX-5
-
-## Features
- - bifurcation: MAC based flow steering for transparent sharing of a single
-physical port between multiple virtual interfaces including Linux netdev
- - multiqueue
-
-## Security considerations
-When creating a rdma interface, it will receive all packets to the MAC address
-attributed to the interface plus a copy of all broadcast and multicast
-traffic.
-The MAC address is under the control of VPP: **the user controlling VPP can
-divert all traffic of any MAC address to the VPP process, including the Linux
-netdev MAC address as long as it can create a rdma interface**.
-The rights to create a rdma interface are controlled by the access rights of
-the `/dev/infiniband/uverbs[0-9]+`device nodes.
-
-## Quickstart
-1. Make sure the `ib_uverbs` module is loaded:
-```
-~# modprobe ib_uverbs
-```
-2. In VPP, create a new rdma virtual interface tied to the Linux netdev of the
-physical port you want to use (`enp94s0f0` in this example):
-```
-vpp# create int rdma host-if enp94s0f0 name rdma-0
-```
-3. Use the interface as usual, eg.:
-```
-vpp# set int ip addr rdma-0 1.1.1.1/24
-vpp# set int st rdma-0 up
-vpp# ping 1.1.1.100`
-```
-
-## Containers support
-It should work in containers as long as:
- - the `ib_uverbs` module is loaded
- - the device nodes `/dev/infiniband/uverbs[0-9]+` are usable from the
- container (but see [security considerations](#Security considerations))
-
-## SR-IOV VFs support
-It should work on SR-IOV VFs the same way it does with PFs. Because of VFs
-security containment features, make sure the MAC address of the rdma VPP
-interface matches the MAC address assigned to the underlying VF.
-For example:
-```
-host# echo 1 > /sys/class/infiniband/mlx5_0/device/sriov_numvfs
-host# ip l set dev enp94s0f0 vf 0 mac 92:5d:f5:df:b1:6f spoof on trust off
-host# ip l set dev enp94s0f2 up
-vpp# create int rdma host-if enp94s0f2 name rdma-0
-vpp# set int mac address rdma-0 92:5d:f5:df:b1:6f
-```
-If you plan to use L2 features such as switching, make sure the underlying
-VF is configured in trusted mode and spoof-checking is disabled (of course, be
-aware of the [security considerations](#Security considerations)):
-```
-host# ip l set dev enp94s0f0 vf 0 spoof off trust on
-```
-
-## Direct Verb mode
-Direct Verb allows the driver to access the NIC HW RX/TX rings directly
-instead of having to go through libibverb and suffering associated overhead.
-It will be automatically selected if the adapter supports it.
diff --git a/src/plugins/rdma/rdma_doc.rst b/src/plugins/rdma/rdma_doc.rst
new file mode 100644
index 00000000000..c22ea550a75
--- /dev/null
+++ b/src/plugins/rdma/rdma_doc.rst
@@ -0,0 +1,102 @@
+RDMA (ibverb) device driver
+===========================
+
+This driver relies on Linux rdma-core (libibverb) userspace poll-mode
+driver to rx/tx Ethernet packets. Despite using the RDMA APIs, this is
+**not** about RDMA (no Infiniband, no RoCE, no iWARP), just pure
+traditional Ethernet packets.
+
+Maturity level
+--------------
+
+Under development: it should work, but has not been thoroughly tested.
+
+Supported Hardware
+------------------
+
+- Mellanox ConnectX-4
+- Mellanox ConnectX-5
+
+Features
+--------
+
+- bifurcation: MAC based flow steering for transparent sharing of a
+ single physical port between multiple virtual interfaces including
+ Linux netdev
+- multiqueue
+
+Security considerations
+-----------------------
+
+When creating a rdma interface, it will receive all packets to the MAC
+address attributed to the interface plus a copy of all broadcast and
+multicast traffic. The MAC address is under the control of VPP: **the
+user controlling VPP can divert all traffic of any MAC address to the
+VPP process, including the Linux netdev MAC address as long as it can
+create a rdma interface**. The rights to create a rdma interface are
+controlled by the access rights of the
+``/dev/infiniband/uverbs[0-9]+``\ device nodes.
+
+Quickstart
+----------
+
+1. Make sure the ``ib_uverbs`` module is loaded:
+
+::
+
+ ~# modprobe ib_uverbs
+
+2. In VPP, create a new rdma virtual interface tied to the Linux netdev
+ of the physical port you want to use (``enp94s0f0`` in this example):
+
+::
+
+ vpp# create int rdma host-if enp94s0f0 name rdma-0
+
+3. Use the interface as usual, e.g.:
+
+::
+
+ vpp# set int ip addr rdma-0 1.1.1.1/24
+ vpp# set int st rdma-0 up
+ vpp# ping 1.1.1.100`
+
+Containers support
+------------------
+
+It should work in containers as long as: - the ``ib_uverbs`` module is
+loaded - the device nodes ``/dev/infiniband/uverbs[0-9]+`` are usable
+from the container (but see `security
+considerations <#Security%20considerations>`__)
+
+SR-IOV VFs support
+------------------
+
+It should work on SR-IOV VFs the same way it does with PFs. Because of
+VFs security containment features, make sure the MAC address of the rdma
+VPP interface matches the MAC address assigned to the underlying VF. For
+example:
+
+::
+
+ host# echo 1 > /sys/class/infiniband/mlx5_0/device/sriov_numvfs
+ host# ip l set dev enp94s0f0 vf 0 mac 92:5d:f5:df:b1:6f spoof on trust off
+ host# ip l set dev enp94s0f2 up
+ vpp# create int rdma host-if enp94s0f2 name rdma-0
+ vpp# set int mac address rdma-0 92:5d:f5:df:b1:6f
+
+If you plan to use L2 features such as switching, make sure the
+underlying VF is configured in trusted mode and spoof-checking is
+disabled (of course, be aware of the `security
+considerations <#Security%20considerations>`__):
+
+::
+
+ host# ip l set dev enp94s0f0 vf 0 spoof off trust on
+
+Direct Verb mode
+----------------
+
+Direct Verb allows the driver to access the NIC HW RX/TX rings directly
+instead of having to go through libibverb and suffering associated
+overhead. It will be automatically selected if the adapter supports it.
diff --git a/src/plugins/rdma/rdma_mlx5dv.h b/src/plugins/rdma/rdma_mlx5dv.h
index efcefe7fbf7..bf01a3a37d6 100644
--- a/src/plugins/rdma/rdma_mlx5dv.h
+++ b/src/plugins/rdma/rdma_mlx5dv.h
@@ -24,16 +24,16 @@
#include <vppinfra/types.h>
#include <vppinfra/error.h>
/* CQE flags - bits 16-31 of qword at offset 0x1c */
-#define CQE_FLAG_L4_OK 10
-#define CQE_FLAG_L3_OK 9
-#define CQE_FLAG_L2_OK 8
-#define CQE_FLAG_IP_FRAG 7
+#define CQE_FLAG_L4_OK (1 << 10)
+#define CQE_FLAG_L3_OK (1 << 9)
+#define CQE_FLAG_L2_OK (1 << 8)
+#define CQE_FLAG_IP_FRAG (1 << 7)
#define CQE_FLAG_L4_HDR_TYPE(f) (((f) >> 4) & 7)
#define CQE_FLAG_L3_HDR_TYPE_SHIFT (2)
#define CQE_FLAG_L3_HDR_TYPE_MASK (3 << CQE_FLAG_L3_HDR_TYPE_SHIFT)
#define CQE_FLAG_L3_HDR_TYPE(f) (((f) & CQE_FLAG_L3_HDR_TYPE_MASK) >> CQE_FLAG_L3_HDR_TYPE_SHIFT)
-#define CQE_FLAG_L3_HDR_TYPE_IP4 1
-#define CQE_FLAG_L3_HDR_TYPE_IP6 2
+#define CQE_FLAG_L3_HDR_TYPE_IP4 2
+#define CQE_FLAG_L3_HDR_TYPE_IP6 1
#define CQE_FLAG_IP_EXT_OPTS 1
/* CQE byte count (Striding RQ) */
diff --git a/src/plugins/rdma/test_api.c b/src/plugins/rdma/test_api.c
index e9d5fcaad98..4ec4d3bf345 100644
--- a/src/plugins/rdma/test_api.c
+++ b/src/plugins/rdma/test_api.c
@@ -189,6 +189,41 @@ api_rdma_create_v3 (vat_main_t *vam)
return ret;
}
+static int
+api_rdma_create_v4 (vat_main_t *vam)
+{
+ vl_api_rdma_create_v4_t *mp;
+ rdma_create_if_args_t args;
+ int ret;
+
+ if (!unformat_user (vam->input, unformat_rdma_create_if_args, &args))
+ {
+ clib_warning ("unknown input `%U'", format_unformat_error, vam->input);
+ return -99;
+ }
+
+ M (RDMA_CREATE_V4, mp);
+
+ snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s", args.ifname);
+ if (args.name)
+ snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name);
+ else
+ mp->name[0] = 0;
+ mp->rxq_num = args.rxq_num;
+ mp->rxq_size = args.rxq_size;
+ mp->txq_size = args.txq_size;
+ mp->mode = api_rdma_mode (args.mode);
+ mp->no_multi_seg = args.no_multi_seg;
+ mp->max_pktlen = args.max_pktlen;
+ mp->rss4 = api_rdma_rss4 (args.rss4);
+ mp->rss6 = api_rdma_rss6 (args.rss6);
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
/* rdma-create reply handler */
static void
vl_api_rdma_create_reply_t_handler (vl_api_rdma_create_reply_t * mp)
@@ -243,6 +278,24 @@ vl_api_rdma_create_v3_reply_t_handler (vl_api_rdma_create_v3_reply_t *mp)
vam->regenerate_interface_table = 1;
}
+/* rdma-create reply handler v4 */
+static void
+vl_api_rdma_create_v4_reply_t_handler (vl_api_rdma_create_v4_reply_t *mp)
+{
+ vat_main_t *vam = rdma_test_main.vat_main;
+ i32 retval = mp->retval;
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created rdma with sw_if_index %d\n",
+ ntohl (mp->sw_if_index));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vam->regenerate_interface_table = 1;
+}
+
/* rdma delete API */
static int
api_rdma_delete (vat_main_t * vam)