diff options
Diffstat (limited to 'src/plugins/rdma/input.c')
-rw-r--r-- | src/plugins/rdma/input.c | 90 |
1 files changed, 59 insertions, 31 deletions
diff --git a/src/plugins/rdma/input.c b/src/plugins/rdma/input.c index f1c508affa2..a7d41a1684d 100644 --- a/src/plugins/rdma/input.c +++ b/src/plugins/rdma/input.c @@ -228,7 +228,6 @@ rdma_device_input_refill (vlib_main_t * vm, rdma_device_t * rd, about what RDMA core does (CYCLIC_RQ or LINKED_LIST_RQ). In cyclic mode, the SRQ header is ignored anyways... */ -/* *INDENT-OFF* */ if (is_striding && !(current_data_seg & (wqe_sz - 1))) *(mlx5dv_wqe_srq_next_t *) wqe = (mlx5dv_wqe_srq_next_t) { @@ -237,7 +236,6 @@ rdma_device_input_refill (vlib_main_t * vm, rdma_device_t * rd, .signature = 0, .rsvd1 = {0} }; -/* *INDENT-ON* */ /* TODO: when log_skip_wqe > 2, hw_prefetcher doesn't work, lots of LLC store misses occur for wqes, to be fixed... */ @@ -609,6 +607,7 @@ rdma_device_poll_cq_mlx5dv (rdma_device_t * rd, rdma_rxq_t * rxq, n_rx_packets++; cq_ci++; byte_cnt++; + cqe_flags++; continue; } @@ -670,46 +669,77 @@ rdma_device_mlx5dv_l3_validate_and_swap_bc (rdma_per_thread_data_t * ptd, int n_rx_packets, u32 * bc) { u16 mask = CQE_FLAG_L3_HDR_TYPE_MASK | CQE_FLAG_L3_OK; - u16 match = CQE_FLAG_L3_HDR_TYPE_IP4 << CQE_FLAG_L3_HDR_TYPE_SHIFT; + u16 match = + CQE_FLAG_L3_HDR_TYPE_IP4 << CQE_FLAG_L3_HDR_TYPE_SHIFT | CQE_FLAG_L3_OK; + + /* convert mask/match to big endian for subsequant comparison */ + mask = clib_host_to_net_u16 (mask); + match = clib_host_to_net_u16 (match); /* verify that all ip4 packets have l3_ok flag set and convert packet length from network to host byte order */ int skip_ip4_cksum = 1; + int n_left = n_rx_packets; + u16 *cqe_flags = ptd->cqe_flags; #if defined CLIB_HAVE_VEC256 - u16x16 mask16 = u16x16_splat (mask); - u16x16 match16 = u16x16_splat (match); - u16x16 r = { }; + if (n_left >= 16) + { + u16x16 mask16 = u16x16_splat (mask); + u16x16 match16 = u16x16_splat (match); + u16x16 r16 = {}; + + while (n_left >= 16) + { + r16 |= (*(u16x16 *) cqe_flags & mask16) != match16; - for (int i = 0; i * 16 < n_rx_packets; i++) - r |= (ptd->cqe_flags16[i] & mask16) != match16; + *(u32x8 *) bc = u32x8_byte_swap (*(u32x8 *) bc); + *(u32x8 *) (bc + 8) = u32x8_byte_swap (*(u32x8 *) (bc + 8)); - if (!u16x16_is_all_zero (r)) - skip_ip4_cksum = 0; + cqe_flags += 16; + bc += 16; + n_left -= 16; + } - for (int i = 0; i < n_rx_packets; i += 8) - *(u32x8 *) (bc + i) = u32x8_byte_swap (*(u32x8 *) (bc + i)); + if (!u16x16_is_all_zero (r16)) + skip_ip4_cksum = 0; + } #elif defined CLIB_HAVE_VEC128 - u16x8 mask8 = u16x8_splat (mask); - u16x8 match8 = u16x8_splat (match); - u16x8 r = { }; + if (n_left >= 8) + { + u16x8 mask8 = u16x8_splat (mask); + u16x8 match8 = u16x8_splat (match); + u16x8 r8 = {}; - for (int i = 0; i * 8 < n_rx_packets; i++) - r |= (ptd->cqe_flags8[i] & mask8) != match8; + while (n_left >= 8) + { + r8 |= (*(u16x8 *) cqe_flags & mask8) != match8; - if (!u16x8_is_all_zero (r)) - skip_ip4_cksum = 0; + *(u32x4 *) bc = u32x4_byte_swap (*(u32x4 *) bc); + *(u32x4 *) (bc + 4) = u32x4_byte_swap (*(u32x4 *) (bc + 4)); - for (int i = 0; i < n_rx_packets; i += 4) - *(u32x4 *) (bc + i) = u32x4_byte_swap (*(u32x4 *) (bc + i)); -#else - for (int i = 0; i < n_rx_packets; i++) - if ((ptd->cqe_flags[i] & mask) != match) - skip_ip4_cksum = 0; + cqe_flags += 8; + bc += 8; + n_left -= 8; + } - for (int i = 0; i < n_rx_packets; i++) - bc[i] = clib_net_to_host_u32 (bc[i]); + if (!u16x8_is_all_zero (r8)) + skip_ip4_cksum = 0; + } #endif + + while (n_left >= 1) + { + if ((cqe_flags[0] & mask) != match) + skip_ip4_cksum = 0; + + bc[0] = clib_net_to_host_u32 (bc[0]); + + cqe_flags += 1; + bc += 1; + n_left -= 1; + } + return skip_ip4_cksum; } @@ -945,7 +975,7 @@ rdma_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* update buffer template for input feature arcs if any */ next_index = rd->per_interface_next_index; if (PREDICT_FALSE (vnet_device_input_have_features (rd->sw_if_index))) - vnet_feature_start_device_input_x1 (rd->sw_if_index, &next_index, &bt); + vnet_feature_start_device_input (rd->sw_if_index, &next_index, &bt); vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next); @@ -1028,7 +1058,7 @@ VLIB_NODE_FN (rdma_input_node) (vlib_main_t * vm, if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ADMIN_UP) == 0) continue; - if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ERROR)) + if (PREDICT_FALSE (rd->flags & RDMA_DEVICE_F_ERROR)) continue; if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV)) @@ -1041,7 +1071,6 @@ VLIB_NODE_FN (rdma_input_node) (vlib_main_t * vm, return n_rx; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (rdma_input_node) = { .name = "rdma-input", .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED, @@ -1053,7 +1082,6 @@ VLIB_REGISTER_NODE (rdma_input_node) = { .error_strings = rdma_input_error_strings, }; -/* *INDENT-ON* */ /* |