summaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/rdma/input.c63
1 files changed, 40 insertions, 23 deletions
diff --git a/src/plugins/rdma/input.c b/src/plugins/rdma/input.c
index 2ff04a6ec46..5f2c0bbf31f 100644
--- a/src/plugins/rdma/input.c
+++ b/src/plugins/rdma/input.c
@@ -609,6 +609,7 @@ rdma_device_poll_cq_mlx5dv (rdma_device_t * rd, rdma_rxq_t * rxq,
n_rx_packets++;
cq_ci++;
byte_cnt++;
+ cqe_flags++;
continue;
}
@@ -670,46 +671,62 @@ rdma_device_mlx5dv_l3_validate_and_swap_bc (rdma_per_thread_data_t
* ptd, int n_rx_packets, u32 * bc)
{
u16 mask = CQE_FLAG_L3_HDR_TYPE_MASK | CQE_FLAG_L3_OK;
- u16 match = CQE_FLAG_L3_HDR_TYPE_IP4 << CQE_FLAG_L3_HDR_TYPE_SHIFT;
+ u16 match =
+ CQE_FLAG_L3_HDR_TYPE_IP4 << CQE_FLAG_L3_HDR_TYPE_SHIFT | CQE_FLAG_L3_OK;
+
+ /* convert mask/match to big endian for subsequant comparison */
+ mask = clib_host_to_net_u16 (mask);
+ match = clib_host_to_net_u16 (match);
/* verify that all ip4 packets have l3_ok flag set and convert packet
length from network to host byte order */
int skip_ip4_cksum = 1;
+ int n_left = 0;
#if defined CLIB_HAVE_VEC256
- u16x16 mask16 = u16x16_splat (mask);
- u16x16 match16 = u16x16_splat (match);
- u16x16 r = { };
+ if (n_rx_packets >= 16)
+ {
+ u16x16 mask16 = u16x16_splat (mask);
+ u16x16 match16 = u16x16_splat (match);
+ u16x16 r16 = {};
- for (int i = 0; i * 16 < n_rx_packets; i++)
- r |= (ptd->cqe_flags16[i] & mask16) != match16;
+ n_left = n_rx_packets % 16;
- if (!u16x16_is_all_zero (r))
- skip_ip4_cksum = 0;
+ for (int i = 0; i < n_rx_packets / 16; i++)
+ r16 |= (ptd->cqe_flags16[i] & mask16) != match16;
- for (int i = 0; i < n_rx_packets; i += 8)
- *(u32x8 *) (bc + i) = u32x8_byte_swap (*(u32x8 *) (bc + i));
+ if (!u16x16_is_all_zero (r16))
+ skip_ip4_cksum = 0;
+
+ for (int i = 0; i < (n_rx_packets - n_left); i += 8)
+ *(u32x8 *) (bc + i) = u32x8_byte_swap (*(u32x8 *) (bc + i));
+ }
#elif defined CLIB_HAVE_VEC128
- u16x8 mask8 = u16x8_splat (mask);
- u16x8 match8 = u16x8_splat (match);
- u16x8 r = { };
+ if (n_rx_packets >= 8)
+ {
+ u16x8 mask8 = u16x8_splat (mask);
+ u16x8 match8 = u16x8_splat (match);
+ u16x8 r8 = {};
- for (int i = 0; i * 8 < n_rx_packets; i++)
- r |= (ptd->cqe_flags8[i] & mask8) != match8;
+ n_left = n_rx_packets % 8;
- if (!u16x8_is_all_zero (r))
- skip_ip4_cksum = 0;
+ for (int i = 0; i < n_rx_packets / 8; i++)
+ r8 |= (ptd->cqe_flags8[i] & mask8) != match8;
- for (int i = 0; i < n_rx_packets; i += 4)
- *(u32x4 *) (bc + i) = u32x4_byte_swap (*(u32x4 *) (bc + i));
-#else
- for (int i = 0; i < n_rx_packets; i++)
+ if (!u16x8_is_all_zero (r8))
+ skip_ip4_cksum = 0;
+
+ for (int i = 0; i < (n_rx_packets - n_left); i += 4)
+ *(u32x4 *) (bc + i) = u32x4_byte_swap (*(u32x4 *) (bc + i));
+ }
+#endif
+ for (int i = (n_rx_packets - n_left); i < n_rx_packets; i++)
if ((ptd->cqe_flags[i] & mask) != match)
skip_ip4_cksum = 0;
- for (int i = 0; i < n_rx_packets; i++)
+ for (int i = (n_rx_packets - n_left); i < n_rx_packets; i++)
bc[i] = clib_net_to_host_u32 (bc[i]);
-#endif
+
return skip_ip4_cksum;
}