aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/mlx5/mlx5_rxtx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/mlx5/mlx5_rxtx.c')
-rw-r--r--drivers/net/mlx5/mlx5_rxtx.c112
1 files changed, 58 insertions, 54 deletions
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 52785946..2d14f8a6 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -38,7 +38,7 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe);
static __rte_always_inline int
mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
- uint16_t cqe_cnt, uint32_t *rss_hash);
+ uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe);
static __rte_always_inline uint32_t
rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe);
@@ -495,6 +495,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
volatile struct mlx5_wqe_ctrl *last_wqe = NULL;
unsigned int segs_n = 0;
const unsigned int max_inline = txq->max_inline;
+ uint64_t addr_64;
if (unlikely(!pkts_n))
return 0;
@@ -503,8 +504,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
/* Start processing. */
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
- /* A CQE slot must always be available. */
- assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
return 0;
@@ -711,12 +710,12 @@ pkt_inline:
ds = 3;
use_dseg:
/* Add the remaining packet as a simple ds. */
- addr = rte_cpu_to_be_64(addr);
+ addr_64 = rte_cpu_to_be_64(addr);
*dseg = (rte_v128u32_t){
rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
- addr,
- addr >> 32,
+ addr_64,
+ addr_64 >> 32,
};
++ds;
if (!segs_n)
@@ -750,12 +749,12 @@ next_seg:
total_length += length;
#endif
/* Store segment information. */
- addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
+ addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
*dseg = (rte_v128u32_t){
rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
- addr,
- addr >> 32,
+ addr_64,
+ addr_64 >> 32,
};
(*txq->elts)[++elts_head & elts_m] = buf;
if (--segs_n)
@@ -816,14 +815,13 @@ next_wqe:
/* Check whether completion threshold has been reached. */
comp = txq->elts_comp + i + j + k;
if (comp >= MLX5_TX_COMP_THRESH) {
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request completion on last WQE. */
last_wqe->ctrl2 = rte_cpu_to_be_32(8);
/* Save elts_head in unused "immediate" field of WQE. */
last_wqe->ctrl3 = txq->elts_head;
txq->elts_comp = 0;
-#ifndef NDEBUG
- ++txq->cq_pi;
-#endif
} else {
txq->elts_comp = comp;
}
@@ -942,8 +940,6 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
/* Start processing. */
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
- /* A CQE slot must always be available. */
- assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
return 0;
@@ -1032,14 +1028,13 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (comp >= MLX5_TX_COMP_THRESH) {
volatile struct mlx5_wqe *wqe = mpw.wqe;
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request completion on last WQE. */
wqe->ctrl[2] = rte_cpu_to_be_32(8);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
-#ifndef NDEBUG
- ++txq->cq_pi;
-#endif
} else {
txq->elts_comp = comp;
}
@@ -1171,8 +1166,6 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
/* Start processing. */
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
- /* A CQE slot must always be available. */
- assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
do {
struct rte_mbuf *buf = *(pkts++);
uintptr_t addr;
@@ -1329,14 +1322,13 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
if (comp >= MLX5_TX_COMP_THRESH) {
volatile struct mlx5_wqe *wqe = mpw.wqe;
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request completion on last WQE. */
wqe->ctrl[2] = rte_cpu_to_be_32(8);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
-#ifndef NDEBUG
- ++txq->cq_pi;
-#endif
} else {
txq->elts_comp = comp;
}
@@ -1450,6 +1442,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
unsigned int mpw_room = 0;
unsigned int inl_pad = 0;
uint32_t inl_hdr;
+ uint64_t addr_64;
struct mlx5_mpw mpw = {
.state = MLX5_MPW_STATE_CLOSED,
};
@@ -1459,8 +1452,6 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
/* Start processing. */
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
- /* A CQE slot must always be available. */
- assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
return 0;
@@ -1586,13 +1577,13 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
((uintptr_t)mpw.data.raw +
inl_pad);
(*txq->elts)[elts_head++ & elts_m] = buf;
- addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
- uintptr_t));
+ addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+ uintptr_t));
*dseg = (rte_v128u32_t) {
rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
- addr,
- addr >> 32,
+ addr_64,
+ addr_64 >> 32,
};
mpw.data.raw = (volatile void *)(dseg + 1);
mpw.total_len += (inl_pad + sizeof(*dseg));
@@ -1616,15 +1607,14 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
(1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) {
volatile struct mlx5_wqe *wqe = mpw.wqe;
+ /* A CQE slot must always be available. */
+ assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request completion on last WQE. */
wqe->ctrl[2] = rte_cpu_to_be_32(8);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
txq->mpw_comp = txq->wqe_ci;
-#ifndef NDEBUG
- ++txq->cq_pi;
-#endif
} else {
txq->elts_comp += j;
}
@@ -1722,8 +1712,9 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
* Pointer to RX queue.
* @param cqe
* CQE to process.
- * @param[out] rss_hash
- * Packet RSS Hash result.
+ * @param[out] mcqe
+ * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not
+ * written.
*
* @return
* Packet size in bytes (0 if there is none), -1 in case of completion
@@ -1731,7 +1722,7 @@ rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
*/
static inline int
mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
- uint16_t cqe_cnt, uint32_t *rss_hash)
+ uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe)
{
struct rxq_zip *zip = &rxq->zip;
uint16_t cqe_n = cqe_cnt + 1;
@@ -1745,7 +1736,7 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
(uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info);
len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt);
- *rss_hash = rte_be_to_cpu_32((*mc)[zip->ai & 7].rx_hash_result);
+ *mcqe = &(*mc)[zip->ai & 7];
if ((++zip->ai & 7) == 0) {
/* Invalidate consumed CQEs */
idx = zip->ca;
@@ -1810,7 +1801,7 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
/* Get packet size to return. */
len = rte_be_to_cpu_32((*mc)[0].byte_cnt);
- *rss_hash = rte_be_to_cpu_32((*mc)[0].rx_hash_result);
+ *mcqe = &(*mc)[0];
zip->ai = 1;
/* Prefetch all the entries to be invalidated */
idx = zip->ca;
@@ -1821,7 +1812,6 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
}
} else {
len = rte_be_to_cpu_32(cqe->byte_cnt);
- *rss_hash = rte_be_to_cpu_32(cqe->rx_hash_res);
}
/* Error while receiving packet. */
if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR))
@@ -1934,7 +1924,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
volatile struct mlx5_wqe_data_seg *wqe =
&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
struct rte_mbuf *rep = (*rxq->elts)[idx];
- uint32_t rss_hash_res = 0;
+ volatile struct mlx5_mini_cqe8 *mcqe = NULL;
+ uint32_t rss_hash_res;
if (pkt)
NEXT(seg) = rep;
@@ -1964,8 +1955,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
}
if (!pkt) {
cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
- len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt,
- &rss_hash_res);
+ len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe);
if (!len) {
rte_mbuf_raw_free(rep);
break;
@@ -1979,6 +1969,10 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
pkt = seg;
assert(len >= (rxq->crc_present << 2));
pkt->ol_flags = 0;
+ /* If compressed, take hash result from mini-CQE. */
+ rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ?
+ cqe->rx_hash_res :
+ mcqe->rx_hash_result);
rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
if (rxq->crc_present)
len -= ETHER_CRC_LEN;
@@ -2104,7 +2098,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
unsigned int i = 0;
uint16_t rq_ci = rxq->rq_ci;
- uint16_t strd_idx = rxq->strd_ci;
+ uint16_t consumed_strd = rxq->consumed_strd;
struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
while (i < pkts_n) {
@@ -2112,12 +2106,14 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
void *addr;
int ret;
unsigned int len;
- uint16_t consumed_strd;
+ uint16_t strd_cnt;
+ uint16_t strd_idx;
uint32_t offset;
uint32_t byte_cnt;
+ volatile struct mlx5_mini_cqe8 *mcqe = NULL;
uint32_t rss_hash_res = 0;
- if (strd_idx == strd_n) {
+ if (consumed_strd == strd_n) {
/* Replace WQE only if the buffer is still in use. */
if (rte_atomic16_read(&buf->refcnt) > 1) {
mprq_buf_replace(rxq, rq_ci & wq_mask);
@@ -2137,12 +2133,12 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
rxq->mprq_repl = rep;
}
/* Advance to the next WQE. */
- strd_idx = 0;
+ consumed_strd = 0;
++rq_ci;
buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
}
cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
- ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &rss_hash_res);
+ ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);
if (!ret)
break;
if (unlikely(ret == -1)) {
@@ -2151,14 +2147,21 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
continue;
}
byte_cnt = ret;
- consumed_strd = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
- MLX5_MPRQ_STRIDE_NUM_SHIFT;
- assert(consumed_strd);
- /* Calculate offset before adding up stride index. */
- offset = strd_idx * strd_sz + strd_shift;
- strd_idx += consumed_strd;
+ strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
+ MLX5_MPRQ_STRIDE_NUM_SHIFT;
+ assert(strd_cnt);
+ consumed_strd += strd_cnt;
if (byte_cnt & MLX5_MPRQ_FILLER_MASK)
continue;
+ if (mcqe == NULL) {
+ rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res);
+ strd_idx = rte_be_to_cpu_16(cqe->wqe_counter);
+ } else {
+ /* mini-CQE for MPRQ doesn't have hash result. */
+ strd_idx = rte_be_to_cpu_16(mcqe->stride_idx);
+ }
+ assert(strd_idx < strd_n);
+ assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask));
/*
* Currently configured to receive a packet per a stride. But if
* MTU is adjusted through kernel interface, device could
@@ -2166,7 +2169,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
* case, the packet should be dropped because it is bigger than
* the max_rx_pkt_len.
*/
- if (unlikely(consumed_strd > 1)) {
+ if (unlikely(strd_cnt > 1)) {
++rxq->stats.idropped;
continue;
}
@@ -2179,6 +2182,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
assert((int)len >= (rxq->crc_present << 2));
if (rxq->crc_present)
len -= ETHER_CRC_LEN;
+ offset = strd_idx * strd_sz + strd_shift;
addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf), offset);
/* Initialize the offload flag. */
pkt->ol_flags = 0;
@@ -2201,7 +2205,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
} else {
rte_iova_t buf_iova;
struct rte_mbuf_ext_shared_info *shinfo;
- uint16_t buf_len = consumed_strd * strd_sz;
+ uint16_t buf_len = strd_cnt * strd_sz;
/* Increment the refcnt of the whole chunk. */
rte_atomic16_add_return(&buf->refcnt, 1);
@@ -2250,7 +2254,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
++i;
}
/* Update the consumer indexes. */
- rxq->strd_ci = strd_idx;
+ rxq->consumed_strd = consumed_strd;
rte_cio_wmb();
*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
if (rq_ci != rxq->rq_ci) {