summaryrefslogtreecommitdiffstats
path: root/drivers/net/mlx4/mlx4.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/mlx4/mlx4.c')
-rw-r--r--drivers/net/mlx4/mlx4.c218
1 files changed, 131 insertions, 87 deletions
diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c
index 4f21dbe2..f8ed42b8 100644
--- a/drivers/net/mlx4/mlx4.c
+++ b/drivers/net/mlx4/mlx4.c
@@ -54,7 +54,6 @@
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <netinet/in.h>
-#include <linux/if.h>
#include <linux/ethtool.h>
#include <linux/sockios.h>
#include <fcntl.h>
@@ -197,7 +196,6 @@ struct rxq {
unsigned int sp:1; /* Use scattered RX elements. */
unsigned int csum:1; /* Enable checksum offloading. */
unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
- uint32_t mb_len; /* Length of a mp-issued mbuf. */
struct mlx4_rxq_stats stats; /* RX queue counters. */
unsigned int socket; /* CPU socket ID for allocations. */
struct ibv_exp_res_domain *rd; /* Resource Domain. */
@@ -659,7 +657,15 @@ priv_get_mtu(struct priv *priv, uint16_t *mtu)
static int
priv_set_mtu(struct priv *priv, uint16_t mtu)
{
- return priv_set_sysfs_ulong(priv, "mtu", mtu);
+ uint16_t new_mtu;
+
+ if (priv_set_sysfs_ulong(priv, "mtu", mtu) ||
+ priv_get_mtu(priv, &new_mtu))
+ return -1;
+ if (new_mtu == mtu)
+ return 0;
+ errno = EINVAL;
+ return -1;
}
/**
@@ -998,7 +1004,7 @@ txq_alloc_elts(struct txq *txq, unsigned int elts_n)
}
mr_linear =
ibv_reg_mr(txq->priv->pd, elts_linear, sizeof(*elts_linear),
- (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));
+ IBV_ACCESS_LOCAL_WRITE);
if (mr_linear == NULL) {
ERROR("%p: unable to configure MR, ibv_reg_mr() failed",
(void *)txq);
@@ -1198,8 +1204,71 @@ txq_complete(struct txq *txq)
return 0;
}
+struct mlx4_check_mempool_data {
+ int ret;
+ char *start;
+ char *end;
+};
+
+/* Called by mlx4_check_mempool() when iterating the memory chunks. */
+static void mlx4_check_mempool_cb(struct rte_mempool *mp,
+ void *opaque, struct rte_mempool_memhdr *memhdr,
+ unsigned mem_idx)
+{
+ struct mlx4_check_mempool_data *data = opaque;
+
+ (void)mp;
+ (void)mem_idx;
+
+ /* It already failed, skip the next chunks. */
+ if (data->ret != 0)
+ return;
+ /* It is the first chunk. */
+ if (data->start == NULL && data->end == NULL) {
+ data->start = memhdr->addr;
+ data->end = data->start + memhdr->len;
+ return;
+ }
+ if (data->end == memhdr->addr) {
+ data->end += memhdr->len;
+ return;
+ }
+ if (data->start == (char *)memhdr->addr + memhdr->len) {
+ data->start -= memhdr->len;
+ return;
+ }
+ /* Error, mempool is not virtually contigous. */
+ data->ret = -1;
+}
+
+/**
+ * Check if a mempool can be used: it must be virtually contiguous.
+ *
+ * @param[in] mp
+ * Pointer to memory pool.
+ * @param[out] start
+ * Pointer to the start address of the mempool virtual memory area
+ * @param[out] end
+ * Pointer to the end address of the mempool virtual memory area
+ *
+ * @return
+ * 0 on success (mempool is virtually contiguous), -1 on error.
+ */
+static int mlx4_check_mempool(struct rte_mempool *mp, uintptr_t *start,
+ uintptr_t *end)
+{
+ struct mlx4_check_mempool_data data;
+
+ memset(&data, 0, sizeof(data));
+ rte_mempool_mem_iter(mp, mlx4_check_mempool_cb, &data);
+ *start = (uintptr_t)data.start;
+ *end = (uintptr_t)data.end;
+
+ return data.ret;
+}
+
/* For best performance, this function should not be inlined. */
-static struct ibv_mr *mlx4_mp2mr(struct ibv_pd *, const struct rte_mempool *)
+static struct ibv_mr *mlx4_mp2mr(struct ibv_pd *, struct rte_mempool *)
__attribute__((noinline));
/**
@@ -1214,15 +1283,21 @@ static struct ibv_mr *mlx4_mp2mr(struct ibv_pd *, const struct rte_mempool *)
* Memory region pointer, NULL in case of error.
*/
static struct ibv_mr *
-mlx4_mp2mr(struct ibv_pd *pd, const struct rte_mempool *mp)
+mlx4_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
{
const struct rte_memseg *ms = rte_eal_get_physmem_layout();
- uintptr_t start = mp->elt_va_start;
- uintptr_t end = mp->elt_va_end;
+ uintptr_t start;
+ uintptr_t end;
unsigned int i;
+ if (mlx4_check_mempool(mp, &start, &end) != 0) {
+ ERROR("mempool %p: not virtually contiguous",
+ (void *)mp);
+ return NULL;
+ }
+
DEBUG("mempool %p area start=%p end=%p size=%zu",
- (const void *)mp, (void *)start, (void *)end,
+ (void *)mp, (void *)start, (void *)end,
(size_t)(end - start));
/* Round start and end to page boundary if found in memory segments. */
for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
@@ -1236,12 +1311,12 @@ mlx4_mp2mr(struct ibv_pd *pd, const struct rte_mempool *mp)
end = RTE_ALIGN_CEIL(end, align);
}
DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
- (const void *)mp, (void *)start, (void *)end,
+ (void *)mp, (void *)start, (void *)end,
(size_t)(end - start));
return ibv_reg_mr(pd,
(void *)start,
end - start,
- IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE);
+ IBV_ACCESS_LOCAL_WRITE);
}
/**
@@ -1276,7 +1351,7 @@ txq_mb2mp(struct rte_mbuf *buf)
* mr->lkey on success, (uint32_t)-1 on failure.
*/
static uint32_t
-txq_mp2mr(struct txq *txq, const struct rte_mempool *mp)
+txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
{
unsigned int i;
struct ibv_mr *mr;
@@ -1294,7 +1369,7 @@ txq_mp2mr(struct txq *txq, const struct rte_mempool *mp)
}
/* Add a new entry, register MR first. */
DEBUG("%p: discovered new memory pool \"%s\" (%p)",
- (void *)txq, mp->name, (const void *)mp);
+ (void *)txq, mp->name, (void *)mp);
mr = mlx4_mp2mr(txq->priv->pd, mp);
if (unlikely(mr == NULL)) {
DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
@@ -1315,12 +1390,11 @@ txq_mp2mr(struct txq *txq, const struct rte_mempool *mp)
txq->mp2mr[i].mr = mr;
txq->mp2mr[i].lkey = mr->lkey;
DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
- (void *)txq, mp->name, (const void *)mp, txq->mp2mr[i].lkey);
+ (void *)txq, mp->name, (void *)mp, txq->mp2mr[i].lkey);
return txq->mp2mr[i].lkey;
}
struct txq_mp2mr_mbuf_check_data {
- const struct rte_mempool *mp;
int ret;
};
@@ -1328,34 +1402,26 @@ struct txq_mp2mr_mbuf_check_data {
* Callback function for rte_mempool_obj_iter() to check whether a given
* mempool object looks like a mbuf.
*
- * @param[in, out] arg
- * Context data (struct txq_mp2mr_mbuf_check_data). Contains mempool pointer
- * and return value.
- * @param[in] start
- * Object start address.
- * @param[in] end
- * Object end address.
+ * @param[in] mp
+ * The mempool pointer
+ * @param[in] arg
+ * Context data (struct txq_mp2mr_mbuf_check_data). Contains the
+ * return value.
+ * @param[in] obj
+ * Object address.
* @param index
- * Unused.
- *
- * @return
- * Nonzero value when object is not a mbuf.
+ * Object index, unused.
*/
static void
-txq_mp2mr_mbuf_check(void *arg, void *start, void *end,
- uint32_t index __rte_unused)
+txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
+ uint32_t index __rte_unused)
{
struct txq_mp2mr_mbuf_check_data *data = arg;
- struct rte_mbuf *buf =
- (void *)((uintptr_t)start + data->mp->header_size);
+ struct rte_mbuf *buf = obj;
- (void)index;
/* Check whether mbuf structure fits element size and whether mempool
* pointer is valid. */
- if (((uintptr_t)end >= (uintptr_t)(buf + 1)) &&
- (buf->pool == data->mp))
- data->ret = 0;
- else
+ if (sizeof(*buf) > mp->elt_size || buf->pool != mp)
data->ret = -1;
}
@@ -1369,28 +1435,16 @@ txq_mp2mr_mbuf_check(void *arg, void *start, void *end,
* Pointer to TX queue structure.
*/
static void
-txq_mp2mr_iter(const struct rte_mempool *mp, void *arg)
+txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
{
struct txq *txq = arg;
struct txq_mp2mr_mbuf_check_data data = {
- .mp = mp,
- .ret = -1,
+ .ret = 0,
};
- /* Discard empty mempools. */
- if (mp->size == 0)
- return;
/* Register mempool only if the first element looks like a mbuf. */
- rte_mempool_obj_iter((void *)mp->elt_va_start,
- 1,
- mp->header_size + mp->elt_size + mp->trailer_size,
- 1,
- mp->elt_pa,
- mp->pg_num,
- mp->pg_shift,
- txq_mp2mr_mbuf_check,
- &data);
- if (data.ret)
+ if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
+ data.ret == -1)
return;
txq_mp2mr(txq, mp);
}
@@ -3075,7 +3129,7 @@ mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
* cacheline while allocating rep.
*/
rte_prefetch0(seg);
- rep = __rte_mbuf_raw_alloc(rxq->mp);
+ rep = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
/*
* Unable to allocate a replacement mbuf,
@@ -3104,7 +3158,6 @@ mlx4_rx_burst_sp(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
rep->ol_flags = -1;
#endif
assert(rep->buf_len == seg->buf_len);
- assert(rep->buf_len == rxq->mb_len);
/* Reconfigure sge to use rep instead of seg. */
assert(sge->lkey == rxq->mr->lkey);
sge->addr = ((uintptr_t)rep->buf_addr + seg_headroom);
@@ -3235,8 +3288,8 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
* Fetch initial bytes of packet descriptor into a
* cacheline while allocating rep.
*/
- rte_prefetch0(seg);
- rte_prefetch0(&seg->cacheline1);
+ rte_mbuf_prefetch_part1(seg);
+ rte_mbuf_prefetch_part2(seg);
ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,
&flags);
if (unlikely(ret < 0)) {
@@ -3274,7 +3327,7 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (ret == 0)
break;
len = ret;
- rep = __rte_mbuf_raw_alloc(rxq->mp);
+ rep = rte_mbuf_raw_alloc(rxq->mp);
if (unlikely(rep == NULL)) {
/*
* Unable to allocate a replacement mbuf,
@@ -3525,6 +3578,7 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
unsigned int i, k;
struct ibv_exp_qp_attr mod;
struct ibv_recv_wr *bad_wr;
+ unsigned int mb_len;
int err;
int parent = (rxq == &priv->rxq_parent);
@@ -3533,6 +3587,7 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
(void *)dev, (void *)rxq);
return EINVAL;
}
+ mb_len = rte_pktmbuf_data_room_size(rxq->mp);
DEBUG("%p: rehashing queue %p", (void *)dev, (void *)rxq);
/* Number of descriptors and mbufs currently allocated. */
desc_n = (tmpl.elts_n * (tmpl.sp ? MLX4_PMD_SGE_WR_N : 1));
@@ -3547,9 +3602,10 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq *rxq)
rxq->csum_l2tun = tmpl.csum_l2tun;
}
/* Enable scattered packets support for this queue if necessary. */
+ assert(mb_len >= RTE_PKTMBUF_HEADROOM);
if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
(dev->data->dev_conf.rxmode.max_rx_pkt_len >
- (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
+ (mb_len - RTE_PKTMBUF_HEADROOM))) {
tmpl.sp = 1;
desc_n /= MLX4_PMD_SGE_WR_N;
} else
@@ -3740,7 +3796,7 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
} attr;
enum ibv_exp_query_intf_status status;
struct ibv_recv_wr *bad_wr;
- struct rte_mbuf *buf;
+ unsigned int mb_len;
int ret = 0;
int parent = (rxq == &priv->rxq_parent);
@@ -3756,31 +3812,22 @@ rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,
desc = 1;
goto skip_mr;
}
+ mb_len = rte_pktmbuf_data_room_size(mp);
if ((desc == 0) || (desc % MLX4_PMD_SGE_WR_N)) {
ERROR("%p: invalid number of RX descriptors (must be a"
" multiple of %d)", (void *)dev, MLX4_PMD_SGE_WR_N);
return EINVAL;
}
- /* Get mbuf length. */
- buf = rte_pktmbuf_alloc(mp);
- if (buf == NULL) {
- ERROR("%p: unable to allocate mbuf", (void *)dev);
- return ENOMEM;
- }
- tmpl.mb_len = buf->buf_len;
- assert((rte_pktmbuf_headroom(buf) +
- rte_pktmbuf_tailroom(buf)) == tmpl.mb_len);
- assert(rte_pktmbuf_headroom(buf) == RTE_PKTMBUF_HEADROOM);
- rte_pktmbuf_free(buf);
/* Toggle RX checksum offload if hardware supports it. */
if (priv->hw_csum)
tmpl.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
if (priv->hw_csum_l2tun)
tmpl.csum_l2tun = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
/* Enable scattered packets support for this queue if necessary. */
+ assert(mb_len >= RTE_PKTMBUF_HEADROOM);
if ((dev->data->dev_conf.rxmode.jumbo_frame) &&
(dev->data->dev_conf.rxmode.max_rx_pkt_len >
- (tmpl.mb_len - RTE_PKTMBUF_HEADROOM))) {
+ (mb_len - RTE_PKTMBUF_HEADROOM))) {
tmpl.sp = 1;
desc /= MLX4_PMD_SGE_WR_N;
}
@@ -4723,7 +4770,7 @@ mlx4_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete)
memset(&dev_link, 0, sizeof(dev_link));
dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
(ifr.ifr_flags & IFF_RUNNING));
- ifr.ifr_data = &edata;
+ ifr.ifr_data = (void *)&edata;
if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s",
strerror(errno));
@@ -4817,6 +4864,7 @@ mlx4_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
/* Reconfigure each RX queue. */
for (i = 0; (i != priv->rxqs_n); ++i) {
struct rxq *rxq = (*priv->rxqs)[i];
+ unsigned int mb_len;
unsigned int max_frame_len;
int sp;
@@ -4826,7 +4874,9 @@ mlx4_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
* toggle scattered support (sp) if necessary. */
max_frame_len = (priv->mtu + ETHER_HDR_LEN +
(ETHER_MAX_VLAN_FRAME_LEN - ETHER_MAX_LEN));
- sp = (max_frame_len > (rxq->mb_len - RTE_PKTMBUF_HEADROOM));
+ mb_len = rte_pktmbuf_data_room_size(rxq->mp);
+ assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+ sp = (max_frame_len > (mb_len - RTE_PKTMBUF_HEADROOM));
/* Provide new values to rxq_setup(). */
dev->data->dev_conf.rxmode.jumbo_frame = sp;
dev->data->dev_conf.rxmode.max_rx_pkt_len = max_frame_len;
@@ -4882,7 +4932,7 @@ mlx4_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
if (mlx4_is_secondary())
return -E_RTE_SECONDARY;
- ifr.ifr_data = &ethpause;
+ ifr.ifr_data = (void *)&ethpause;
priv_lock(priv);
if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
ret = errno;
@@ -4932,7 +4982,7 @@ mlx4_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
if (mlx4_is_secondary())
return -E_RTE_SECONDARY;
- ifr.ifr_data = &ethpause;
+ ifr.ifr_data = (void *)&ethpause;
ethpause.autoneg = fc_conf->autoneg;
if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||
(fc_conf->mode & RTE_FC_RX_PAUSE))
@@ -5358,7 +5408,7 @@ priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
rte_eal_alarm_cancel(mlx4_dev_link_status_handler, dev);
priv->pending_alarm = 0;
priv->intr_handle.fd = 0;
- priv->intr_handle.type = 0;
+ priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
}
/**
@@ -5757,22 +5807,16 @@ error:
static const struct rte_pci_id mlx4_pci_id_map[] = {
{
- .vendor_id = PCI_VENDOR_ID_MELLANOX,
- .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX3,
- .subsystem_vendor_id = PCI_ANY_ID,
- .subsystem_device_id = PCI_ANY_ID
+ RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+ PCI_DEVICE_ID_MELLANOX_CONNECTX3)
},
{
- .vendor_id = PCI_VENDOR_ID_MELLANOX,
- .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO,
- .subsystem_vendor_id = PCI_ANY_ID,
- .subsystem_device_id = PCI_ANY_ID
+ RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+ PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO)
},
{
- .vendor_id = PCI_VENDOR_ID_MELLANOX,
- .device_id = PCI_DEVICE_ID_MELLANOX_CONNECTX3VF,
- .subsystem_vendor_id = PCI_ANY_ID,
- .subsystem_device_id = PCI_ANY_ID
+ RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+ PCI_DEVICE_ID_MELLANOX_CONNECTX3VF)
},
{
.vendor_id = 0