aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorBenoît Ganne <bganne@cisco.com>2020-04-24 15:52:24 +0200
committerDave Barach <openvpp@barachs.net>2020-04-27 12:19:39 +0000
commit2d25467d130485d804ad91a4df9870d684f70dd6 (patch)
tree2c8a777ef20e342eea1f8d3e36bf6ffb5bf24b86 /src/plugins
parent81284163a293759bc5c2d6a124639c6796589d15 (diff)
rdma: tx: interleave prefetches
Type: improvement Change-Id: Ic2d9b17cf5e524f3ad2a3c5343fe1230aa360e73 Signed-off-by: Benoît Ganne <bganne@cisco.com>
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/rdma/output.c37
1 files changed, 13 insertions, 24 deletions
diff --git a/src/plugins/rdma/output.c b/src/plugins/rdma/output.c
index 2e7fb5978a4..5b181485e49 100644
--- a/src/plugins/rdma/output.c
+++ b/src/plugins/rdma/output.c
@@ -294,7 +294,7 @@ rdma_device_output_tx_mlx5 (vlib_main_t * vm,
wrap_around:
wqe = txq->dv_sq_wqes + (tail & sq_mask);
- while (n >= 4)
+ while (n >= 8)
{
u32 flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
if (PREDICT_FALSE (flags & VLIB_BUFFER_NEXT_PRESENT))
@@ -302,18 +302,16 @@ wrap_around:
n_left_from, n, bi, b, wqe,
tail);
- if (PREDICT_TRUE (n >= 8))
- {
- vlib_prefetch_buffer_header (b[4], LOAD);
- vlib_prefetch_buffer_header (b[5], LOAD);
- vlib_prefetch_buffer_header (b[6], LOAD);
- vlib_prefetch_buffer_header (b[7], LOAD);
- CLIB_PREFETCH (wqe + 4, 4 * sizeof (wqe[0]), STORE);
- }
-
+ vlib_prefetch_buffer_header (b[4], LOAD);
rdma_mlx5_wqe_init (wqe + 0, txq->dv_wqe_tmpl, b[0], tail + 0);
+
+ vlib_prefetch_buffer_header (b[5], LOAD);
rdma_mlx5_wqe_init (wqe + 1, txq->dv_wqe_tmpl, b[1], tail + 1);
+
+ vlib_prefetch_buffer_header (b[6], LOAD);
rdma_mlx5_wqe_init (wqe + 2, txq->dv_wqe_tmpl, b[2], tail + 2);
+
+ vlib_prefetch_buffer_header (b[7], LOAD);
rdma_mlx5_wqe_init (wqe + 3, txq->dv_wqe_tmpl, b[3], tail + 3);
b += 4;
@@ -395,33 +393,24 @@ rdma_device_output_tx_ibverb (vlib_main_t * vm,
struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge;
u32 n = n_left_from;
- while (n >= 4)
+ while (n >= 8)
{
- if (PREDICT_TRUE (n >= 8))
- {
- vlib_prefetch_buffer_header (b[4 + 0], LOAD);
- vlib_prefetch_buffer_header (b[4 + 1], LOAD);
- vlib_prefetch_buffer_header (b[4 + 2], LOAD);
- vlib_prefetch_buffer_header (b[4 + 3], LOAD);
- CLIB_PREFETCH (&s[4 + 0], 4 * sizeof (s[0]), STORE);
- clib_prefetch_store (&w[4 + 0]);
- clib_prefetch_store (&w[4 + 1]);
- clib_prefetch_store (&w[4 + 2]);
- clib_prefetch_store (&w[4 + 3]);
- }
-
+ vlib_prefetch_buffer_header (b[4], LOAD);
s[0].addr = vlib_buffer_get_current_va (b[0]);
s[0].length = b[0]->current_length;
s[0].lkey = rd->lkey;
+ vlib_prefetch_buffer_header (b[5], LOAD);
s[1].addr = vlib_buffer_get_current_va (b[1]);
s[1].length = b[1]->current_length;
s[1].lkey = rd->lkey;
+ vlib_prefetch_buffer_header (b[6], LOAD);
s[2].addr = vlib_buffer_get_current_va (b[2]);
s[2].length = b[2]->current_length;
s[2].lkey = rd->lkey;
+ vlib_prefetch_buffer_header (b[7], LOAD);
s[3].addr = vlib_buffer_get_current_va (b[3]);
s[3].length = b[3]->current_length;
s[3].lkey = rd->lkey;
bold } /* Name.Constant */ .highlight .nd { color: #555555 } /* Name.Decorator */ .highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */ .highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */ .highlight .nl { color: #336699; font-style: italic } /* Name.Label */ .highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */ .highlight .py { color: #336699; font-weight: bold } /* Name.Property */ .highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */ .highlight .nv { color: #336699 } /* Name.Variable */ .highlight .ow { color: #008800 } /* Operator.Word */ .highlight .w { color: #bbbbbb } /* Text.Whitespace */ .highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */ }
# AF_XDP Ethernet driver {#af_xdp_doc}

This driver relies on Linux AF_XDP socket to rx/tx Ethernet packets.

## Maturity level
Under development: it should work, but has not been thoroughly tested.

## Features
 - copy and zero-copy mode
 - multiqueue
 - API
 - custom eBPF program
 - polling, interrupt and adaptive mode

## Limitations
Because of AF_XDP restrictions, the MTU is limited to below PAGE_SIZE
(4096-bytes on most systems) minus 256-bytes, and they are additional
limitations depending upon specific Linux device drivers.
As a rule of thumb, a MTU of 3000-bytes or less should be safe.

## Requirements
The Linux kernel interface must be up and have enough queues before
creating the VPP AF_XDP interface, otherwise Linux will deny creating
the AF_XDP socket.
The AF_XDP interface will claim NIC RX queue starting from 0, up to the
requested number of RX queues (only 1 by default). It means all packets
destined to NIC RX queue `[0, num_rx_queues[` will be received by the
AF_XDP interface, and only them. Depending on your configuration, there
will usually be several RX queues (typically 1 per core) and packets are
spread accross queues by RSS. In order to receive consistent traffic,
you **must** program the NIC dispatching accordingly. The simplest way
to get all the packets is to reconfigure the Linux kernel driver to use
only `num_rx_queues` RX queues (ie all NIC queues will be associated
with the AF_XDP socket):
```
~# ethtool -L <iface> combined <num_rx_queues>
```
Additionally, the VPP AF_XDP interface will use a MAC address generated at
creation time instead of the Linux kernel interface MAC. As Linux kernel
interface are not in promiscuous mode by default (see below) this will
results in a useless configuration where the VPP AF_XDP interface only
receives packets destined to the Linux kernel interface MAC just to drop
them because the destination MAC does not match VPP AF_XDP interface MAC.
If you want to use the Linux interface MAC for the VPP AF_XDP interface,
you can change it afterwards in VPP:
```
~# vppctl set int mac address <iface> <mac>
```
Finally, if you wish to receive all packets and not only the packets
destined to the Linux kernel interface MAC you need to set the Linux
kernel interface in promiscuous mode:
```
~# ip link set dev <iface> promisc on
```

## Security considerations
When creating an AF_XDP interface, it will receive all packets arriving
to the NIC RX queue #0. You need to configure the Linux kernel NIC
driver properly to ensure that only intented packets will arrive in
this queue. There is no way to filter the packets after-the-fact using
eg. netfilter or eBPF.

## Quickstart
1. Setup the Linux kernel interface (enp216s0f0 here) to use 4 queues:
```
~# ethtool -L enp216s0f0 combined 4
```
2. Put the Linux kernel interface up and in promiscuous mode:
```
~# ip l set dev enp216s0f0 promisc on up
```
3. Create the AF_XDP interface:
```
~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues 4
```
4. Use the interface as usual, eg.:
```
~# vppctl set int ip addr enp216s0f0/0 1.1.1.1/24
~# vppctl set int st enp216s0f0/0 up
~# vppctl ping 1.1.1.100`
```

## Custom eBPF XDP program
This driver relies on libbpf and as such relies on the `xsks_map` eBPF
map.  The default behavior is to use the XDP program already attached
to the interface if any, otherwise load the default one.
You can request to load a custom XDP program with the `prog` option when
creating the interface in VPP:
```
~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues 4 prog extras/bpf/af_xdp.bpf.o
```
In that case it will replace any previously attached program.  A custom
XDP program example is provided in `extras/bpf/`.

## Performance consideration
AF_XDP relies on the Linux kernel NIC driver to rx/tx packets. To reach
high-performance (10's MPPS), the Linux kernel NIC driver must support
zero-copy mode and its RX path must run on a dedicated core in the NUMA
where the NIC is physically connected.