summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenoît Ganne <bganne@cisco.com>2019-05-22 18:09:19 +0200
committerDamjan Marion <dmarion@me.com>2019-05-24 08:24:17 +0000
commitf2d5cdbfa674a2ac9e81fd49d69594f0cdbcffd3 (patch)
treead54d865325c51b4ca29b109c4015885b0fa8f69
parent34716fae918750e4fc7a7da4b06e0dfbdef2d1c5 (diff)
rdma: add support for promiscuous mode
rdma interfaces filter packets per MAC by default to share the physical interface between multiple users (eg. VPP and Linux). When configured in promiscuous mode, all packets will go to this interface, regardless of the MAC. All other interface will not receive anymore packet while it is in promiscuous mode. Promiscuous mode is needed (and automatically turned on) for L2 path (l2patch, xconnect, bridge...). Change-Id: I4c0eb4421f51d116e635e7828d00f202f4a97ded Signed-off-by: Benoît Ganne <bganne@cisco.com>
-rw-r--r--src/plugins/rdma/device.c205
-rw-r--r--src/plugins/rdma/rdma.h11
2 files changed, 145 insertions, 71 deletions
diff --git a/src/plugins/rdma/device.c b/src/plugins/rdma/device.c
index b6609ca4bdc..0fddc3a3a5d 100644
--- a/src/plugins/rdma/device.c
+++ b/src/plugins/rdma/device.c
@@ -45,18 +45,139 @@ static u8 rdma_rss_hash_key[] = {
rdma_main_t rdma_main;
-#define rdma_log_debug(dev, f, ...) \
-{ \
- vlib_log(VLIB_LOG_LEVEL_DEBUG, rdma_main.log_class, "%U: " f, \
- format_vlib_pci_addr, &rd->pci_addr, ##__VA_ARGS__); \
-};
+#define rdma_log__(lvl, dev, f, ...) \
+ do { \
+ vlib_log((lvl), rdma_main.log_class, "%s: " f, \
+ &(dev)->name, ##__VA_ARGS__); \
+ } while (0)
+
+#define rdma_log(lvl, dev, f, ...) \
+ rdma_log__((lvl), (dev), "%s (%d): " f, strerror(errno), errno, ##__VA_ARGS__)
+
+static struct ibv_flow *
+rdma_rxq_init_flow (const rdma_device_t * rd, struct ibv_qp *qp,
+ const mac_address_t * mac, const mac_address_t * mask,
+ u32 flags)
+{
+ struct ibv_flow *flow;
+ struct raw_eth_flow_attr
+ {
+ struct ibv_flow_attr attr;
+ struct ibv_flow_spec_eth spec_eth;
+ } __attribute__ ((packed)) fa;
+
+ memset (&fa, 0, sizeof (fa));
+ fa.attr.num_of_specs = 1;
+ fa.attr.port = 1;
+ fa.attr.flags = flags;
+ fa.spec_eth.type = IBV_FLOW_SPEC_ETH;
+ fa.spec_eth.size = sizeof (struct ibv_flow_spec_eth);
+
+ memcpy (fa.spec_eth.val.dst_mac, mac, sizeof (fa.spec_eth.val.dst_mac));
+ memcpy (fa.spec_eth.mask.dst_mac, mask, sizeof (fa.spec_eth.mask.dst_mac));
+
+ flow = ibv_create_flow (qp, &fa.attr);
+ if (!flow)
+ rdma_log (VLIB_LOG_LEVEL_ERR, rd, "ibv_create_flow() failed");
+ return flow;
+}
+
+static u32
+rdma_rxq_destroy_flow (const rdma_device_t * rd, struct ibv_flow **flow)
+{
+ if (!*flow)
+ return 0;
+
+ if (ibv_destroy_flow (*flow))
+ {
+ rdma_log (VLIB_LOG_LEVEL_ERR, rd, "ibv_destroy_flow() failed");
+ return ~0;
+ }
+
+ *flow = 0;
+ return 0;
+}
+
+static u32
+rdma_dev_set_promisc (rdma_device_t * rd)
+{
+ const mac_address_t all = {.bytes = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0} };
+ int err;
+
+ err = rdma_rxq_destroy_flow (rd, &rd->flow_mcast);
+ if (err)
+ return ~0;
+
+ err = rdma_rxq_destroy_flow (rd, &rd->flow_ucast);
+ if (err)
+ return ~0;
+
+ rd->flow_ucast = rdma_rxq_init_flow (rd, rd->rx_qp, &all, &all, 0);
+ if (!rd->flow_ucast)
+ return ~0;
+
+ rd->flags |= RDMA_DEVICE_F_PROMISC;
+ return 0;
+}
+
+static u32
+rdma_dev_set_ucast (rdma_device_t * rd)
+{
+ const mac_address_t ucast = {.bytes = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
+ };
+ const mac_address_t mcast = {.bytes = {0x1, 0x0, 0x0, 0x0, 0x0, 0x0} };
+ int err;
+
+ err = rdma_rxq_destroy_flow (rd, &rd->flow_mcast);
+ if (err)
+ return ~0;
+
+ err = rdma_rxq_destroy_flow (rd, &rd->flow_ucast);
+ if (err)
+ return ~0;
+
+ /* receive only packets with src = our MAC */
+ rd->flow_ucast = rdma_rxq_init_flow (rd, rd->rx_qp, &rd->hwaddr, &ucast, 0);
+ if (!rd->flow_ucast)
+ return ~0;
+
+ /* receive multicast packets */
+ rd->flow_mcast = rdma_rxq_init_flow (rd, rd->rx_qp, &mcast, &mcast,
+ IBV_FLOW_ATTR_FLAGS_DONT_TRAP
+ /* let others receive mcast packet too (eg. Linux) */
+ );
+ if (!rd->flow_mcast)
+ return ~0;
+
+ rd->flags &= ~RDMA_DEVICE_F_PROMISC;
+ return 0;
+}
+
+static u32
+rdma_dev_change_mtu (rdma_device_t * rd)
+{
+ rdma_log__ (VLIB_LOG_LEVEL_ERR, rd, "MTU change not supported");
+ return ~0;
+}
static u32
rdma_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
{
rdma_main_t *rm = &rdma_main;
- vlib_log_warn (rm->log_class, "TODO");
- return 0;
+ rdma_device_t *rd = vec_elt_at_index (rm->devices, hw->dev_instance);
+
+ switch (flags)
+ {
+ case 0:
+ return rdma_dev_set_ucast (rd);
+ case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
+ return rdma_dev_set_promisc (rd);
+ case ETHERNET_INTERFACE_FLAG_MTU:
+ return rdma_dev_change_mtu (rd);
+ }
+
+ rdma_log__ (VLIB_LOG_LEVEL_ERR, rd, "unknown flag %x requested", flags);
+ return ~0;
}
static void
@@ -145,9 +266,7 @@ rdma_async_event_read_ready (clib_file_t * f)
struct ibv_async_event event;
ret = ibv_get_async_event (rd->ctx, &event);
if (ret < 0)
- {
- return clib_error_return_unix (0, "ibv_get_async_event() failed");
- }
+ return clib_error_return_unix (0, "ibv_get_async_event() failed");
switch (event.event_type)
{
@@ -164,9 +283,8 @@ rdma_async_event_read_ready (clib_file_t * f)
format_vlib_pci_addr, &rd->pci_addr);
break;
default:
- vlib_log_warn (rm->log_class,
- "Unhandeld RDMA async event %i for device %U",
- event.event_type, format_vlib_pci_addr, &rd->pci_addr);
+ rdma_log__ (VLIB_LOG_LEVEL_ERR, rd, "unhandeld RDMA async event %i",
+ event.event_type);
break;
}
@@ -183,14 +301,11 @@ rdma_async_event_init (rdma_device_t * rd)
/* make RDMA async event fd non-blocking */
ret = fcntl (rd->ctx->async_fd, F_GETFL);
if (ret < 0)
- {
- return clib_error_return_unix (0, "fcntl(F_GETFL) failed");
- }
+ return clib_error_return_unix (0, "fcntl(F_GETFL) failed");
+
ret = fcntl (rd->ctx->async_fd, F_SETFL, ret | O_NONBLOCK);
if (ret < 0)
- {
- return clib_error_return_unix (0, "fcntl(F_SETFL, O_NONBLOCK) failed");
- }
+ return clib_error_return_unix (0, "fcntl(F_SETFL, O_NONBLOCK) failed");
/* register RDMA async event fd */
t.read_function = rdma_async_event_read_ready;
@@ -201,7 +316,6 @@ rdma_async_event_init (rdma_device_t * rd)
format (0, "RMDA %U async event", format_vlib_pci_addr, &rd->pci_addr);
rd->async_event_clib_file_index = clib_file_add (&file_main, &t);
-
return 0;
}
@@ -238,7 +352,7 @@ rdma_dev_cleanup (rdma_device_t * rd)
{ \
int rv; \
if ((rv = fn (arg))) \
- rdma_log_debug (rd, #fn "() failed (rv = %d)", rv); \
+ rdma_log (VLIB_LOG_LEVEL_DEBUG, rd, #fn "() failed (rv = %d)", rv); \
}
_(ibv_destroy_flow, rd->flow_mcast);
@@ -269,33 +383,6 @@ rdma_dev_cleanup (rdma_device_t * rd)
}
static clib_error_t *
-rdma_rxq_init_flow (struct ibv_flow **flow, struct ibv_qp *qp,
- const mac_address_t * mac, const mac_address_t * mask,
- u32 flags)
-{
- struct raw_eth_flow_attr
- {
- struct ibv_flow_attr attr;
- struct ibv_flow_spec_eth spec_eth;
- } __attribute__ ((packed)) fa;
-
- memset (&fa, 0, sizeof (fa));
- fa.attr.num_of_specs = 1;
- fa.attr.port = 1;
- fa.attr.flags = flags;
- fa.spec_eth.type = IBV_FLOW_SPEC_ETH;
- fa.spec_eth.size = sizeof (struct ibv_flow_spec_eth);
-
- memcpy (fa.spec_eth.val.dst_mac, mac, sizeof (fa.spec_eth.val.dst_mac));
- memcpy (fa.spec_eth.mask.dst_mac, mask, sizeof (fa.spec_eth.mask.dst_mac));
-
- if ((*flow = ibv_create_flow (qp, &fa.attr)) == 0)
- return clib_error_return_unix (0, "create Flow Failed");
-
- return 0;
-}
-
-static clib_error_t *
rdma_rxq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc)
{
rdma_rxq_t *rxq;
@@ -332,11 +419,7 @@ rdma_rxq_finalize (vlib_main_t * vm, rdma_device_t * rd)
{
struct ibv_rwq_ind_table_init_attr rwqia;
struct ibv_qp_init_attr_ex qpia;
- const mac_address_t ucast = {.bytes = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
- };
- const mac_address_t mcast = {.bytes = {0x1, 0x0, 0x0, 0x0, 0x0, 0x0} };
struct ibv_wq **ind_tbl;
- clib_error_t *err;
u32 i;
ASSERT (is_pow2 (vec_len (rd->rxqs))
@@ -368,16 +451,10 @@ rdma_rxq_finalize (vlib_main_t * vm, rdma_device_t * rd)
if ((rd->rx_qp = ibv_create_qp_ex (rd->ctx, &qpia)) == 0)
return clib_error_return_unix (0, "Queue Pair create failed");
- /* receive only packets with src = our MAC */
- if ((err =
- rdma_rxq_init_flow (&rd->flow_ucast, rd->rx_qp, &rd->hwaddr, &ucast,
- 0)) != 0)
- return err;
- /* receive multicast packets */
- return rdma_rxq_init_flow (&rd->flow_mcast, rd->rx_qp, &mcast, &mcast,
- IBV_FLOW_ATTR_FLAGS_DONT_TRAP
- /* let others receive mcast packet too (eg. Linux) */
- );
+ if (rdma_dev_set_ucast (rd))
+ return clib_error_return_unix (0, "Set unicast mode failed");
+
+ return 0;
}
static clib_error_t *
@@ -541,7 +618,7 @@ rdma_create_if (vlib_main_t * vm, rdma_create_if_args_t * args)
clib_error_return_unix (0,
"no RDMA devices available, errno = %d. "
"Is the ib_uverbs module loaded?", errno);
- goto err1;
+ goto err0;
}
for (int i = 0; i < n_devs; i++)
@@ -563,7 +640,7 @@ rdma_create_if (vlib_main_t * vm, rdma_create_if_args_t * args)
if ((args->error =
rdma_dev_init (vm, rd, args->rxq_size, args->txq_size, args->rxq_num)))
- goto err2;
+ goto err1;
if ((args->error = rdma_register_interface (vnm, rd)))
goto err2;
diff --git a/src/plugins/rdma/rdma.h b/src/plugins/rdma/rdma.h
index 82b62b34ed5..c7df6f72fdf 100644
--- a/src/plugins/rdma/rdma.h
+++ b/src/plugins/rdma/rdma.h
@@ -22,13 +22,10 @@
#include <vlib/log.h>
#define foreach_rdma_device_flags \
- _(0, INITIALIZED, "initialized") \
- _(1, ERROR, "error") \
- _(2, ADMIN_UP, "admin-up") \
- _(3, VA_DMA, "vaddr-dma") \
- _(4, LINK_UP, "link-up") \
- _(5, SHARED_TXQ_LOCK, "shared-txq-lock") \
- _(6, ELOG, "elog") \
+ _(0, ERROR, "error") \
+ _(1, ADMIN_UP, "admin-up") \
+ _(2, LINK_UP, "link-up") \
+ _(3, PROMISC, "promiscuous")
enum
{
class="k">for i in self.pg_interfaces: i.admin_up() if table_id != 0: tbl = VppIpTable(self, table_id) tbl.add_vpp_config() tbl = VppIpTable(self, table_id, is_ip6=1) tbl.add_vpp_config() i.set_table_ip4(table_id) i.set_table_ip6(table_id) i.config_ip4() i.resolve_arp() i.config_ip6() i.resolve_ndp() table_id += 1 def tearDown(self): for i in self.pg_interfaces: i.unconfig_ip4() i.unconfig_ip6() i.ip6_disable() i.set_table_ip4(0) i.set_table_ip6(0) i.admin_down() super(TestSVS, self).tearDown() def test_svs4(self): """ Source VRF Select IP4 """ # # packets destined out of the 3 non-default table interfaces # pkts_0 = [(Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IP(src="1.1.1.1", dst=self.pg1.remote_ip4) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)), (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IP(src="2.2.2.2", dst=self.pg2.remote_ip4) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)), (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IP(src="3.3.3.3", dst=self.pg3.remote_ip4) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100))] pkts_1 = [(Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / IP(src="1.1.1.1", dst=self.pg1.remote_ip4) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)), (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / IP(src="2.2.2.2", dst=self.pg2.remote_ip4) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)), (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / IP(src="3.3.3.3", dst=self.pg3.remote_ip4) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100))] # # before adding the SVS config all these packets are dropped when # ingressing on pg0 since pg0 is in the default table # for p in pkts_0: self.send_and_assert_no_replies(self.pg0, p * 1) # # Add table 1001 & 1002 into which we'll add the routes # determining the source VRF selection # table_ids = [101, 102] for table_id in table_ids: self.vapi.svs_table_add_del( VppEnum.vl_api_address_family_t.ADDRESS_IP4, table_id) # # map X.0.0.0/8 to each SVS table for lookup in table X # for i in range(1, 4): self.vapi.svs_route_add_del( table_id, "%d.0.0.0/8" % i, i) # # Enable SVS on pg0/pg1 using table 1001/1002 # self.vapi.svs_enable_disable( VppEnum.vl_api_address_family_t.ADDRESS_IP4, table_ids[0], self.pg0.sw_if_index) self.vapi.svs_enable_disable( VppEnum.vl_api_address_family_t.ADDRESS_IP4, table_ids[1], self.pg1.sw_if_index) # # now all the packets should be delivered out the respective interface # self.send_and_expect(self.pg0, pkts_0[0] * 65, self.pg1) self.send_and_expect(self.pg0, pkts_0[1] * 65, self.pg2) self.send_and_expect(self.pg0, pkts_0[2] * 65, self.pg3) self.send_and_expect(self.pg1, pkts_1[0] * 65, self.pg1) self.send_and_expect(self.pg1, pkts_1[1] * 65, self.pg2) self.send_and_expect(self.pg1, pkts_1[2] * 65, self.pg3) # # check that if the SVS lookup does not match a route the packet # is forwarded using the interface's routing table # p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IP(src=self.pg0.remote_ip4, dst=self.pg0.remote_ip4) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)) self.send_and_expect(self.pg0, p * 65, self.pg0) p = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / IP(src=self.pg1.remote_ip4, dst=self.pg1.remote_ip4) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)) self.send_and_expect(self.pg1, p * 65, self.pg1) # # dump the SVS configs # ss = self.vapi.svs_dump() self.assertEqual(ss[0].table_id, table_ids[0]) self.assertEqual(ss[0].sw_if_index, self.pg0.sw_if_index) self.assertEqual(ss[0].af, VppEnum.vl_api_address_family_t.ADDRESS_IP4) self.assertEqual(ss[1].table_id, table_ids[1]) self.assertEqual(ss[1].sw_if_index, self.pg1.sw_if_index) self.assertEqual(ss[1].af, VppEnum.vl_api_address_family_t.ADDRESS_IP4) # # cleanup # self.vapi.svs_enable_disable( VppEnum.vl_api_address_family_t.ADDRESS_IP4, table_ids[0], self.pg0.sw_if_index, is_enable=0) self.vapi.svs_enable_disable( VppEnum.vl_api_address_family_t.ADDRESS_IP4, table_ids[1], self.pg1.sw_if_index, is_enable=0) for table_id in table_ids: for i in range(1, 4): self.vapi.svs_route_add_del( table_id, "%d.0.0.0/8" % i, 0, is_add=0) self.vapi.svs_table_add_del( VppEnum.vl_api_address_family_t.ADDRESS_IP4, table_id, is_add=0) def test_svs6(self): """ Source VRF Select IP6 """ # # packets destined out of the 3 non-default table interfaces # pkts_0 = [(Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IPv6(src="2001:1::1", dst=self.pg1.remote_ip6) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)), (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IPv6(src="2001:2::1", dst=self.pg2.remote_ip6) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)), (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IPv6(src="2001:3::1", dst=self.pg3.remote_ip6) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100))] pkts_1 = [(Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / IPv6(src="2001:1::1", dst=self.pg1.remote_ip6) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)), (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / IPv6(src="2001:2::1", dst=self.pg2.remote_ip6) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)), (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / IPv6(src="2001:3::1", dst=self.pg3.remote_ip6) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100))] # # before adding the SVS config all these packets are dropped when # ingressing on pg0 since pg0 is in the default table # for p in pkts_0: self.send_and_assert_no_replies(self.pg0, p * 1) # # Add table 1001 & 1002 into which we'll add the routes # determining the source VRF selection # table_ids = [101, 102] for table_id in table_ids: self.vapi.svs_table_add_del( VppEnum.vl_api_address_family_t.ADDRESS_IP6, table_id) # # map X.0.0.0/8 to each SVS table for lookup in table X # for i in range(1, 4): self.vapi.svs_route_add_del( table_id, "2001:%d::/32" % i, i) # # Enable SVS on pg0/pg1 using table 1001/1002 # self.vapi.svs_enable_disable( VppEnum.vl_api_address_family_t.ADDRESS_IP6, table_ids[0], self.pg0.sw_if_index) self.vapi.svs_enable_disable( VppEnum.vl_api_address_family_t.ADDRESS_IP6, table_ids[1], self.pg1.sw_if_index) # # now all the packets should be delivered out the respective interface # self.send_and_expect(self.pg0, pkts_0[0] * 65, self.pg1) self.send_and_expect(self.pg0, pkts_0[1] * 65, self.pg2) self.send_and_expect(self.pg0, pkts_0[2] * 65, self.pg3) self.send_and_expect(self.pg1, pkts_1[0] * 65, self.pg1) self.send_and_expect(self.pg1, pkts_1[1] * 65, self.pg2) self.send_and_expect(self.pg1, pkts_1[2] * 65, self.pg3) # # check that if the SVS lookup does not match a route the packet # is forwarded using the interface's routing table # p = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / IPv6(src=self.pg0.remote_ip6, dst=self.pg0.remote_ip6) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)) self.send_and_expect(self.pg0, p * 65, self.pg0) p = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / IPv6(src=self.pg1.remote_ip6, dst=self.pg1.remote_ip6) / UDP(sport=1234, dport=1234) / Raw('\xa5' * 100)) self.send_and_expect(self.pg1, p * 65, self.pg1) # # dump the SVS configs # ss = self.vapi.svs_dump() self.assertEqual(ss[0].table_id, table_ids[0]) self.assertEqual(ss[0].sw_if_index, self.pg0.sw_if_index) self.assertEqual(ss[0].af, VppEnum.vl_api_address_family_t.ADDRESS_IP6) self.assertEqual(ss[1].table_id, table_ids[1]) self.assertEqual(ss[1].sw_if_index, self.pg1.sw_if_index) self.assertEqual(ss[1].af, VppEnum.vl_api_address_family_t.ADDRESS_IP6) # # cleanup # self.vapi.svs_enable_disable( VppEnum.vl_api_address_family_t.ADDRESS_IP6, table_ids[0], self.pg0.sw_if_index, is_enable=0) self.vapi.svs_enable_disable( VppEnum.vl_api_address_family_t.ADDRESS_IP6, table_ids[1], self.pg1.sw_if_index, is_enable=0) for table_id in table_ids: for i in range(1, 4): self.vapi.svs_route_add_del( table_id, "2001:%d::/32" % i, 0, is_add=0) self.vapi.svs_table_add_del( VppEnum.vl_api_address_family_t.ADDRESS_IP6, table_id, is_add=0) if __name__ == '__main__': unittest.main(testRunner=VppTestRunner)