diff options
Diffstat (limited to 'drivers/net/mlx4/mlx4_flow.c')
-rw-r--r-- | drivers/net/mlx4/mlx4_flow.c | 2062 |
1 files changed, 1153 insertions, 909 deletions
diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c index 925c89c5..8b87b298 100644 --- a/drivers/net/mlx4/mlx4_flow.c +++ b/drivers/net/mlx4/mlx4_flow.c @@ -2,7 +2,7 @@ * BSD LICENSE * * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox. + * Copyright 2017 Mellanox * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -31,197 +31,328 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +/** + * @file + * Flow API operations for mlx4 driver. + */ + +#include <arpa/inet.h> #include <assert.h> +#include <errno.h> +#include <stdalign.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> +#include <sys/queue.h> + +/* Verbs headers do not support -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-Wpedantic" +#endif +#include <infiniband/verbs.h> +#ifdef PEDANTIC +#pragma GCC diagnostic error "-Wpedantic" +#endif +#include <rte_byteorder.h> +#include <rte_errno.h> +#include <rte_eth_ctrl.h> +#include <rte_ethdev.h> +#include <rte_ether.h> #include <rte_flow.h> #include <rte_flow_driver.h> #include <rte_malloc.h> -/* Generated configuration header. */ -#include "mlx4_autoconf.h" - /* PMD headers. */ #include "mlx4.h" #include "mlx4_flow.h" +#include "mlx4_rxtx.h" +#include "mlx4_utils.h" -/** Static initializer for items. */ -#define ITEMS(...) \ +/** Static initializer for a list of subsequent item types. */ +#define NEXT_ITEM(...) \ (const enum rte_flow_item_type []){ \ __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \ } -/** Structure to generate a simple graph of layers supported by the NIC. */ -struct mlx4_flow_items { - /** List of possible actions for these items. */ - const enum rte_flow_action_type *const actions; - /** Bit-masks corresponding to the possibilities for the item. */ - const void *mask; - /** - * Default bit-masks to use when item->mask is not provided. When - * \default_mask is also NULL, the full supported bit-mask (\mask) is - * used instead. - */ - const void *default_mask; - /** Bit-masks size in bytes. */ +/** Processor structure associated with a flow item. */ +struct mlx4_flow_proc_item { + /** Bit-mask for fields supported by this PMD. */ + const void *mask_support; + /** Bit-mask to use when @p item->mask is not provided. */ + const void *mask_default; + /** Size in bytes for @p mask_support and @p mask_default. */ const unsigned int mask_sz; - /** - * Check support for a given item. - * - * @param item[in] - * Item specification. - * @param mask[in] - * Bit-masks covering supported fields to compare with spec, - * last and mask in - * \item. - * @param size - * Bit-Mask size in bytes. - * - * @return - * 0 on success, negative value otherwise. - */ - int (*validate)(const struct rte_flow_item *item, - const uint8_t *mask, unsigned int size); - /** - * Conversion function from rte_flow to NIC specific flow. - * - * @param item - * rte_flow item to convert. - * @param default_mask - * Default bit-masks to use when item->mask is not provided. - * @param data - * Internal structure to store the conversion. - * - * @return - * 0 on success, negative value otherwise. - */ - int (*convert)(const struct rte_flow_item *item, - const void *default_mask, - void *data); + /** Merge a pattern item into a flow rule handle. */ + int (*merge)(struct rte_flow *flow, + const struct rte_flow_item *item, + const struct mlx4_flow_proc_item *proc, + struct rte_flow_error *error); /** Size in bytes of the destination structure. */ const unsigned int dst_sz; - /** List of possible following items. */ - const enum rte_flow_item_type *const items; + /** List of possible subsequent items. */ + const enum rte_flow_item_type *const next_item; }; -struct rte_flow_drop { - struct ibv_qp *qp; /**< Verbs queue pair. */ - struct ibv_cq *cq; /**< Verbs completion queue. */ +/** Shared resources for drop flow rules. */ +struct mlx4_drop { + struct ibv_qp *qp; /**< QP target. */ + struct ibv_cq *cq; /**< CQ associated with above QP. */ + struct priv *priv; /**< Back pointer to private data. */ + uint32_t refcnt; /**< Reference count. */ }; -/** Valid action for this PMD. */ -static const enum rte_flow_action_type valid_actions[] = { - RTE_FLOW_ACTION_TYPE_DROP, - RTE_FLOW_ACTION_TYPE_QUEUE, - RTE_FLOW_ACTION_TYPE_RSS, - RTE_FLOW_ACTION_TYPE_END, -}; +/** + * Convert DPDK RSS hash fields to their Verbs equivalent. + * + * @param rss_hf + * Hash fields in DPDK format (see struct rte_eth_rss_conf). + * + * @return + * A valid Verbs RSS hash fields mask for mlx4 on success, (uint64_t)-1 + * otherwise and rte_errno is set. + */ +static uint64_t +mlx4_conv_rss_hf(uint64_t rss_hf) +{ + enum { IPV4, IPV6, TCP, UDP, }; + const uint64_t in[] = { + [IPV4] = (ETH_RSS_IPV4 | + ETH_RSS_FRAG_IPV4 | + ETH_RSS_NONFRAG_IPV4_TCP | + ETH_RSS_NONFRAG_IPV4_UDP | + ETH_RSS_NONFRAG_IPV4_OTHER), + [IPV6] = (ETH_RSS_IPV6 | + ETH_RSS_FRAG_IPV6 | + ETH_RSS_NONFRAG_IPV6_TCP | + ETH_RSS_NONFRAG_IPV6_UDP | + ETH_RSS_NONFRAG_IPV6_OTHER | + ETH_RSS_IPV6_EX | + ETH_RSS_IPV6_TCP_EX | + ETH_RSS_IPV6_UDP_EX), + [TCP] = (ETH_RSS_NONFRAG_IPV4_TCP | + ETH_RSS_NONFRAG_IPV6_TCP | + ETH_RSS_IPV6_TCP_EX), + /* + * UDP support is temporarily disabled due to an + * implementation issue in the kernel. + */ + [UDP] = 0, + }; + const uint64_t out[RTE_DIM(in)] = { + [IPV4] = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4, + [IPV6] = IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6, + [TCP] = IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP, + [UDP] = IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP, + }; + uint64_t seen = 0; + uint64_t conv = 0; + unsigned int i; + + for (i = 0; i != RTE_DIM(in); ++i) + if (rss_hf & in[i]) { + seen |= rss_hf & in[i]; + conv |= out[i]; + } + if (!(rss_hf & ~seen)) + return conv; + rte_errno = ENOTSUP; + return (uint64_t)-1; +} /** - * Convert Ethernet item to Verbs specification. + * Merge Ethernet pattern item into flow rule handle. * - * @param item[in] - * Item specification. - * @param default_mask[in] - * Default bit-masks to use when item->mask is not provided. - * @param data[in, out] - * User structure. + * Additional mlx4-specific constraints on supported fields: + * + * - No support for partial masks, except in the specific case of matching + * all multicast traffic (@p spec->dst and @p mask->dst equal to + * 01:00:00:00:00:00). + * - Not providing @p item->spec or providing an empty @p mask->dst is + * *only* supported if the rule doesn't specify additional matching + * criteria (i.e. rule is promiscuous-like). + * + * @param[in, out] flow + * Flow rule handle to update. + * @param[in] item + * Pattern item to merge. + * @param[in] proc + * Associated item-processing object. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx4_flow_create_eth(const struct rte_flow_item *item, - const void *default_mask, - void *data) +mlx4_flow_merge_eth(struct rte_flow *flow, + const struct rte_flow_item *item, + const struct mlx4_flow_proc_item *proc, + struct rte_flow_error *error) { const struct rte_flow_item_eth *spec = item->spec; - const struct rte_flow_item_eth *mask = item->mask; - struct mlx4_flow *flow = (struct mlx4_flow *)data; + const struct rte_flow_item_eth *mask = + spec ? (item->mask ? item->mask : proc->mask_default) : NULL; struct ibv_flow_spec_eth *eth; - const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth); + const char *msg; unsigned int i; + if (!mask) { + flow->promisc = 1; + } else { + uint32_t sum_dst = 0; + uint32_t sum_src = 0; + + for (i = 0; i != sizeof(mask->dst.addr_bytes); ++i) { + sum_dst += mask->dst.addr_bytes[i]; + sum_src += mask->src.addr_bytes[i]; + } + if (sum_src) { + msg = "mlx4 does not support source MAC matching"; + goto error; + } else if (!sum_dst) { + flow->promisc = 1; + } else if (sum_dst == 1 && mask->dst.addr_bytes[0] == 1) { + if (!(spec->dst.addr_bytes[0] & 1)) { + msg = "mlx4 does not support the explicit" + " exclusion of all multicast traffic"; + goto error; + } + flow->allmulti = 1; + } else if (sum_dst != (UINT8_C(0xff) * ETHER_ADDR_LEN)) { + msg = "mlx4 does not support matching partial" + " Ethernet fields"; + goto error; + } + } + if (!flow->ibv_attr) + return 0; + if (flow->promisc) { + flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT; + return 0; + } + if (flow->allmulti) { + flow->ibv_attr->type = IBV_FLOW_ATTR_MC_DEFAULT; + return 0; + } ++flow->ibv_attr->num_of_specs; - flow->ibv_attr->priority = 2; - eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size); *eth = (struct ibv_flow_spec_eth) { .type = IBV_FLOW_SPEC_ETH, - .size = eth_size, + .size = sizeof(*eth), }; - if (!spec) { - flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT; - return 0; - } - if (!mask) - mask = default_mask; memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN); - memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN); memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN); - memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN); /* Remove unwanted bits from values. */ for (i = 0; i < ETHER_ADDR_LEN; ++i) { eth->val.dst_mac[i] &= eth->mask.dst_mac[i]; - eth->val.src_mac[i] &= eth->mask.src_mac[i]; } return 0; +error: + return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, + item, msg); } /** - * Convert VLAN item to Verbs specification. + * Merge VLAN pattern item into flow rule handle. * - * @param item[in] - * Item specification. - * @param default_mask[in] - * Default bit-masks to use when item->mask is not provided. - * @param data[in, out] - * User structure. + * Additional mlx4-specific constraints on supported fields: + * + * - Matching *all* VLAN traffic by omitting @p item->spec or providing an + * empty @p item->mask would also include non-VLAN traffic. Doing so is + * therefore unsupported. + * - No support for partial masks. + * + * @param[in, out] flow + * Flow rule handle to update. + * @param[in] item + * Pattern item to merge. + * @param[in] proc + * Associated item-processing object. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx4_flow_create_vlan(const struct rte_flow_item *item, - const void *default_mask, - void *data) +mlx4_flow_merge_vlan(struct rte_flow *flow, + const struct rte_flow_item *item, + const struct mlx4_flow_proc_item *proc, + struct rte_flow_error *error) { const struct rte_flow_item_vlan *spec = item->spec; - const struct rte_flow_item_vlan *mask = item->mask; - struct mlx4_flow *flow = (struct mlx4_flow *)data; + const struct rte_flow_item_vlan *mask = + spec ? (item->mask ? item->mask : proc->mask_default) : NULL; struct ibv_flow_spec_eth *eth; - const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth); + const char *msg; - eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size); - if (!spec) + if (!mask || !mask->tci) { + msg = "mlx4 cannot match all VLAN traffic while excluding" + " non-VLAN traffic, TCI VID must be specified"; + goto error; + } + if (mask->tci != RTE_BE16(0x0fff)) { + msg = "mlx4 does not support partial TCI VID matching"; + goto error; + } + if (!flow->ibv_attr) return 0; - if (!mask) - mask = default_mask; + eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size - + sizeof(*eth)); eth->val.vlan_tag = spec->tci; eth->mask.vlan_tag = mask->tci; eth->val.vlan_tag &= eth->mask.vlan_tag; return 0; +error: + return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, + item, msg); } /** - * Convert IPv4 item to Verbs specification. + * Merge IPv4 pattern item into flow rule handle. * - * @param item[in] - * Item specification. - * @param default_mask[in] - * Default bit-masks to use when item->mask is not provided. - * @param data[in, out] - * User structure. + * Additional mlx4-specific constraints on supported fields: + * + * - No support for partial masks. + * + * @param[in, out] flow + * Flow rule handle to update. + * @param[in] item + * Pattern item to merge. + * @param[in] proc + * Associated item-processing object. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx4_flow_create_ipv4(const struct rte_flow_item *item, - const void *default_mask, - void *data) +mlx4_flow_merge_ipv4(struct rte_flow *flow, + const struct rte_flow_item *item, + const struct mlx4_flow_proc_item *proc, + struct rte_flow_error *error) { const struct rte_flow_item_ipv4 *spec = item->spec; - const struct rte_flow_item_ipv4 *mask = item->mask; - struct mlx4_flow *flow = (struct mlx4_flow *)data; + const struct rte_flow_item_ipv4 *mask = + spec ? (item->mask ? item->mask : proc->mask_default) : NULL; struct ibv_flow_spec_ipv4 *ipv4; - unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4); + const char *msg; + if (mask && + ((uint32_t)(mask->hdr.src_addr + 1) > UINT32_C(1) || + (uint32_t)(mask->hdr.dst_addr + 1) > UINT32_C(1))) { + msg = "mlx4 does not support matching partial IPv4 fields"; + goto error; + } + if (!flow->ibv_attr) + return 0; ++flow->ibv_attr->num_of_specs; - flow->ibv_attr->priority = 1; - ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size); *ipv4 = (struct ibv_flow_spec_ipv4) { .type = IBV_FLOW_SPEC_IPV4, - .size = ipv4_size, + .size = sizeof(*ipv4), }; if (!spec) return 0; @@ -229,8 +360,6 @@ mlx4_flow_create_ipv4(const struct rte_flow_item *item, .src_ip = spec->hdr.src_addr, .dst_ip = spec->hdr.dst_addr, }; - if (!mask) - mask = default_mask; ipv4->mask = (struct ibv_flow_ipv4_filter) { .src_ip = mask->hdr.src_addr, .dst_ip = mask->hdr.dst_addr, @@ -239,528 +368,504 @@ mlx4_flow_create_ipv4(const struct rte_flow_item *item, ipv4->val.src_ip &= ipv4->mask.src_ip; ipv4->val.dst_ip &= ipv4->mask.dst_ip; return 0; +error: + return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, + item, msg); } /** - * Convert UDP item to Verbs specification. + * Merge UDP pattern item into flow rule handle. * - * @param item[in] - * Item specification. - * @param default_mask[in] - * Default bit-masks to use when item->mask is not provided. - * @param data[in, out] - * User structure. + * Additional mlx4-specific constraints on supported fields: + * + * - No support for partial masks. + * + * @param[in, out] flow + * Flow rule handle to update. + * @param[in] item + * Pattern item to merge. + * @param[in] proc + * Associated item-processing object. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx4_flow_create_udp(const struct rte_flow_item *item, - const void *default_mask, - void *data) +mlx4_flow_merge_udp(struct rte_flow *flow, + const struct rte_flow_item *item, + const struct mlx4_flow_proc_item *proc, + struct rte_flow_error *error) { const struct rte_flow_item_udp *spec = item->spec; - const struct rte_flow_item_udp *mask = item->mask; - struct mlx4_flow *flow = (struct mlx4_flow *)data; + const struct rte_flow_item_udp *mask = + spec ? (item->mask ? item->mask : proc->mask_default) : NULL; struct ibv_flow_spec_tcp_udp *udp; - unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp); + const char *msg; + if (mask && + ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) || + (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) { + msg = "mlx4 does not support matching partial UDP fields"; + goto error; + } + if (!flow->ibv_attr) + return 0; ++flow->ibv_attr->num_of_specs; - flow->ibv_attr->priority = 0; - udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size); *udp = (struct ibv_flow_spec_tcp_udp) { .type = IBV_FLOW_SPEC_UDP, - .size = udp_size, + .size = sizeof(*udp), }; if (!spec) return 0; udp->val.dst_port = spec->hdr.dst_port; udp->val.src_port = spec->hdr.src_port; - if (!mask) - mask = default_mask; udp->mask.dst_port = mask->hdr.dst_port; udp->mask.src_port = mask->hdr.src_port; /* Remove unwanted bits from values. */ udp->val.src_port &= udp->mask.src_port; udp->val.dst_port &= udp->mask.dst_port; return 0; +error: + return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, + item, msg); } /** - * Convert TCP item to Verbs specification. + * Merge TCP pattern item into flow rule handle. * - * @param item[in] - * Item specification. - * @param default_mask[in] - * Default bit-masks to use when item->mask is not provided. - * @param data[in, out] - * User structure. + * Additional mlx4-specific constraints on supported fields: + * + * - No support for partial masks. + * + * @param[in, out] flow + * Flow rule handle to update. + * @param[in] item + * Pattern item to merge. + * @param[in] proc + * Associated item-processing object. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx4_flow_create_tcp(const struct rte_flow_item *item, - const void *default_mask, - void *data) +mlx4_flow_merge_tcp(struct rte_flow *flow, + const struct rte_flow_item *item, + const struct mlx4_flow_proc_item *proc, + struct rte_flow_error *error) { const struct rte_flow_item_tcp *spec = item->spec; - const struct rte_flow_item_tcp *mask = item->mask; - struct mlx4_flow *flow = (struct mlx4_flow *)data; + const struct rte_flow_item_tcp *mask = + spec ? (item->mask ? item->mask : proc->mask_default) : NULL; struct ibv_flow_spec_tcp_udp *tcp; - unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp); + const char *msg; + if (mask && + ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) || + (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) { + msg = "mlx4 does not support matching partial TCP fields"; + goto error; + } + if (!flow->ibv_attr) + return 0; ++flow->ibv_attr->num_of_specs; - flow->ibv_attr->priority = 0; - tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size); *tcp = (struct ibv_flow_spec_tcp_udp) { .type = IBV_FLOW_SPEC_TCP, - .size = tcp_size, + .size = sizeof(*tcp), }; if (!spec) return 0; tcp->val.dst_port = spec->hdr.dst_port; tcp->val.src_port = spec->hdr.src_port; - if (!mask) - mask = default_mask; tcp->mask.dst_port = mask->hdr.dst_port; tcp->mask.src_port = mask->hdr.src_port; /* Remove unwanted bits from values. */ tcp->val.src_port &= tcp->mask.src_port; tcp->val.dst_port &= tcp->mask.dst_port; return 0; +error: + return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, + item, msg); } /** - * Check support for a given item. + * Perform basic sanity checks on a pattern item. * - * @param item[in] + * @param[in] item * Item specification. - * @param mask[in] - * Bit-masks covering supported fields to compare with spec, last and mask in - * \item. - * @param size - * Bit-Mask size in bytes. + * @param[in] proc + * Associated item-processing object. + * @param[out] error + * Perform verbose error reporting if not NULL. * * @return - * 0 on success, negative value otherwise. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx4_flow_item_validate(const struct rte_flow_item *item, - const uint8_t *mask, unsigned int size) +mlx4_flow_item_check(const struct rte_flow_item *item, + const struct mlx4_flow_proc_item *proc, + struct rte_flow_error *error) { - int ret = 0; + const uint8_t *mask; + unsigned int i; + /* item->last and item->mask cannot exist without item->spec. */ if (!item->spec && (item->mask || item->last)) - return -1; - if (item->spec && !item->mask) { - unsigned int i; - const uint8_t *spec = item->spec; - - for (i = 0; i < size; ++i) - if ((spec[i] | mask[i]) != mask[i]) - return -1; - } - if (item->last && !item->mask) { - unsigned int i; - const uint8_t *spec = item->last; - - for (i = 0; i < size; ++i) - if ((spec[i] | mask[i]) != mask[i]) - return -1; - } - if (item->spec && item->last) { - uint8_t spec[size]; - uint8_t last[size]; - const uint8_t *apply = mask; - unsigned int i; - - if (item->mask) - apply = item->mask; - for (i = 0; i < size; ++i) { - spec[i] = ((const uint8_t *)item->spec)[i] & apply[i]; - last[i] = ((const uint8_t *)item->last)[i] & apply[i]; - } - ret = memcmp(spec, last, size); - } - return ret; -} - -static int -mlx4_flow_validate_eth(const struct rte_flow_item *item, - const uint8_t *mask, unsigned int size) -{ - if (item->mask) { - const struct rte_flow_item_eth *mask = item->mask; - - if (mask->dst.addr_bytes[0] != 0xff || - mask->dst.addr_bytes[1] != 0xff || - mask->dst.addr_bytes[2] != 0xff || - mask->dst.addr_bytes[3] != 0xff || - mask->dst.addr_bytes[4] != 0xff || - mask->dst.addr_bytes[5] != 0xff) - return -1; - } - return mlx4_flow_item_validate(item, mask, size); -} - -static int -mlx4_flow_validate_vlan(const struct rte_flow_item *item, - const uint8_t *mask, unsigned int size) -{ - if (item->mask) { - const struct rte_flow_item_vlan *mask = item->mask; - - if (mask->tci != 0 && - ntohs(mask->tci) != 0x0fff) - return -1; - } - return mlx4_flow_item_validate(item, mask, size); -} - -static int -mlx4_flow_validate_ipv4(const struct rte_flow_item *item, - const uint8_t *mask, unsigned int size) -{ - if (item->mask) { - const struct rte_flow_item_ipv4 *mask = item->mask; - - if (mask->hdr.src_addr != 0 && - mask->hdr.src_addr != 0xffffffff) - return -1; - if (mask->hdr.dst_addr != 0 && - mask->hdr.dst_addr != 0xffffffff) - return -1; - } - return mlx4_flow_item_validate(item, mask, size); -} - -static int -mlx4_flow_validate_udp(const struct rte_flow_item *item, - const uint8_t *mask, unsigned int size) -{ - if (item->mask) { - const struct rte_flow_item_udp *mask = item->mask; - - if (mask->hdr.src_port != 0 && - mask->hdr.src_port != 0xffff) - return -1; - if (mask->hdr.dst_port != 0 && - mask->hdr.dst_port != 0xffff) - return -1; - } - return mlx4_flow_item_validate(item, mask, size); -} - -static int -mlx4_flow_validate_tcp(const struct rte_flow_item *item, - const uint8_t *mask, unsigned int size) -{ - if (item->mask) { - const struct rte_flow_item_tcp *mask = item->mask; - - if (mask->hdr.src_port != 0 && - mask->hdr.src_port != 0xffff) - return -1; - if (mask->hdr.dst_port != 0 && - mask->hdr.dst_port != 0xffff) - return -1; + return rte_flow_error_set + (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, + "\"mask\" or \"last\" field provided without a" + " corresponding \"spec\""); + /* No spec, no mask, no problem. */ + if (!item->spec) + return 0; + mask = item->mask ? + (const uint8_t *)item->mask : + (const uint8_t *)proc->mask_default; + assert(mask); + /* + * Single-pass check to make sure that: + * - Mask is supported, no bits are set outside proc->mask_support. + * - Both item->spec and item->last are included in mask. + */ + for (i = 0; i != proc->mask_sz; ++i) { + if (!mask[i]) + continue; + if ((mask[i] | ((const uint8_t *)proc->mask_support)[i]) != + ((const uint8_t *)proc->mask_support)[i]) + return rte_flow_error_set + (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, + item, "unsupported field found in \"mask\""); + if (item->last && + (((const uint8_t *)item->spec)[i] & mask[i]) != + (((const uint8_t *)item->last)[i] & mask[i])) + return rte_flow_error_set + (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, + item, + "range between \"spec\" and \"last\"" + " is larger than \"mask\""); } - return mlx4_flow_item_validate(item, mask, size); + return 0; } /** Graph of supported items and associated actions. */ -static const struct mlx4_flow_items mlx4_flow_items[] = { +static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = { [RTE_FLOW_ITEM_TYPE_END] = { - .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH), + .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH), }, [RTE_FLOW_ITEM_TYPE_ETH] = { - .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN, - RTE_FLOW_ITEM_TYPE_IPV4), - .actions = valid_actions, - .mask = &(const struct rte_flow_item_eth){ + .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_IPV4), + .mask_support = &(const struct rte_flow_item_eth){ + /* Only destination MAC can be matched. */ .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", - .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", }, - .default_mask = &rte_flow_item_eth_mask, + .mask_default = &rte_flow_item_eth_mask, .mask_sz = sizeof(struct rte_flow_item_eth), - .validate = mlx4_flow_validate_eth, - .convert = mlx4_flow_create_eth, + .merge = mlx4_flow_merge_eth, .dst_sz = sizeof(struct ibv_flow_spec_eth), }, [RTE_FLOW_ITEM_TYPE_VLAN] = { - .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4), - .actions = valid_actions, - .mask = &(const struct rte_flow_item_vlan){ - /* rte_flow_item_vlan_mask is invalid for mlx4. */ -#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN - .tci = 0x0fff, -#else - .tci = 0xff0f, -#endif + .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4), + .mask_support = &(const struct rte_flow_item_vlan){ + /* Only TCI VID matching is supported. */ + .tci = RTE_BE16(0x0fff), }, + .mask_default = &rte_flow_item_vlan_mask, .mask_sz = sizeof(struct rte_flow_item_vlan), - .validate = mlx4_flow_validate_vlan, - .convert = mlx4_flow_create_vlan, + .merge = mlx4_flow_merge_vlan, .dst_sz = 0, }, [RTE_FLOW_ITEM_TYPE_IPV4] = { - .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, - RTE_FLOW_ITEM_TYPE_TCP), - .actions = valid_actions, - .mask = &(const struct rte_flow_item_ipv4){ + .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_TCP), + .mask_support = &(const struct rte_flow_item_ipv4){ .hdr = { - .src_addr = -1, - .dst_addr = -1, + .src_addr = RTE_BE32(0xffffffff), + .dst_addr = RTE_BE32(0xffffffff), }, }, - .default_mask = &rte_flow_item_ipv4_mask, + .mask_default = &rte_flow_item_ipv4_mask, .mask_sz = sizeof(struct rte_flow_item_ipv4), - .validate = mlx4_flow_validate_ipv4, - .convert = mlx4_flow_create_ipv4, + .merge = mlx4_flow_merge_ipv4, .dst_sz = sizeof(struct ibv_flow_spec_ipv4), }, [RTE_FLOW_ITEM_TYPE_UDP] = { - .actions = valid_actions, - .mask = &(const struct rte_flow_item_udp){ + .mask_support = &(const struct rte_flow_item_udp){ .hdr = { - .src_port = -1, - .dst_port = -1, + .src_port = RTE_BE16(0xffff), + .dst_port = RTE_BE16(0xffff), }, }, - .default_mask = &rte_flow_item_udp_mask, + .mask_default = &rte_flow_item_udp_mask, .mask_sz = sizeof(struct rte_flow_item_udp), - .validate = mlx4_flow_validate_udp, - .convert = mlx4_flow_create_udp, + .merge = mlx4_flow_merge_udp, .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp), }, [RTE_FLOW_ITEM_TYPE_TCP] = { - .actions = valid_actions, - .mask = &(const struct rte_flow_item_tcp){ + .mask_support = &(const struct rte_flow_item_tcp){ .hdr = { - .src_port = -1, - .dst_port = -1, + .src_port = RTE_BE16(0xffff), + .dst_port = RTE_BE16(0xffff), }, }, - .default_mask = &rte_flow_item_tcp_mask, + .mask_default = &rte_flow_item_tcp_mask, .mask_sz = sizeof(struct rte_flow_item_tcp), - .validate = mlx4_flow_validate_tcp, - .convert = mlx4_flow_create_tcp, + .merge = mlx4_flow_merge_tcp, .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp), }, }; /** - * Validate a flow supported by the NIC. + * Make sure a flow rule is supported and initialize associated structure. * * @param priv * Pointer to private structure. * @param[in] attr * Flow rule attributes. - * @param[in] items + * @param[in] pattern * Pattern specification (list terminated by the END pattern item). * @param[in] actions * Associated actions (list terminated by the END action). * @param[out] error * Perform verbose error reporting if not NULL. - * @param[in, out] flow - * Flow structure to update. + * @param[in, out] addr + * Buffer where the resulting flow rule handle pointer must be stored. + * If NULL, stop processing after validation stage. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_flow_validate(struct priv *priv, - const struct rte_flow_attr *attr, - const struct rte_flow_item items[], - const struct rte_flow_action actions[], - struct rte_flow_error *error, - struct mlx4_flow *flow) +mlx4_flow_prepare(struct priv *priv, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + struct rte_flow **addr) { - const struct mlx4_flow_items *cur_item = mlx4_flow_items; - struct mlx4_flow_action action = { - .queue = 0, - .drop = 0, - }; - - (void)priv; - if (attr->group) { - rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ATTR_GROUP, - NULL, - "groups are not supported"); - return -rte_errno; - } - if (attr->priority) { - rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, - NULL, - "priorities are not supported"); - return -rte_errno; - } - if (attr->egress) { - rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, - NULL, - "egress is not supported"); - return -rte_errno; - } - if (!attr->ingress) { - rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, - NULL, - "only ingress is supported"); - return -rte_errno; - } - /* Go over items list. */ - for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) { - const struct mlx4_flow_items *token = NULL; + const struct rte_flow_item *item; + const struct rte_flow_action *action; + const struct mlx4_flow_proc_item *proc; + struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) }; + struct rte_flow *flow = &temp; + const char *msg = NULL; + + if (attr->group) + return rte_flow_error_set + (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP, + NULL, "groups are not supported"); + if (attr->priority > MLX4_FLOW_PRIORITY_LAST) + return rte_flow_error_set + (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + NULL, "maximum priority level is " + MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)); + if (attr->egress) + return rte_flow_error_set + (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + NULL, "egress is not supported"); + if (!attr->ingress) + return rte_flow_error_set + (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + NULL, "only ingress is supported"); +fill: + proc = mlx4_flow_proc_item_list; + /* Go over pattern. */ + for (item = pattern; item->type; ++item) { + const struct mlx4_flow_proc_item *next = NULL; unsigned int i; int err; - if (items->type == RTE_FLOW_ITEM_TYPE_VOID) + if (item->type == RTE_FLOW_ITEM_TYPE_VOID) + continue; + if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) { + flow->internal = 1; continue; - /* - * The nic can support patterns with NULL eth spec only - * if eth is a single item in a rule. - */ - if (!items->spec && - items->type == RTE_FLOW_ITEM_TYPE_ETH) { - const struct rte_flow_item *next = items + 1; - - if (next->type != RTE_FLOW_ITEM_TYPE_END) { - rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ITEM, - items, - "the rule requires" - " an Ethernet spec"); - return -rte_errno; - } } - for (i = 0; - cur_item->items && - cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END; - ++i) { - if (cur_item->items[i] == items->type) { - token = &mlx4_flow_items[items->type]; + if (flow->promisc || flow->allmulti) { + msg = "mlx4 does not support additional matching" + " criteria combined with indiscriminate" + " matching on Ethernet headers"; + goto exit_item_not_supported; + } + for (i = 0; proc->next_item && proc->next_item[i]; ++i) { + if (proc->next_item[i] == item->type) { + next = &mlx4_flow_proc_item_list[item->type]; break; } } - if (!token) - goto exit_item_not_supported; - cur_item = token; - err = cur_item->validate(items, - (const uint8_t *)cur_item->mask, - cur_item->mask_sz); - if (err) + if (!next) goto exit_item_not_supported; - if (flow->ibv_attr && cur_item->convert) { - err = cur_item->convert(items, - (cur_item->default_mask ? - cur_item->default_mask : - cur_item->mask), - flow); + proc = next; + /* + * Perform basic sanity checks only once, while handle is + * not allocated. + */ + if (flow == &temp) { + err = mlx4_flow_item_check(item, proc, error); if (err) - goto exit_item_not_supported; + return err; } - flow->offset += cur_item->dst_sz; + if (proc->merge) { + err = proc->merge(flow, item, proc, error); + if (err) + return err; + } + flow->ibv_attr_size += proc->dst_sz; } - /* Go over actions list */ - for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) { - if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) { - continue; - } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { - action.drop = 1; - } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) { - const struct rte_flow_action_queue *queue = - (const struct rte_flow_action_queue *) - actions->conf; + /* Go over actions list. */ + for (action = actions; action->type; ++action) { + switch (action->type) { + const struct rte_flow_action_queue *queue; + const struct rte_flow_action_rss *rss; + const struct rte_eth_rss_conf *rss_conf; + unsigned int i; - if (!queue || (queue->index > (priv->rxqs_n - 1))) + case RTE_FLOW_ACTION_TYPE_VOID: + continue; + case RTE_FLOW_ACTION_TYPE_DROP: + flow->drop = 1; + break; + case RTE_FLOW_ACTION_TYPE_QUEUE: + if (flow->rss) + break; + queue = action->conf; + if (queue->index >= priv->dev->data->nb_rx_queues) { + msg = "queue target index beyond number of" + " configured Rx queues"; goto exit_action_not_supported; - action.queue = 1; - action.queues_n = 1; - action.queues[0] = queue->index; - } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) { - int i; - int ierr; - const struct rte_flow_action_rss *rss = - (const struct rte_flow_action_rss *) - actions->conf; - - if (!priv->hw_rss) { - rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ACTION, - actions, - "RSS cannot be used with " - "the current configuration"); - return -rte_errno; } - if (!priv->isolated) { - rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ACTION, - actions, - "RSS cannot be used without " - "isolated mode"); - return -rte_errno; + flow->rss = mlx4_rss_get + (priv, 0, mlx4_rss_hash_key_default, 1, + &queue->index); + if (!flow->rss) { + msg = "not enough resources for additional" + " single-queue RSS context"; + goto exit_action_not_supported; + } + break; + case RTE_FLOW_ACTION_TYPE_RSS: + if (flow->rss) + break; + rss = action->conf; + /* Default RSS configuration if none is provided. */ + rss_conf = + rss->rss_conf ? + rss->rss_conf : + &(struct rte_eth_rss_conf){ + .rss_key = mlx4_rss_hash_key_default, + .rss_key_len = MLX4_RSS_HASH_KEY_SIZE, + .rss_hf = (ETH_RSS_IPV4 | + ETH_RSS_NONFRAG_IPV4_TCP | + ETH_RSS_IPV6 | + ETH_RSS_NONFRAG_IPV6_TCP), + }; + /* Sanity checks. */ + for (i = 0; i < rss->num; ++i) + if (rss->queue[i] >= + priv->dev->data->nb_rx_queues) + break; + if (i != rss->num) { + msg = "queue index target beyond number of" + " configured Rx queues"; + goto exit_action_not_supported; } if (!rte_is_power_of_2(rss->num)) { - rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ACTION, - actions, - "the number of queues " - "should be power of two"); - return -rte_errno; + msg = "for RSS, mlx4 requires the number of" + " queues to be a power of two"; + goto exit_action_not_supported; } - if (priv->max_rss_tbl_sz < rss->num) { - rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ACTION, - actions, - "the number of queues " - "is too large"); - return -rte_errno; + if (rss_conf->rss_key_len != + sizeof(flow->rss->key)) { + msg = "mlx4 supports exactly one RSS hash key" + " length: " + MLX4_STR_EXPAND(MLX4_RSS_HASH_KEY_SIZE); + goto exit_action_not_supported; } - /* checking indexes array */ - ierr = 0; - for (i = 0; i < rss->num; ++i) { - int j; - if (rss->queue[i] >= priv->rxqs_n) - ierr = 1; - /* - * Prevent the user from specifying - * the same queue twice in the RSS array. - */ - for (j = i + 1; j < rss->num && !ierr; ++j) - if (rss->queue[j] == rss->queue[i]) - ierr = 1; - if (ierr) { - rte_flow_error_set( - error, - ENOTSUP, - RTE_FLOW_ERROR_TYPE_HANDLE, - NULL, - "RSS action only supports " - "unique queue indices " - "in a list"); - return -rte_errno; - } + for (i = 1; i < rss->num; ++i) + if (rss->queue[i] - rss->queue[i - 1] != 1) + break; + if (i != rss->num) { + msg = "mlx4 requires RSS contexts to use" + " consecutive queue indices only"; + goto exit_action_not_supported; } - action.queue = 1; - action.queues_n = rss->num; - for (i = 0; i < rss->num; ++i) - action.queues[i] = rss->queue[i]; - } else { + if (rss->queue[0] % rss->num) { + msg = "mlx4 requires the first queue of a RSS" + " context to be aligned on a multiple" + " of the context size"; + goto exit_action_not_supported; + } + flow->rss = mlx4_rss_get + (priv, mlx4_conv_rss_hf(rss_conf->rss_hf), + rss_conf->rss_key, rss->num, rss->queue); + if (!flow->rss) { + msg = "either invalid parameters or not enough" + " resources for additional multi-queue" + " RSS context"; + goto exit_action_not_supported; + } + break; + default: goto exit_action_not_supported; } } - if (!action.queue && !action.drop) { - rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, - NULL, "no valid action"); - return -rte_errno; + if (!flow->rss && !flow->drop) + return rte_flow_error_set + (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "no valid action"); + /* Validation ends here. */ + if (!addr) { + if (flow->rss) + mlx4_rss_put(flow->rss); + return 0; } + if (flow == &temp) { + /* Allocate proper handle based on collected data. */ + const struct mlx4_malloc_vec vec[] = { + { + .align = alignof(struct rte_flow), + .size = sizeof(*flow), + .addr = (void **)&flow, + }, + { + .align = alignof(struct ibv_flow_attr), + .size = temp.ibv_attr_size, + .addr = (void **)&temp.ibv_attr, + }, + }; + + if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec))) + return rte_flow_error_set + (error, -rte_errno, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "flow rule handle allocation failure"); + /* Most fields will be updated by second pass. */ + *flow = (struct rte_flow){ + .ibv_attr = temp.ibv_attr, + .ibv_attr_size = sizeof(*flow->ibv_attr), + .rss = temp.rss, + }; + *flow->ibv_attr = (struct ibv_flow_attr){ + .type = IBV_FLOW_ATTR_NORMAL, + .size = sizeof(*flow->ibv_attr), + .priority = attr->priority, + .port = priv->port, + }; + goto fill; + } + *addr = flow; return 0; exit_item_not_supported: - rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, - items, "item not supported"); - return -rte_errno; + return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, + item, msg ? msg : "item not supported"); exit_action_not_supported: - rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, - actions, "action not supported"); - return -rte_errno; + return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, + action, msg ? msg : "action not supported"); } /** @@ -769,552 +874,691 @@ exit_action_not_supported: * @see rte_flow_validate() * @see rte_flow_ops */ -int +static int mlx4_flow_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, - const struct rte_flow_item items[], + const struct rte_flow_item pattern[], const struct rte_flow_action actions[], struct rte_flow_error *error) { struct priv *priv = dev->data->dev_private; - int ret; - struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) }; - - priv_lock(priv); - ret = priv_flow_validate(priv, attr, items, actions, error, &flow); - priv_unlock(priv); - return ret; -} - -/** - * Destroy a drop queue. - * - * @param priv - * Pointer to private structure. - */ -static void -mlx4_flow_destroy_drop_queue(struct priv *priv) -{ - if (priv->flow_drop_queue) { - struct rte_flow_drop *fdq = priv->flow_drop_queue; - priv->flow_drop_queue = NULL; - claim_zero(ibv_destroy_qp(fdq->qp)); - claim_zero(ibv_destroy_cq(fdq->cq)); - rte_free(fdq); - } + return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL); } /** - * Create a single drop queue for all drop flows. + * Get a drop flow rule resources instance. * * @param priv * Pointer to private structure. * * @return - * 0 on success, negative value otherwise. + * Pointer to drop flow resources on success, NULL otherwise and rte_errno + * is set. */ -static int -mlx4_flow_create_drop_queue(struct priv *priv) +static struct mlx4_drop * +mlx4_drop_get(struct priv *priv) { - struct ibv_qp *qp; - struct ibv_cq *cq; - struct rte_flow_drop *fdq; + struct mlx4_drop *drop = priv->drop; - fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0); - if (!fdq) { - ERROR("Cannot allocate memory for drop struct"); - goto err; - } - cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0, - &(struct ibv_exp_cq_init_attr){ - .comp_mask = 0, - }); - if (!cq) { - ERROR("Cannot create drop CQ"); - goto err_create_cq; - } - qp = ibv_exp_create_qp(priv->ctx, - &(struct ibv_exp_qp_init_attr){ - .send_cq = cq, - .recv_cq = cq, - .cap = { - .max_recv_wr = 1, - .max_recv_sge = 1, - }, - .qp_type = IBV_QPT_RAW_PACKET, - .comp_mask = - IBV_EXP_QP_INIT_ATTR_PD | - IBV_EXP_QP_INIT_ATTR_PORT, - .pd = priv->pd, - .port_num = priv->port, - }); - if (!qp) { - ERROR("Cannot create drop QP"); - goto err_create_qp; + if (drop) { + assert(drop->refcnt); + assert(drop->priv == priv); + ++drop->refcnt; + return drop; } - *fdq = (struct rte_flow_drop){ - .qp = qp, - .cq = cq, + drop = rte_malloc(__func__, sizeof(*drop), 0); + if (!drop) + goto error; + *drop = (struct mlx4_drop){ + .priv = priv, + .refcnt = 1, }; - priv->flow_drop_queue = fdq; - return 0; -err_create_qp: - claim_zero(ibv_destroy_cq(cq)); -err_create_cq: - rte_free(fdq); -err: - return -1; + drop->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0); + if (!drop->cq) + goto error; + drop->qp = ibv_create_qp(priv->pd, + &(struct ibv_qp_init_attr){ + .send_cq = drop->cq, + .recv_cq = drop->cq, + .qp_type = IBV_QPT_RAW_PACKET, + }); + if (!drop->qp) + goto error; + priv->drop = drop; + return drop; +error: + if (drop->qp) + claim_zero(ibv_destroy_qp(drop->qp)); + if (drop->cq) + claim_zero(ibv_destroy_cq(drop->cq)); + if (drop) + rte_free(drop); + rte_errno = ENOMEM; + return NULL; } /** - * Get RSS parent rxq structure for given queues. + * Give back a drop flow rule resources instance. * - * Creates a new or returns an existed one. - * - * @param priv - * Pointer to private structure. - * @param queues - * queues indices array, NULL in default RSS case. - * @param children_n - * the size of queues array. - * - * @return - * Pointer to a parent rxq structure, NULL on failure. + * @param drop + * Pointer to drop flow rule resources. */ -static struct rxq * -priv_parent_get(struct priv *priv, - uint16_t queues[], - uint16_t children_n, - struct rte_flow_error *error) +static void +mlx4_drop_put(struct mlx4_drop *drop) { - unsigned int i; - struct rxq *parent; - - for (parent = LIST_FIRST(&priv->parents); - parent; - parent = LIST_NEXT(parent, next)) { - unsigned int same = 0; - unsigned int overlap = 0; - - /* - * Find out whether an appropriate parent queue already exists - * and can be reused, otherwise make sure there are no overlaps. - */ - for (i = 0; i < children_n; ++i) { - unsigned int j; - - for (j = 0; j < parent->rss.queues_n; ++j) { - if (parent->rss.queues[j] != queues[i]) - continue; - ++overlap; - if (i == j) - ++same; - } - } - if (same == children_n && - children_n == parent->rss.queues_n) - return parent; - else if (overlap) - goto error; - } - /* Exclude the cases when some QPs were created without RSS */ - for (i = 0; i < children_n; ++i) { - struct rxq *rxq = (*priv->rxqs)[queues[i]]; - if (rxq->qp) - goto error; - } - parent = priv_parent_create(priv, queues, children_n); - if (!parent) { - rte_flow_error_set(error, - ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, "flow rule creation failure"); - return NULL; - } - return parent; - -error: - rte_flow_error_set(error, - EEXIST, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, - "sharing a queue between several" - " RSS groups is not supported"); - return NULL; + assert(drop->refcnt); + if (--drop->refcnt) + return; + drop->priv->drop = NULL; + claim_zero(ibv_destroy_qp(drop->qp)); + claim_zero(ibv_destroy_cq(drop->cq)); + rte_free(drop); } /** - * Complete flow rule creation. + * Toggle a configured flow rule. * * @param priv * Pointer to private structure. - * @param ibv_attr - * Verbs flow attributes. - * @param action - * Target action structure. + * @param flow + * Flow rule handle to toggle. + * @param enable + * Whether associated Verbs flow must be created or removed. * @param[out] error * Perform verbose error reporting if not NULL. * * @return - * A flow if the rule could be created. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ -static struct rte_flow * -priv_flow_create_action_queue(struct priv *priv, - struct ibv_flow_attr *ibv_attr, - struct mlx4_flow_action *action, - struct rte_flow_error *error) +static int +mlx4_flow_toggle(struct priv *priv, + struct rte_flow *flow, + int enable, + struct rte_flow_error *error) { - struct ibv_qp *qp; - struct rte_flow *rte_flow; - struct rxq *rxq_parent = NULL; + struct ibv_qp *qp = NULL; + const char *msg; + int err; - assert(priv->pd); - assert(priv->ctx); - rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0); - if (!rte_flow) { - rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, - NULL, "cannot allocate flow memory"); - return NULL; + if (!enable) { + if (!flow->ibv_flow) + return 0; + claim_zero(ibv_destroy_flow(flow->ibv_flow)); + flow->ibv_flow = NULL; + if (flow->drop) + mlx4_drop_put(priv->drop); + else if (flow->rss) + mlx4_rss_detach(flow->rss); + return 0; } - if (action->drop) { - qp = priv->flow_drop_queue ? priv->flow_drop_queue->qp : NULL; - } else { - int ret; + assert(flow->ibv_attr); + if (!flow->internal && + !priv->isolated && + flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) { + if (flow->ibv_flow) { + claim_zero(ibv_destroy_flow(flow->ibv_flow)); + flow->ibv_flow = NULL; + if (flow->drop) + mlx4_drop_put(priv->drop); + else if (flow->rss) + mlx4_rss_detach(flow->rss); + } + err = EACCES; + msg = ("priority level " + MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST) + " is reserved when not in isolated mode"); + goto error; + } + if (flow->rss) { + struct mlx4_rss *rss = flow->rss; + int missing = 0; unsigned int i; - struct rxq *rxq = NULL; - if (action->queues_n > 1) { - rxq_parent = priv_parent_get(priv, action->queues, - action->queues_n, error); - if (!rxq_parent) - goto error; + /* Stop at the first nonexistent target queue. */ + for (i = 0; i != rss->queues; ++i) + if (rss->queue_id[i] >= + priv->dev->data->nb_rx_queues || + !priv->dev->data->rx_queues[rss->queue_id[i]]) { + missing = 1; + break; + } + if (flow->ibv_flow) { + if (missing ^ !flow->drop) + return 0; + /* Verbs flow needs updating. */ + claim_zero(ibv_destroy_flow(flow->ibv_flow)); + flow->ibv_flow = NULL; + if (flow->drop) + mlx4_drop_put(priv->drop); + else + mlx4_rss_detach(rss); } - for (i = 0; i < action->queues_n; ++i) { - rxq = (*priv->rxqs)[action->queues[i]]; - /* - * In case of isolated mode we postpone - * ibv receive queue creation till the first - * rte_flow rule will be applied on that queue. - */ - if (!rxq->qp) { - assert(priv->isolated); - ret = rxq_create_qp(rxq, rxq->elts_n, - 0, 0, rxq_parent); - if (ret) { - rte_flow_error_set( - error, - ENOMEM, - RTE_FLOW_ERROR_TYPE_HANDLE, - NULL, - "flow rule creation failure"); - goto error; - } + if (!missing) { + err = mlx4_rss_attach(rss); + if (err) { + err = -err; + msg = "cannot create indirection table or hash" + " QP to associate flow rule with"; + goto error; } + qp = rss->qp; } - qp = action->queues_n > 1 ? rxq_parent->qp : rxq->qp; - rte_flow->qp = qp; + /* A missing target queue drops traffic implicitly. */ + flow->drop = missing; } - rte_flow->ibv_attr = ibv_attr; - if (!priv->started) - return rte_flow; - rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr); - if (!rte_flow->ibv_flow) { - rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, - NULL, "flow rule creation failure"); - goto error; + if (flow->drop) { + mlx4_drop_get(priv); + if (!priv->drop) { + err = rte_errno; + msg = "resources for drop flow rule cannot be created"; + goto error; + } + qp = priv->drop->qp; } - return rte_flow; - + assert(qp); + if (flow->ibv_flow) + return 0; + flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr); + if (flow->ibv_flow) + return 0; + if (flow->drop) + mlx4_drop_put(priv->drop); + else if (flow->rss) + mlx4_rss_detach(flow->rss); + err = errno; + msg = "flow rule rejected by device"; error: - if (rxq_parent) - rxq_parent_cleanup(rxq_parent); - rte_free(rte_flow); - return NULL; + return rte_flow_error_set + (error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg); } /** - * Convert a flow. - * - * @param priv - * Pointer to private structure. - * @param[in] attr - * Flow rule attributes. - * @param[in] items - * Pattern specification (list terminated by the END pattern item). - * @param[in] actions - * Associated actions (list terminated by the END action). - * @param[out] error - * Perform verbose error reporting if not NULL. + * Create a flow. * - * @return - * A flow on success, NULL otherwise. + * @see rte_flow_create() + * @see rte_flow_ops */ static struct rte_flow * -priv_flow_create(struct priv *priv, +mlx4_flow_create(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, - const struct rte_flow_item items[], + const struct rte_flow_item pattern[], const struct rte_flow_action actions[], struct rte_flow_error *error) { - struct rte_flow *rte_flow; - struct mlx4_flow_action action; - struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), }; + struct priv *priv = dev->data->dev_private; + struct rte_flow *flow; int err; - err = priv_flow_validate(priv, attr, items, actions, error, &flow); + err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow); if (err) return NULL; - flow.ibv_attr = rte_malloc(__func__, flow.offset, 0); - if (!flow.ibv_attr) { - rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, - NULL, "cannot allocate ibv_attr memory"); - return NULL; - } - flow.offset = sizeof(struct ibv_flow_attr); - *flow.ibv_attr = (struct ibv_flow_attr){ - .comp_mask = 0, - .type = IBV_FLOW_ATTR_NORMAL, - .size = sizeof(struct ibv_flow_attr), - .priority = attr->priority, - .num_of_specs = 0, - .port = priv->port, - .flags = 0, - }; - claim_zero(priv_flow_validate(priv, attr, items, actions, - error, &flow)); - action = (struct mlx4_flow_action){ - .queue = 0, - .drop = 0, - }; - for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) { - if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) { - continue; - } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) { - action.queue = 1; - action.queues_n = 1; - action.queues[0] = - ((const struct rte_flow_action_queue *) - actions->conf)->index; - } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { - action.drop = 1; - } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) { - unsigned int i; - const struct rte_flow_action_rss *rss = - (const struct rte_flow_action_rss *) - actions->conf; + err = mlx4_flow_toggle(priv, flow, priv->started, error); + if (!err) { + struct rte_flow *curr = LIST_FIRST(&priv->flows); - action.queue = 1; - action.queues_n = rss->num; - for (i = 0; i < rss->num; ++i) - action.queues[i] = rss->queue[i]; + /* New rules are inserted after internal ones. */ + if (!curr || !curr->internal) { + LIST_INSERT_HEAD(&priv->flows, flow, next); } else { - rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ACTION, - actions, "unsupported action"); - goto exit; + while (LIST_NEXT(curr, next) && + LIST_NEXT(curr, next)->internal) + curr = LIST_NEXT(curr, next); + LIST_INSERT_AFTER(curr, flow, next); } + return flow; } - rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr, - &action, error); - if (rte_flow) - return rte_flow; -exit: - rte_free(flow.ibv_attr); + if (flow->rss) + mlx4_rss_put(flow->rss); + rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + error->message); + rte_free(flow); return NULL; } /** - * Create a flow. + * Configure isolated mode. * - * @see rte_flow_create() - * @see rte_flow_ops - */ -struct rte_flow * -mlx4_flow_create(struct rte_eth_dev *dev, - const struct rte_flow_attr *attr, - const struct rte_flow_item items[], - const struct rte_flow_action actions[], - struct rte_flow_error *error) -{ - struct priv *priv = dev->data->dev_private; - struct rte_flow *flow; - - priv_lock(priv); - flow = priv_flow_create(priv, attr, items, actions, error); - if (flow) { - LIST_INSERT_HEAD(&priv->flows, flow, next); - DEBUG("Flow created %p", (void *)flow); - } - priv_unlock(priv); - return flow; -} - -/** * @see rte_flow_isolate() - * - * Must be done before calling dev_configure(). - * - * @param dev - * Pointer to the ethernet device structure. - * @param enable - * Nonzero to enter isolated mode, attempt to leave it otherwise. - * @param[out] error - * Perform verbose error reporting if not NULL. PMDs initialize this - * structure in case of error only. - * - * @return - * 0 on success, a negative value on error. + * @see rte_flow_ops */ -int +static int mlx4_flow_isolate(struct rte_eth_dev *dev, int enable, struct rte_flow_error *error) { struct priv *priv = dev->data->dev_private; - priv_lock(priv); - if (priv->rxqs) { - rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, "isolated mode must be set" - " before configuring the device"); - priv_unlock(priv); + if (!!enable == !!priv->isolated) + return 0; + priv->isolated = !!enable; + if (mlx4_flow_sync(priv, error)) { + priv->isolated = !enable; return -rte_errno; } - priv->isolated = !!enable; - priv_unlock(priv); return 0; } /** - * Destroy a flow. + * Destroy a flow rule. * - * @param priv - * Pointer to private structure. - * @param[in] flow - * Flow to destroy. + * @see rte_flow_destroy() + * @see rte_flow_ops */ -static void -priv_flow_destroy(struct priv *priv, struct rte_flow *flow) +static int +mlx4_flow_destroy(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct rte_flow_error *error) { - (void)priv; + struct priv *priv = dev->data->dev_private; + int err = mlx4_flow_toggle(priv, flow, 0, error); + + if (err) + return err; LIST_REMOVE(flow, next); - if (flow->ibv_flow) - claim_zero(ibv_destroy_flow(flow->ibv_flow)); - rte_free(flow->ibv_attr); - DEBUG("Flow destroyed %p", (void *)flow); + if (flow->rss) + mlx4_rss_put(flow->rss); rte_free(flow); + return 0; } /** - * Destroy a flow. + * Destroy user-configured flow rules. * - * @see rte_flow_destroy() + * This function skips internal flows rules. + * + * @see rte_flow_flush() * @see rte_flow_ops */ -int -mlx4_flow_destroy(struct rte_eth_dev *dev, - struct rte_flow *flow, - struct rte_flow_error *error) +static int +mlx4_flow_flush(struct rte_eth_dev *dev, + struct rte_flow_error *error) { struct priv *priv = dev->data->dev_private; + struct rte_flow *flow = LIST_FIRST(&priv->flows); + + while (flow) { + struct rte_flow *next = LIST_NEXT(flow, next); - (void)error; - priv_lock(priv); - priv_flow_destroy(priv, flow); - priv_unlock(priv); + if (!flow->internal) + mlx4_flow_destroy(dev, flow, error); + flow = next; + } return 0; } /** - * Destroy all flows. + * Helper function to determine the next configured VLAN filter. * * @param priv * Pointer to private structure. + * @param vlan + * VLAN ID to use as a starting point. + * + * @return + * Next configured VLAN ID or a high value (>= 4096) if there is none. */ -static void -priv_flow_flush(struct priv *priv) +static uint16_t +mlx4_flow_internal_next_vlan(struct priv *priv, uint16_t vlan) +{ + while (vlan < 4096) { + if (priv->dev->data->vlan_filter_conf.ids[vlan / 64] & + (UINT64_C(1) << (vlan % 64))) + return vlan; + ++vlan; + } + return vlan; +} + +/** + * Generate internal flow rules. + * + * Various flow rules are created depending on the mode the device is in: + * + * 1. Promiscuous: port MAC + catch-all (VLAN filtering is ignored). + * 2. All multicast: port MAC/VLAN + catch-all multicast. + * 3. Otherwise: port MAC/VLAN + broadcast MAC/VLAN. + * + * About MAC flow rules: + * + * - MAC flow rules are generated from @p dev->data->mac_addrs + * (@p priv->mac array). + * - An additional flow rule for Ethernet broadcasts is also generated. + * - All these are per-VLAN if @p dev->data->dev_conf.rxmode.hw_vlan_filter + * is enabled and VLAN filters are configured. + * + * @param priv + * Pointer to private structure. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error) { - while (!LIST_EMPTY(&priv->flows)) { - struct rte_flow *flow; + struct rte_flow_attr attr = { + .priority = MLX4_FLOW_PRIORITY_LAST, + .ingress = 1, + }; + struct rte_flow_item_eth eth_spec; + const struct rte_flow_item_eth eth_mask = { + .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", + }; + const struct rte_flow_item_eth eth_allmulti = { + .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00", + }; + struct rte_flow_item_vlan vlan_spec; + const struct rte_flow_item_vlan vlan_mask = { + .tci = RTE_BE16(0x0fff), + }; + struct rte_flow_item pattern[] = { + { + .type = MLX4_FLOW_ITEM_TYPE_INTERNAL, + }, + { + .type = RTE_FLOW_ITEM_TYPE_ETH, + .spec = ð_spec, + .mask = ð_mask, + }, + { + /* Replaced with VLAN if filtering is enabled. */ + .type = RTE_FLOW_ITEM_TYPE_END, + }, + { + .type = RTE_FLOW_ITEM_TYPE_END, + }, + }; + /* + * Round number of queues down to their previous power of 2 to + * comply with RSS context limitations. Extra queues silently do not + * get RSS by default. + */ + uint32_t queues = + rte_align32pow2(priv->dev->data->nb_rx_queues + 1) >> 1; + alignas(struct rte_flow_action_rss) uint8_t rss_conf_data + [offsetof(struct rte_flow_action_rss, queue) + + sizeof(((struct rte_flow_action_rss *)0)->queue[0]) * queues]; + struct rte_flow_action_rss *rss_conf = (void *)rss_conf_data; + struct rte_flow_action actions[] = { + { + .type = RTE_FLOW_ACTION_TYPE_RSS, + .conf = rss_conf, + }, + { + .type = RTE_FLOW_ACTION_TYPE_END, + }, + }; + struct ether_addr *rule_mac = ð_spec.dst; + rte_be16_t *rule_vlan = + priv->dev->data->dev_conf.rxmode.hw_vlan_filter && + !priv->dev->data->promiscuous ? + &vlan_spec.tci : + NULL; + int broadcast = + !priv->dev->data->promiscuous && + !priv->dev->data->all_multicast; + uint16_t vlan = 0; + struct rte_flow *flow; + unsigned int i; + int err = 0; - flow = LIST_FIRST(&priv->flows); - priv_flow_destroy(priv, flow); + /* Nothing to be done if there are no Rx queues. */ + if (!queues) + goto error; + /* Prepare default RSS configuration. */ + *rss_conf = (struct rte_flow_action_rss){ + .rss_conf = NULL, /* Rely on default fallback settings. */ + .num = queues, + }; + for (i = 0; i != queues; ++i) + rss_conf->queue[i] = i; + /* + * Set up VLAN item if filtering is enabled and at least one VLAN + * filter is configured. + */ + if (rule_vlan) { + vlan = mlx4_flow_internal_next_vlan(priv, 0); + if (vlan < 4096) { + pattern[2] = (struct rte_flow_item){ + .type = RTE_FLOW_ITEM_TYPE_VLAN, + .spec = &vlan_spec, + .mask = &vlan_mask, + }; +next_vlan: + *rule_vlan = rte_cpu_to_be_16(vlan); + } else { + rule_vlan = NULL; + } } + for (i = 0; i != RTE_DIM(priv->mac) + broadcast; ++i) { + const struct ether_addr *mac; + + /* Broadcasts are handled by an extra iteration. */ + if (i < RTE_DIM(priv->mac)) + mac = &priv->mac[i]; + else + mac = ð_mask.dst; + if (is_zero_ether_addr(mac)) + continue; + /* Check if MAC flow rule is already present. */ + for (flow = LIST_FIRST(&priv->flows); + flow && flow->internal; + flow = LIST_NEXT(flow, next)) { + const struct ibv_flow_spec_eth *eth = + (const void *)((uintptr_t)flow->ibv_attr + + sizeof(*flow->ibv_attr)); + unsigned int j; + + if (!flow->mac) + continue; + assert(flow->ibv_attr->type == IBV_FLOW_ATTR_NORMAL); + assert(flow->ibv_attr->num_of_specs == 1); + assert(eth->type == IBV_FLOW_SPEC_ETH); + assert(flow->rss); + if (rule_vlan && + (eth->val.vlan_tag != *rule_vlan || + eth->mask.vlan_tag != RTE_BE16(0x0fff))) + continue; + if (!rule_vlan && eth->mask.vlan_tag) + continue; + for (j = 0; j != sizeof(mac->addr_bytes); ++j) + if (eth->val.dst_mac[j] != mac->addr_bytes[j] || + eth->mask.dst_mac[j] != UINT8_C(0xff) || + eth->val.src_mac[j] != UINT8_C(0x00) || + eth->mask.src_mac[j] != UINT8_C(0x00)) + break; + if (j != sizeof(mac->addr_bytes)) + continue; + if (flow->rss->queues != queues || + memcmp(flow->rss->queue_id, rss_conf->queue, + queues * sizeof(flow->rss->queue_id[0]))) + continue; + break; + } + if (!flow || !flow->internal) { + /* Not found, create a new flow rule. */ + memcpy(rule_mac, mac, sizeof(*mac)); + flow = mlx4_flow_create(priv->dev, &attr, pattern, + actions, error); + if (!flow) { + err = -rte_errno; + goto error; + } + } + flow->select = 1; + flow->mac = 1; + } + if (rule_vlan) { + vlan = mlx4_flow_internal_next_vlan(priv, vlan + 1); + if (vlan < 4096) + goto next_vlan; + } + /* Take care of promiscuous and all multicast flow rules. */ + if (!broadcast) { + for (flow = LIST_FIRST(&priv->flows); + flow && flow->internal; + flow = LIST_NEXT(flow, next)) { + if (priv->dev->data->promiscuous) { + if (flow->promisc) + break; + } else { + assert(priv->dev->data->all_multicast); + if (flow->allmulti) + break; + } + } + if (flow && flow->internal) { + assert(flow->rss); + if (flow->rss->queues != queues || + memcmp(flow->rss->queue_id, rss_conf->queue, + queues * sizeof(flow->rss->queue_id[0]))) + flow = NULL; + } + if (!flow || !flow->internal) { + /* Not found, create a new flow rule. */ + if (priv->dev->data->promiscuous) { + pattern[1].spec = NULL; + pattern[1].mask = NULL; + } else { + assert(priv->dev->data->all_multicast); + pattern[1].spec = ð_allmulti; + pattern[1].mask = ð_allmulti; + } + pattern[2] = pattern[3]; + flow = mlx4_flow_create(priv->dev, &attr, pattern, + actions, error); + if (!flow) { + err = -rte_errno; + goto error; + } + } + assert(flow->promisc || flow->allmulti); + flow->select = 1; + } +error: + /* Clear selection and clean up stale internal flow rules. */ + flow = LIST_FIRST(&priv->flows); + while (flow && flow->internal) { + struct rte_flow *next = LIST_NEXT(flow, next); + + if (!flow->select) + claim_zero(mlx4_flow_destroy(priv->dev, flow, error)); + else + flow->select = 0; + flow = next; + } + return err; } /** - * Destroy all flows. + * Synchronize flow rules. * - * @see rte_flow_flush() - * @see rte_flow_ops + * This function synchronizes flow rules with the state of the device by + * taking into account isolated mode and whether target queues are + * configured. + * + * @param priv + * Pointer to private structure. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx4_flow_flush(struct rte_eth_dev *dev, - struct rte_flow_error *error) +mlx4_flow_sync(struct priv *priv, struct rte_flow_error *error) { - struct priv *priv = dev->data->dev_private; + struct rte_flow *flow; + int ret; - (void)error; - priv_lock(priv); - priv_flow_flush(priv); - priv_unlock(priv); + /* Internal flow rules are guaranteed to come first in the list. */ + if (priv->isolated) { + /* + * Get rid of them in isolated mode, stop at the first + * non-internal rule found. + */ + for (flow = LIST_FIRST(&priv->flows); + flow && flow->internal; + flow = LIST_FIRST(&priv->flows)) + claim_zero(mlx4_flow_destroy(priv->dev, flow, error)); + } else { + /* Refresh internal rules. */ + ret = mlx4_flow_internal(priv, error); + if (ret) + return ret; + } + /* Toggle the remaining flow rules . */ + LIST_FOREACH(flow, &priv->flows, next) { + ret = mlx4_flow_toggle(priv, flow, priv->started, error); + if (ret) + return ret; + } + if (!priv->started) + assert(!priv->drop); return 0; } /** - * Remove all flows. + * Clean up all flow rules. * - * Called by dev_stop() to remove all flows. + * Unlike mlx4_flow_flush(), this function takes care of all remaining flow + * rules regardless of whether they are internal or user-configured. * * @param priv * Pointer to private structure. */ void -mlx4_priv_flow_stop(struct priv *priv) +mlx4_flow_clean(struct priv *priv) { struct rte_flow *flow; - for (flow = LIST_FIRST(&priv->flows); - flow; - flow = LIST_NEXT(flow, next)) { - claim_zero(ibv_destroy_flow(flow->ibv_flow)); - flow->ibv_flow = NULL; - DEBUG("Flow %p removed", (void *)flow); - } - mlx4_flow_destroy_drop_queue(priv); + while ((flow = LIST_FIRST(&priv->flows))) + mlx4_flow_destroy(priv->dev, flow, NULL); + assert(LIST_EMPTY(&priv->rss)); } +static const struct rte_flow_ops mlx4_flow_ops = { + .validate = mlx4_flow_validate, + .create = mlx4_flow_create, + .destroy = mlx4_flow_destroy, + .flush = mlx4_flow_flush, + .isolate = mlx4_flow_isolate, +}; + /** - * Add all flows. + * Manage filter operations. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device structure. + * @param filter_type + * Filter type. + * @param filter_op + * Operation to perform. + * @param arg + * Pointer to operation-specific structure. * * @return - * 0 on success, a errno value otherwise and rte_errno is set. + * 0 on success, negative errno value otherwise and rte_errno is set. */ int -mlx4_priv_flow_start(struct priv *priv) +mlx4_filter_ctrl(struct rte_eth_dev *dev, + enum rte_filter_type filter_type, + enum rte_filter_op filter_op, + void *arg) { - int ret; - struct ibv_qp *qp; - struct rte_flow *flow; - - ret = mlx4_flow_create_drop_queue(priv); - if (ret) - return -1; - for (flow = LIST_FIRST(&priv->flows); - flow; - flow = LIST_NEXT(flow, next)) { - qp = flow->qp ? flow->qp : priv->flow_drop_queue->qp; - flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr); - if (!flow->ibv_flow) { - DEBUG("Flow %p cannot be applied", (void *)flow); - rte_errno = EINVAL; - return rte_errno; - } - DEBUG("Flow %p applied", (void *)flow); + switch (filter_type) { + case RTE_ETH_FILTER_GENERIC: + if (filter_op != RTE_ETH_FILTER_GET) + break; + *(const void **)arg = &mlx4_flow_ops; + return 0; + default: + ERROR("%p: filter type (%d) not supported", + (void *)dev, filter_type); + break; } - return 0; + rte_errno = ENOTSUP; + return -rte_errno; } |