diff options
author | Luca Boccassi <luca.boccassi@gmail.com> | 2018-11-19 12:59:01 +0000 |
---|---|---|
committer | Luca Boccassi <luca.boccassi@gmail.com> | 2018-11-19 12:59:24 +0000 |
commit | 29058550643267a554e0368806dece63b047c5cb (patch) | |
tree | a6f573fe3fbc14585529b83cfcc65da2dceefbf5 /drivers/net/mlx5 | |
parent | 8a853e3f0275efc8b05cb195085d45946942744a (diff) |
New upstream version 18.11-rc4upstream/18.11-rc4
Change-Id: I861e1a2f7df210f57f44f1ad56b9ef789a4675e3
Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'drivers/net/mlx5')
-rw-r--r-- | drivers/net/mlx5/mlx5_flow.c | 29 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_flow.h | 9 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_flow_dv.c | 183 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_flow_tcf.c | 321 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_flow_verbs.c | 47 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_mr.c | 39 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxq.c | 5 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx.h | 38 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 2 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 2 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_stats.c | 3 |
11 files changed, 491 insertions, 187 deletions
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 5ad3a11a..97dc3e1f 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -294,7 +294,7 @@ static struct mlx5_flow_tunnel_info tunnels_info[] = { }, { .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP, - .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP, + .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP, }, { .tunnel = MLX5_FLOW_LAYER_MPLS, @@ -1593,12 +1593,14 @@ mlx5_flow_validate_item_gre(const struct rte_flow_item *item, /** * Validate MPLS item. * + * @param[in] dev + * Pointer to the rte_eth_dev structure. * @param[in] item * Item specification. * @param[in] item_flags * Bit-fields that holds the items detected until now. - * @param[in] target_protocol - * The next protocol in the previous item. + * @param[in] prev_layer + * The protocol layer indicated in previous item. * @param[out] error * Pointer to error structure. * @@ -1606,16 +1608,27 @@ mlx5_flow_validate_item_gre(const struct rte_flow_item *item, * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused, +mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused, + const struct rte_flow_item *item __rte_unused, uint64_t item_flags __rte_unused, - uint8_t target_protocol __rte_unused, + uint64_t prev_layer __rte_unused, struct rte_flow_error *error) { #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT const struct rte_flow_item_mpls *mask = item->mask; + struct priv *priv = dev->data->dev_private; int ret; - if (target_protocol != 0xff && target_protocol != IPPROTO_MPLS) + if (!priv->config.mpls_en) + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "MPLS not supported or" + " disabled in firmware" + " configuration."); + /* MPLS over IP, UDP, GRE is allowed */ + if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 | + MLX5_FLOW_LAYER_OUTER_L4_UDP | + MLX5_FLOW_LAYER_GRE))) return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, "protocol filtering not compatible" @@ -2127,14 +2140,14 @@ static void flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list, struct rte_flow *flow) { - flow_drv_destroy(dev, flow); - TAILQ_REMOVE(list, flow, next); /* * Update RX queue flags only if port is started, otherwise it is * already clean. */ if (dev->data->dev_started) flow_rxq_flags_trim(dev, flow); + flow_drv_destroy(dev, flow); + TAILQ_REMOVE(list, flow, next); rte_free(flow->fdir); rte_free(flow); } diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h index 51ab47fe..4a7c0529 100644 --- a/drivers/net/mlx5/mlx5_flow.h +++ b/drivers/net/mlx5/mlx5_flow.h @@ -114,6 +114,9 @@ #define IPPROTO_MPLS 137 #endif +/* UDP port number for MPLS */ +#define MLX5_UDP_PORT_MPLS 6635 + /* UDP port numbers for VxLAN. */ #define MLX5_UDP_PORT_VXLAN 4789 #define MLX5_UDP_PORT_VXLAN_GPE 4790 @@ -219,6 +222,7 @@ struct mlx5_flow_dv { struct mlx5_flow_tcf { struct nlmsghdr *nlh; struct tcmsg *tcm; + uint32_t *ptc_flags; /**< tc rule applied flags. */ union { /**< Tunnel encap/decap descriptor. */ struct flow_tcf_tunnel_hdr *tunnel; struct flow_tcf_vxlan_decap *vxlan_decap; @@ -381,9 +385,10 @@ int mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, int mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, uint64_t item_flags, struct rte_flow_error *error); -int mlx5_flow_validate_item_mpls(const struct rte_flow_item *item, +int mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev, + const struct rte_flow_item *item, uint64_t item_flags, - uint8_t target_protocol, + uint64_t prev_layer, struct rte_flow_error *error); int mlx5_flow_validate_item_tcp(const struct rte_flow_item *item, uint64_t item_flags, diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index a2edd168..1f318748 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -775,6 +775,7 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, int ret; uint64_t action_flags = 0; uint64_t item_flags = 0; + uint64_t last_item = 0; int tunnel = 0; uint8_t next_protocol = 0xff; int actions_n = 0; @@ -794,24 +795,24 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, error); if (ret < 0) return ret; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : - MLX5_FLOW_LAYER_OUTER_L2; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : + MLX5_FLOW_LAYER_OUTER_L2; break; case RTE_FLOW_ITEM_TYPE_VLAN: ret = mlx5_flow_validate_item_vlan(items, item_flags, error); if (ret < 0) return ret; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : - MLX5_FLOW_LAYER_OUTER_VLAN; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN : + MLX5_FLOW_LAYER_OUTER_VLAN; break; case RTE_FLOW_ITEM_TYPE_IPV4: ret = mlx5_flow_validate_item_ipv4(items, item_flags, error); if (ret < 0) return ret; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : - MLX5_FLOW_LAYER_OUTER_L3_IPV4; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : + MLX5_FLOW_LAYER_OUTER_L3_IPV4; if (items->mask != NULL && ((const struct rte_flow_item_ipv4 *) items->mask)->hdr.next_proto_id) { @@ -831,8 +832,8 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, error); if (ret < 0) return ret; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : - MLX5_FLOW_LAYER_OUTER_L3_IPV6; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : + MLX5_FLOW_LAYER_OUTER_L3_IPV6; if (items->mask != NULL && ((const struct rte_flow_item_ipv6 *) items->mask)->hdr.proto) { @@ -855,8 +856,8 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, error); if (ret < 0) return ret; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : - MLX5_FLOW_LAYER_OUTER_L4_TCP; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : + MLX5_FLOW_LAYER_OUTER_L4_TCP; break; case RTE_FLOW_ITEM_TYPE_UDP: ret = mlx5_flow_validate_item_udp(items, item_flags, @@ -864,8 +865,8 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, error); if (ret < 0) return ret; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : - MLX5_FLOW_LAYER_OUTER_L4_UDP; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : + MLX5_FLOW_LAYER_OUTER_L4_UDP; break; case RTE_FLOW_ITEM_TYPE_GRE: case RTE_FLOW_ITEM_TYPE_NVGRE: @@ -873,14 +874,14 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, next_protocol, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_GRE; + last_item = MLX5_FLOW_LAYER_GRE; break; case RTE_FLOW_ITEM_TYPE_VXLAN: ret = mlx5_flow_validate_item_vxlan(items, item_flags, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_VXLAN; + last_item = MLX5_FLOW_LAYER_VXLAN; break; case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: ret = mlx5_flow_validate_item_vxlan_gpe(items, @@ -888,20 +889,29 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE; + last_item = MLX5_FLOW_LAYER_VXLAN_GPE; + break; + case RTE_FLOW_ITEM_TYPE_MPLS: + ret = mlx5_flow_validate_item_mpls(dev, items, + item_flags, + last_item, error); + if (ret < 0) + return ret; + last_item = MLX5_FLOW_LAYER_MPLS; break; case RTE_FLOW_ITEM_TYPE_META: ret = flow_dv_validate_item_meta(dev, items, attr, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_ITEM_METADATA; + last_item = MLX5_FLOW_ITEM_METADATA; break; default: return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, NULL, "item not supported"); } + item_flags |= last_item; } for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { if (actions_n == MLX5_DV_MAX_NUMBER_OF_ACTIONS) @@ -1609,6 +1619,96 @@ flow_dv_translate_item_vxlan(void *matcher, void *key, } /** + * Add MPLS item to matcher and to the value. + * + * @param[in, out] matcher + * Flow matcher. + * @param[in, out] key + * Flow matcher value. + * @param[in] item + * Flow pattern to translate. + * @param[in] prev_layer + * The protocol layer indicated in previous item. + * @param[in] inner + * Item is inner pattern. + */ +static void +flow_dv_translate_item_mpls(void *matcher, void *key, + const struct rte_flow_item *item, + uint64_t prev_layer, + int inner) +{ + const uint32_t *in_mpls_m = item->mask; + const uint32_t *in_mpls_v = item->spec; + uint32_t *out_mpls_m = 0; + uint32_t *out_mpls_v = 0; + void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters); + void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters); + void *misc2_m = MLX5_ADDR_OF(fte_match_param, matcher, + misc_parameters_2); + void *misc2_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters_2); + void *headers_m = MLX5_ADDR_OF(fte_match_param, matcher, outer_headers); + void *headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers); + + switch (prev_layer) { + case MLX5_FLOW_LAYER_OUTER_L4_UDP: + MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + MLX5_UDP_PORT_MPLS); + break; + case MLX5_FLOW_LAYER_GRE: + MLX5_SET(fte_match_set_misc, misc_m, gre_protocol, 0xffff); + MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, + ETHER_TYPE_MPLS); + break; + default: + MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, + IPPROTO_MPLS); + break; + } + if (!in_mpls_v) + return; + if (!in_mpls_m) + in_mpls_m = (const uint32_t *)&rte_flow_item_mpls_mask; + switch (prev_layer) { + case MLX5_FLOW_LAYER_OUTER_L4_UDP: + out_mpls_m = + (uint32_t *)MLX5_ADDR_OF(fte_match_set_misc2, misc2_m, + outer_first_mpls_over_udp); + out_mpls_v = + (uint32_t *)MLX5_ADDR_OF(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_udp); + break; + case MLX5_FLOW_LAYER_GRE: + out_mpls_m = + (uint32_t *)MLX5_ADDR_OF(fte_match_set_misc2, misc2_m, + outer_first_mpls_over_gre); + out_mpls_v = + (uint32_t *)MLX5_ADDR_OF(fte_match_set_misc2, misc2_v, + outer_first_mpls_over_gre); + break; + default: + /* Inner MPLS not over GRE is not supported. */ + if (!inner) { + out_mpls_m = + (uint32_t *)MLX5_ADDR_OF(fte_match_set_misc2, + misc2_m, + outer_first_mpls); + out_mpls_v = + (uint32_t *)MLX5_ADDR_OF(fte_match_set_misc2, + misc2_v, + outer_first_mpls); + } + break; + } + if (out_mpls_m && out_mpls_v) { + *out_mpls_m = *in_mpls_m; + *out_mpls_v = *in_mpls_v & *in_mpls_m; + } +} + +/** * Add META item to matcher * * @param[in, out] matcher @@ -1786,6 +1886,7 @@ flow_dv_translate(struct rte_eth_dev *dev, struct priv *priv = dev->data->dev_private; struct rte_flow *flow = dev_flow->flow; uint64_t item_flags = 0; + uint64_t last_item = 0; uint64_t action_flags = 0; uint64_t priority = attr->priority; struct mlx5_flow_dv_matcher matcher = { @@ -1940,17 +2041,17 @@ flow_dv_translate(struct rte_eth_dev *dev, flow_dv_translate_item_eth(match_mask, match_value, items, tunnel); matcher.priority = MLX5_PRIORITY_MAP_L2; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : - MLX5_FLOW_LAYER_OUTER_L2; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : + MLX5_FLOW_LAYER_OUTER_L2; break; case RTE_FLOW_ITEM_TYPE_VLAN: flow_dv_translate_item_vlan(match_mask, match_value, items, tunnel); matcher.priority = MLX5_PRIORITY_MAP_L2; - item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 | - MLX5_FLOW_LAYER_INNER_VLAN) : - (MLX5_FLOW_LAYER_OUTER_L2 | - MLX5_FLOW_LAYER_OUTER_VLAN); + last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 | + MLX5_FLOW_LAYER_INNER_VLAN) : + (MLX5_FLOW_LAYER_OUTER_L2 | + MLX5_FLOW_LAYER_OUTER_VLAN); break; case RTE_FLOW_ITEM_TYPE_IPV4: flow_dv_translate_item_ipv4(match_mask, match_value, @@ -1961,8 +2062,8 @@ flow_dv_translate(struct rte_eth_dev *dev, (dev_flow, tunnel, MLX5_IPV4_LAYER_TYPES, MLX5_IPV4_IBV_RX_HASH); - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : - MLX5_FLOW_LAYER_OUTER_L3_IPV4; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : + MLX5_FLOW_LAYER_OUTER_L3_IPV4; break; case RTE_FLOW_ITEM_TYPE_IPV6: flow_dv_translate_item_ipv6(match_mask, match_value, @@ -1973,8 +2074,8 @@ flow_dv_translate(struct rte_eth_dev *dev, (dev_flow, tunnel, MLX5_IPV6_LAYER_TYPES, MLX5_IPV6_IBV_RX_HASH); - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : - MLX5_FLOW_LAYER_OUTER_L3_IPV6; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : + MLX5_FLOW_LAYER_OUTER_L3_IPV6; break; case RTE_FLOW_ITEM_TYPE_TCP: flow_dv_translate_item_tcp(match_mask, match_value, @@ -1985,8 +2086,8 @@ flow_dv_translate(struct rte_eth_dev *dev, (dev_flow, tunnel, ETH_RSS_TCP, IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP); - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : - MLX5_FLOW_LAYER_OUTER_L4_TCP; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : + MLX5_FLOW_LAYER_OUTER_L4_TCP; break; case RTE_FLOW_ITEM_TYPE_UDP: flow_dv_translate_item_udp(match_mask, match_value, @@ -1997,37 +2098,43 @@ flow_dv_translate(struct rte_eth_dev *dev, (dev_flow, tunnel, ETH_RSS_UDP, IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP); - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : - MLX5_FLOW_LAYER_OUTER_L4_UDP; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : + MLX5_FLOW_LAYER_OUTER_L4_UDP; break; case RTE_FLOW_ITEM_TYPE_GRE: flow_dv_translate_item_gre(match_mask, match_value, items, tunnel); - item_flags |= MLX5_FLOW_LAYER_GRE; + last_item = MLX5_FLOW_LAYER_GRE; break; case RTE_FLOW_ITEM_TYPE_NVGRE: flow_dv_translate_item_nvgre(match_mask, match_value, items, tunnel); - item_flags |= MLX5_FLOW_LAYER_GRE; + last_item = MLX5_FLOW_LAYER_GRE; break; case RTE_FLOW_ITEM_TYPE_VXLAN: flow_dv_translate_item_vxlan(match_mask, match_value, items, tunnel); - item_flags |= MLX5_FLOW_LAYER_VXLAN; + last_item = MLX5_FLOW_LAYER_VXLAN; break; case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: flow_dv_translate_item_vxlan(match_mask, match_value, items, tunnel); - item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE; + last_item = MLX5_FLOW_LAYER_VXLAN_GPE; + break; + case RTE_FLOW_ITEM_TYPE_MPLS: + flow_dv_translate_item_mpls(match_mask, match_value, + items, last_item, tunnel); + last_item = MLX5_FLOW_LAYER_MPLS; break; case RTE_FLOW_ITEM_TYPE_META: flow_dv_translate_item_meta(match_mask, match_value, items); - item_flags |= MLX5_FLOW_ITEM_METADATA; + last_item = MLX5_FLOW_ITEM_METADATA; break; default: break; } + item_flags |= last_item; } assert(!flow_dv_check_valid_spec(matcher.mask.buf, dev_flow->dv.value.buf)); @@ -2275,8 +2382,10 @@ flow_dv_query(struct rte_eth_dev *dev __rte_unused, void *data __rte_unused, struct rte_flow_error *error __rte_unused) { - rte_errno = ENOTSUP; - return -rte_errno; + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "flow query with DV is not supported"); } diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c index 97d2a54c..2f6b7d63 100644 --- a/drivers/net/mlx5/mlx5_flow_tcf.c +++ b/drivers/net/mlx5/mlx5_flow_tcf.c @@ -160,6 +160,9 @@ struct tc_tunnel_key { #ifndef TCA_CLS_FLAGS_SKIP_SW #define TCA_CLS_FLAGS_SKIP_SW (1 << 1) #endif +#ifndef TCA_CLS_FLAGS_IN_HW +#define TCA_CLS_FLAGS_IN_HW (1 << 2) +#endif #ifndef HAVE_TCA_CHAIN #define TCA_CHAIN 11 #endif @@ -3699,6 +3702,8 @@ override_na_vlan_priority: assert(na_flower); assert(na_flower_act); mnl_attr_nest_end(nlh, na_flower_act); + dev_flow->tcf.ptc_flags = mnl_attr_get_payload + (mnl_nlmsg_get_payload_tail(nlh)); mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, decap.vxlan ? 0 : TCA_CLS_FLAGS_SKIP_SW); mnl_attr_nest_end(nlh, na_flower); @@ -3717,10 +3722,6 @@ override_na_vlan_priority: * @param nlh * Message to send. This function always raises the NLM_F_ACK flag before * sending. - * @param[in] msglen - * Message length. Message buffer may contain multiple commands and - * nlmsg_len field not always corresponds to actual message length. - * If 0 specified the nlmsg_len field in header is used as message length. * @param[in] cb * Callback handler for received message. * @param[in] arg @@ -3732,52 +3733,64 @@ override_na_vlan_priority: static int flow_tcf_nl_ack(struct mlx5_flow_tcf_context *tcf, struct nlmsghdr *nlh, - uint32_t msglen, mnl_cb_t cb, void *arg) { unsigned int portid = mnl_socket_get_portid(tcf->nl); uint32_t seq = tcf->seq++; - int err, ret; + int ret, err = 0; assert(tcf->nl); assert(tcf->buf); - if (!seq) + if (!seq) { /* seq 0 is reserved for kernel event-driven notifications. */ seq = tcf->seq++; + } nlh->nlmsg_seq = seq; - if (!msglen) { - msglen = nlh->nlmsg_len; - nlh->nlmsg_flags |= NLM_F_ACK; + nlh->nlmsg_flags |= NLM_F_ACK; + ret = mnl_socket_sendto(tcf->nl, nlh, nlh->nlmsg_len); + if (ret <= 0) { + /* Message send error occurres. */ + rte_errno = errno; + return -rte_errno; } - ret = mnl_socket_sendto(tcf->nl, nlh, msglen); - err = (ret <= 0) ? errno : 0; nlh = (struct nlmsghdr *)(tcf->buf); /* * The following loop postpones non-fatal errors until multipart * messages are complete. */ - if (ret > 0) - while (true) { - ret = mnl_socket_recvfrom(tcf->nl, tcf->buf, - tcf->buf_size); + while (true) { + ret = mnl_socket_recvfrom(tcf->nl, tcf->buf, tcf->buf_size); + if (ret < 0) { + err = errno; + /* + * In case of overflow Will receive till + * end of multipart message. We may lost part + * of reply messages but mark and return an error. + */ + if (err != ENOSPC || + !(nlh->nlmsg_flags & NLM_F_MULTI) || + nlh->nlmsg_type == NLMSG_DONE) + break; + } else { + ret = mnl_cb_run(nlh, ret, seq, portid, cb, arg); + if (!ret) { + /* + * libmnl returns 0 if DONE or + * success ACK message found. + */ + break; + } if (ret < 0) { + /* + * ACK message with error found + * or some error occurred. + */ err = errno; - if (err != ENOSPC) - break; - } - if (!err) { - ret = mnl_cb_run(nlh, ret, seq, portid, - cb, arg); - if (ret < 0) { - err = errno; - break; - } - } - /* Will receive till end of multipart message */ - if (!(nlh->nlmsg_flags & NLM_F_MULTI) || - nlh->nlmsg_type == NLMSG_DONE) break; + } + /* We should continue receiving. */ } + } if (!err) return 0; rte_errno = err; @@ -3886,7 +3899,7 @@ flow_tcf_send_nlcmd(struct mlx5_flow_tcf_context *tcf, nlh = (struct nlmsghdr *)&bc->msg[msg]; assert((bc->size - msg) >= nlh->nlmsg_len); msg += nlh->nlmsg_len; - rc = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL); + rc = flow_tcf_nl_ack(tcf, nlh, NULL, NULL); if (rc) { DRV_LOG(WARNING, "netlink: cleanup error %d", rc); @@ -4019,7 +4032,7 @@ flow_tcf_encap_local_cleanup(struct mlx5_flow_tcf_context *tcf, ifa->ifa_family = AF_UNSPEC; ifa->ifa_index = ifindex; ifa->ifa_scope = RT_SCOPE_LINK; - ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_local_cb, &ctx); + ret = flow_tcf_nl_ack(tcf, nlh, flow_tcf_collect_local_cb, &ctx); if (ret) DRV_LOG(WARNING, "netlink: query device list error %d", ret); ret = flow_tcf_send_nlcmd(tcf, &ctx); @@ -4140,7 +4153,7 @@ flow_tcf_encap_neigh_cleanup(struct mlx5_flow_tcf_context *tcf, ndm->ndm_family = AF_UNSPEC; ndm->ndm_ifindex = ifindex; ndm->ndm_state = NUD_PERMANENT; - ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_neigh_cb, &ctx); + ret = flow_tcf_nl_ack(tcf, nlh, flow_tcf_collect_neigh_cb, &ctx); if (ret) DRV_LOG(WARNING, "netlink: query device list error %d", ret); ret = flow_tcf_send_nlcmd(tcf, &ctx); @@ -4269,7 +4282,7 @@ flow_tcf_encap_iface_cleanup(struct mlx5_flow_tcf_context *tcf, nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm)); ifm->ifi_family = AF_UNSPEC; - ret = flow_tcf_nl_ack(tcf, nlh, 0, flow_tcf_collect_vxlan_cb, &ctx); + ret = flow_tcf_nl_ack(tcf, nlh, flow_tcf_collect_vxlan_cb, &ctx); if (ret) DRV_LOG(WARNING, "netlink: query device list error %d", ret); ret = flow_tcf_send_nlcmd(tcf, &ctx); @@ -4341,7 +4354,7 @@ flow_tcf_rule_local(struct mlx5_flow_tcf_context *tcf, sizeof(encap->ipv6.dst), &encap->ipv6.dst); } - if (!flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL)) + if (!flow_tcf_nl_ack(tcf, nlh, NULL, NULL)) return 0; return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -4404,7 +4417,7 @@ flow_tcf_rule_neigh(struct mlx5_flow_tcf_context *tcf, if (encap->mask & FLOW_TCF_ENCAP_ETH_DST) mnl_attr_put(nlh, NDA_LLADDR, sizeof(encap->eth.dst), &encap->eth.dst); - if (!flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL)) + if (!flow_tcf_nl_ack(tcf, nlh, NULL, NULL)) return 0; return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -4679,7 +4692,7 @@ flow_tcf_vtep_delete(struct mlx5_flow_tcf_context *tcf, ifm->ifi_family = AF_UNSPEC; ifm->ifi_index = vtep->ifindex; assert(sizeof(buf) >= nlh->nlmsg_len); - ret = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL); + ret = flow_tcf_nl_ack(tcf, nlh, NULL, NULL); if (ret) DRV_LOG(WARNING, "netlink: error deleting vxlan" " encap/decap ifindex %u", @@ -4769,7 +4782,7 @@ flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf, mnl_attr_nest_end(nlh, na_vxlan); mnl_attr_nest_end(nlh, na_info); assert(sizeof(buf) >= nlh->nlmsg_len); - ret = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL); + ret = flow_tcf_nl_ack(tcf, nlh, NULL, NULL); if (ret) { DRV_LOG(WARNING, "netlink: VTEP %s create failure (%d)", @@ -4811,7 +4824,7 @@ flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf, ifm->ifi_index = vtep->ifindex; ifm->ifi_flags = IFF_UP; ifm->ifi_change = IFF_UP; - ret = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL); + ret = flow_tcf_nl_ack(tcf, nlh, NULL, NULL); if (ret) { rte_flow_error_set(error, -errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -5069,6 +5082,172 @@ flow_tcf_vtep_release(struct mlx5_flow_tcf_context *tcf, pthread_mutex_unlock(&vtep_list_mutex); } +struct tcf_nlcb_query { + uint32_t handle; + uint32_t tc_flags; + uint32_t flags_valid:1; +}; + +/** + * Collect queried rule attributes. This is callback routine called by + * libmnl mnl_cb_run() in loop for every message in received packet. + * Current implementation collects the flower flags only. + * + * @param[in] nlh + * Pointer to reply header. + * @param[in, out] arg + * Context pointer for this callback. + * + * @return + * A positive, nonzero value on success (required by libmnl + * to continue messages processing). + */ +static int +flow_tcf_collect_query_cb(const struct nlmsghdr *nlh, void *arg) +{ + struct tcf_nlcb_query *query = arg; + struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh); + struct nlattr *na, *na_opt; + bool flower = false; + + if (nlh->nlmsg_type != RTM_NEWTFILTER || + tcm->tcm_handle != query->handle) + return 1; + mnl_attr_for_each(na, nlh, sizeof(*tcm)) { + switch (mnl_attr_get_type(na)) { + case TCA_KIND: + if (strcmp(mnl_attr_get_payload(na), "flower")) { + /* Not flower filter, drop entire message. */ + return 1; + } + flower = true; + break; + case TCA_OPTIONS: + if (!flower) { + /* Not flower options, drop entire message. */ + return 1; + } + /* Check nested flower options. */ + mnl_attr_for_each_nested(na_opt, na) { + switch (mnl_attr_get_type(na_opt)) { + case TCA_FLOWER_FLAGS: + query->flags_valid = 1; + query->tc_flags = + mnl_attr_get_u32(na_opt); + break; + } + } + break; + } + } + return 1; +} + +/** + * Query a TC flower rule flags via netlink. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] dev_flow + * Pointer to the flow. + * @param[out] pflags + * pointer to the data retrieved by the query. + * + * @return + * 0 on success, a negative errno value otherwise. + */ +static int +flow_tcf_query_flags(struct mlx5_flow_tcf_context *tcf, + struct mlx5_flow *dev_flow, + uint32_t *pflags) +{ + struct nlmsghdr *nlh; + struct tcmsg *tcm; + struct tcf_nlcb_query query = { + .handle = dev_flow->tcf.tcm->tcm_handle, + }; + + nlh = mnl_nlmsg_put_header(tcf->buf); + nlh->nlmsg_type = RTM_GETTFILTER; + nlh->nlmsg_flags = NLM_F_REQUEST; + tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm)); + memcpy(tcm, dev_flow->tcf.tcm, sizeof(*tcm)); + /* + * Ignore Netlink error for filter query operations. + * The reply length is sent by kernel as errno. + * Just check we got the flags option. + */ + flow_tcf_nl_ack(tcf, nlh, flow_tcf_collect_query_cb, &query); + if (!query.flags_valid) { + *pflags = 0; + return -ENOENT; + } + *pflags = query.tc_flags; + return 0; +} + +/** + * Query and check the in_hw set for specified rule. + * + * @param[in] tcf + * Context object initialized by mlx5_flow_tcf_context_create(). + * @param[in] dev_flow + * Pointer to the flow to check. + * + * @return + * 0 on success, a negative errno value otherwise. + */ +static int +flow_tcf_check_inhw(struct mlx5_flow_tcf_context *tcf, + struct mlx5_flow *dev_flow) +{ + uint32_t flags; + int ret; + + ret = flow_tcf_query_flags(tcf, dev_flow, &flags); + if (ret) + return ret; + return (flags & TCA_CLS_FLAGS_IN_HW) ? 0 : -ENOENT; +} + +/** + * Remove flow from E-Switch by sending Netlink message. + * + * @param[in] dev + * Pointer to Ethernet device. + * @param[in, out] flow + * Pointer to the sub flow. + */ +static void +flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow) +{ + struct priv *priv = dev->data->dev_private; + struct mlx5_flow_tcf_context *ctx = priv->tcf_context; + struct mlx5_flow *dev_flow; + struct nlmsghdr *nlh; + + if (!flow) + return; + dev_flow = LIST_FIRST(&flow->dev_flows); + if (!dev_flow) + return; + /* E-Switch flow can't be expanded. */ + assert(!LIST_NEXT(dev_flow, next)); + if (dev_flow->tcf.applied) { + nlh = dev_flow->tcf.nlh; + nlh->nlmsg_type = RTM_DELTFILTER; + nlh->nlmsg_flags = NLM_F_REQUEST; + flow_tcf_nl_ack(ctx, nlh, NULL, NULL); + if (dev_flow->tcf.tunnel) { + assert(dev_flow->tcf.tunnel->vtep); + flow_tcf_vtep_release(ctx, + dev_flow->tcf.tunnel->vtep, + dev_flow); + dev_flow->tcf.tunnel->vtep = NULL; + } + dev_flow->tcf.applied = 0; + } +} /** * Apply flow to E-Switch by sending Netlink message. @@ -5120,8 +5299,22 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow, *dev_flow->tcf.tunnel->ifindex_ptr = dev_flow->tcf.tunnel->vtep->ifindex; } - if (!flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL)) { + if (!flow_tcf_nl_ack(ctx, nlh, NULL, NULL)) { dev_flow->tcf.applied = 1; + if (*dev_flow->tcf.ptc_flags & TCA_CLS_FLAGS_SKIP_SW) + return 0; + /* + * Rule was applied without skip_sw flag set. + * We should check whether the rule was acctually + * accepted by hardware (have look at in_hw flag). + */ + if (flow_tcf_check_inhw(ctx, dev_flow)) { + flow_tcf_remove(dev, flow); + return rte_flow_error_set + (error, ENOENT, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "netlink: rule has no in_hw flag set"); + } return 0; } if (dev_flow->tcf.tunnel) { @@ -5137,45 +5330,6 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow, } /** - * Remove flow from E-Switch by sending Netlink message. - * - * @param[in] dev - * Pointer to Ethernet device. - * @param[in, out] flow - * Pointer to the sub flow. - */ -static void -flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow) -{ - struct priv *priv = dev->data->dev_private; - struct mlx5_flow_tcf_context *ctx = priv->tcf_context; - struct mlx5_flow *dev_flow; - struct nlmsghdr *nlh; - - if (!flow) - return; - dev_flow = LIST_FIRST(&flow->dev_flows); - if (!dev_flow) - return; - /* E-Switch flow can't be expanded. */ - assert(!LIST_NEXT(dev_flow, next)); - if (dev_flow->tcf.applied) { - nlh = dev_flow->tcf.nlh; - nlh->nlmsg_type = RTM_DELTFILTER; - nlh->nlmsg_flags = NLM_F_REQUEST; - flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL); - if (dev_flow->tcf.tunnel) { - assert(dev_flow->tcf.tunnel->vtep); - flow_tcf_vtep_release(ctx, - dev_flow->tcf.tunnel->vtep, - dev_flow); - dev_flow->tcf.tunnel->vtep = NULL; - } - dev_flow->tcf.applied = 0; - } -} - -/** * Remove flow from E-Switch and release resources of the device flow. * * @param[in] dev @@ -5494,7 +5648,7 @@ flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh, * Message received from Netlink. * @param[out] data * Pointer to data area to be filled by the parsing routine. - * assumed to be a pinter to struct flow_tcf_stats_basic. + * assumed to be a pointer to struct flow_tcf_stats_basic. * * @return * MNL_CB_OK value. @@ -5542,7 +5696,7 @@ flow_tcf_query_count(struct rte_eth_dev *dev, void *data, struct rte_flow_error *error) { - struct flow_tcf_stats_basic sb_data = { 0 }; + struct flow_tcf_stats_basic sb_data; struct rte_flow_query_count *qc = data; struct priv *priv = dev->data->dev_private; struct mlx5_flow_tcf_context *ctx = priv->tcf_context; @@ -5553,6 +5707,7 @@ flow_tcf_query_count(struct rte_eth_dev *dev, ssize_t ret; assert(qc); + memset(&sb_data, 0, sizeof(sb_data)); dev_flow = LIST_FIRST(&flow->dev_flows); /* E-Switch flow can't be expanded. */ assert(!LIST_NEXT(dev_flow, next)); @@ -5714,7 +5869,7 @@ mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx, tcm->tcm_parent = TC_H_INGRESS; assert(sizeof(buf) >= nlh->nlmsg_len); /* Ignore errors when qdisc is already absent. */ - if (flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL) && + if (flow_tcf_nl_ack(ctx, nlh, NULL, NULL) && rte_errno != EINVAL && rte_errno != ENOENT) return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -5731,7 +5886,7 @@ mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx, tcm->tcm_parent = TC_H_INGRESS; mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress"); assert(sizeof(buf) >= nlh->nlmsg_len); - if (flow_tcf_nl_ack(ctx, nlh, 0, NULL, NULL)) + if (flow_tcf_nl_ack(ctx, nlh, NULL, NULL)) return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "netlink: failed to create ingress" diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c index d6d95db5..81ec59d7 100644 --- a/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/drivers/net/mlx5/mlx5_flow_verbs.c @@ -68,9 +68,10 @@ flow_verbs_counter_create(struct rte_eth_dev *dev, #elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45) struct priv *priv = dev->data->dev_private; struct ibv_counters_init_attr init = {0}; - struct ibv_counter_attach_attr attach = {0}; + struct ibv_counter_attach_attr attach; int ret; + memset(&attach, 0, sizeof(attach)); counter->cs = mlx5_glue->create_counters(priv->ctx, &init); if (!counter->cs) { rte_errno = ENOTSUP; @@ -1017,6 +1018,7 @@ flow_verbs_validate(struct rte_eth_dev *dev, int ret; uint64_t action_flags = 0; uint64_t item_flags = 0; + uint64_t last_item = 0; uint8_t next_protocol = 0xff; if (items == NULL) @@ -1036,26 +1038,26 @@ flow_verbs_validate(struct rte_eth_dev *dev, error); if (ret < 0) return ret; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : - MLX5_FLOW_LAYER_OUTER_L2; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L2 : + MLX5_FLOW_LAYER_OUTER_L2; break; case RTE_FLOW_ITEM_TYPE_VLAN: ret = mlx5_flow_validate_item_vlan(items, item_flags, error); if (ret < 0) return ret; - item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 | - MLX5_FLOW_LAYER_INNER_VLAN) : - (MLX5_FLOW_LAYER_OUTER_L2 | - MLX5_FLOW_LAYER_OUTER_VLAN); + last_item = tunnel ? (MLX5_FLOW_LAYER_INNER_L2 | + MLX5_FLOW_LAYER_INNER_VLAN) : + (MLX5_FLOW_LAYER_OUTER_L2 | + MLX5_FLOW_LAYER_OUTER_VLAN); break; case RTE_FLOW_ITEM_TYPE_IPV4: ret = mlx5_flow_validate_item_ipv4(items, item_flags, error); if (ret < 0) return ret; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : - MLX5_FLOW_LAYER_OUTER_L3_IPV4; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : + MLX5_FLOW_LAYER_OUTER_L3_IPV4; if (items->mask != NULL && ((const struct rte_flow_item_ipv4 *) items->mask)->hdr.next_proto_id) { @@ -1075,8 +1077,8 @@ flow_verbs_validate(struct rte_eth_dev *dev, error); if (ret < 0) return ret; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : - MLX5_FLOW_LAYER_OUTER_L3_IPV6; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : + MLX5_FLOW_LAYER_OUTER_L3_IPV6; if (items->mask != NULL && ((const struct rte_flow_item_ipv6 *) items->mask)->hdr.proto) { @@ -1097,8 +1099,8 @@ flow_verbs_validate(struct rte_eth_dev *dev, error); if (ret < 0) return ret; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : - MLX5_FLOW_LAYER_OUTER_L4_UDP; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : + MLX5_FLOW_LAYER_OUTER_L4_UDP; break; case RTE_FLOW_ITEM_TYPE_TCP: ret = mlx5_flow_validate_item_tcp @@ -1108,15 +1110,15 @@ flow_verbs_validate(struct rte_eth_dev *dev, error); if (ret < 0) return ret; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : - MLX5_FLOW_LAYER_OUTER_L4_TCP; + last_item = tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : + MLX5_FLOW_LAYER_OUTER_L4_TCP; break; case RTE_FLOW_ITEM_TYPE_VXLAN: ret = mlx5_flow_validate_item_vxlan(items, item_flags, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_VXLAN; + last_item = MLX5_FLOW_LAYER_VXLAN; break; case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: ret = mlx5_flow_validate_item_vxlan_gpe(items, @@ -1124,28 +1126,29 @@ flow_verbs_validate(struct rte_eth_dev *dev, dev, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE; + last_item = MLX5_FLOW_LAYER_VXLAN_GPE; break; case RTE_FLOW_ITEM_TYPE_GRE: ret = mlx5_flow_validate_item_gre(items, item_flags, next_protocol, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_GRE; + last_item = MLX5_FLOW_LAYER_GRE; break; case RTE_FLOW_ITEM_TYPE_MPLS: - ret = mlx5_flow_validate_item_mpls(items, item_flags, - next_protocol, - error); + ret = mlx5_flow_validate_item_mpls(dev, items, + item_flags, + last_item, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_MPLS; + last_item = MLX5_FLOW_LAYER_MPLS; break; default: return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, NULL, "item not supported"); } + item_flags |= last_item; } for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { switch (actions->type) { diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c index f4b15d3f..442b2d23 100644 --- a/drivers/net/mlx5/mlx5_mr.c +++ b/drivers/net/mlx5/mlx5_mr.c @@ -342,8 +342,9 @@ mr_insert_dev_cache(struct rte_eth_dev *dev, struct mlx5_mr *mr) DRV_LOG(DEBUG, "port %u inserting MR(%p) to global cache", dev->data->port_id, (void *)mr); for (n = 0; n < mr->ms_bmp_n; ) { - struct mlx5_mr_cache entry = { 0, }; + struct mlx5_mr_cache entry; + memset(&entry, 0, sizeof(entry)); /* Find a contiguous chunk and advance the index. */ n = mr_find_next_chunk(mr, &entry, n); if (!entry.end) @@ -386,8 +387,9 @@ mr_lookup_dev_list(struct rte_eth_dev *dev, struct mlx5_mr_cache *entry, if (mr->ms_n == 0) continue; for (n = 0; n < mr->ms_bmp_n; ) { - struct mlx5_mr_cache ret = { 0, }; + struct mlx5_mr_cache ret; + memset(&ret, 0, sizeof(ret)); n = mr_find_next_chunk(mr, &ret, n); if (addr >= ret.start && addr < ret.end) { /* Found. */ @@ -570,7 +572,7 @@ mlx5_mr_create(struct rte_eth_dev *dev, struct mlx5_mr_cache *entry, * Find out a contiguous virtual address chunk in use, to which the * given address belongs, in order to register maximum range. In the * best case where mempools are not dynamically recreated and - * '--socket-mem' is speicified as an EAL option, it is very likely to + * '--socket-mem' is specified as an EAL option, it is very likely to * have only one MR(LKey) per a socket and per a hugepage-size even * though the system memory is highly fragmented. */ @@ -688,8 +690,9 @@ alloc_resources: */ for (n = 0; n < ms_n; ++n) { uintptr_t start; - struct mlx5_mr_cache ret = { 0, }; + struct mlx5_mr_cache ret; + memset(&ret, 0, sizeof(ret)); start = data_re.start + n * msl->page_sz; /* Exclude memsegs already registered by other MRs. */ if (mr_lookup_dev(dev, &ret, start) == UINT32_MAX) { @@ -1042,7 +1045,7 @@ mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr) * @return * Searched LKey on success, UINT32_MAX on no match. */ -uint32_t +static uint32_t mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr) { struct mlx5_txq_ctrl *txq_ctrl = @@ -1057,6 +1060,32 @@ mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr) } /** + * Bottom-half of LKey search on Tx. If it can't be searched in the memseg + * list, register the mempool of the mbuf as externally allocated memory. + * + * @param txq + * Pointer to Tx queue structure. + * @param mb + * Pointer to mbuf. + * + * @return + * Searched LKey on success, UINT32_MAX on no match. + */ +uint32_t +mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, struct rte_mbuf *mb) +{ + uintptr_t addr = (uintptr_t)mb->buf_addr; + uint32_t lkey; + + lkey = mlx5_tx_addr2mr_bh(txq, addr); + if (lkey == UINT32_MAX && rte_errno == ENXIO) { + /* Mempool may have externally allocated memory. */ + return mlx5_tx_update_ext_mp(txq, addr, mlx5_mb2mp(mb)); + } + return lkey; +} + +/** * Flush all of the local cache entries. * * @param mr_ctrl diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index eef48502..183da0e2 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -1468,6 +1468,8 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, tmpl->rxq.mp = mp; tmpl->rxq.stats.idx = idx; tmpl->rxq.elts_n = log2above(desc); + tmpl->rxq.rq_repl_thresh = + MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n); tmpl->rxq.elts = (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1); #ifndef RTE_ARCH_64 @@ -1782,7 +1784,7 @@ mlx5_hrxq_new(struct rte_eth_dev *dev, struct mlx5_ind_table_ibv *ind_tbl; struct ibv_qp *qp; #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT - struct mlx5dv_qp_init_attr qp_init_attr = {0}; + struct mlx5dv_qp_init_attr qp_init_attr; #endif int err; @@ -1795,6 +1797,7 @@ mlx5_hrxq_new(struct rte_eth_dev *dev, return NULL; } #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT + memset(&qp_init_attr, 0, sizeof(qp_init_attr)); if (tunnel) { qp_init_attr.comp_mask = MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS; diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index 1b6200f6..f47d327c 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -101,6 +101,7 @@ struct mlx5_rxq_data { uint16_t consumed_strd; /* Number of consumed strides in WQE. */ uint32_t rq_pi; uint32_t cq_ci; + uint16_t rq_repl_thresh; /* Threshold for buffer replenishment. */ struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ uint16_t mprq_max_memcpy_len; /* Maximum size of packet to memcpy. */ volatile void *wqes; @@ -363,7 +364,7 @@ uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl); uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr); -uint32_t mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr); +uint32_t mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, struct rte_mbuf *mb); uint32_t mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr, struct rte_mempool *mp); @@ -379,17 +380,16 @@ uint32_t mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr, * Address of the lock to use for that UAR access. */ static __rte_always_inline void -__mlx5_uar_write64_relaxed(uint64_t val, volatile void *addr, +__mlx5_uar_write64_relaxed(uint64_t val, void *addr, rte_spinlock_t *lock __rte_unused) { #ifdef RTE_ARCH_64 - rte_write64_relaxed(val, addr); + *(uint64_t *)addr = val; #else /* !RTE_ARCH_64 */ rte_spinlock_lock(lock); - rte_write32_relaxed(val, addr); + *(uint32_t *)addr = val; rte_io_wmb(); - rte_write32_relaxed(val >> 32, - (volatile void *)((volatile char *)addr + 4)); + *((uint32_t *)addr + 1) = val >> 32; rte_spinlock_unlock(lock); #endif } @@ -407,7 +407,7 @@ __mlx5_uar_write64_relaxed(uint64_t val, volatile void *addr, * Address of the lock to use for that UAR access. */ static __rte_always_inline void -__mlx5_uar_write64(uint64_t val, volatile void *addr, rte_spinlock_t *lock) +__mlx5_uar_write64(uint64_t val, void *addr, rte_spinlock_t *lock) { rte_io_wmb(); __mlx5_uar_write64_relaxed(val, addr, lock); @@ -619,7 +619,7 @@ mlx5_tx_complete(struct mlx5_txq_data *txq) * @return * Memory pool where data is located for given mbuf. */ -static struct rte_mempool * +static inline struct rte_mempool * mlx5_mb2mp(struct rte_mbuf *buf) { if (unlikely(RTE_MBUF_INDIRECT(buf))) @@ -668,9 +668,10 @@ mlx5_rx_addr2mr(struct mlx5_rxq_data *rxq, uintptr_t addr) * Searched LKey on success, UINT32_MAX on no match. */ static __rte_always_inline uint32_t -mlx5_tx_addr2mr(struct mlx5_txq_data *txq, uintptr_t addr) +mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) { struct mlx5_mr_ctrl *mr_ctrl = &txq->mr_ctrl; + uintptr_t addr = (uintptr_t)mb->buf_addr; uint32_t lkey; /* Check generation bit to see if there's any change on existing MRs. */ @@ -681,23 +682,8 @@ mlx5_tx_addr2mr(struct mlx5_txq_data *txq, uintptr_t addr) MLX5_MR_CACHE_N, addr); if (likely(lkey != UINT32_MAX)) return lkey; - /* Take slower bottom-half (binary search) on miss. */ - return mlx5_tx_addr2mr_bh(txq, addr); -} - -static __rte_always_inline uint32_t -mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) -{ - uintptr_t addr = (uintptr_t)mb->buf_addr; - uint32_t lkey = mlx5_tx_addr2mr(txq, addr); - - if (likely(lkey != UINT32_MAX)) - return lkey; - if (rte_errno == ENXIO) { - /* Mempool may have externally allocated memory. */ - lkey = mlx5_tx_update_ext_mp(txq, addr, mlx5_mb2mp(mb)); - } - return lkey; + /* Take slower bottom-half on miss. */ + return mlx5_tx_mb2mr_bh(txq, mb); } /** diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h index 0b729f18..883fe1bf 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h @@ -732,7 +732,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, * N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished). */ repl_n = q_n - (rxq->rq_ci - rxq->rq_pi); - if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n)) + if (repl_n >= rxq->rq_repl_thresh) mlx5_rx_replenish_bulk_mbuf(rxq, repl_n); /* See if there're unreturned mbufs from compressed CQE. */ rcvd_pkt = rxq->cq_ci - rxq->rq_pi; diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index e0f95f92..14117c4b 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -716,7 +716,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, * N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished). */ repl_n = q_n - (rxq->rq_ci - rxq->rq_pi); - if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n)) + if (repl_n >= rxq->rq_repl_thresh) mlx5_rx_replenish_bulk_mbuf(rxq, repl_n); /* See if there're unreturned mbufs from compressed CQE. */ rcvd_pkt = rxq->cq_ci - rxq->rq_pi; diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c index a14d1e49..fccb9af0 100644 --- a/drivers/net/mlx5/mlx5_stats.c +++ b/drivers/net/mlx5/mlx5_stats.c @@ -354,10 +354,11 @@ int mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { struct priv *priv = dev->data->dev_private; - struct rte_eth_stats tmp = {0}; + struct rte_eth_stats tmp; unsigned int i; unsigned int idx; + memset(&tmp, 0, sizeof(tmp)); /* Add software counters. */ for (i = 0; (i != priv->rxqs_n); ++i) { struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; |