From 055c52583a2794da8ba1e85a48cce3832372b12f Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Wed, 8 Nov 2017 14:15:11 +0000 Subject: New upstream version 17.11-rc3 Change-Id: I6a5baa40612fe0c20f30b5fa773a6cbbac63a685 Signed-off-by: Luca Boccassi --- drivers/net/sfc/Makefile | 8 +- drivers/net/sfc/base/ef10_filter.c | 288 +++++++++++++++++++++++++++++-------- drivers/net/sfc/base/ef10_impl.h | 25 ++++ drivers/net/sfc/base/ef10_nic.c | 10 ++ drivers/net/sfc/base/ef10_rx.c | 96 ++++++++++--- drivers/net/sfc/base/efx.h | 105 +++++++++++--- drivers/net/sfc/base/efx_filter.c | 103 ++++++++++++- drivers/net/sfc/base/efx_impl.h | 20 ++- drivers/net/sfc/base/efx_rx.c | 142 +++++++++++++++--- drivers/net/sfc/base/hunt_nic.c | 7 + drivers/net/sfc/base/medford_nic.c | 7 + drivers/net/sfc/base/siena_nic.c | 3 + drivers/net/sfc/efsys.h | 2 +- drivers/net/sfc/sfc.c | 18 ++- drivers/net/sfc/sfc.h | 7 +- drivers/net/sfc/sfc_dp_rx.h | 5 + drivers/net/sfc/sfc_dp_tx.h | 9 ++ drivers/net/sfc/sfc_ef10_rx.c | 13 +- drivers/net/sfc/sfc_ef10_tx.c | 81 +++++++++-- drivers/net/sfc/sfc_ethdev.c | 69 +++++++-- drivers/net/sfc/sfc_flow.c | 188 +++++++++++++++++++++++- drivers/net/sfc/sfc_flow.h | 15 ++ drivers/net/sfc/sfc_rx.c | 74 ++++++++-- drivers/net/sfc/sfc_tso.c | 4 +- drivers/net/sfc/sfc_tweak.h | 3 + drivers/net/sfc/sfc_tx.c | 58 +++++++- 26 files changed, 1162 insertions(+), 198 deletions(-) (limited to 'drivers/net/sfc') diff --git a/drivers/net/sfc/Makefile b/drivers/net/sfc/Makefile index 57aa963b..2cfd62a2 100644 --- a/drivers/net/sfc/Makefile +++ b/drivers/net/sfc/Makefile @@ -65,13 +65,19 @@ CFLAGS += -Wbad-function-cast CFLAGS_BASE_DRIVER += -Wno-empty-body else ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y) CFLAGS_BASE_DRIVER += -Wno-unused-but-set-variable +# Suppress ICC false positive warning on 'bulk' may be used before its +# value is set +CFLAGS_sfc_ef10_tx.o += -wd3656 endif +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs +LDLIBS += -lrte_bus_pci # # List of base driver object files for which # special CFLAGS above should be applied # -BASE_DRIVER_OBJS=$(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c))) +BASE_DRIVER_OBJS=$(sort $(patsubst %.c,%.o,$(notdir $(wildcard $(SRCDIR)/base/*.c)))) $(foreach obj, $(BASE_DRIVER_OBJS), \ $(eval CFLAGS_$(obj)+=$(CFLAGS_BASE_DRIVER))) diff --git a/drivers/net/sfc/base/ef10_filter.c b/drivers/net/sfc/base/ef10_filter.c index 695bb847..e1faf1dd 100644 --- a/drivers/net/sfc/base/ef10_filter.c +++ b/drivers/net/sfc/base/ef10_filter.c @@ -123,29 +123,33 @@ ef10_filter_init( #define MATCH_MASK(match) (EFX_MASK32(match) << EFX_LOW_BIT(match)) EFX_STATIC_ASSERT(EFX_FILTER_MATCH_REM_HOST == - MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_SRC_IP)); + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_SRC_IP)); EFX_STATIC_ASSERT(EFX_FILTER_MATCH_LOC_HOST == - MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_DST_IP)); + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_DST_IP)); EFX_STATIC_ASSERT(EFX_FILTER_MATCH_REM_MAC == - MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_SRC_MAC)); + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_SRC_MAC)); EFX_STATIC_ASSERT(EFX_FILTER_MATCH_REM_PORT == - MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_SRC_PORT)); + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_SRC_PORT)); EFX_STATIC_ASSERT(EFX_FILTER_MATCH_LOC_MAC == - MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_DST_MAC)); + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_DST_MAC)); EFX_STATIC_ASSERT(EFX_FILTER_MATCH_LOC_PORT == - MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_DST_PORT)); + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_DST_PORT)); EFX_STATIC_ASSERT(EFX_FILTER_MATCH_ETHER_TYPE == - MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_ETHER_TYPE)); + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_ETHER_TYPE)); EFX_STATIC_ASSERT(EFX_FILTER_MATCH_INNER_VID == - MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_INNER_VLAN)); + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_INNER_VLAN)); EFX_STATIC_ASSERT(EFX_FILTER_MATCH_OUTER_VID == - MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_OUTER_VLAN)); + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_OUTER_VLAN)); EFX_STATIC_ASSERT(EFX_FILTER_MATCH_IP_PROTO == - MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_IP_PROTO)); + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_IP_PROTO)); + EFX_STATIC_ASSERT(EFX_FILTER_MATCH_IFRM_UNKNOWN_MCAST_DST == + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_MCAST_DST)); + EFX_STATIC_ASSERT(EFX_FILTER_MATCH_IFRM_UNKNOWN_UCAST_DST == + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_IFRM_UNKNOWN_UCAST_DST)); EFX_STATIC_ASSERT(EFX_FILTER_MATCH_UNKNOWN_MCAST_DST == - MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_UNKNOWN_MCAST_DST)); + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_MCAST_DST)); EFX_STATIC_ASSERT((uint32_t)EFX_FILTER_MATCH_UNKNOWN_UCAST_DST == - MATCH_MASK(MC_CMD_FILTER_OP_IN_MATCH_UNKNOWN_UCAST_DST)); + MATCH_MASK(MC_CMD_FILTER_OP_EXT_IN_MATCH_UNKNOWN_UCAST_DST)); #undef MATCH_MASK EFSYS_KMEM_ALLOC(enp->en_esip, sizeof (ef10_filter_table_t), eftp); @@ -186,27 +190,27 @@ efx_mcdi_filter_op_add( __inout ef10_filter_handle_t *handle) { efx_mcdi_req_t req; - uint8_t payload[MAX(MC_CMD_FILTER_OP_IN_LEN, - MC_CMD_FILTER_OP_OUT_LEN)]; + uint8_t payload[MAX(MC_CMD_FILTER_OP_EXT_IN_LEN, + MC_CMD_FILTER_OP_EXT_OUT_LEN)]; efx_rc_t rc; memset(payload, 0, sizeof (payload)); req.emr_cmd = MC_CMD_FILTER_OP; req.emr_in_buf = payload; - req.emr_in_length = MC_CMD_FILTER_OP_IN_LEN; + req.emr_in_length = MC_CMD_FILTER_OP_EXT_IN_LEN; req.emr_out_buf = payload; - req.emr_out_length = MC_CMD_FILTER_OP_OUT_LEN; + req.emr_out_length = MC_CMD_FILTER_OP_EXT_OUT_LEN; switch (filter_op) { case MC_CMD_FILTER_OP_IN_OP_REPLACE: - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_HANDLE_LO, + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_HANDLE_LO, handle->efh_lo); - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_HANDLE_HI, + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_HANDLE_HI, handle->efh_hi); /* Fall through */ case MC_CMD_FILTER_OP_IN_OP_INSERT: case MC_CMD_FILTER_OP_IN_OP_SUBSCRIBE: - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_OP, filter_op); + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_OP, filter_op); break; default: EFSYS_ASSERT(0); @@ -214,82 +218,123 @@ efx_mcdi_filter_op_add( goto fail1; } - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_PORT_ID, + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_PORT_ID, EVB_PORT_ID_ASSIGNED); - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_MATCH_FIELDS, + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_MATCH_FIELDS, spec->efs_match_flags); - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_RX_DEST, - MC_CMD_FILTER_OP_IN_RX_DEST_HOST); - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_RX_QUEUE, + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_RX_DEST, + MC_CMD_FILTER_OP_EXT_IN_RX_DEST_HOST); + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_RX_QUEUE, spec->efs_dmaq_id); + +#if EFSYS_OPT_RX_SCALE if (spec->efs_flags & EFX_FILTER_FLAG_RX_RSS) { - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_RX_CONTEXT, - spec->efs_rss_context); + uint32_t rss_context; + + if (spec->efs_rss_context == EFX_RSS_CONTEXT_DEFAULT) + rss_context = enp->en_rss_context; + else + rss_context = spec->efs_rss_context; + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_RX_CONTEXT, + rss_context); } - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_RX_MODE, +#endif + + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_RX_MODE, spec->efs_flags & EFX_FILTER_FLAG_RX_RSS ? - MC_CMD_FILTER_OP_IN_RX_MODE_RSS : - MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE); - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_TX_DEST, - MC_CMD_FILTER_OP_IN_TX_DEST_DEFAULT); + MC_CMD_FILTER_OP_EXT_IN_RX_MODE_RSS : + MC_CMD_FILTER_OP_EXT_IN_RX_MODE_SIMPLE); + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_TX_DEST, + MC_CMD_FILTER_OP_EXT_IN_TX_DEST_DEFAULT); if (filter_op != MC_CMD_FILTER_OP_IN_OP_REPLACE) { /* * NOTE: Unlike most MCDI requests, the filter fields * are presented in network (big endian) byte order. */ - memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_IN_SRC_MAC), + memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_EXT_IN_SRC_MAC), spec->efs_rem_mac, EFX_MAC_ADDR_LEN); - memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_IN_DST_MAC), + memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_EXT_IN_DST_MAC), spec->efs_loc_mac, EFX_MAC_ADDR_LEN); - MCDI_IN_SET_WORD(req, FILTER_OP_IN_SRC_PORT, + MCDI_IN_SET_WORD(req, FILTER_OP_EXT_IN_SRC_PORT, __CPU_TO_BE_16(spec->efs_rem_port)); - MCDI_IN_SET_WORD(req, FILTER_OP_IN_DST_PORT, + MCDI_IN_SET_WORD(req, FILTER_OP_EXT_IN_DST_PORT, __CPU_TO_BE_16(spec->efs_loc_port)); - MCDI_IN_SET_WORD(req, FILTER_OP_IN_ETHER_TYPE, + MCDI_IN_SET_WORD(req, FILTER_OP_EXT_IN_ETHER_TYPE, __CPU_TO_BE_16(spec->efs_ether_type)); - MCDI_IN_SET_WORD(req, FILTER_OP_IN_INNER_VLAN, + MCDI_IN_SET_WORD(req, FILTER_OP_EXT_IN_INNER_VLAN, __CPU_TO_BE_16(spec->efs_inner_vid)); - MCDI_IN_SET_WORD(req, FILTER_OP_IN_OUTER_VLAN, + MCDI_IN_SET_WORD(req, FILTER_OP_EXT_IN_OUTER_VLAN, __CPU_TO_BE_16(spec->efs_outer_vid)); /* IP protocol (in low byte, high byte is zero) */ - MCDI_IN_SET_BYTE(req, FILTER_OP_IN_IP_PROTO, + MCDI_IN_SET_BYTE(req, FILTER_OP_EXT_IN_IP_PROTO, spec->efs_ip_proto); EFX_STATIC_ASSERT(sizeof (spec->efs_rem_host) == - MC_CMD_FILTER_OP_IN_SRC_IP_LEN); + MC_CMD_FILTER_OP_EXT_IN_SRC_IP_LEN); EFX_STATIC_ASSERT(sizeof (spec->efs_loc_host) == - MC_CMD_FILTER_OP_IN_DST_IP_LEN); + MC_CMD_FILTER_OP_EXT_IN_DST_IP_LEN); - memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_IN_SRC_IP), + memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_EXT_IN_SRC_IP), &spec->efs_rem_host.eo_byte[0], - MC_CMD_FILTER_OP_IN_SRC_IP_LEN); - memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_IN_DST_IP), + MC_CMD_FILTER_OP_EXT_IN_SRC_IP_LEN); + memcpy(MCDI_IN2(req, uint8_t, FILTER_OP_EXT_IN_DST_IP), &spec->efs_loc_host.eo_byte[0], - MC_CMD_FILTER_OP_IN_DST_IP_LEN); + MC_CMD_FILTER_OP_EXT_IN_DST_IP_LEN); + + /* + * On Medford, filters for encapsulated packets match based on + * the ether type and IP protocol in the outer frame. In + * addition we need to fill in the VNI or VSID type field. + */ + switch (spec->efs_encap_type) { + case EFX_TUNNEL_PROTOCOL_NONE: + break; + case EFX_TUNNEL_PROTOCOL_VXLAN: + case EFX_TUNNEL_PROTOCOL_GENEVE: + MCDI_IN_POPULATE_DWORD_1(req, + FILTER_OP_EXT_IN_VNI_OR_VSID, + FILTER_OP_EXT_IN_VNI_TYPE, + spec->efs_encap_type == EFX_TUNNEL_PROTOCOL_VXLAN ? + MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_VXLAN : + MC_CMD_FILTER_OP_EXT_IN_VNI_TYPE_GENEVE); + break; + case EFX_TUNNEL_PROTOCOL_NVGRE: + MCDI_IN_POPULATE_DWORD_1(req, + FILTER_OP_EXT_IN_VNI_OR_VSID, + FILTER_OP_EXT_IN_VSID_TYPE, + MC_CMD_FILTER_OP_EXT_IN_VSID_TYPE_NVGRE); + break; + default: + EFSYS_ASSERT(0); + rc = EINVAL; + goto fail2; + } } efx_mcdi_execute(enp, &req); if (req.emr_rc != 0) { rc = req.emr_rc; - goto fail2; + goto fail3; } - if (req.emr_out_length_used < MC_CMD_FILTER_OP_OUT_LEN) { + if (req.emr_out_length_used < MC_CMD_FILTER_OP_EXT_OUT_LEN) { rc = EMSGSIZE; - goto fail3; + goto fail4; } - handle->efh_lo = MCDI_OUT_DWORD(req, FILTER_OP_OUT_HANDLE_LO); - handle->efh_hi = MCDI_OUT_DWORD(req, FILTER_OP_OUT_HANDLE_HI); + handle->efh_lo = MCDI_OUT_DWORD(req, FILTER_OP_EXT_OUT_HANDLE_LO); + handle->efh_hi = MCDI_OUT_DWORD(req, FILTER_OP_EXT_OUT_HANDLE_HI); return (0); +fail4: + EFSYS_PROBE(fail4); fail3: EFSYS_PROBE(fail3); fail2: @@ -308,24 +353,24 @@ efx_mcdi_filter_op_delete( __inout ef10_filter_handle_t *handle) { efx_mcdi_req_t req; - uint8_t payload[MAX(MC_CMD_FILTER_OP_IN_LEN, - MC_CMD_FILTER_OP_OUT_LEN)]; + uint8_t payload[MAX(MC_CMD_FILTER_OP_EXT_IN_LEN, + MC_CMD_FILTER_OP_EXT_OUT_LEN)]; efx_rc_t rc; memset(payload, 0, sizeof (payload)); req.emr_cmd = MC_CMD_FILTER_OP; req.emr_in_buf = payload; - req.emr_in_length = MC_CMD_FILTER_OP_IN_LEN; + req.emr_in_length = MC_CMD_FILTER_OP_EXT_IN_LEN; req.emr_out_buf = payload; - req.emr_out_length = MC_CMD_FILTER_OP_OUT_LEN; + req.emr_out_length = MC_CMD_FILTER_OP_EXT_OUT_LEN; switch (filter_op) { case MC_CMD_FILTER_OP_IN_OP_REMOVE: - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_OP, + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_OP, MC_CMD_FILTER_OP_IN_OP_REMOVE); break; case MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE: - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_OP, + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_OP, MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE); break; default: @@ -334,8 +379,8 @@ efx_mcdi_filter_op_delete( goto fail1; } - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_HANDLE_LO, handle->efh_lo); - MCDI_IN_SET_DWORD(req, FILTER_OP_IN_HANDLE_HI, handle->efh_hi); + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_HANDLE_LO, handle->efh_lo); + MCDI_IN_SET_DWORD(req, FILTER_OP_EXT_IN_HANDLE_HI, handle->efh_hi); efx_mcdi_execute_quiet(enp, &req); @@ -344,7 +389,7 @@ efx_mcdi_filter_op_delete( goto fail2; } - if (req.emr_out_length_used < MC_CMD_FILTER_OP_OUT_LEN) { + if (req.emr_out_length_used < MC_CMD_FILTER_OP_EXT_OUT_LEN) { rc = EMSGSIZE; goto fail3; } @@ -390,6 +435,8 @@ ef10_filter_equal( return (B_FALSE); if (left->efs_ip_proto != right->efs_ip_proto) return (B_FALSE); + if (left->efs_encap_type != right->efs_encap_type) + return (B_FALSE); return (B_TRUE); @@ -549,10 +596,6 @@ ef10_filter_add_internal( EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON || enp->en_family == EFX_FAMILY_MEDFORD); -#if EFSYS_OPT_RX_SCALE - spec->efs_rss_context = enp->en_rss_context; -#endif - hash = ef10_filter_hash(spec); /* @@ -1194,6 +1237,108 @@ fail1: return (rc); } +typedef struct ef10_filter_encap_entry_s { + uint16_t ether_type; + efx_tunnel_protocol_t encap_type; + uint32_t inner_frame_match; +} ef10_filter_encap_entry_t; + +#define EF10_ENCAP_FILTER_ENTRY(ipv, encap_type, inner_frame_match) \ + { EFX_ETHER_TYPE_##ipv, EFX_TUNNEL_PROTOCOL_##encap_type, \ + EFX_FILTER_INNER_FRAME_MATCH_UNKNOWN_##inner_frame_match } + +static ef10_filter_encap_entry_t ef10_filter_encap_list[] = { + EF10_ENCAP_FILTER_ENTRY(IPV4, VXLAN, UCAST_DST), + EF10_ENCAP_FILTER_ENTRY(IPV4, VXLAN, MCAST_DST), + EF10_ENCAP_FILTER_ENTRY(IPV6, VXLAN, UCAST_DST), + EF10_ENCAP_FILTER_ENTRY(IPV6, VXLAN, MCAST_DST), + + EF10_ENCAP_FILTER_ENTRY(IPV4, GENEVE, UCAST_DST), + EF10_ENCAP_FILTER_ENTRY(IPV4, GENEVE, MCAST_DST), + EF10_ENCAP_FILTER_ENTRY(IPV6, GENEVE, UCAST_DST), + EF10_ENCAP_FILTER_ENTRY(IPV6, GENEVE, MCAST_DST), + + EF10_ENCAP_FILTER_ENTRY(IPV4, NVGRE, UCAST_DST), + EF10_ENCAP_FILTER_ENTRY(IPV4, NVGRE, MCAST_DST), + EF10_ENCAP_FILTER_ENTRY(IPV6, NVGRE, UCAST_DST), + EF10_ENCAP_FILTER_ENTRY(IPV6, NVGRE, MCAST_DST), +}; + +#undef EF10_ENCAP_FILTER_ENTRY + +static __checkReturn efx_rc_t +ef10_filter_insert_encap_filters( + __in efx_nic_t *enp, + __in boolean_t mulcst, + __in efx_filter_flags_t filter_flags) +{ + ef10_filter_table_t *table = enp->en_filter.ef_ef10_filter_table; + uint32_t i; + efx_rc_t rc; + + EFX_STATIC_ASSERT(EFX_ARRAY_SIZE(ef10_filter_encap_list) <= + EFX_ARRAY_SIZE(table->eft_encap_filter_indexes)); + + /* + * On Medford, full-featured firmware can identify packets as being + * tunnel encapsulated, even if no encapsulated packet offloads are in + * use. When packets are identified as such, ordinary filters are not + * applied, only ones specific to encapsulated packets. Hence we need to + * insert filters for encapsulated packets in order to receive them. + * + * Separate filters need to be inserted for each ether type, + * encapsulation type, and inner frame type (unicast or multicast). To + * keep things simple and reduce the number of filters needed, catch-all + * filters for all combinations of types are inserted, even if + * all_unicst or all_mulcst have not been set. (These catch-all filters + * may well, however, fail to insert on unprivileged functions.) + */ + table->eft_encap_filter_count = 0; + for (i = 0; i < EFX_ARRAY_SIZE(ef10_filter_encap_list); i++) { + efx_filter_spec_t spec; + ef10_filter_encap_entry_t *encap_filter = + &ef10_filter_encap_list[i]; + + /* + * Skip multicast filters if we've not been asked for + * any multicast traffic. + */ + if ((mulcst == B_FALSE) && + (encap_filter->inner_frame_match == + EFX_FILTER_INNER_FRAME_MATCH_UNKNOWN_MCAST_DST)) + continue; + + efx_filter_spec_init_rx(&spec, EFX_FILTER_PRI_AUTO, + filter_flags, + table->eft_default_rxq); + efx_filter_spec_set_ether_type(&spec, encap_filter->ether_type); + rc = efx_filter_spec_set_encap_type(&spec, + encap_filter->encap_type, + encap_filter->inner_frame_match); + if (rc != 0) + goto fail1; + + rc = ef10_filter_add_internal(enp, &spec, B_TRUE, + &table->eft_encap_filter_indexes[ + table->eft_encap_filter_count]); + if (rc != 0) { + if (rc != EACCES) + goto fail2; + } else { + table->eft_encap_filter_count++; + } + } + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + static void ef10_filter_remove_old( __in efx_nic_t *enp) @@ -1289,6 +1434,12 @@ ef10_filter_reconfigure( } table->eft_mulcst_filter_count = 0; + for (i = 0; i < table->eft_encap_filter_count; i++) { + (void) ef10_filter_delete_internal(enp, + table->eft_encap_filter_indexes[i]); + } + table->eft_encap_filter_count = 0; + return (0); } @@ -1306,6 +1457,10 @@ ef10_filter_reconfigure( ef10_filter_set_entry_auto_old(table, table->eft_mulcst_filter_indexes[i]); } + for (i = 0; i < table->eft_encap_filter_count; i++) { + ef10_filter_set_entry_auto_old(table, + table->eft_encap_filter_indexes[i]); + } /* * Insert or renew unicast filters. @@ -1423,6 +1578,13 @@ ef10_filter_reconfigure( } } + if (encp->enc_tunnel_encapsulations_supported != 0) { + /* Try to insert filters for encapsulated packets. */ + (void) ef10_filter_insert_encap_filters(enp, + mulcst || all_mulcst || brdcst, + filter_flags); + } + /* Remove old filters which were not renewed */ ef10_filter_remove_old(enp); diff --git a/drivers/net/sfc/base/ef10_impl.h b/drivers/net/sfc/base/ef10_impl.h index 8c3dffee..8f9eb7a3 100644 --- a/drivers/net/sfc/base/ef10_impl.h +++ b/drivers/net/sfc/base/ef10_impl.h @@ -897,9 +897,22 @@ ef10_rx_scatter_enable( #if EFSYS_OPT_RX_SCALE +extern __checkReturn efx_rc_t +ef10_rx_scale_context_alloc( + __in efx_nic_t *enp, + __in efx_rx_scale_context_type_t type, + __in uint32_t num_queues, + __out uint32_t *rss_contextp); + +extern __checkReturn efx_rc_t +ef10_rx_scale_context_free( + __in efx_nic_t *enp, + __in uint32_t rss_context); + extern __checkReturn efx_rc_t ef10_rx_scale_mode_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in efx_rx_hash_alg_t alg, __in efx_rx_hash_type_t type, __in boolean_t insert); @@ -907,12 +920,14 @@ ef10_rx_scale_mode_set( extern __checkReturn efx_rc_t ef10_rx_scale_key_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in_ecount(n) uint8_t *key, __in size_t n); extern __checkReturn efx_rc_t ef10_rx_scale_tbl_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in_ecount(n) unsigned int *table, __in size_t n); @@ -1005,6 +1020,13 @@ typedef struct ef10_filter_entry_s { /* Allow for the broadcast address to be added to the multicast list */ #define EFX_EF10_FILTER_MULTICAST_FILTERS_MAX (EFX_MAC_MULTICAST_LIST_MAX + 1) +/* + * For encapsulated packets, there is one filter each for each combination of + * IPv4 or IPv6 outer frame, VXLAN, GENEVE or NVGRE packet type, and unicast or + * multicast inner frames. + */ +#define EFX_EF10_FILTER_ENCAP_FILTERS_MAX 12 + typedef struct ef10_filter_table_s { ef10_filter_entry_t eft_entry[EFX_EF10_FILTER_TBL_ROWS]; efx_rxq_t *eft_default_rxq; @@ -1016,6 +1038,9 @@ typedef struct ef10_filter_table_s { EFX_EF10_FILTER_MULTICAST_FILTERS_MAX]; uint32_t eft_mulcst_filter_count; boolean_t eft_using_all_mulcst; + uint32_t eft_encap_filter_indexes[ + EFX_EF10_FILTER_ENCAP_FILTERS_MAX]; + uint32_t eft_encap_filter_count; } ef10_filter_table_t; __checkReturn efx_rc_t diff --git a/drivers/net/sfc/base/ef10_nic.c b/drivers/net/sfc/base/ef10_nic.c index aac2679c..58d1b0af 100644 --- a/drivers/net/sfc/base/ef10_nic.c +++ b/drivers/net/sfc/base/ef10_nic.c @@ -1072,6 +1072,16 @@ ef10_get_datapath_caps( encp->enc_mac_stats_40g_tx_size_bins = CAP_FLAG2(flags2, MAC_STATS_40G_TX_SIZE_BINS) ? B_TRUE : B_FALSE; + /* + * Check if firmware supports VXLAN and NVGRE tunnels. + * The capability indicates Geneve protocol support as well. + */ + if (CAP_FLAG(flags, VXLAN_NVGRE)) + encp->enc_tunnel_encapsulations_supported = + (1u << EFX_TUNNEL_PROTOCOL_VXLAN) | + (1u << EFX_TUNNEL_PROTOCOL_GENEVE) | + (1u << EFX_TUNNEL_PROTOCOL_NVGRE); + #undef CAP_FLAG #undef CAP_FLAG2 diff --git a/drivers/net/sfc/base/ef10_rx.c b/drivers/net/sfc/base/ef10_rx.c index 661caa88..849f674c 100644 --- a/drivers/net/sfc/base/ef10_rx.c +++ b/drivers/net/sfc/base/ef10_rx.c @@ -159,7 +159,7 @@ fail1: static __checkReturn efx_rc_t efx_mcdi_rss_context_alloc( __in efx_nic_t *enp, - __in efx_rx_scale_support_t scale_support, + __in efx_rx_scale_context_type_t type, __in uint32_t num_queues, __out uint32_t *rss_contextp) { @@ -175,7 +175,7 @@ efx_mcdi_rss_context_alloc( goto fail1; } - switch (scale_support) { + switch (type) { case EFX_RX_SCALE_EXCLUSIVE: context_type = MC_CMD_RSS_CONTEXT_ALLOC_IN_TYPE_EXCLUSIVE; break; @@ -461,7 +461,7 @@ ef10_rx_init( * Allocated an exclusive RSS context, which allows both the * indirection table and key to be modified. */ - enp->en_rss_support = EFX_RX_SCALE_EXCLUSIVE; + enp->en_rss_context_type = EFX_RX_SCALE_EXCLUSIVE; enp->en_hash_support = EFX_RX_HASH_AVAILABLE; } else { /* @@ -469,7 +469,7 @@ ef10_rx_init( * operation without support for RSS. The pseudo-header in * received packets will not contain a Toeplitz hash value. */ - enp->en_rss_support = EFX_RX_SCALE_UNAVAILABLE; + enp->en_rss_context_type = EFX_RX_SCALE_UNAVAILABLE; enp->en_hash_support = EFX_RX_HASH_UNAVAILABLE; } @@ -489,10 +489,53 @@ ef10_rx_scatter_enable( } #endif /* EFSYS_OPT_RX_SCATTER */ +#if EFSYS_OPT_RX_SCALE + __checkReturn efx_rc_t +ef10_rx_scale_context_alloc( + __in efx_nic_t *enp, + __in efx_rx_scale_context_type_t type, + __in uint32_t num_queues, + __out uint32_t *rss_contextp) +{ + efx_rc_t rc; + + rc = efx_mcdi_rss_context_alloc(enp, type, num_queues, rss_contextp); + if (rc != 0) + goto fail1; + + return (0); + +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + return (rc); +} +#endif /* EFSYS_OPT_RX_SCALE */ + +#if EFSYS_OPT_RX_SCALE + __checkReturn efx_rc_t +ef10_rx_scale_context_free( + __in efx_nic_t *enp, + __in uint32_t rss_context) +{ + efx_rc_t rc; + + rc = efx_mcdi_rss_context_free(enp, rss_context); + if (rc != 0) + goto fail1; + + return (0); + +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + return (rc); +} +#endif /* EFSYS_OPT_RX_SCALE */ + #if EFSYS_OPT_RX_SCALE __checkReturn efx_rc_t ef10_rx_scale_mode_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in efx_rx_hash_alg_t alg, __in efx_rx_hash_type_t type, __in boolean_t insert) @@ -507,13 +550,16 @@ ef10_rx_scale_mode_set( goto fail1; } - if (enp->en_rss_support == EFX_RX_SCALE_UNAVAILABLE) { - rc = ENOTSUP; - goto fail2; + if (rss_context == EFX_RSS_CONTEXT_DEFAULT) { + if (enp->en_rss_context_type == EFX_RX_SCALE_UNAVAILABLE) { + rc = ENOTSUP; + goto fail2; + } + rss_context = enp->en_rss_context; } if ((rc = efx_mcdi_rss_context_set_flags(enp, - enp->en_rss_context, type)) != 0) + rss_context, type)) != 0) goto fail3; return (0); @@ -533,18 +579,24 @@ fail1: __checkReturn efx_rc_t ef10_rx_scale_key_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in_ecount(n) uint8_t *key, __in size_t n) { efx_rc_t rc; - if (enp->en_rss_support == EFX_RX_SCALE_UNAVAILABLE) { - rc = ENOTSUP; - goto fail1; + EFX_STATIC_ASSERT(EFX_RSS_KEY_SIZE == + MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN); + + if (rss_context == EFX_RSS_CONTEXT_DEFAULT) { + if (enp->en_rss_context_type == EFX_RX_SCALE_UNAVAILABLE) { + rc = ENOTSUP; + goto fail1; + } + rss_context = enp->en_rss_context; } - if ((rc = efx_mcdi_rss_context_set_key(enp, - enp->en_rss_context, key, n)) != 0) + if ((rc = efx_mcdi_rss_context_set_key(enp, rss_context, key, n)) != 0) goto fail2; return (0); @@ -562,18 +614,23 @@ fail1: __checkReturn efx_rc_t ef10_rx_scale_tbl_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in_ecount(n) unsigned int *table, __in size_t n) { efx_rc_t rc; - if (enp->en_rss_support == EFX_RX_SCALE_UNAVAILABLE) { - rc = ENOTSUP; - goto fail1; + + if (rss_context == EFX_RSS_CONTEXT_DEFAULT) { + if (enp->en_rss_context_type == EFX_RX_SCALE_UNAVAILABLE) { + rc = ENOTSUP; + goto fail1; + } + rss_context = enp->en_rss_context; } if ((rc = efx_mcdi_rss_context_set_table(enp, - enp->en_rss_context, table, n)) != 0) + rss_context, table, n)) != 0) goto fail2; return (0); @@ -964,11 +1021,10 @@ ef10_rx_fini( __in efx_nic_t *enp) { #if EFSYS_OPT_RX_SCALE - if (enp->en_rss_support != EFX_RX_SCALE_UNAVAILABLE) { + if (enp->en_rss_context_type != EFX_RX_SCALE_UNAVAILABLE) (void) efx_mcdi_rss_context_free(enp, enp->en_rss_context); - } enp->en_rss_context = 0; - enp->en_rss_support = EFX_RX_SCALE_UNAVAILABLE; + enp->en_rss_context_type = EFX_RX_SCALE_UNAVAILABLE; #else _NOTE(ARGUNUSED(enp)) #endif /* EFSYS_OPT_RX_SCALE */ diff --git a/drivers/net/sfc/base/efx.h b/drivers/net/sfc/base/efx.h index 7eabc370..57fba052 100644 --- a/drivers/net/sfc/base/efx.h +++ b/drivers/net/sfc/base/efx.h @@ -1088,6 +1088,14 @@ efx_bist_stop( #define EFX_FEATURE_FW_ASSISTED_TSO_V2 0x00002000 #define EFX_FEATURE_PACKED_STREAM 0x00004000 +typedef enum efx_tunnel_protocol_e { + EFX_TUNNEL_PROTOCOL_NONE = 0, + EFX_TUNNEL_PROTOCOL_VXLAN, + EFX_TUNNEL_PROTOCOL_GENEVE, + EFX_TUNNEL_PROTOCOL_NVGRE, + EFX_TUNNEL_NPROTOS +} efx_tunnel_protocol_t; + typedef struct efx_nic_cfg_s { uint32_t enc_board_type; uint32_t enc_phy_type; @@ -1119,6 +1127,7 @@ typedef struct efx_nic_cfg_s { uint32_t enc_rx_prefix_size; uint32_t enc_rx_buf_align_start; uint32_t enc_rx_buf_align_end; + uint32_t enc_rx_scale_max_exclusive_contexts; #if EFSYS_OPT_LOOPBACK efx_qword_t enc_loopback_types[EFX_LINK_NMODES]; #endif /* EFSYS_OPT_LOOPBACK */ @@ -1187,6 +1196,7 @@ typedef struct efx_nic_cfg_s { boolean_t enc_rx_var_packed_stream_supported; boolean_t enc_pm_and_rxdp_counters; boolean_t enc_mac_stats_40g_tx_size_bins; + uint32_t enc_tunnel_encapsulations_supported; /* External port identifier */ uint8_t enc_external_port; uint32_t enc_mcdi_max_payload_length; @@ -1873,6 +1883,9 @@ efx_rx_scatter_enable( __in unsigned int buf_size); #endif /* EFSYS_OPT_RX_SCATTER */ +/* Handle to represent use of the default RSS context. */ +#define EFX_RSS_CONTEXT_DEFAULT 0xffffffff + #if EFSYS_OPT_RX_SCALE typedef enum efx_rx_hash_alg_e { @@ -1892,30 +1905,44 @@ typedef enum efx_rx_hash_support_e { EFX_RX_HASH_AVAILABLE /* Insert hash with/without RSS */ } efx_rx_hash_support_t; +#define EFX_RSS_KEY_SIZE 40 /* RSS key size (bytes) */ #define EFX_RSS_TBL_SIZE 128 /* Rows in RX indirection table */ #define EFX_MAXRSS 64 /* RX indirection entry range */ #define EFX_MAXRSS_LEGACY 16 /* See bug16611 and bug17213 */ -typedef enum efx_rx_scale_support_e { - EFX_RX_SCALE_UNAVAILABLE = 0, /* Not supported */ +typedef enum efx_rx_scale_context_type_e { + EFX_RX_SCALE_UNAVAILABLE = 0, /* No RX scale context */ EFX_RX_SCALE_EXCLUSIVE, /* Writable key/indirection table */ EFX_RX_SCALE_SHARED /* Read-only key/indirection table */ -} efx_rx_scale_support_t; +} efx_rx_scale_context_type_t; extern __checkReturn efx_rc_t -efx_rx_hash_support_get( +efx_rx_hash_default_support_get( __in efx_nic_t *enp, __out efx_rx_hash_support_t *supportp); extern __checkReturn efx_rc_t -efx_rx_scale_support_get( +efx_rx_scale_default_support_get( + __in efx_nic_t *enp, + __out efx_rx_scale_context_type_t *typep); + +extern __checkReturn efx_rc_t +efx_rx_scale_context_alloc( __in efx_nic_t *enp, - __out efx_rx_scale_support_t *supportp); + __in efx_rx_scale_context_type_t type, + __in uint32_t num_queues, + __out uint32_t *rss_contextp); + +extern __checkReturn efx_rc_t +efx_rx_scale_context_free( + __in efx_nic_t *enp, + __in uint32_t rss_context); extern __checkReturn efx_rc_t efx_rx_scale_mode_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in efx_rx_hash_alg_t alg, __in efx_rx_hash_type_t type, __in boolean_t insert); @@ -1923,12 +1950,14 @@ efx_rx_scale_mode_set( extern __checkReturn efx_rc_t efx_rx_scale_tbl_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in_ecount(n) unsigned int *table, __in size_t n); extern __checkReturn efx_rc_t efx_rx_scale_key_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in_ecount(n) uint8_t *key, __in size_t n); @@ -2214,6 +2243,7 @@ efx_tx_qdestroy( #define EFX_IPPROTO_TCP 6 #define EFX_IPPROTO_UDP 17 +#define EFX_IPPROTO_GRE 47 /* Use RSS to spread across multiple queues */ #define EFX_FILTER_FLAG_RX_RSS 0x01 @@ -2232,6 +2262,10 @@ efx_tx_qdestroy( typedef unsigned int efx_filter_flags_t; +/* + * Flags which specify the fields to match on. The values are the same as in the + * MC_CMD_FILTER_OP/MC_CMD_FILTER_OP_EXT commands. + */ typedef enum efx_filter_match_flags_e { EFX_FILTER_MATCH_REM_HOST = 0x0001, /* Match by remote IP host * address */ @@ -2246,6 +2280,10 @@ typedef enum efx_filter_match_flags_e { EFX_FILTER_MATCH_OUTER_VID = 0x0100, /* Match by outer VLAN ID */ EFX_FILTER_MATCH_IP_PROTO = 0x0200, /* Match by IP transport * protocol */ + /* For encapsulated packets, match all multicast inner frames */ + EFX_FILTER_MATCH_IFRM_UNKNOWN_MCAST_DST = 0x01000000, + /* For encapsulated packets, match all unicast inner frames */ + EFX_FILTER_MATCH_IFRM_UNKNOWN_UCAST_DST = 0x02000000, /* Match otherwise-unmatched multicast and broadcast packets */ EFX_FILTER_MATCH_UNKNOWN_MCAST_DST = 0x40000000, /* Match otherwise-unmatched unicast packets */ @@ -2271,26 +2309,26 @@ typedef enum efx_filter_priority_s { */ typedef struct efx_filter_spec_s { - uint32_t efs_match_flags; - uint32_t efs_priority:2; - uint32_t efs_flags:6; - uint32_t efs_dmaq_id:12; - uint32_t efs_rss_context; - uint16_t efs_outer_vid; - uint16_t efs_inner_vid; - uint8_t efs_loc_mac[EFX_MAC_ADDR_LEN]; - uint8_t efs_rem_mac[EFX_MAC_ADDR_LEN]; - uint16_t efs_ether_type; - uint8_t efs_ip_proto; - uint16_t efs_loc_port; - uint16_t efs_rem_port; - efx_oword_t efs_rem_host; - efx_oword_t efs_loc_host; + uint32_t efs_match_flags; + uint32_t efs_priority:2; + uint32_t efs_flags:6; + uint32_t efs_dmaq_id:12; + uint32_t efs_rss_context; + uint16_t efs_outer_vid; + uint16_t efs_inner_vid; + uint8_t efs_loc_mac[EFX_MAC_ADDR_LEN]; + uint8_t efs_rem_mac[EFX_MAC_ADDR_LEN]; + uint16_t efs_ether_type; + uint8_t efs_ip_proto; + efx_tunnel_protocol_t efs_encap_type; + uint16_t efs_loc_port; + uint16_t efs_rem_port; + efx_oword_t efs_rem_host; + efx_oword_t efs_loc_host; } efx_filter_spec_t; /* Default values for use in filter specifications */ -#define EFX_FILTER_SPEC_RSS_CONTEXT_DEFAULT 0xffffffff #define EFX_FILTER_SPEC_RX_DMAQ_ID_DROP 0xfff #define EFX_FILTER_SPEC_VID_UNSPEC 0xffff @@ -2357,6 +2395,11 @@ efx_filter_spec_set_eth_local( __in uint16_t vid, __in const uint8_t *addr); +extern void +efx_filter_spec_set_ether_type( + __inout efx_filter_spec_t *spec, + __in uint16_t ether_type); + extern __checkReturn efx_rc_t efx_filter_spec_set_uc_def( __inout efx_filter_spec_t *spec); @@ -2365,6 +2408,24 @@ extern __checkReturn efx_rc_t efx_filter_spec_set_mc_def( __inout efx_filter_spec_t *spec); +typedef enum efx_filter_inner_frame_match_e { + EFX_FILTER_INNER_FRAME_MATCH_OTHER = 0, + EFX_FILTER_INNER_FRAME_MATCH_UNKNOWN_MCAST_DST, + EFX_FILTER_INNER_FRAME_MATCH_UNKNOWN_UCAST_DST +} efx_filter_inner_frame_match_t; + +extern __checkReturn efx_rc_t +efx_filter_spec_set_encap_type( + __inout efx_filter_spec_t *spec, + __in efx_tunnel_protocol_t encap_type, + __in efx_filter_inner_frame_match_t inner_frame_match); + +#if EFSYS_OPT_RX_SCALE +extern __checkReturn efx_rc_t +efx_filter_spec_set_rss_context( + __inout efx_filter_spec_t *spec, + __in uint32_t rss_context); +#endif #endif /* EFSYS_OPT_FILTER */ /* HASH */ diff --git a/drivers/net/sfc/base/efx_filter.c b/drivers/net/sfc/base/efx_filter.c index ba310260..5cab7d87 100644 --- a/drivers/net/sfc/base/efx_filter.c +++ b/drivers/net/sfc/base/efx_filter.c @@ -117,10 +117,6 @@ efx_filter_remove( EFSYS_ASSERT3P(spec, !=, NULL); EFSYS_ASSERT3U(spec->efs_flags, &, EFX_FILTER_FLAG_RX); -#if EFSYS_OPT_RX_SCALE - spec->efs_rss_context = enp->en_rss_context; -#endif - return (efop->efo_delete(enp, spec)); } @@ -302,7 +298,7 @@ efx_filter_spec_init_rx( memset(spec, 0, sizeof (*spec)); spec->efs_priority = priority; spec->efs_flags = EFX_FILTER_FLAG_RX | flags; - spec->efs_rss_context = EFX_FILTER_SPEC_RSS_CONTEXT_DEFAULT; + spec->efs_rss_context = EFX_RSS_CONTEXT_DEFAULT; spec->efs_dmaq_id = (uint16_t)erp->er_index; } @@ -396,6 +392,17 @@ efx_filter_spec_set_eth_local( return (0); } + void +efx_filter_spec_set_ether_type( + __inout efx_filter_spec_t *spec, + __in uint16_t ether_type) +{ + EFSYS_ASSERT3P(spec, !=, NULL); + + spec->efs_ether_type = ether_type; + spec->efs_match_flags |= EFX_FILTER_MATCH_ETHER_TYPE; +} + /* * Specify matching otherwise-unmatched unicast in a filter specification */ @@ -423,6 +430,88 @@ efx_filter_spec_set_mc_def( } +__checkReturn efx_rc_t +efx_filter_spec_set_encap_type( + __inout efx_filter_spec_t *spec, + __in efx_tunnel_protocol_t encap_type, + __in efx_filter_inner_frame_match_t inner_frame_match) +{ + uint32_t match_flags = 0; + uint8_t ip_proto; + efx_rc_t rc; + + EFSYS_ASSERT3P(spec, !=, NULL); + + switch (encap_type) { + case EFX_TUNNEL_PROTOCOL_VXLAN: + case EFX_TUNNEL_PROTOCOL_GENEVE: + ip_proto = EFX_IPPROTO_UDP; + break; + case EFX_TUNNEL_PROTOCOL_NVGRE: + ip_proto = EFX_IPPROTO_GRE; + break; + default: + EFSYS_ASSERT(0); + rc = EINVAL; + goto fail1; + } + + switch (inner_frame_match) { + case EFX_FILTER_INNER_FRAME_MATCH_UNKNOWN_MCAST_DST: + match_flags |= EFX_FILTER_MATCH_IFRM_UNKNOWN_MCAST_DST; + break; + case EFX_FILTER_INNER_FRAME_MATCH_UNKNOWN_UCAST_DST: + match_flags |= EFX_FILTER_MATCH_IFRM_UNKNOWN_UCAST_DST; + break; + case EFX_FILTER_INNER_FRAME_MATCH_OTHER: + /* This is for when specific inner frames are to be matched. */ + break; + default: + EFSYS_ASSERT(0); + rc = EINVAL; + goto fail2; + } + + spec->efs_encap_type = encap_type; + spec->efs_ip_proto = ip_proto; + spec->efs_match_flags |= (match_flags | EFX_FILTER_MATCH_IP_PROTO); + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} + +#if EFSYS_OPT_RX_SCALE + __checkReturn efx_rc_t +efx_filter_spec_set_rss_context( + __inout efx_filter_spec_t *spec, + __in uint32_t rss_context) +{ + efx_rc_t rc; + + EFSYS_ASSERT3P(spec, !=, NULL); + + /* The filter must have been created with EFX_FILTER_FLAG_RX_RSS. */ + if ((spec->efs_flags & EFX_FILTER_FLAG_RX_RSS) == 0) { + rc = EINVAL; + goto fail1; + } + + spec->efs_rss_context = rss_context; + + return (0); + +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + + return (rc); +} +#endif #if EFSYS_OPT_SIENA @@ -454,9 +543,9 @@ siena_filter_spec_from_gen_spec( else EFSYS_ASSERT3U(gen_spec->efs_flags, &, EFX_FILTER_FLAG_RX); - /* Falconsiena only has one RSS context */ + /* Siena only has one RSS context */ if ((gen_spec->efs_flags & EFX_FILTER_FLAG_RX_RSS) && - gen_spec->efs_rss_context != 0) { + gen_spec->efs_rss_context != EFX_RSS_CONTEXT_DEFAULT) { rc = EINVAL; goto fail1; } diff --git a/drivers/net/sfc/base/efx_impl.h b/drivers/net/sfc/base/efx_impl.h index 43add6d9..53fa37ac 100644 --- a/drivers/net/sfc/base/efx_impl.h +++ b/drivers/net/sfc/base/efx_impl.h @@ -152,11 +152,17 @@ typedef struct efx_rx_ops_s { efx_rc_t (*erxo_scatter_enable)(efx_nic_t *, unsigned int); #endif #if EFSYS_OPT_RX_SCALE - efx_rc_t (*erxo_scale_mode_set)(efx_nic_t *, efx_rx_hash_alg_t, + efx_rc_t (*erxo_scale_context_alloc)(efx_nic_t *, + efx_rx_scale_context_type_t, + uint32_t, uint32_t *); + efx_rc_t (*erxo_scale_context_free)(efx_nic_t *, uint32_t); + efx_rc_t (*erxo_scale_mode_set)(efx_nic_t *, uint32_t, + efx_rx_hash_alg_t, efx_rx_hash_type_t, boolean_t); - efx_rc_t (*erxo_scale_key_set)(efx_nic_t *, uint8_t *, size_t); - efx_rc_t (*erxo_scale_tbl_set)(efx_nic_t *, unsigned int *, - size_t); + efx_rc_t (*erxo_scale_key_set)(efx_nic_t *, uint32_t, + uint8_t *, size_t); + efx_rc_t (*erxo_scale_tbl_set)(efx_nic_t *, uint32_t, + unsigned int *, size_t); uint32_t (*erxo_prefix_hash)(efx_nic_t *, efx_rx_hash_alg_t, uint8_t *); #endif /* EFSYS_OPT_RX_SCALE */ @@ -648,9 +654,9 @@ struct efx_nic_s { const efx_vpd_ops_t *en_evpdop; #endif /* EFSYS_OPT_VPD */ #if EFSYS_OPT_RX_SCALE - efx_rx_hash_support_t en_hash_support; - efx_rx_scale_support_t en_rss_support; - uint32_t en_rss_context; + efx_rx_hash_support_t en_hash_support; + efx_rx_scale_context_type_t en_rss_context_type; + uint32_t en_rss_context; #endif /* EFSYS_OPT_RX_SCALE */ uint32_t en_vport_id; #if EFSYS_OPT_LICENSING diff --git a/drivers/net/sfc/base/efx_rx.c b/drivers/net/sfc/base/efx_rx.c index c8156341..785365d3 100644 --- a/drivers/net/sfc/base/efx_rx.c +++ b/drivers/net/sfc/base/efx_rx.c @@ -53,6 +53,7 @@ siena_rx_scatter_enable( static __checkReturn efx_rc_t siena_rx_scale_mode_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in efx_rx_hash_alg_t alg, __in efx_rx_hash_type_t type, __in boolean_t insert); @@ -60,12 +61,14 @@ siena_rx_scale_mode_set( static __checkReturn efx_rc_t siena_rx_scale_key_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in_ecount(n) uint8_t *key, __in size_t n); static __checkReturn efx_rc_t siena_rx_scale_tbl_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in_ecount(n) unsigned int *table, __in size_t n); @@ -149,6 +152,8 @@ static const efx_rx_ops_t __efx_rx_siena_ops = { siena_rx_scatter_enable, /* erxo_scatter_enable */ #endif #if EFSYS_OPT_RX_SCALE + NULL, /* erxo_scale_context_alloc */ + NULL, /* erxo_scale_context_free */ siena_rx_scale_mode_set, /* erxo_scale_mode_set */ siena_rx_scale_key_set, /* erxo_scale_key_set */ siena_rx_scale_tbl_set, /* erxo_scale_tbl_set */ @@ -176,6 +181,8 @@ static const efx_rx_ops_t __efx_rx_ef10_ops = { ef10_rx_scatter_enable, /* erxo_scatter_enable */ #endif #if EFSYS_OPT_RX_SCALE + ef10_rx_scale_context_alloc, /* erxo_scale_context_alloc */ + ef10_rx_scale_context_free, /* erxo_scale_context_free */ ef10_rx_scale_mode_set, /* erxo_scale_mode_set */ ef10_rx_scale_key_set, /* erxo_scale_key_set */ ef10_rx_scale_tbl_set, /* erxo_scale_tbl_set */ @@ -304,7 +311,7 @@ fail1: #if EFSYS_OPT_RX_SCALE __checkReturn efx_rc_t -efx_rx_hash_support_get( +efx_rx_hash_default_support_get( __in efx_nic_t *enp, __out efx_rx_hash_support_t *supportp) { @@ -318,7 +325,10 @@ efx_rx_hash_support_get( goto fail1; } - /* Report if resources are available to insert RX hash value */ + /* + * Report the hashing support the client gets by default if it + * does not allocate an RSS context itself. + */ *supportp = enp->en_hash_support; return (0); @@ -330,22 +340,25 @@ fail1: } __checkReturn efx_rc_t -efx_rx_scale_support_get( +efx_rx_scale_default_support_get( __in efx_nic_t *enp, - __out efx_rx_scale_support_t *supportp) + __out efx_rx_scale_context_type_t *typep) { efx_rc_t rc; EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX); - if (supportp == NULL) { + if (typep == NULL) { rc = EINVAL; goto fail1; } - /* Report if resources are available to support RSS */ - *supportp = enp->en_rss_support; + /* + * Report the RSS support the client gets by default if it + * does not allocate an RSS context itself. + */ + *typep = enp->en_rss_context_type; return (0); @@ -354,10 +367,75 @@ fail1: return (rc); } +#endif /* EFSYS_OPT_RX_SCALE */ +#if EFSYS_OPT_RX_SCALE + __checkReturn efx_rc_t +efx_rx_scale_context_alloc( + __in efx_nic_t *enp, + __in efx_rx_scale_context_type_t type, + __in uint32_t num_queues, + __out uint32_t *rss_contextp) +{ + const efx_rx_ops_t *erxop = enp->en_erxop; + efx_rc_t rc; + + EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); + EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX); + + if (erxop->erxo_scale_context_alloc == NULL) { + rc = ENOTSUP; + goto fail1; + } + if ((rc = erxop->erxo_scale_context_alloc(enp, type, + num_queues, rss_contextp)) != 0) { + goto fail2; + } + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + return (rc); +} +#endif /* EFSYS_OPT_RX_SCALE */ + +#if EFSYS_OPT_RX_SCALE + __checkReturn efx_rc_t +efx_rx_scale_context_free( + __in efx_nic_t *enp, + __in uint32_t rss_context) +{ + const efx_rx_ops_t *erxop = enp->en_erxop; + efx_rc_t rc; + + EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); + EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX); + + if (erxop->erxo_scale_context_free == NULL) { + rc = ENOTSUP; + goto fail1; + } + if ((rc = erxop->erxo_scale_context_free(enp, rss_context)) != 0) + goto fail2; + + return (0); + +fail2: + EFSYS_PROBE(fail2); +fail1: + EFSYS_PROBE1(fail1, efx_rc_t, rc); + return (rc); +} +#endif /* EFSYS_OPT_RX_SCALE */ + +#if EFSYS_OPT_RX_SCALE __checkReturn efx_rc_t efx_rx_scale_mode_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in efx_rx_hash_alg_t alg, __in efx_rx_hash_type_t type, __in boolean_t insert) @@ -369,7 +447,7 @@ efx_rx_scale_mode_set( EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX); if (erxop->erxo_scale_mode_set != NULL) { - if ((rc = erxop->erxo_scale_mode_set(enp, alg, + if ((rc = erxop->erxo_scale_mode_set(enp, rss_context, alg, type, insert)) != 0) goto fail1; } @@ -386,6 +464,7 @@ fail1: __checkReturn efx_rc_t efx_rx_scale_key_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in_ecount(n) uint8_t *key, __in size_t n) { @@ -395,7 +474,7 @@ efx_rx_scale_key_set( EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX); - if ((rc = erxop->erxo_scale_key_set(enp, key, n)) != 0) + if ((rc = erxop->erxo_scale_key_set(enp, rss_context, key, n)) != 0) goto fail1; return (0); @@ -411,6 +490,7 @@ fail1: __checkReturn efx_rc_t efx_rx_scale_tbl_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in_ecount(n) unsigned int *table, __in size_t n) { @@ -420,7 +500,7 @@ efx_rx_scale_tbl_set( EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC); EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_RX); - if ((rc = erxop->erxo_scale_tbl_set(enp, table, n)) != 0) + if ((rc = erxop->erxo_scale_tbl_set(enp, rss_context, table, n)) != 0) goto fail1; return (0); @@ -654,7 +734,7 @@ siena_rx_init( #if EFSYS_OPT_RX_SCALE /* The RSS key and indirection table are writable. */ - enp->en_rss_support = EFX_RX_SCALE_EXCLUSIVE; + enp->en_rss_context_type = EFX_RX_SCALE_EXCLUSIVE; /* Hardware can insert RX hash with/without RSS */ enp->en_hash_support = EFX_RX_HASH_AVAILABLE; @@ -773,12 +853,18 @@ fail1: static __checkReturn efx_rc_t siena_rx_scale_mode_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in efx_rx_hash_alg_t alg, __in efx_rx_hash_type_t type, __in boolean_t insert) { efx_rc_t rc; + if (rss_context != EFX_RSS_CONTEXT_DEFAULT) { + rc = EINVAL; + goto fail1; + } + switch (alg) { case EFX_RX_HASHALG_LFSR: EFX_RX_LFSR_HASH(enp, insert); @@ -794,17 +880,19 @@ siena_rx_scale_mode_set( type & EFX_RX_HASH_TCPIPV6, rc); if (rc != 0) - goto fail1; + goto fail2; break; default: rc = EINVAL; - goto fail2; + goto fail3; } return (0); +fail3: + EFSYS_PROBE(fail3); fail2: EFSYS_PROBE(fail2); fail1: @@ -820,6 +908,7 @@ fail1: static __checkReturn efx_rc_t siena_rx_scale_key_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in_ecount(n) uint8_t *key, __in size_t n) { @@ -828,6 +917,11 @@ siena_rx_scale_key_set( unsigned int offset; efx_rc_t rc; + if (rss_context != EFX_RSS_CONTEXT_DEFAULT) { + rc = EINVAL; + goto fail1; + } + byte = 0; /* Write Toeplitz IPv4 hash key */ @@ -848,7 +942,7 @@ siena_rx_scale_key_set( --offset) { if (oword.eo_u8[offset - 1] != key[byte++]) { rc = EFAULT; - goto fail1; + goto fail2; } } @@ -897,7 +991,7 @@ siena_rx_scale_key_set( --offset) { if (oword.eo_u8[offset - 1] != key[byte++]) { rc = EFAULT; - goto fail2; + goto fail3; } } @@ -909,7 +1003,7 @@ siena_rx_scale_key_set( --offset) { if (oword.eo_u8[offset - 1] != key[byte++]) { rc = EFAULT; - goto fail3; + goto fail4; } } @@ -921,13 +1015,15 @@ siena_rx_scale_key_set( --offset) { if (oword.eo_u8[offset - 1] != key[byte++]) { rc = EFAULT; - goto fail4; + goto fail5; } } done: return (0); +fail5: + EFSYS_PROBE(fail5); fail4: EFSYS_PROBE(fail4); fail3: @@ -945,6 +1041,7 @@ fail1: static __checkReturn efx_rc_t siena_rx_scale_tbl_set( __in efx_nic_t *enp, + __in uint32_t rss_context, __in_ecount(n) unsigned int *table, __in size_t n) { @@ -955,11 +1052,16 @@ siena_rx_scale_tbl_set( EFX_STATIC_ASSERT(EFX_RSS_TBL_SIZE == FR_BZ_RX_INDIRECTION_TBL_ROWS); EFX_STATIC_ASSERT(EFX_MAXRSS == (1 << FRF_BZ_IT_QUEUE_WIDTH)); - if (n > FR_BZ_RX_INDIRECTION_TBL_ROWS) { + if (rss_context != EFX_RSS_CONTEXT_DEFAULT) { rc = EINVAL; goto fail1; } + if (n > FR_BZ_RX_INDIRECTION_TBL_ROWS) { + rc = EINVAL; + goto fail2; + } + for (index = 0; index < FR_BZ_RX_INDIRECTION_TBL_ROWS; index++) { uint32_t byte; @@ -988,12 +1090,14 @@ siena_rx_scale_tbl_set( /* Verify the entry */ if (EFX_OWORD_FIELD(oword, FRF_BZ_IT_QUEUE) != byte) { rc = EFAULT; - goto fail2; + goto fail3; } } return (0); +fail3: + EFSYS_PROBE(fail3); fail2: EFSYS_PROBE(fail2); fail1: diff --git a/drivers/net/sfc/base/hunt_nic.c b/drivers/net/sfc/base/hunt_nic.c index addbf1c5..19fb7cfb 100644 --- a/drivers/net/sfc/base/hunt_nic.c +++ b/drivers/net/sfc/base/hunt_nic.c @@ -301,6 +301,13 @@ hunt_board_cfg( /* Alignment for WPTR updates */ encp->enc_rx_push_align = EF10_RX_WPTR_ALIGN; + /* + * Maximum number of exclusive RSS contexts which can be allocated. The + * hardware supports 64, but 6 are reserved for shared contexts. They + * are a global resource so not all may be available. + */ + encp->enc_rx_scale_max_exclusive_contexts = 58; + encp->enc_tx_dma_desc_size_max = EFX_MASK32(ESF_DZ_RX_KER_BYTE_CNT); /* No boundary crossing limits */ encp->enc_tx_dma_desc_boundary = 0; diff --git a/drivers/net/sfc/base/medford_nic.c b/drivers/net/sfc/base/medford_nic.c index 07afac1e..d361d654 100644 --- a/drivers/net/sfc/base/medford_nic.c +++ b/drivers/net/sfc/base/medford_nic.c @@ -298,6 +298,13 @@ medford_board_cfg( /* Alignment for WPTR updates */ encp->enc_rx_push_align = EF10_RX_WPTR_ALIGN; + /* + * Maximum number of exclusive RSS contexts which can be allocated. The + * hardware supports 64, but 6 are reserved for shared contexts. They + * are a global resource so not all may be available. + */ + encp->enc_rx_scale_max_exclusive_contexts = 58; + encp->enc_tx_dma_desc_size_max = EFX_MASK32(ESF_DZ_RX_KER_BYTE_CNT); /* No boundary crossing limits */ encp->enc_tx_dma_desc_boundary = 0; diff --git a/drivers/net/sfc/base/siena_nic.c b/drivers/net/sfc/base/siena_nic.c index 129b854b..fcc8f151 100644 --- a/drivers/net/sfc/base/siena_nic.c +++ b/drivers/net/sfc/base/siena_nic.c @@ -135,6 +135,9 @@ siena_board_cfg( /* Alignment for WPTR updates */ encp->enc_rx_push_align = 1; + /* There is one RSS context per function */ + encp->enc_rx_scale_max_exclusive_contexts = 1; + encp->enc_tx_dma_desc_size_max = EFX_MASK32(FSF_AZ_TX_KER_BYTE_COUNT); /* Fragments must not span 4k boundaries. */ encp->enc_tx_dma_desc_boundary = 4096; diff --git a/drivers/net/sfc/efsys.h b/drivers/net/sfc/efsys.h index 0405d02b..f428b624 100644 --- a/drivers/net/sfc/efsys.h +++ b/drivers/net/sfc/efsys.h @@ -253,7 +253,7 @@ typedef struct __efsys_identifier_s efsys_identifier_t; /* DMA */ -typedef phys_addr_t efsys_dma_addr_t; +typedef rte_iova_t efsys_dma_addr_t; typedef struct efsys_mem_s { const struct rte_memzone *esm_mz; diff --git a/drivers/net/sfc/sfc.c b/drivers/net/sfc/sfc.c index 6cecfc00..49d7e937 100644 --- a/drivers/net/sfc/sfc.c +++ b/drivers/net/sfc/sfc.c @@ -61,8 +61,8 @@ sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id, return ENOMEM; } - esmp->esm_addr = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr); - if (esmp->esm_addr == RTE_BAD_PHYS_ADDR) { + esmp->esm_addr = mz->iova; + if (esmp->esm_addr == RTE_BAD_IOVA) { (void)rte_memzone_free(mz); return EFAULT; } @@ -501,7 +501,7 @@ sfc_mem_bar_fini(struct sfc_adapter *sa) * and also known to give a uniform distribution * (a good distribution of traffic between different CPUs) */ -static const uint8_t default_rss_key[SFC_RSS_KEY_SIZE] = { +static const uint8_t default_rss_key[EFX_RSS_KEY_SIZE] = { 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, 0x6d, 0x5a, @@ -510,10 +510,10 @@ static const uint8_t default_rss_key[SFC_RSS_KEY_SIZE] = { }; #endif +#if EFSYS_OPT_RX_SCALE static int sfc_set_rss_defaults(struct sfc_adapter *sa) { -#if EFSYS_OPT_RX_SCALE int rc; rc = efx_intr_init(sa->nic, sa->intr.type, NULL); @@ -528,11 +528,11 @@ sfc_set_rss_defaults(struct sfc_adapter *sa) if (rc != 0) goto fail_rx_init; - rc = efx_rx_scale_support_get(sa->nic, &sa->rss_support); + rc = efx_rx_scale_default_support_get(sa->nic, &sa->rss_support); if (rc != 0) goto fail_scale_support_get; - rc = efx_rx_hash_support_get(sa->nic, &sa->hash_support); + rc = efx_rx_hash_default_support_get(sa->nic, &sa->hash_support); if (rc != 0) goto fail_hash_support_get; @@ -556,10 +556,14 @@ fail_ev_init: fail_intr_init: return rc; +} #else +static int +sfc_set_rss_defaults(__rte_unused struct sfc_adapter *sa) +{ return 0; -#endif } +#endif int sfc_attach(struct sfc_adapter *sa) diff --git a/drivers/net/sfc/sfc.h b/drivers/net/sfc/sfc.h index 286d1ac1..7f11bf22 100644 --- a/drivers/net/sfc/sfc.h +++ b/drivers/net/sfc/sfc.h @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -48,8 +49,6 @@ extern "C" { #endif #if EFSYS_OPT_RX_SCALE -/** RSS key length (bytes) */ -#define SFC_RSS_KEY_SIZE 40 /** RSS hash offloads mask */ #define SFC_RSS_OFFLOADS (ETH_RSS_IP | ETH_RSS_TCP) #endif @@ -225,11 +224,11 @@ struct sfc_adapter { unsigned int rss_channels; #if EFSYS_OPT_RX_SCALE - efx_rx_scale_support_t rss_support; + efx_rx_scale_context_type_t rss_support; efx_rx_hash_support_t hash_support; efx_rx_hash_type_t rss_hash_types; unsigned int rss_tbl[EFX_RSS_TBL_SIZE]; - uint8_t rss_key[SFC_RSS_KEY_SIZE]; + uint8_t rss_key[EFX_RSS_KEY_SIZE]; #endif /* diff --git a/drivers/net/sfc/sfc_dp_rx.h b/drivers/net/sfc/sfc_dp_rx.h index a7b82784..3f6a604b 100644 --- a/drivers/net/sfc/sfc_dp_rx.h +++ b/drivers/net/sfc/sfc_dp_rx.h @@ -155,6 +155,10 @@ typedef const uint32_t * (sfc_dp_rx_supported_ptypes_get_t)(void); /** Get number of pending Rx descriptors */ typedef unsigned int (sfc_dp_rx_qdesc_npending_t)(struct sfc_dp_rxq *dp_rxq); +/** Check Rx descriptor status */ +typedef int (sfc_dp_rx_qdesc_status_t)(struct sfc_dp_rxq *dp_rxq, + uint16_t offset); + /** Receive datapath definition */ struct sfc_dp_rx { struct sfc_dp dp; @@ -170,6 +174,7 @@ struct sfc_dp_rx { sfc_dp_rx_qpurge_t *qpurge; sfc_dp_rx_supported_ptypes_get_t *supported_ptypes_get; sfc_dp_rx_qdesc_npending_t *qdesc_npending; + sfc_dp_rx_qdesc_status_t *qdesc_status; eth_rx_burst_t pkt_burst; }; diff --git a/drivers/net/sfc/sfc_dp_tx.h b/drivers/net/sfc/sfc_dp_tx.h index c1c34191..94d1b108 100644 --- a/drivers/net/sfc/sfc_dp_tx.h +++ b/drivers/net/sfc/sfc_dp_tx.h @@ -127,6 +127,12 @@ typedef bool (sfc_dp_tx_qtx_ev_t)(struct sfc_dp_txq *dp_txq, unsigned int id); */ typedef void (sfc_dp_tx_qreap_t)(struct sfc_dp_txq *dp_txq); +/** + * Check Tx descriptor status + */ +typedef int (sfc_dp_tx_qdesc_status_t)(struct sfc_dp_txq *dp_txq, + uint16_t offset); + /** Transmit datapath definition */ struct sfc_dp_tx { struct sfc_dp dp; @@ -136,12 +142,15 @@ struct sfc_dp_tx { #define SFC_DP_TX_FEAT_TSO 0x2 #define SFC_DP_TX_FEAT_MULTI_SEG 0x4 #define SFC_DP_TX_FEAT_MULTI_PROCESS 0x8 +#define SFC_DP_TX_FEAT_MULTI_POOL 0x10 +#define SFC_DP_TX_FEAT_REFCNT 0x20 sfc_dp_tx_qcreate_t *qcreate; sfc_dp_tx_qdestroy_t *qdestroy; sfc_dp_tx_qstart_t *qstart; sfc_dp_tx_qstop_t *qstop; sfc_dp_tx_qtx_ev_t *qtx_ev; sfc_dp_tx_qreap_t *qreap; + sfc_dp_tx_qdesc_status_t *qdesc_status; eth_tx_burst_t pkt_burst; }; diff --git a/drivers/net/sfc/sfc_ef10_rx.c b/drivers/net/sfc/sfc_ef10_rx.c index 60812cbe..18d60c69 100644 --- a/drivers/net/sfc/sfc_ef10_rx.c +++ b/drivers/net/sfc/sfc_ef10_rx.c @@ -177,7 +177,7 @@ sfc_ef10_rx_qrefill(struct sfc_ef10_rxq *rxq) ++i, ++id) { struct rte_mbuf *m = objs[i]; struct sfc_ef10_rx_sw_desc *rxd; - phys_addr_t phys_addr; + rte_iova_t phys_addr; SFC_ASSERT((id & ~ptr_mask) == 0); rxd = &rxq->sw_ring[id]; @@ -189,7 +189,7 @@ sfc_ef10_rx_qrefill(struct sfc_ef10_rxq *rxq) * structure members. */ - phys_addr = rte_mbuf_data_dma_addr_default(m); + phys_addr = rte_mbuf_data_iova_default(m); EFX_POPULATE_QWORD_2(rxq->rxq_hw_ring[id], ESF_DZ_RX_KER_BYTE_CNT, buf_size, ESF_DZ_RX_KER_BUF_ADDR, phys_addr); @@ -544,6 +544,14 @@ sfc_ef10_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq) return -ENOTSUP; } +static sfc_dp_rx_qdesc_status_t sfc_ef10_rx_qdesc_status; +static int +sfc_ef10_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq, + __rte_unused uint16_t offset) +{ + return -ENOTSUP; +} + static uint64_t sfc_ef10_mk_mbuf_rearm_data(uint16_t port_id, uint16_t prefix_size) @@ -708,5 +716,6 @@ struct sfc_dp_rx sfc_ef10_rx = { .qpurge = sfc_ef10_rx_qpurge, .supported_ptypes_get = sfc_ef10_supported_ptypes_get, .qdesc_npending = sfc_ef10_rx_qdesc_npending, + .qdesc_status = sfc_ef10_rx_qdesc_status, .pkt_burst = sfc_ef10_recv_pkts, }; diff --git a/drivers/net/sfc/sfc_ef10_tx.c b/drivers/net/sfc/sfc_ef10_tx.c index da2b5edb..0454e79a 100644 --- a/drivers/net/sfc/sfc_ef10_tx.c +++ b/drivers/net/sfc/sfc_ef10_tx.c @@ -158,17 +158,35 @@ sfc_ef10_tx_reap(struct sfc_ef10_txq *txq) pending += sfc_ef10_tx_process_events(txq); if (pending != completed) { + struct rte_mbuf *bulk[SFC_TX_REAP_BULK_SIZE]; + unsigned int nb = 0; + do { struct sfc_ef10_tx_sw_desc *txd; + struct rte_mbuf *m; txd = &txq->sw_ring[completed & ptr_mask]; + if (txd->mbuf == NULL) + continue; - if (txd->mbuf != NULL) { - rte_pktmbuf_free(txd->mbuf); - txd->mbuf = NULL; + m = rte_pktmbuf_prefree_seg(txd->mbuf); + txd->mbuf = NULL; + if (m == NULL) + continue; + + if ((nb == RTE_DIM(bulk)) || + ((nb != 0) && (m->pool != bulk[0]->pool))) { + rte_mempool_put_bulk(bulk[0]->pool, + (void *)bulk, nb); + nb = 0; } + + bulk[nb++] = m; } while (++completed != pending); + if (nb != 0) + rte_mempool_put_bulk(bulk[0]->pool, (void *)bulk, nb); + txq->completed = completed; } @@ -177,7 +195,7 @@ sfc_ef10_tx_reap(struct sfc_ef10_txq *txq) } static void -sfc_ef10_tx_qdesc_dma_create(phys_addr_t addr, uint16_t size, bool eop, +sfc_ef10_tx_qdesc_dma_create(rte_iova_t addr, uint16_t size, bool eop, efx_qword_t *edp) { EFX_POPULATE_QWORD_4(*edp, @@ -323,8 +341,9 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) pkt_len = m_seg->pkt_len; do { - phys_addr_t seg_addr = rte_mbuf_data_dma_addr(m_seg); + rte_iova_t seg_addr = rte_mbuf_data_iova(m_seg); unsigned int seg_len = rte_pktmbuf_data_len(m_seg); + unsigned int id = added & ptr_mask; SFC_ASSERT(seg_len <= SFC_EF10_TX_DMA_DESC_LEN_MAX); @@ -332,15 +351,30 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) sfc_ef10_tx_qdesc_dma_create(seg_addr, seg_len, (pkt_len == 0), - &txq->txq_hw_ring[added & ptr_mask]); + &txq->txq_hw_ring[id]); + + /* + * rte_pktmbuf_free() is commonly used in DPDK for + * recycling packets - the function checks every + * segment's reference counter and returns the + * buffer to its pool whenever possible; + * nevertheless, freeing mbuf segments one by one + * may entail some performance decline; + * from this point, sfc_efx_tx_reap() does the same job + * on its own and frees buffers in bulks (all mbufs + * within a bulk belong to the same pool); + * from this perspective, individual segment pointers + * must be associated with the corresponding SW + * descriptors independently so that only one loop + * is sufficient on reap to inspect all the buffers + */ + txq->sw_ring[id].mbuf = m_seg; + ++added; } while ((m_seg = m_seg->next) != 0); dma_desc_space -= (added - pkt_start); - - /* Assign mbuf to the last used desc */ - txq->sw_ring[(added - 1) & ptr_mask].mbuf = *pktp; } if (likely(added != txq->added)) { @@ -367,14 +401,25 @@ sfc_ef10_simple_tx_reap(struct sfc_ef10_txq *txq) pending += sfc_ef10_tx_process_events(txq); if (pending != completed) { + struct rte_mbuf *bulk[SFC_TX_REAP_BULK_SIZE]; + unsigned int nb = 0; + do { struct sfc_ef10_tx_sw_desc *txd; txd = &txq->sw_ring[completed & ptr_mask]; - rte_pktmbuf_free_seg(txd->mbuf); + if (nb == RTE_DIM(bulk)) { + rte_mempool_put_bulk(bulk[0]->pool, + (void *)bulk, nb); + nb = 0; + } + + bulk[nb++] = txd->mbuf; } while (++completed != pending); + rte_mempool_put_bulk(bulk[0]->pool, (void *)bulk, nb); + txq->completed = completed; } @@ -419,7 +464,7 @@ sfc_ef10_simple_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, SFC_ASSERT(rte_pktmbuf_data_len(pkt) <= SFC_EF10_TX_DMA_DESC_LEN_MAX); - sfc_ef10_tx_qdesc_dma_create(rte_mbuf_data_dma_addr(pkt), + sfc_ef10_tx_qdesc_dma_create(rte_mbuf_data_iova(pkt), rte_pktmbuf_data_len(pkt), true, &txq->txq_hw_ring[id]); @@ -557,7 +602,7 @@ sfc_ef10_tx_qreap(struct sfc_dp_txq *dp_txq) txd = &txq->sw_ring[completed & txq->ptr_mask]; if (txd->mbuf != NULL) { - rte_pktmbuf_free(txd->mbuf); + rte_pktmbuf_free_seg(txd->mbuf); txd->mbuf = NULL; } } @@ -565,6 +610,14 @@ sfc_ef10_tx_qreap(struct sfc_dp_txq *dp_txq) txq->flags &= ~SFC_EF10_TXQ_STARTED; } +static sfc_dp_tx_qdesc_status_t sfc_ef10_tx_qdesc_status; +static int +sfc_ef10_tx_qdesc_status(__rte_unused struct sfc_dp_txq *dp_txq, + __rte_unused uint16_t offset) +{ + return -ENOTSUP; +} + struct sfc_dp_tx sfc_ef10_tx = { .dp = { .name = SFC_KVARG_DATAPATH_EF10, @@ -572,6 +625,8 @@ struct sfc_dp_tx sfc_ef10_tx = { .hw_fw_caps = SFC_DP_HW_FW_CAP_EF10, }, .features = SFC_DP_TX_FEAT_MULTI_SEG | + SFC_DP_TX_FEAT_MULTI_POOL | + SFC_DP_TX_FEAT_REFCNT | SFC_DP_TX_FEAT_MULTI_PROCESS, .qcreate = sfc_ef10_tx_qcreate, .qdestroy = sfc_ef10_tx_qdestroy, @@ -579,6 +634,7 @@ struct sfc_dp_tx sfc_ef10_tx = { .qtx_ev = sfc_ef10_tx_qtx_ev, .qstop = sfc_ef10_tx_qstop, .qreap = sfc_ef10_tx_qreap, + .qdesc_status = sfc_ef10_tx_qdesc_status, .pkt_burst = sfc_ef10_xmit_pkts, }; @@ -594,5 +650,6 @@ struct sfc_dp_tx sfc_ef10_simple_tx = { .qtx_ev = sfc_ef10_tx_qtx_ev, .qstop = sfc_ef10_tx_qstop, .qreap = sfc_ef10_tx_qreap, + .qdesc_status = sfc_ef10_tx_qdesc_status, .pkt_burst = sfc_ef10_simple_xmit_pkts, }; diff --git a/drivers/net/sfc/sfc_ethdev.c b/drivers/net/sfc/sfc_ethdev.c index 12bcd6fa..2f5f86f8 100644 --- a/drivers/net/sfc/sfc_ethdev.c +++ b/drivers/net/sfc/sfc_ethdev.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "efx.h" @@ -145,10 +146,16 @@ sfc_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) if (~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_SEG) dev_info->default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + if (~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_POOL) + dev_info->default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOMULTMEMP; + + if (~sa->dp_tx->features & SFC_DP_TX_FEAT_REFCNT) + dev_info->default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOREFCOUNT; + #if EFSYS_OPT_RX_SCALE if (sa->rss_support != EFX_RX_SCALE_UNAVAILABLE) { dev_info->reta_size = EFX_RSS_TBL_SIZE; - dev_info->hash_key_size = SFC_RSS_KEY_SIZE; + dev_info->hash_key_size = EFX_RSS_KEY_SIZE; dev_info->flow_type_rss_offloads = SFC_RSS_OFFLOADS; } #endif @@ -515,16 +522,18 @@ sfc_tx_queue_release(void *queue) sfc_adapter_unlock(sa); } -static void +static int sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { struct sfc_adapter *sa = dev->data->dev_private; struct sfc_port *port = &sa->port; uint64_t *mac_stats; + int ret; rte_spinlock_lock(&port->mac_stats_lock); - if (sfc_port_update_mac_stats(sa) != 0) + ret = sfc_port_update_mac_stats(sa); + if (ret != 0) goto unlock; mac_stats = port->mac_stats_buf; @@ -581,6 +590,8 @@ sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) unlock: rte_spinlock_unlock(&port->mac_stats_lock); + SFC_ASSERT(ret >= 0); + return -ret; } static void @@ -991,7 +1002,7 @@ sfc_set_mc_addr_list(struct rte_eth_dev *dev, struct ether_addr *mc_addr_set, } for (i = 0; i < nb_mc_addr; ++i) { - (void)rte_memcpy(mc_addrs, mc_addr_set[i].addr_bytes, + rte_memcpy(mc_addrs, mc_addr_set[i].addr_bytes, EFX_MAC_ADDR_LEN); mc_addrs += EFX_MAC_ADDR_LEN; } @@ -1086,6 +1097,24 @@ sfc_rx_descriptor_done(void *queue, uint16_t offset) return sfc_rx_qdesc_done(dp_rxq, offset); } +static int +sfc_rx_descriptor_status(void *queue, uint16_t offset) +{ + struct sfc_dp_rxq *dp_rxq = queue; + struct sfc_rxq *rxq = sfc_rxq_by_dp_rxq(dp_rxq); + + return rxq->evq->sa->dp_rx->qdesc_status(dp_rxq, offset); +} + +static int +sfc_tx_descriptor_status(void *queue, uint16_t offset) +{ + struct sfc_dp_txq *dp_txq = queue; + struct sfc_txq *txq = sfc_txq_by_dp_txq(dp_txq); + + return txq->evq->sa->dp_tx->qdesc_status(dp_txq, offset); +} + static int sfc_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id) { @@ -1205,9 +1234,9 @@ sfc_dev_rss_hash_conf_get(struct rte_eth_dev *dev, * locally in 'sfc_adapter' and kept up-to-date */ rss_conf->rss_hf = sfc_efx_to_rte_hash_type(sa->rss_hash_types); - rss_conf->rss_key_len = SFC_RSS_KEY_SIZE; + rss_conf->rss_key_len = EFX_RSS_KEY_SIZE; if (rss_conf->rss_key != NULL) - rte_memcpy(rss_conf->rss_key, sa->rss_key, SFC_RSS_KEY_SIZE); + rte_memcpy(rss_conf->rss_key, sa->rss_key, EFX_RSS_KEY_SIZE); sfc_adapter_unlock(sa); @@ -1252,14 +1281,17 @@ sfc_dev_rss_hash_update(struct rte_eth_dev *dev, efx_hash_types = sfc_rte_to_efx_hash_type(rss_conf->rss_hf); - rc = efx_rx_scale_mode_set(sa->nic, EFX_RX_HASHALG_TOEPLITZ, + rc = efx_rx_scale_mode_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT, + EFX_RX_HASHALG_TOEPLITZ, efx_hash_types, B_TRUE); if (rc != 0) goto fail_scale_mode_set; if (rss_conf->rss_key != NULL) { if (sa->state == SFC_ADAPTER_STARTED) { - rc = efx_rx_scale_key_set(sa->nic, rss_conf->rss_key, + rc = efx_rx_scale_key_set(sa->nic, + EFX_RSS_CONTEXT_DEFAULT, + rss_conf->rss_key, sizeof(sa->rss_key)); if (rc != 0) goto fail_scale_key_set; @@ -1275,7 +1307,8 @@ sfc_dev_rss_hash_update(struct rte_eth_dev *dev, return 0; fail_scale_key_set: - if (efx_rx_scale_mode_set(sa->nic, EFX_RX_HASHALG_TOEPLITZ, + if (efx_rx_scale_mode_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT, + EFX_RX_HASHALG_TOEPLITZ, sa->rss_hash_types, B_TRUE) != 0) sfc_err(sa, "failed to restore RSS mode"); @@ -1326,7 +1359,7 @@ sfc_dev_rss_reta_update(struct rte_eth_dev *dev, struct sfc_port *port = &sa->port; unsigned int *rss_tbl_new; uint16_t entry; - int rc; + int rc = 0; if (port->isolated) @@ -1371,10 +1404,16 @@ sfc_dev_rss_reta_update(struct rte_eth_dev *dev, } } - rc = efx_rx_scale_tbl_set(sa->nic, rss_tbl_new, EFX_RSS_TBL_SIZE); - if (rc == 0) - rte_memcpy(sa->rss_tbl, rss_tbl_new, sizeof(sa->rss_tbl)); + if (sa->state == SFC_ADAPTER_STARTED) { + rc = efx_rx_scale_tbl_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT, + rss_tbl_new, EFX_RSS_TBL_SIZE); + if (rc != 0) + goto fail_scale_tbl_set; + } + + rte_memcpy(sa->rss_tbl, rss_tbl_new, sizeof(sa->rss_tbl)); +fail_scale_tbl_set: bad_reta_entry: sfc_adapter_unlock(sa); @@ -1469,6 +1508,8 @@ static const struct eth_dev_ops sfc_eth_dev_ops = { .rx_queue_release = sfc_rx_queue_release, .rx_queue_count = sfc_rx_queue_count, .rx_descriptor_done = sfc_rx_descriptor_done, + .rx_descriptor_status = sfc_rx_descriptor_status, + .tx_descriptor_status = sfc_tx_descriptor_status, .tx_queue_setup = sfc_tx_queue_setup, .tx_queue_release = sfc_tx_queue_release, .flow_ctrl_get = sfc_flow_ctrl_get, @@ -1751,8 +1792,6 @@ sfc_eth_dev_init(struct rte_eth_dev *dev) /* Copy PCI device info to the dev->data */ rte_eth_copy_pci_info(dev, pci_dev); - dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; - rc = sfc_kvargs_parse(sa); if (rc != 0) goto fail_kvargs_parse; diff --git a/drivers/net/sfc/sfc_flow.c b/drivers/net/sfc/sfc_flow.c index 110dfb89..f2050f65 100644 --- a/drivers/net/sfc/sfc_flow.c +++ b/drivers/net/sfc/sfc_flow.c @@ -803,7 +803,7 @@ sfc_flow_parse_attr(const struct rte_flow_attr *attr, } flow->spec.efs_flags |= EFX_FILTER_FLAG_RX; - flow->spec.efs_rss_context = EFX_FILTER_SPEC_RSS_CONTEXT_DEFAULT; + flow->spec.efs_rss_context = EFX_RSS_CONTEXT_DEFAULT; return 0; } @@ -886,6 +886,170 @@ sfc_flow_parse_queue(struct sfc_adapter *sa, return 0; } +#if EFSYS_OPT_RX_SCALE +static int +sfc_flow_parse_rss(struct sfc_adapter *sa, + const struct rte_flow_action_rss *rss, + struct rte_flow *flow) +{ + unsigned int rxq_sw_index; + struct sfc_rxq *rxq; + unsigned int rxq_hw_index_min; + unsigned int rxq_hw_index_max; + const struct rte_eth_rss_conf *rss_conf = rss->rss_conf; + uint64_t rss_hf; + uint8_t *rss_key = NULL; + struct sfc_flow_rss *sfc_rss_conf = &flow->rss_conf; + unsigned int i; + + if (rss->num == 0) + return -EINVAL; + + rxq_sw_index = sa->rxq_count - 1; + rxq = sa->rxq_info[rxq_sw_index].rxq; + rxq_hw_index_min = rxq->hw_index; + rxq_hw_index_max = 0; + + for (i = 0; i < rss->num; ++i) { + rxq_sw_index = rss->queue[i]; + + if (rxq_sw_index >= sa->rxq_count) + return -EINVAL; + + rxq = sa->rxq_info[rxq_sw_index].rxq; + + if (rxq->hw_index < rxq_hw_index_min) + rxq_hw_index_min = rxq->hw_index; + + if (rxq->hw_index > rxq_hw_index_max) + rxq_hw_index_max = rxq->hw_index; + } + + rss_hf = (rss_conf != NULL) ? rss_conf->rss_hf : SFC_RSS_OFFLOADS; + if ((rss_hf & ~SFC_RSS_OFFLOADS) != 0) + return -EINVAL; + + if (rss_conf != NULL) { + if (rss_conf->rss_key_len != sizeof(sa->rss_key)) + return -EINVAL; + + rss_key = rss_conf->rss_key; + } else { + rss_key = sa->rss_key; + } + + flow->rss = B_TRUE; + + sfc_rss_conf->rxq_hw_index_min = rxq_hw_index_min; + sfc_rss_conf->rxq_hw_index_max = rxq_hw_index_max; + sfc_rss_conf->rss_hash_types = sfc_rte_to_efx_hash_type(rss_hf); + rte_memcpy(sfc_rss_conf->rss_key, rss_key, sizeof(sa->rss_key)); + + for (i = 0; i < RTE_DIM(sfc_rss_conf->rss_tbl); ++i) { + unsigned int rxq_sw_index = rss->queue[i % rss->num]; + struct sfc_rxq *rxq = sa->rxq_info[rxq_sw_index].rxq; + + sfc_rss_conf->rss_tbl[i] = rxq->hw_index - rxq_hw_index_min; + } + + return 0; +} +#endif /* EFSYS_OPT_RX_SCALE */ + +static int +sfc_flow_filter_insert(struct sfc_adapter *sa, + struct rte_flow *flow) +{ + efx_filter_spec_t *spec = &flow->spec; + +#if EFSYS_OPT_RX_SCALE + struct sfc_flow_rss *rss = &flow->rss_conf; + int rc = 0; + + if (flow->rss) { + unsigned int rss_spread = MIN(rss->rxq_hw_index_max - + rss->rxq_hw_index_min + 1, + EFX_MAXRSS); + + rc = efx_rx_scale_context_alloc(sa->nic, + EFX_RX_SCALE_EXCLUSIVE, + rss_spread, + &spec->efs_rss_context); + if (rc != 0) + goto fail_scale_context_alloc; + + rc = efx_rx_scale_mode_set(sa->nic, spec->efs_rss_context, + EFX_RX_HASHALG_TOEPLITZ, + rss->rss_hash_types, B_TRUE); + if (rc != 0) + goto fail_scale_mode_set; + + rc = efx_rx_scale_key_set(sa->nic, spec->efs_rss_context, + rss->rss_key, + sizeof(sa->rss_key)); + if (rc != 0) + goto fail_scale_key_set; + + spec->efs_dmaq_id = rss->rxq_hw_index_min; + spec->efs_flags |= EFX_FILTER_FLAG_RX_RSS; + } + + rc = efx_filter_insert(sa->nic, spec); + if (rc != 0) + goto fail_filter_insert; + + if (flow->rss) { + /* + * Scale table is set after filter insertion because + * the table entries are relative to the base RxQ ID + * and the latter is submitted to the HW by means of + * inserting a filter, so by the time of the request + * the HW knows all the information needed to verify + * the table entries, and the operation will succeed + */ + rc = efx_rx_scale_tbl_set(sa->nic, spec->efs_rss_context, + rss->rss_tbl, RTE_DIM(rss->rss_tbl)); + if (rc != 0) + goto fail_scale_tbl_set; + } + + return 0; + +fail_scale_tbl_set: + efx_filter_remove(sa->nic, spec); + +fail_filter_insert: +fail_scale_key_set: +fail_scale_mode_set: + if (rss != NULL) + efx_rx_scale_context_free(sa->nic, spec->efs_rss_context); + +fail_scale_context_alloc: + return rc; +#else /* !EFSYS_OPT_RX_SCALE */ + return efx_filter_insert(sa->nic, spec); +#endif /* EFSYS_OPT_RX_SCALE */ +} + +static int +sfc_flow_filter_remove(struct sfc_adapter *sa, + struct rte_flow *flow) +{ + efx_filter_spec_t *spec = &flow->spec; + int rc = 0; + + rc = efx_filter_remove(sa->nic, spec); + if (rc != 0) + return rc; + +#if EFSYS_OPT_RX_SCALE + if (flow->rss) + rc = efx_rx_scale_context_free(sa->nic, spec->efs_rss_context); +#endif /* EFSYS_OPT_RX_SCALE */ + + return rc; +} + static int sfc_flow_parse_actions(struct sfc_adapter *sa, const struct rte_flow_action actions[], @@ -919,6 +1083,20 @@ sfc_flow_parse_actions(struct sfc_adapter *sa, is_specified = B_TRUE; break; +#if EFSYS_OPT_RX_SCALE + case RTE_FLOW_ACTION_TYPE_RSS: + rc = sfc_flow_parse_rss(sa, actions->conf, flow); + if (rc != 0) { + rte_flow_error_set(error, rc, + RTE_FLOW_ERROR_TYPE_ACTION, actions, + "Bad RSS action"); + return -rte_errno; + } + + is_specified = B_TRUE; + break; +#endif /* EFSYS_OPT_RX_SCALE */ + default: rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, actions, @@ -1013,7 +1191,7 @@ sfc_flow_create(struct rte_eth_dev *dev, sfc_adapter_lock(sa); if (sa->state == SFC_ADAPTER_STARTED) { - rc = efx_filter_insert(sa->nic, &flow->spec); + rc = sfc_flow_filter_insert(sa, flow); if (rc != 0) { rte_flow_error_set(error, rc, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -1047,7 +1225,7 @@ sfc_flow_remove(struct sfc_adapter *sa, SFC_ASSERT(sfc_adapter_is_locked(sa)); if (sa->state == SFC_ADAPTER_STARTED) { - rc = efx_filter_remove(sa->nic, &flow->spec); + rc = sfc_flow_filter_remove(sa, flow); if (rc != 0) rte_flow_error_set(error, rc, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, @@ -1172,7 +1350,7 @@ sfc_flow_stop(struct sfc_adapter *sa) SFC_ASSERT(sfc_adapter_is_locked(sa)); TAILQ_FOREACH(flow, &sa->filter.flow_list, entries) - efx_filter_remove(sa->nic, &flow->spec); + sfc_flow_filter_remove(sa, flow); } int @@ -1186,7 +1364,7 @@ sfc_flow_start(struct sfc_adapter *sa) SFC_ASSERT(sfc_adapter_is_locked(sa)); TAILQ_FOREACH(flow, &sa->filter.flow_list, entries) { - rc = efx_filter_insert(sa->nic, &flow->spec); + rc = sfc_flow_filter_insert(sa, flow); if (rc != 0) goto fail_bad_flow; } diff --git a/drivers/net/sfc/sfc_flow.h b/drivers/net/sfc/sfc_flow.h index bfc34364..aa740d7d 100644 --- a/drivers/net/sfc/sfc_flow.h +++ b/drivers/net/sfc/sfc_flow.h @@ -41,9 +41,24 @@ extern "C" { #endif +#if EFSYS_OPT_RX_SCALE +/* RSS configuration storage */ +struct sfc_flow_rss { + unsigned int rxq_hw_index_min; + unsigned int rxq_hw_index_max; + unsigned int rss_hash_types; + uint8_t rss_key[EFX_RSS_KEY_SIZE]; + unsigned int rss_tbl[EFX_RSS_TBL_SIZE]; +}; +#endif /* EFSYS_OPT_RX_SCALE */ + /* PMD-specific definition of the opaque type from rte_flow.h */ struct rte_flow { efx_filter_spec_t spec; /* filter specification */ +#if EFSYS_OPT_RX_SCALE + boolean_t rss; /* RSS toggle */ + struct sfc_flow_rss rss_conf; /* RSS configuration */ +#endif /* EFSYS_OPT_RX_SCALE */ TAILQ_ENTRY(rte_flow) entries; /* flow list entries */ }; diff --git a/drivers/net/sfc/sfc_rx.c b/drivers/net/sfc/sfc_rx.c index 1bf86445..2ae095b2 100644 --- a/drivers/net/sfc/sfc_rx.c +++ b/drivers/net/sfc/sfc_rx.c @@ -128,7 +128,7 @@ sfc_efx_rx_qrefill(struct sfc_efx_rxq *rxq) SFC_ASSERT(m->nb_segs == 1); m->port = port_id; - addr[i] = rte_pktmbuf_mtophys(m); + addr[i] = rte_pktmbuf_iova(m); } efx_rx_qpost(rxq->common, addr, rxq->buf_size, @@ -207,11 +207,11 @@ sfc_efx_supported_ptypes_get(void) return ptypes; } +#if EFSYS_OPT_RX_SCALE static void sfc_efx_rx_set_rss_hash(struct sfc_efx_rxq *rxq, unsigned int flags, struct rte_mbuf *m) { -#if EFSYS_OPT_RX_SCALE uint8_t *mbuf_data; @@ -227,8 +227,15 @@ sfc_efx_rx_set_rss_hash(struct sfc_efx_rxq *rxq, unsigned int flags, m->ol_flags |= PKT_RX_RSS_HASH; } -#endif } +#else +static void +sfc_efx_rx_set_rss_hash(__rte_unused struct sfc_efx_rxq *rxq, + __rte_unused unsigned int flags, + __rte_unused struct rte_mbuf *m) +{ +} +#endif static uint16_t sfc_efx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) @@ -349,6 +356,43 @@ sfc_efx_rx_qdesc_npending(struct sfc_dp_rxq *dp_rxq) return rxq->pending - rxq->completed; } +static sfc_dp_rx_qdesc_status_t sfc_efx_rx_qdesc_status; +static int +sfc_efx_rx_qdesc_status(struct sfc_dp_rxq *dp_rxq, uint16_t offset) +{ + struct sfc_efx_rxq *rxq = sfc_efx_rxq_by_dp_rxq(dp_rxq); + + if (unlikely(offset > rxq->ptr_mask)) + return -EINVAL; + + /* + * Poll EvQ to derive up-to-date 'rxq->pending' figure; + * it is required for the queue to be running, but the + * check is omitted because API design assumes that it + * is the duty of the caller to satisfy all conditions + */ + SFC_ASSERT((rxq->flags & SFC_EFX_RXQ_FLAG_RUNNING) == + SFC_EFX_RXQ_FLAG_RUNNING); + sfc_ev_qpoll(rxq->evq); + + /* + * There is a handful of reserved entries in the ring, + * but an explicit check whether the offset points to + * a reserved entry is neglected since the two checks + * below rely on the figures which take the HW limits + * into account and thus if an entry is reserved, the + * checks will fail and UNAVAIL code will be returned + */ + + if (offset < (rxq->pending - rxq->completed)) + return RTE_ETH_RX_DESC_DONE; + + if (offset < (rxq->added - rxq->completed)) + return RTE_ETH_RX_DESC_AVAIL; + + return RTE_ETH_RX_DESC_UNAVAIL; +} + struct sfc_rxq * sfc_rxq_by_dp_rxq(const struct sfc_dp_rxq *dp_rxq) { @@ -498,6 +542,7 @@ struct sfc_dp_rx sfc_efx_rx = { .qpurge = sfc_efx_rx_qpurge, .supported_ptypes_get = sfc_efx_supported_ptypes_get, .qdesc_npending = sfc_efx_rx_qdesc_npending, + .qdesc_status = sfc_efx_rx_qdesc_status, .pkt_burst = sfc_efx_recv_pkts, }; @@ -1050,31 +1095,39 @@ sfc_efx_to_rte_hash_type(efx_rx_hash_type_t efx_hash_types) } #endif +#if EFSYS_OPT_RX_SCALE static int sfc_rx_rss_config(struct sfc_adapter *sa) { int rc = 0; -#if EFSYS_OPT_RX_SCALE if (sa->rss_channels > 0) { - rc = efx_rx_scale_mode_set(sa->nic, EFX_RX_HASHALG_TOEPLITZ, + rc = efx_rx_scale_mode_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT, + EFX_RX_HASHALG_TOEPLITZ, sa->rss_hash_types, B_TRUE); if (rc != 0) goto finish; - rc = efx_rx_scale_key_set(sa->nic, sa->rss_key, + rc = efx_rx_scale_key_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT, + sa->rss_key, sizeof(sa->rss_key)); if (rc != 0) goto finish; - rc = efx_rx_scale_tbl_set(sa->nic, sa->rss_tbl, - sizeof(sa->rss_tbl)); + rc = efx_rx_scale_tbl_set(sa->nic, EFX_RSS_CONTEXT_DEFAULT, + sa->rss_tbl, RTE_DIM(sa->rss_tbl)); } finish: -#endif return rc; } +#else +static int +sfc_rx_rss_config(__rte_unused struct sfc_adapter *sa) +{ + return 0; +} +#endif int sfc_rx_start(struct sfc_adapter *sa) @@ -1243,7 +1296,6 @@ sfc_rx_configure(struct sfc_adapter *sa) { struct rte_eth_conf *dev_conf = &sa->eth_dev->data->dev_conf; const unsigned int nb_rx_queues = sa->eth_dev->data->nb_rx_queues; - unsigned int sw_index; int rc; sfc_log_init(sa, "nb_rx_queues=%u (old %u)", @@ -1296,6 +1348,8 @@ sfc_rx_configure(struct sfc_adapter *sa) MIN(sa->rxq_count, EFX_MAXRSS) : 0; if (sa->rss_channels > 0) { + unsigned int sw_index; + for (sw_index = 0; sw_index < EFX_RSS_TBL_SIZE; ++sw_index) sa->rss_tbl[sw_index] = sw_index % sa->rss_channels; } diff --git a/drivers/net/sfc/sfc_tso.c b/drivers/net/sfc/sfc_tso.c index fb79d749..2e7b595b 100644 --- a/drivers/net/sfc/sfc_tso.c +++ b/drivers/net/sfc/sfc_tso.c @@ -141,7 +141,7 @@ sfc_efx_tso_do(struct sfc_efx_txq *txq, unsigned int idx, if (unlikely(tcph_off > encp->enc_tx_tso_tcp_header_offset_limit)) return EMSGSIZE; - header_paddr = rte_pktmbuf_mtophys(m); + header_paddr = rte_pktmbuf_iova(m); /* * Sometimes headers may be split across multiple mbufs. In such cases @@ -155,7 +155,7 @@ sfc_efx_tso_do(struct sfc_efx_txq *txq, unsigned int idx, header_len); tsoh = txq->sw_ring[idx & txq->ptr_mask].tsoh; - header_paddr = rte_malloc_virt2phy((void *)tsoh); + header_paddr = rte_malloc_virt2iova((void *)tsoh); } else { if (m->data_len == header_len) { *in_off = 0; diff --git a/drivers/net/sfc/sfc_tweak.h b/drivers/net/sfc/sfc_tweak.h index 4ef7fc8b..fd2f75c3 100644 --- a/drivers/net/sfc/sfc_tweak.h +++ b/drivers/net/sfc/sfc_tweak.h @@ -53,4 +53,7 @@ /** Default free threshold follows recommendations from DPDK documentation */ #define SFC_TX_DEFAULT_FREE_THRESH 32 +/** Number of mbufs to be freed in bulk in a single call */ +#define SFC_TX_REAP_BULK_SIZE 32 + #endif /* _SFC_TWEAK_H_ */ diff --git a/drivers/net/sfc/sfc_tx.c b/drivers/net/sfc/sfc_tx.c index fc439cb6..127d59e6 100644 --- a/drivers/net/sfc/sfc_tx.c +++ b/drivers/net/sfc/sfc_tx.c @@ -91,6 +91,21 @@ sfc_tx_qcheck_conf(struct sfc_adapter *sa, uint16_t nb_tx_desc, rc = EINVAL; } + if (((flags & ETH_TXQ_FLAGS_NOMULTMEMP) == 0) && + (~sa->dp_tx->features & SFC_DP_TX_FEAT_MULTI_POOL)) { + sfc_err(sa, "multi-mempool is not supported by %s datapath", + sa->dp_tx->dp.name); + rc = EINVAL; + } + + if (((flags & ETH_TXQ_FLAGS_NOREFCOUNT) == 0) && + (~sa->dp_tx->features & SFC_DP_TX_FEAT_REFCNT)) { + sfc_err(sa, + "mbuf reference counters are neglected by %s datapath", + sa->dp_tx->dp.name); + rc = EINVAL; + } + if ((flags & ETH_TXQ_FLAGS_NOVLANOFFL) == 0) { if (!encp->enc_hw_tx_insert_vlan_enabled) { sfc_err(sa, "VLAN offload is not supported"); @@ -750,7 +765,7 @@ sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) size_t seg_len; seg_len = m_seg->data_len; - next_frag = rte_mbuf_data_dma_addr(m_seg); + next_frag = rte_mbuf_data_iova(m_seg); /* * If we've started TSO transaction few steps earlier, @@ -977,6 +992,44 @@ sfc_efx_tx_qreap(struct sfc_dp_txq *dp_txq) txq->flags &= ~SFC_EFX_TXQ_FLAG_STARTED; } +static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status; +static int +sfc_efx_tx_qdesc_status(struct sfc_dp_txq *dp_txq, uint16_t offset) +{ + struct sfc_efx_txq *txq = sfc_efx_txq_by_dp_txq(dp_txq); + + if (unlikely(offset > txq->ptr_mask)) + return -EINVAL; + + if (unlikely(offset >= EFX_TXQ_LIMIT(txq->ptr_mask + 1))) + return RTE_ETH_TX_DESC_UNAVAIL; + + /* + * Poll EvQ to derive up-to-date 'txq->pending' figure; + * it is required for the queue to be running, but the + * check is omitted because API design assumes that it + * is the duty of the caller to satisfy all conditions + */ + SFC_ASSERT((txq->flags & SFC_EFX_TXQ_FLAG_RUNNING) == + SFC_EFX_TXQ_FLAG_RUNNING); + sfc_ev_qpoll(txq->evq); + + /* + * Ring tail is 'txq->pending', and although descriptors + * between 'txq->completed' and 'txq->pending' are still + * in use by the driver, they should be reported as DONE + */ + if (unlikely(offset < (txq->added - txq->pending))) + return RTE_ETH_TX_DESC_FULL; + + /* + * There is no separate return value for unused descriptors; + * the latter will be reported as DONE because genuine DONE + * descriptors will be freed anyway in SW on the next burst + */ + return RTE_ETH_TX_DESC_DONE; +} + struct sfc_dp_tx sfc_efx_tx = { .dp = { .name = SFC_KVARG_DATAPATH_EFX, @@ -985,11 +1038,14 @@ struct sfc_dp_tx sfc_efx_tx = { }, .features = SFC_DP_TX_FEAT_VLAN_INSERT | SFC_DP_TX_FEAT_TSO | + SFC_DP_TX_FEAT_MULTI_POOL | + SFC_DP_TX_FEAT_REFCNT | SFC_DP_TX_FEAT_MULTI_SEG, .qcreate = sfc_efx_tx_qcreate, .qdestroy = sfc_efx_tx_qdestroy, .qstart = sfc_efx_tx_qstart, .qstop = sfc_efx_tx_qstop, .qreap = sfc_efx_tx_qreap, + .qdesc_status = sfc_efx_tx_qdesc_status, .pkt_burst = sfc_efx_xmit_pkts, }; -- cgit 1.2.3-korg