From 8a853e3f0275efc8b05cb195085d45946942744a Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Wed, 14 Nov 2018 11:13:11 +0000 Subject: New upstream version 18.11-rc3 Change-Id: I958b9d019027ef049bd992b3968a667f3ae382ae Signed-off-by: Luca Boccassi --- app/test-pmd/cmdline.c | 5 +- app/test-pmd/cmdline_flow.c | 15 +- app/test-pmd/cmdline_mtr.c | 24 +- app/test-pmd/csumonly.c | 8 +- app/test-pmd/softnicfwd.c | 2 + buildtools/check-experimental-syms.sh | 6 + buildtools/gen-build-mk.sh | 5 +- doc/guides/nics/mlx5.rst | 6 + drivers/bus/vdev/vdev.c | 6 +- drivers/bus/vmbus/linux/vmbus_uio.c | 12 +- drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c | 8 +- drivers/crypto/ccp/ccp_pci.c | 5 +- drivers/crypto/octeontx/otx_cryptodev.c | 2 +- drivers/crypto/octeontx/otx_cryptodev_hw_access.c | 5 +- drivers/crypto/octeontx/otx_cryptodev_ops.c | 2 +- drivers/net/bnx2x/bnx2x.c | 25 +- drivers/net/bnx2x/bnx2x_logs.h | 35 +- drivers/net/bonding/rte_eth_bond_pmd.c | 11 +- drivers/net/cxgbe/cxgbe_filter.c | 7 +- drivers/net/cxgbe/cxgbe_flow.c | 16 +- drivers/net/cxgbe/cxgbe_flow.h | 5 +- drivers/net/cxgbe/cxgbe_main.c | 18 +- drivers/net/cxgbe/cxgbevf_ethdev.c | 12 +- drivers/net/cxgbe/cxgbevf_main.c | 6 + drivers/net/cxgbe/mps_tcam.c | 4 +- drivers/net/e1000/base/e1000_i210.c | 1 + drivers/net/ena/ena_ethdev.c | 11 +- drivers/net/enic/enic_rxtx.c | 19 +- drivers/net/fm10k/fm10k_ethdev.c | 11 +- drivers/net/i40e/i40e_ethdev.c | 5 +- drivers/net/ixgbe/ixgbe_ethdev.c | 37 +- drivers/net/mlx5/Makefile | 4 +- drivers/net/mlx5/mlx5.c | 7 +- drivers/net/mlx5/mlx5_flow.c | 19 +- drivers/net/mlx5/mlx5_flow_dv.c | 270 +++++++----- drivers/net/mlx5/mlx5_flow_tcf.c | 100 ++--- drivers/net/mlx5/mlx5_flow_verbs.c | 25 +- drivers/net/mlx5/mlx5_utils.h | 10 + drivers/net/octeontx/base/octeontx_pki_var.h | 13 +- drivers/net/octeontx/octeontx_ethdev.c | 3 +- drivers/net/qede/qede_rxtx.c | 30 +- drivers/net/qede/qede_rxtx.h | 5 +- drivers/net/sfc/base/efx.h | 3 +- drivers/net/tap/rte_eth_tap.c | 3 +- drivers/net/tap/tap_netlink.c | 3 + drivers/net/thunderx/nicvf_rxtx.c | 10 +- drivers/net/virtio/virtio_pci.c | 10 +- drivers/net/virtio/virtio_user_ethdev.c | 2 +- drivers/net/vmxnet3/vmxnet3_ethdev.c | 9 +- examples/fips_validation/fips_validation_tdes.c | 2 +- examples/fips_validation/main.c | 6 +- examples/flow_filtering/main.c | 2 +- examples/ip_pipeline/cli.c | 28 +- examples/l3fwd-power/main.c | 2 +- lib/librte_bpf/bpf_jit_x86.c | 28 +- lib/librte_eal/common/arch/x86/rte_memcpy.c | 29 -- lib/librte_eal/common/eal_common_dev.c | 3 +- lib/librte_eal/common/eal_common_devargs.c | 36 +- lib/librte_eal/common/eal_common_memory.c | 7 +- lib/librte_eal/common/eal_common_proc.c | 31 +- lib/librte_eal/common/include/arch/x86/rte_rtm.h | 19 +- .../common/include/arch/x86/rte_spinlock.h | 21 +- lib/librte_eal/common/include/rte_common.h | 19 + lib/librte_eal/common/include/rte_devargs.h | 4 +- lib/librte_eal/common/include/rte_version.h | 2 +- lib/librte_eal/common/rte_reciprocal.c | 17 +- lib/librte_eal/linuxapp/eal/eal_alarm.c | 2 + lib/librte_eal/linuxapp/eal/eal_memory.c | 1 + lib/librte_ethdev/rte_ethdev.c | 93 +++-- lib/librte_hash/rte_cmp_x86.h | 2 + lib/librte_hash/rte_cuckoo_hash.c | 307 +++++++++++++- lib/librte_pci/rte_pci.c | 4 + lib/librte_pipeline/rte_table_action.c | 5 +- lib/librte_ring/rte_ring_c11_mem.h | 14 +- lib/librte_vhost/vhost_crypto.c | 460 ++++++++++++++++----- lib/librte_vhost/vhost_user.c | 2 +- lib/librte_vhost/virtio_net.c | 4 +- meson.build | 2 +- mk/rte.app.mk | 2 + mk/rte.sdkconfig.mk | 3 +- test/bpf/t1.c | 14 +- test/bpf/t3.c | 13 +- test/test/test.c | 6 +- test/test/test_bpf.c | 108 +++++ test/test/test_common.c | 32 ++ test/test/test_hash_readwrite.c | 20 +- test/test/test_kni.c | 2 +- test/test/test_power_acpi_cpufreq.c | 2 +- test/test/test_reorder.c | 2 +- 89 files changed, 1565 insertions(+), 651 deletions(-) delete mode 100644 lib/librte_eal/common/arch/x86/rte_memcpy.c diff --git a/app/test-pmd/cmdline.c b/app/test-pmd/cmdline.c index 5e08a1b9..12750741 100644 --- a/app/test-pmd/cmdline.c +++ b/app/test-pmd/cmdline.c @@ -17805,10 +17805,7 @@ search_rx_offload(const char *name) if (!strcasecmp(single_name, name)) { found = 1; break; - } else if (!strcasecmp(single_name, "UNKNOWN")) - break; - else if (single_name == NULL) - break; + } single_offload <<= 1; } diff --git a/app/test-pmd/cmdline_flow.c b/app/test-pmd/cmdline_flow.c index 91e2e350..23ea7cc8 100644 --- a/app/test-pmd/cmdline_flow.c +++ b/app/test-pmd/cmdline_flow.c @@ -3248,15 +3248,26 @@ parse_vc_action_rss(struct context *ctx, const struct token *token, .func = RTE_ETH_HASH_FUNCTION_DEFAULT, .level = 0, .types = rss_hf, - .key_len = 0, + .key_len = sizeof(action_rss_data->key), .queue_num = RTE_MIN(nb_rxq, ACTION_RSS_QUEUE_NUM), - .key = NULL, + .key = action_rss_data->key, .queue = action_rss_data->queue, }, + .key = "testpmd's default RSS hash key, " + "override it for better balancing", .queue = { 0 }, }; for (i = 0; i < action_rss_data->conf.queue_num; ++i) action_rss_data->queue[i] = i; + if (!port_id_is_invalid(ctx->port, DISABLED_WARN) && + ctx->port != (portid_t)RTE_PORT_ALL) { + struct rte_eth_dev_info info; + + rte_eth_dev_info_get(ctx->port, &info); + action_rss_data->conf.key_len = + RTE_MIN(sizeof(action_rss_data->key), + info.hash_key_size); + } action->conf = &action_rss_data->conf; return ret; } diff --git a/app/test-pmd/cmdline_mtr.c b/app/test-pmd/cmdline_mtr.c index 63f32828..846de88d 100644 --- a/app/test-pmd/cmdline_mtr.c +++ b/app/test-pmd/cmdline_mtr.c @@ -74,7 +74,7 @@ parse_uint(uint64_t *value, const char *str) } static int -parse_dscp_table_entries(char *str, enum rte_mtr_color *dscp_table) +parse_dscp_table_entries(char *str, enum rte_mtr_color **dscp_table) { char *token; int i = 0; @@ -84,23 +84,23 @@ parse_dscp_table_entries(char *str, enum rte_mtr_color *dscp_table) return 0; /* Allocate memory for dscp table */ - dscp_table = (enum rte_mtr_color *)malloc(MAX_DSCP_TABLE_ENTRIES * + *dscp_table = (enum rte_mtr_color *)malloc(MAX_DSCP_TABLE_ENTRIES * sizeof(enum rte_mtr_color)); - if (dscp_table == NULL) + if (*dscp_table == NULL) return -1; while (1) { if (strcmp(token, "G") == 0 || strcmp(token, "g") == 0) - dscp_table[i++] = RTE_MTR_GREEN; + *dscp_table[i++] = RTE_MTR_GREEN; else if (strcmp(token, "Y") == 0 || strcmp(token, "y") == 0) - dscp_table[i++] = RTE_MTR_YELLOW; + *dscp_table[i++] = RTE_MTR_YELLOW; else if (strcmp(token, "R") == 0 || strcmp(token, "r") == 0) - dscp_table[i++] = RTE_MTR_RED; + *dscp_table[i++] = RTE_MTR_RED; else { - free(dscp_table); + free(*dscp_table); return -1; } if (i == MAX_DSCP_TABLE_ENTRIES) @@ -108,7 +108,7 @@ parse_dscp_table_entries(char *str, enum rte_mtr_color *dscp_table) token = strtok_r(str, PARSE_DELIMITER, &str); if (token == NULL) { - free(dscp_table); + free(*dscp_table); return -1; } } @@ -117,7 +117,7 @@ parse_dscp_table_entries(char *str, enum rte_mtr_color *dscp_table) static int parse_meter_color_str(char *c_str, uint32_t *use_prev_meter_color, - enum rte_mtr_color *dscp_table) + enum rte_mtr_color **dscp_table) { char *token; uint64_t previous_mtr_color = 0; @@ -195,7 +195,7 @@ parse_policer_action_string(char *p_str, uint32_t action_mask, static int parse_multi_token_string(char *t_str, uint16_t *port_id, - uint32_t *mtr_id, enum rte_mtr_color *dscp_table) + uint32_t *mtr_id, enum rte_mtr_color **dscp_table) { char *token; uint64_t val; @@ -794,7 +794,7 @@ static void cmd_create_port_meter_parsed(void *parsed_result, params.meter_profile_id = res->profile_id; /* Parse meter input color string params */ - ret = parse_meter_color_str(c_str, &use_prev_meter_color, dscp_table); + ret = parse_meter_color_str(c_str, &use_prev_meter_color, &dscp_table); if (ret) { printf(" Meter input color params string parse error\n"); return; @@ -1141,7 +1141,7 @@ static void cmd_set_port_meter_dscp_table_parsed(void *parsed_result, int ret; /* Parse string */ - ret = parse_multi_token_string(t_str, &port_id, &mtr_id, dscp_table); + ret = parse_multi_token_string(t_str, &port_id, &mtr_id, &dscp_table); if (ret) { printf(" Multi token string parse error\n"); return; diff --git a/app/test-pmd/csumonly.c b/app/test-pmd/csumonly.c index dce4b9be..ffeee205 100644 --- a/app/test-pmd/csumonly.c +++ b/app/test-pmd/csumonly.c @@ -111,7 +111,9 @@ parse_ipv4(struct ipv4_hdr *ipv4_hdr, struct testpmd_offload_info *info) if (info->l4_proto == IPPROTO_TCP) { tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + info->l3_len); info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; - } else + } else if (info->l4_proto == IPPROTO_UDP) + info->l4_len = sizeof(struct udp_hdr); + else info->l4_len = 0; } @@ -128,7 +130,9 @@ parse_ipv6(struct ipv6_hdr *ipv6_hdr, struct testpmd_offload_info *info) if (info->l4_proto == IPPROTO_TCP) { tcp_hdr = (struct tcp_hdr *)((char *)ipv6_hdr + info->l3_len); info->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; - } else + } else if (info->l4_proto == IPPROTO_UDP) + info->l4_len = sizeof(struct udp_hdr); + else info->l4_len = 0; } diff --git a/app/test-pmd/softnicfwd.c b/app/test-pmd/softnicfwd.c index 7ff62280..94e6669d 100644 --- a/app/test-pmd/softnicfwd.c +++ b/app/test-pmd/softnicfwd.c @@ -458,6 +458,7 @@ softport_tm_tc_node_add(portid_t port_id, error->message, shaper_profile_id); + free(tnp.shared_shaper_id); return -1; } tnp.shaper_profile_id = shaper_profile_id; @@ -473,6 +474,7 @@ softport_tm_tc_node_add(portid_t port_id, error->message, h->tc_node_id[pos][k]); + free(tnp.shared_shaper_id); return -1; } shaper_profile_id++; diff --git a/buildtools/check-experimental-syms.sh b/buildtools/check-experimental-syms.sh index d0915102..7d1f3a56 100755 --- a/buildtools/check-experimental-syms.sh +++ b/buildtools/check-experimental-syms.sh @@ -5,6 +5,12 @@ MAPFILE=$1 OBJFILE=$2 +# added check for "make -C test/" usage +if [ ! -e $MAPFILE ] || [ ! -f $OBJFILE ] +then + exit 0 +fi + if [ -d $MAPFILE ] then exit 0 diff --git a/buildtools/gen-build-mk.sh b/buildtools/gen-build-mk.sh index c18b205e..636920b6 100755 --- a/buildtools/gen-build-mk.sh +++ b/buildtools/gen-build-mk.sh @@ -5,7 +5,6 @@ # Auto-generate a Makefile in build directory # Args: # $1: path of project src root -# $2: path of build dir (can be relative to $1) echo "# Automatically generated by gen-build-mk.sh" echo @@ -18,7 +17,7 @@ echo echo "MAKEFLAGS += --no-print-directory" echo echo "all:" -echo " @\$(MAKE) -C $1 O=$2" +echo " @\$(MAKE) -C $1 O=\$(CURDIR)" echo echo "%::" -echo " @\$(MAKE) -C $1 O=$2 \$@" +echo " @\$(MAKE) -C $1 O=\$(CURDIR) \$@" diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 7af5ead8..3610e008 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -137,6 +137,11 @@ Limitations enabled (``rxq_cqe_comp_en``) at the same time, RSS hash result is not fully supported. Some Rx packets may not have PKT_RX_RSS_HASH. +- IPv6 Multicast messages are not supported on VM, while promiscuous mode + and allmulticast mode are both set to off. + To receive IPv6 Multicast messages on VM, explicitly set the relevant + MAC address using rte_eth_dev_mac_addr_add() API. + Statistics ---------- @@ -434,6 +439,7 @@ Run-time configuration A nonzero value enables the DV flow steering assuming it is supported by the driver. + The DV flow steering is not supported on switchdev mode. Disabled by default. diff --git a/drivers/bus/vdev/vdev.c b/drivers/bus/vdev/vdev.c index 9c66bdc7..2c03ca41 100644 --- a/drivers/bus/vdev/vdev.c +++ b/drivers/bus/vdev/vdev.c @@ -224,7 +224,6 @@ insert_vdev(const char *name, const char *args, } dev->device.bus = &rte_vdev_bus; - dev->device.devargs = devargs; dev->device.numa_node = SOCKET_ID_ANY; dev->device.name = devargs->name; @@ -238,9 +237,10 @@ insert_vdev(const char *name, const char *args, goto fail; } - TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); if (init) - rte_devargs_insert(devargs); + rte_devargs_insert(&devargs); + dev->device.devargs = devargs; + TAILQ_INSERT_TAIL(&vdev_device_list, dev, next); if (p_dev) *p_dev = dev; diff --git a/drivers/bus/vmbus/linux/vmbus_uio.c b/drivers/bus/vmbus/linux/vmbus_uio.c index 856c6d66..12e97e3a 100644 --- a/drivers/bus/vmbus/linux/vmbus_uio.c +++ b/drivers/bus/vmbus/linux/vmbus_uio.c @@ -329,6 +329,7 @@ int vmbus_uio_get_subchan(struct vmbus_channel *primary, char chan_path[PATH_MAX], subchan_path[PATH_MAX]; struct dirent *ent; DIR *chan_dir; + int err; snprintf(chan_path, sizeof(chan_path), "%s/%s/channels", @@ -344,7 +345,6 @@ int vmbus_uio_get_subchan(struct vmbus_channel *primary, while ((ent = readdir(chan_dir))) { unsigned long relid, subid, monid; char *endp; - int err; if (ent->d_name[0] == '.') continue; @@ -364,8 +364,7 @@ int vmbus_uio_get_subchan(struct vmbus_channel *primary, if (err) { VMBUS_LOG(NOTICE, "invalid subchannel id %lu", subid); - closedir(chan_dir); - return err; + goto fail; } if (subid == 0) @@ -382,17 +381,20 @@ int vmbus_uio_get_subchan(struct vmbus_channel *primary, if (err) { VMBUS_LOG(NOTICE, "invalid monitor id %lu", monid); - return err; + goto fail; } err = vmbus_chan_create(dev, relid, subid, monid, subchan); if (err) { VMBUS_LOG(NOTICE, "subchannel setup failed"); - return err; + goto fail; } break; } closedir(chan_dir); return (ent == NULL) ? -ENOENT : 0; +fail: + closedir(chan_dir); + return err; } diff --git a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c index 43f6c26e..f3eff268 100644 --- a/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c +++ b/drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c @@ -535,14 +535,12 @@ aesni_mb_pmd_qp_set_unique_name(struct rte_cryptodev *dev, /** Create a ring to place processed operations on */ static struct rte_ring * aesni_mb_pmd_qp_create_processed_ops_ring(struct aesni_mb_qp *qp, - const char *str, unsigned int ring_size, int socket_id) + unsigned int ring_size, int socket_id) { struct rte_ring *r; char ring_name[RTE_CRYPTODEV_NAME_MAX_LEN]; - unsigned int n = snprintf(ring_name, sizeof(ring_name), - "%s_%s", - qp->name, str); + unsigned int n = snprintf(ring_name, sizeof(ring_name), "%s", qp->name); if (n >= sizeof(ring_name)) return NULL; @@ -600,7 +598,7 @@ aesni_mb_pmd_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id, qp->op_fns = &job_ops[internals->vector_mode]; qp->ingress_queue = aesni_mb_pmd_qp_create_processed_ops_ring(qp, - "ingress", qp_conf->nb_descriptors, socket_id); + qp_conf->nb_descriptors, socket_id); if (qp->ingress_queue == NULL) { ret = -1; goto qp_setup_cleanup; diff --git a/drivers/crypto/ccp/ccp_pci.c b/drivers/crypto/ccp/ccp_pci.c index 59152ca5..1702a09c 100644 --- a/drivers/crypto/ccp/ccp_pci.c +++ b/drivers/crypto/ccp/ccp_pci.c @@ -31,12 +31,15 @@ ccp_check_pci_uio_module(void) while (uio_module_names[i] != NULL) { while (fgets(buf, sizeof(buf), fp) != NULL) { if (!strncmp(buf, uio_module_names[i], - strlen(uio_module_names[i]))) + strlen(uio_module_names[i]))) { + fclose(fp); return i; + } } i++; rewind(fp); } + fclose(fp); printf("Insert igb_uio or uio_pci_generic kernel module(s)"); return -1;/* uio not inserted */ } diff --git a/drivers/crypto/octeontx/otx_cryptodev.c b/drivers/crypto/octeontx/otx_cryptodev.c index 269f0456..b201e0a1 100644 --- a/drivers/crypto/octeontx/otx_cryptodev.c +++ b/drivers/crypto/octeontx/otx_cryptodev.c @@ -100,8 +100,8 @@ otx_cpt_pci_remove(struct rte_pci_device *pci_dev) if (rte_eal_process_type() == RTE_PROC_PRIMARY) rte_free(cryptodev->data->dev_private); - cryptodev->device = NULL; cryptodev->device->driver = NULL; + cryptodev->device = NULL; cryptodev->data = NULL; /* free metapool memory */ diff --git a/drivers/crypto/octeontx/otx_cryptodev_hw_access.c b/drivers/crypto/octeontx/otx_cryptodev_hw_access.c index 5e705a83..18f2e6b1 100644 --- a/drivers/crypto/octeontx/otx_cryptodev_hw_access.c +++ b/drivers/crypto/octeontx/otx_cryptodev_hw_access.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "otx_cryptodev_hw_access.h" #include "otx_cryptodev_mbox.h" @@ -366,7 +367,9 @@ otx_cpt_hw_init(struct cpt_vf *cptvf, void *pdev, void *reg_base, char *name) /* Bar0 base address */ cptvf->reg_base = reg_base; - strncpy(cptvf->dev_name, name, 32); + + /* Save device name */ + strlcpy(cptvf->dev_name, name, (sizeof(cptvf->dev_name))); cptvf->pdev = pdev; diff --git a/drivers/crypto/octeontx/otx_cryptodev_ops.c b/drivers/crypto/octeontx/otx_cryptodev_ops.c index 23f96591..90d0c14b 100644 --- a/drivers/crypto/octeontx/otx_cryptodev_ops.c +++ b/drivers/crypto/octeontx/otx_cryptodev_ops.c @@ -216,7 +216,7 @@ otx_cpt_que_pair_setup(struct rte_cryptodev *dev, } ret = otx_cpt_get_resource(cptvf, 0, &instance); - if (ret != 0) { + if (ret != 0 || instance == NULL) { CPT_LOG_ERR("Error getting instance handle from device %s : " "ret = %d", dev->data->name, ret); return ret; diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c index 27975936..a6d2687a 100644 --- a/drivers/net/bnx2x/bnx2x.c +++ b/drivers/net/bnx2x/bnx2x.c @@ -199,8 +199,12 @@ static int bnx2x_acquire_hw_lock(struct bnx2x_softc *sc, uint32_t resource) uint32_t hw_lock_control_reg; int cnt; +#ifndef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC if (resource) PMD_INIT_FUNC_TRACE(sc); +#else + PMD_INIT_FUNC_TRACE(sc); +#endif /* validate the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { @@ -248,8 +252,12 @@ static int bnx2x_release_hw_lock(struct bnx2x_softc *sc, uint32_t resource) int func = SC_FUNC(sc); uint32_t hw_lock_control_reg; +#ifndef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC if (resource) PMD_INIT_FUNC_TRACE(sc); +#else + PMD_INIT_FUNC_TRACE(sc); +#endif /* validate the resource is within range */ if (resource > HW_LOCK_MAX_RESOURCE_VALUE) { @@ -7041,7 +7049,7 @@ void bnx2x_link_status_update(struct bnx2x_softc *sc) } bnx2x_link_report(sc); } else { - bnx2x_link_report(sc); + bnx2x_link_report_locked(sc); bnx2x_stats_handle(sc, STATS_EVENT_LINK_UP); } } @@ -9388,6 +9396,8 @@ static int bnx2x_prev_unload(struct bnx2x_softc *sc) uint32_t fw, hw_lock_reg, hw_lock_val; uint32_t rc = 0; + PMD_INIT_FUNC_TRACE(sc); + /* * Clear HW from errors which may have resulted from an interrupted * DMAE transaction. @@ -9395,22 +9405,23 @@ static int bnx2x_prev_unload(struct bnx2x_softc *sc) bnx2x_prev_interrupted_dmae(sc); /* Release previously held locks */ - if (SC_FUNC(sc) <= 5) - hw_lock_reg = (MISC_REG_DRIVER_CONTROL_1 + SC_FUNC(sc) * 8); - else - hw_lock_reg = - (MISC_REG_DRIVER_CONTROL_7 + (SC_FUNC(sc) - 6) * 8); + hw_lock_reg = (SC_FUNC(sc) <= 5) ? + (MISC_REG_DRIVER_CONTROL_1 + SC_FUNC(sc) * 8) : + (MISC_REG_DRIVER_CONTROL_7 + (SC_FUNC(sc) - 6) * 8); hw_lock_val = (REG_RD(sc, hw_lock_reg)); if (hw_lock_val) { if (hw_lock_val & HW_LOCK_RESOURCE_NVRAM) { + PMD_DRV_LOG(DEBUG, sc, "Releasing previously held NVRAM lock\n"); REG_WR(sc, MCP_REG_MCPR_NVM_SW_ARB, (MCPR_NVM_SW_ARB_ARB_REQ_CLR1 << SC_PORT(sc))); } + PMD_DRV_LOG(DEBUG, sc, "Releasing previously held HW lock\n"); REG_WR(sc, hw_lock_reg, 0xffffffff); } if (MCPR_ACCESS_LOCK_LOCK & REG_RD(sc, MCP_REG_MCPR_ACCESS_LOCK)) { + PMD_DRV_LOG(DEBUG, sc, "Releasing previously held ALR\n"); REG_WR(sc, MCP_REG_MCPR_ACCESS_LOCK, 0); } @@ -9740,6 +9751,8 @@ int bnx2x_attach(struct bnx2x_softc *sc) sc->fw_seq = (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_mb_header) & DRV_MSG_SEQ_NUMBER_MASK); + PMD_DRV_LOG(DEBUG, sc, "prev unload fw_seq 0x%04x", + sc->fw_seq); bnx2x_prev_unload(sc); } diff --git a/drivers/net/bnx2x/bnx2x_logs.h b/drivers/net/bnx2x/bnx2x_logs.h index 753bccdf..f0cf69c1 100644 --- a/drivers/net/bnx2x/bnx2x_logs.h +++ b/drivers/net/bnx2x/bnx2x_logs.h @@ -10,43 +10,40 @@ extern int bnx2x_logtype_init; #define PMD_INIT_LOG(level, sc, fmt, args...) \ - RTE_LOG(level, PMD, \ + rte_log(RTE_LOG_ ## level, bnx2x_logtype_init, \ "[bnx2x_pmd: %s] %s() " fmt "\n", (sc)->devinfo.name, __func__, ##args) #define PMD_INIT_FUNC_TRACE(sc) PMD_INIT_LOG(DEBUG, sc, " >>") +extern int bnx2x_logtype_driver; +#define PMD_DRV_LOG_RAW(level, sc, fmt, args...) \ + rte_log(RTE_LOG_ ## level, bnx2x_logtype_driver, \ + "[%s:%d(%s)] " fmt, __func__, __LINE__, \ + (sc)->devinfo.name ? (sc)->devinfo.name : "", ## args) + +#define PMD_DRV_LOG(level, sc, fmt, args...) \ + PMD_DRV_LOG_RAW(level, sc, fmt "\n", ## args) + #ifdef RTE_LIBRTE_BNX2X_DEBUG_RX #define PMD_RX_LOG(level, fmt, args...) \ - RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) + rte_log(RTE_LOG_ ## level, bnx2x_logtype_driver, \ + "%s(): " fmt "\n", __func__, ## args) #else #define PMD_RX_LOG(level, fmt, args...) do { } while(0) #endif #ifdef RTE_LIBRTE_BNX2X_DEBUG_TX #define PMD_TX_LOG(level, fmt, args...) \ - RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) + rte_log(RTE_LOG_ ## level, bnx2x_logtype_driver, \ + "%s(): " fmt "\n", __func__, ## args) #else #define PMD_TX_LOG(level, fmt, args...) do { } while(0) #endif -#ifdef RTE_LIBRTE_BNX2X_DEBUG_TX_FREE -#define PMD_TX_FREE_LOG(level, fmt, args...) \ - RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) -#else -#define PMD_TX_FREE_LOG(level, fmt, args...) do { } while(0) -#endif - -extern int bnx2x_logtype_driver; -#define PMD_DRV_LOG_RAW(level, sc, fmt, args...) \ - RTE_LOG(level, PMD, "[%s:%d(%s)] " fmt, __func__, __LINE__, \ - (sc)->devinfo.name ? (sc)->devinfo.name : "", ## args) - -#define PMD_DRV_LOG(level, sc, fmt, args...) \ - PMD_DRV_LOG_RAW(level, sc, fmt "\n", ## args) - #ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC #define PMD_DEBUG_PERIODIC_LOG(level, sc, fmt, args...) \ - RTE_LOG(level, PMD, "%s(%s): " fmt "\n", __func__, \ + rte_log(RTE_LOG_ ## level, bnx2x_logtype_driver, \ + "%s(%s): " fmt "\n", __func__, \ (sc)->devinfo.name ? (sc)->devinfo.name : "", ## args) #else #define PMD_DEBUG_PERIODIC_LOG(level, sc, fmt, args...) do { } while (0) diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c index 1a6d8e4d..2661620a 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -2181,9 +2181,14 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev) internals->link_status_polling_enabled = 0; for (i = 0; i < internals->slave_count; i++) { - internals->slaves[i].last_link_status = 0; - rte_eth_dev_stop(internals->slaves[i].port_id); - deactivate_slave(eth_dev, internals->slaves[i].port_id); + uint16_t slave_id = internals->slaves[i].port_id; + if (find_slave_by_id(internals->active_slaves, + internals->active_slave_count, slave_id) != + internals->active_slave_count) { + internals->slaves[i].last_link_status = 0; + rte_eth_dev_stop(slave_id); + deactivate_slave(eth_dev, slave_id); + } } } diff --git a/drivers/net/cxgbe/cxgbe_filter.c b/drivers/net/cxgbe/cxgbe_filter.c index ef1102be..3a7912e4 100644 --- a/drivers/net/cxgbe/cxgbe_filter.c +++ b/drivers/net/cxgbe/cxgbe_filter.c @@ -263,8 +263,8 @@ static u64 hash_filter_ntuple(const struct filter_entry *f) u64 ntuple = 0; u16 tcp_proto = IPPROTO_TCP; /* TCP Protocol Number */ - if (tp->port_shift >= 0) - ntuple |= (u64)f->fs.mask.iport << tp->port_shift; + if (tp->port_shift >= 0 && f->fs.mask.iport) + ntuple |= (u64)f->fs.val.iport << tp->port_shift; if (tp->protocol_shift >= 0) { if (!f->fs.val.proto) @@ -278,9 +278,6 @@ static u64 hash_filter_ntuple(const struct filter_entry *f) if (tp->macmatch_shift >= 0 && f->fs.mask.macidx) ntuple |= (u64)(f->fs.val.macidx) << tp->macmatch_shift; - if (ntuple != tp->hash_filter_mask) - return 0; - return ntuple; } diff --git a/drivers/net/cxgbe/cxgbe_flow.c b/drivers/net/cxgbe/cxgbe_flow.c index 54ec7e59..4deaff8f 100644 --- a/drivers/net/cxgbe/cxgbe_flow.c +++ b/drivers/net/cxgbe/cxgbe_flow.c @@ -7,14 +7,12 @@ #define __CXGBE_FILL_FS(__v, __m, fs, elem, e) \ do { \ - if (!((fs)->val.elem || (fs)->mask.elem)) { \ - (fs)->val.elem = (__v); \ - (fs)->mask.elem = (__m); \ - } else { \ + if ((fs)->mask.elem && ((fs)->val.elem != (__v))) \ return rte_flow_error_set(e, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, \ - NULL, "a filter can be specified" \ - " only once"); \ - } \ + NULL, "Redefined match item with" \ + " different values found"); \ + (fs)->val.elem = (__v); \ + (fs)->mask.elem = (__m); \ } while (0) #define __CXGBE_FILL_FS_MEMCPY(__v, __m, fs, elem) \ @@ -799,7 +797,7 @@ static int __cxgbe_flow_create(struct rte_eth_dev *dev, struct rte_flow *flow) /* Poll the FW for reply */ err = cxgbe_poll_for_completion(&adap->sge.fw_evtq, - CXGBE_FLOW_POLL_US, + CXGBE_FLOW_POLL_MS, CXGBE_FLOW_POLL_CNT, &ctx.completion); if (err) { @@ -885,7 +883,7 @@ static int __cxgbe_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow) /* Poll the FW for reply */ err = cxgbe_poll_for_completion(&adap->sge.fw_evtq, - CXGBE_FLOW_POLL_US, + CXGBE_FLOW_POLL_MS, CXGBE_FLOW_POLL_CNT, &ctx.completion); if (err) { diff --git a/drivers/net/cxgbe/cxgbe_flow.h b/drivers/net/cxgbe/cxgbe_flow.h index 718bf3d0..ec8e47ae 100644 --- a/drivers/net/cxgbe/cxgbe_flow.h +++ b/drivers/net/cxgbe/cxgbe_flow.h @@ -10,8 +10,9 @@ #include "mps_tcam.h" #include "cxgbe.h" -#define CXGBE_FLOW_POLL_US 10 -#define CXGBE_FLOW_POLL_CNT 10 +/* Max poll time is 100 * 100msec = 10 sec */ +#define CXGBE_FLOW_POLL_MS 100 /* 100 milliseconds */ +#define CXGBE_FLOW_POLL_CNT 100 /* Max number of times to poll */ struct chrte_fparse { int (*fptr)(const void *mask, /* currently supported mask */ diff --git a/drivers/net/cxgbe/cxgbe_main.c b/drivers/net/cxgbe/cxgbe_main.c index 88dc851f..ec080e5d 100644 --- a/drivers/net/cxgbe/cxgbe_main.c +++ b/drivers/net/cxgbe/cxgbe_main.c @@ -157,18 +157,18 @@ out: /** * cxgbe_poll_for_completion: Poll rxq for completion * @q: rxq to poll - * @us: microseconds to delay + * @ms: milliseconds to delay * @cnt: number of times to poll * @c: completion to check for 'done' status * * Polls the rxq for reples until completion is done or the count * expires. */ -int cxgbe_poll_for_completion(struct sge_rspq *q, unsigned int us, +int cxgbe_poll_for_completion(struct sge_rspq *q, unsigned int ms, unsigned int cnt, struct t4_completion *c) { unsigned int i; - unsigned int work_done, budget = 4; + unsigned int work_done, budget = 32; if (!c) return -EINVAL; @@ -181,7 +181,7 @@ int cxgbe_poll_for_completion(struct sge_rspq *q, unsigned int us, return 0; } t4_os_unlock(&c->lock); - udelay(us); + rte_delay_ms(ms); } return -ETIMEDOUT; } @@ -1339,18 +1339,22 @@ inline bool force_linkup(struct adapter *adap) int link_start(struct port_info *pi) { struct adapter *adapter = pi->adapter; - int ret; + u64 conf_offloads; unsigned int mtu; + int ret; mtu = pi->eth_dev->data->dev_conf.rxmode.max_rx_pkt_len - (ETHER_HDR_LEN + ETHER_CRC_LEN); + conf_offloads = pi->eth_dev->data->dev_conf.rxmode.offloads; + /* * We do not set address filters and promiscuity here, the stack does * that step explicitly. */ - ret = t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu, -1, -1, - -1, 1, true); + ret = t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu, -1, -1, -1, + !!(conf_offloads & DEV_RX_OFFLOAD_VLAN_STRIP), + true); if (ret == 0) { ret = cxgbe_mpstcam_modify(pi, (int)pi->xact_addr_filt, (u8 *)&pi->eth_dev->data->mac_addrs[0]); diff --git a/drivers/net/cxgbe/cxgbevf_ethdev.c b/drivers/net/cxgbe/cxgbevf_ethdev.c index 3b32ca9d..a6458d53 100644 --- a/drivers/net/cxgbe/cxgbevf_ethdev.c +++ b/drivers/net/cxgbe/cxgbevf_ethdev.c @@ -177,6 +177,16 @@ out_free_adapter: return err; } +static int eth_cxgbevf_dev_uninit(struct rte_eth_dev *eth_dev) +{ + struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private); + struct adapter *adap = pi->adapter; + + /* Free up other ports and all resources */ + cxgbe_close(adap); + return 0; +} + static int eth_cxgbevf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_device *pci_dev) { @@ -186,7 +196,7 @@ static int eth_cxgbevf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, static int eth_cxgbevf_pci_remove(struct rte_pci_device *pci_dev) { - return rte_eth_dev_pci_generic_remove(pci_dev, NULL); + return rte_eth_dev_pci_generic_remove(pci_dev, eth_cxgbevf_dev_uninit); } static struct rte_pci_driver rte_cxgbevf_pmd = { diff --git a/drivers/net/cxgbe/cxgbevf_main.c b/drivers/net/cxgbe/cxgbevf_main.c index 6223e125..61bd8519 100644 --- a/drivers/net/cxgbe/cxgbevf_main.c +++ b/drivers/net/cxgbe/cxgbevf_main.c @@ -11,6 +11,7 @@ #include "t4_regs.h" #include "t4_msg.h" #include "cxgbe.h" +#include "mps_tcam.h" /* * Figure out how many Ports and Queue Sets we can support. This depends on @@ -271,6 +272,11 @@ allocate_mac: print_adapter_info(adapter); print_port_info(adapter); + adapter->mpstcam = t4_init_mpstcam(adapter); + if (!adapter->mpstcam) + dev_warn(adapter, + "VF could not allocate mps tcam table. Continuing\n"); + err = init_rss(adapter); if (err) goto out_free; diff --git a/drivers/net/cxgbe/mps_tcam.c b/drivers/net/cxgbe/mps_tcam.c index 02ec69a9..71c8070b 100644 --- a/drivers/net/cxgbe/mps_tcam.c +++ b/drivers/net/cxgbe/mps_tcam.c @@ -236,8 +236,6 @@ struct mpstcam_table *t4_init_mpstcam(struct adapter *adap) void t4_cleanup_mpstcam(struct adapter *adap) { - if (adap->mpstcam) { - t4_os_free(adap->mpstcam->entry); + if (adap->mpstcam) t4_os_free(adap->mpstcam); - } } diff --git a/drivers/net/e1000/base/e1000_i210.c b/drivers/net/e1000/base/e1000_i210.c index 277331c4..c2abb43f 100644 --- a/drivers/net/e1000/base/e1000_i210.c +++ b/drivers/net/e1000/base/e1000_i210.c @@ -941,6 +941,7 @@ STATIC s32 e1000_pll_workaround_i210(struct e1000_hw *hw) if (ret_val != E1000_SUCCESS) nvm_word = E1000_INVM_DEFAULT_AL; tmp_nvm = nvm_word | E1000_INVM_PLL_WO_VAL; + phy_word = E1000_PHY_PLL_UNCONF; for (i = 0; i < E1000_MAX_PLL_TRIES; i++) { /* check current state directly from internal PHY */ e1000_read_phy_reg_gs40g(hw, (E1000_PHY_PLL_FREQ_PAGE | diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c index abe1e7bd..05a4fbe0 100644 --- a/drivers/net/ena/ena_ethdev.c +++ b/drivers/net/ena/ena_ethdev.c @@ -529,11 +529,6 @@ static void ena_close(struct rte_eth_dev *dev) ena_interrupt_handler_rte, adapter); - /* - * Pass the information to the rte_eth_dev_close() that it should also - * release the private port resources. - */ - dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; /* * MAC is not allocated dynamically. Setting NULL should prevent from * release of the resource in the rte_eth_dev_release_port(). @@ -1666,6 +1661,12 @@ static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) ether_addr_copy((struct ether_addr *)get_feat_ctx.dev_attr.mac_addr, (struct ether_addr *)adapter->mac_addr); + /* + * Pass the information to the rte_eth_dev_close() that it should also + * release the private port resources. + */ + eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; + adapter->drv_stats = rte_zmalloc("adapter stats", sizeof(*adapter->drv_stats), RTE_CACHE_LINE_SIZE); diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c index 5189ee63..0aadd342 100644 --- a/drivers/net/enic/enic_rxtx.c +++ b/drivers/net/enic/enic_rxtx.c @@ -393,11 +393,22 @@ uint16_t enic_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, for (i = 0; i != nb_pkts; i++) { m = tx_pkts[i]; - if (unlikely(m->pkt_len > ENIC_TX_MAX_PKT_SIZE)) { - rte_errno = EINVAL; - return i; - } ol_flags = m->ol_flags; + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (unlikely(m->pkt_len > ENIC_TX_MAX_PKT_SIZE)) { + rte_errno = EINVAL; + return i; + } + } else { + uint16_t header_len; + + header_len = m->l2_len + m->l3_len + m->l4_len; + if (m->tso_segsz + header_len > ENIC_TX_MAX_PKT_SIZE) { + rte_errno = EINVAL; + return i; + } + } + if (ol_flags & wq->tx_offload_notsup_mask) { rte_errno = ENOTSUP; return i; diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index c852022d..85fb6c5c 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -464,11 +464,6 @@ fm10k_dev_configure(struct rte_eth_dev *dev) return 0; } -/* fls = find last set bit = 32 minus the number of leading zeros */ -#ifndef fls -#define fls(x) (((x) == 0) ? 0 : (32 - __builtin_clz((x)))) -#endif - static void fm10k_dev_vmdq_rx_configure(struct rte_eth_dev *dev) { @@ -1030,8 +1025,8 @@ fm10k_dev_dglort_map_configure(struct rte_eth_dev *dev) macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private); nb_queue_pools = macvlan->nb_queue_pools; - pool_len = nb_queue_pools ? fls(nb_queue_pools - 1) : 0; - rss_len = fls(dev->data->nb_rx_queues - 1) - pool_len; + pool_len = nb_queue_pools ? rte_fls_u32(nb_queue_pools - 1) : 0; + rss_len = rte_fls_u32(dev->data->nb_rx_queues - 1) - pool_len; /* GLORT 0x0-0x3F are used by PF and VMDQ, 0x40-0x7F used by FD */ dglortdec = (rss_len << FM10K_DGLORTDEC_RSSLENGTH_SHIFT) | pool_len; @@ -1042,7 +1037,7 @@ fm10k_dev_dglort_map_configure(struct rte_eth_dev *dev) FM10K_WRITE_REG(hw, FM10K_DGLORTDEC(0), dglortdec); /* Flow Director configurations, only queue number is valid. */ - dglortdec = fls(dev->data->nb_rx_queues - 1); + dglortdec = rte_fls_u32(dev->data->nb_rx_queues - 1); dglortmask = (GLORT_FD_MASK << FM10K_DGLORTMAP_MASK_SHIFT) | (hw->mac.dglort_map + GLORT_FD_Q_BASE); FM10K_WRITE_REG(hw, FM10K_DGLORTMAP(1), dglortmask); diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 1c779068..790ecc3c 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -12552,13 +12552,16 @@ i40e_rss_conf_init(struct i40e_rte_flow_rss_conf *out, if (in->key_len > RTE_DIM(out->key) || in->queue_num > RTE_DIM(out->queue)) return -EINVAL; + if (!in->key && in->key_len) + return -EINVAL; + if (in->key) + out->conf.key = memcpy(out->key, in->key, in->key_len); out->conf = (struct rte_flow_action_rss){ .func = in->func, .level = in->level, .types = in->types, .key_len = in->key_len, .queue_num = in->queue_num, - .key = memcpy(out->key, in->key, in->key_len), .queue = memcpy(out->queue, in->queue, sizeof(*in->queue) * in->queue_num), }; diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index c9e82d51..91ba6201 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -2549,6 +2549,9 @@ ixgbe_dev_start(struct rte_eth_dev *dev) return -EINVAL; } + /* Stop the link setup handler before resetting the HW. */ + rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev); + /* disable uio/vfio intr/eventfd mapping */ rte_intr_disable(intr_handle); @@ -2731,8 +2734,6 @@ ixgbe_dev_start(struct rte_eth_dev *dev) if (err) goto error; - ixgbe_dev_link_update(dev, 0); - skip_link_setup: if (rte_intr_allow_others(intr_handle)) { @@ -2768,6 +2769,12 @@ skip_link_setup: "please call hierarchy_commit() " "before starting the port"); + /* + * Update link status right before return, because it may + * start link configuration process in a separate thread. + */ + ixgbe_dev_link_update(dev, 0); + return 0; error: @@ -3873,11 +3880,6 @@ static int ixgbevf_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, int *link_up, int wait_to_complete) { - /** - * for a quick link status checking, wait_to_compelet == 0, - * skip PF link status checking - */ - bool no_pflink_check = wait_to_complete == 0; struct ixgbe_mbx_info *mbx = &hw->mbx; struct ixgbe_mac_info *mac = &hw->mac; uint32_t links_reg, in_msg; @@ -3938,14 +3940,6 @@ ixgbevf_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, *speed = IXGBE_LINK_SPEED_UNKNOWN; } - if (no_pflink_check) { - if (*speed == IXGBE_LINK_SPEED_UNKNOWN) - mac->get_link_status = true; - else - mac->get_link_status = false; - - goto out; - } /* if the read failed it could just be a mailbox collision, best wait * until we are called again and don't report an error */ @@ -3955,7 +3949,7 @@ ixgbevf_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) { /* msg is not CTS and is NACK we must have lost CTS status */ if (in_msg & IXGBE_VT_MSGTYPE_NACK) - ret_val = -1; + mac->get_link_status = false; goto out; } @@ -5061,6 +5055,9 @@ ixgbevf_dev_start(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); + /* Stop the link setup handler before resetting the HW. */ + rte_eal_alarm_cancel(ixgbe_dev_setup_link_alarm_handler, dev); + err = hw->mac.ops.reset_hw(hw); if (err) { PMD_INIT_LOG(ERR, "Unable to reset vf hardware (%d)", err); @@ -5096,8 +5093,6 @@ ixgbevf_dev_start(struct rte_eth_dev *dev) ixgbevf_dev_rxtx_start(dev); - ixgbevf_dev_link_update(dev, 0); - /* check and configure queue intr-vector mapping */ if (rte_intr_cap_multiple(intr_handle) && dev->data->dev_conf.intr_conf.rxq) { @@ -5135,6 +5130,12 @@ ixgbevf_dev_start(struct rte_eth_dev *dev) /* Re-enable interrupt for VF */ ixgbevf_intr_enable(dev); + /* + * Update link status right before return, because it may + * start link configuration process in a separate thread. + */ + ixgbevf_dev_link_update(dev, 0); + return 0; } diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index 7a50bccd..895cdfee 100644 --- a/drivers/net/mlx5/Makefile +++ b/drivers/net/mlx5/Makefile @@ -51,7 +51,7 @@ CFLAGS += -D_DEFAULT_SOURCE CFLAGS += -D_XOPEN_SOURCE=600 CFLAGS += $(WERROR_FLAGS) CFLAGS += -Wno-strict-prototypes -CFLAGS += $(shell pkg-config --cflags libmnl) +CFLAGS += $(shell command -v pkg-config > /dev/null 2>&1 && pkg-config --cflags libmnl) ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y) CFLAGS += -DMLX5_GLUE='"$(LIB_GLUE)"' CFLAGS += -DMLX5_GLUE_VERSION='"$(LIB_GLUE_VERSION)"' @@ -60,7 +60,7 @@ LDLIBS += -ldl else LDLIBS += -libverbs -lmlx5 endif -LDLIBS += $(shell pkg-config --libs libmnl) +LDLIBS += $(shell command -v pkg-config > /dev/null 2>&1 && pkg-config --libs libmnl || echo "-lmnl") LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs LDLIBS += -lrte_bus_pci diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index ed1fcfc7..9e5cab16 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -346,11 +346,6 @@ mlx5_dev_close(struct rte_eth_dev *dev) } memset(priv, 0, sizeof(*priv)); priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; - /* - * flag to rte_eth_dev_close() that it should release the port resources - * (calling rte_eth_dev_release_port()) in addition to closing it. - */ - dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; /* * Reset mac_addrs to NULL such that it is not freed as part of * rte_eth_dev_release_port(). mac_addrs is part of dev_private so @@ -1114,6 +1109,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, err = ENOMEM; goto error; } + /* Flag to call rte_eth_dev_release_port() in rte_eth_dev_close(). */ + eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; if (priv->representor) { eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR; eth_dev->data->representor_id = priv->representor_id; diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 3c2ac4b3..5ad3a11a 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -1178,6 +1178,12 @@ mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item, "L3 cannot follow an L4 layer."); if (!mask) mask = &rte_flow_item_ipv4_mask; + else if (mask->hdr.next_proto_id != 0 && + mask->hdr.next_proto_id != 0xff) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, + "partial mask is not supported" + " for protocol"); ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, (const uint8_t *)&nic_mask, sizeof(struct rte_flow_item_ipv4), @@ -1234,17 +1240,6 @@ mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item, return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item, "L3 cannot follow an L4 layer."); - /* - * IPv6 is not recognised by the NIC inside a GRE tunnel. - * Such support has to be disabled as the rule will be - * accepted. Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and - * Mellanox OFED 4.4-1.0.0.0. - */ - if (tunnel && item_flags & MLX5_FLOW_LAYER_GRE) - return rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ITEM, item, - "IPv6 inside a GRE tunnel is" - " not recognised."); if (!mask) mask = &rte_flow_item_ipv6_mask; ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask, @@ -2657,7 +2652,7 @@ flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2) FLOW_FDIR_CMP(f1, f2, l3_mask) || FLOW_FDIR_CMP(f1, f2, l4) || FLOW_FDIR_CMP(f1, f2, l4_mask) || - FLOW_FDIR_CMP(f1, f2, actions[0])) + FLOW_FDIR_CMP(f1, f2, actions[0].type)) return 1; if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE && FLOW_FDIR_CMP(f1, f2, queue)) diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c index 79096153..a2edd168 100644 --- a/drivers/net/mlx5/mlx5_flow_dv.c +++ b/drivers/net/mlx5/mlx5_flow_dv.c @@ -814,10 +814,17 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, MLX5_FLOW_LAYER_OUTER_L3_IPV4; if (items->mask != NULL && ((const struct rte_flow_item_ipv4 *) - items->mask)->hdr.next_proto_id) + items->mask)->hdr.next_proto_id) { next_protocol = ((const struct rte_flow_item_ipv4 *) (items->spec))->hdr.next_proto_id; + next_protocol &= + ((const struct rte_flow_item_ipv4 *) + (items->mask))->hdr.next_proto_id; + } else { + /* Reset for inner layer. */ + next_protocol = 0xff; + } break; case RTE_FLOW_ITEM_TYPE_IPV6: ret = mlx5_flow_validate_item_ipv6(items, item_flags, @@ -828,10 +835,17 @@ flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, MLX5_FLOW_LAYER_OUTER_L3_IPV6; if (items->mask != NULL && ((const struct rte_flow_item_ipv6 *) - items->mask)->hdr.proto) + items->mask)->hdr.proto) { next_protocol = ((const struct rte_flow_item_ipv6 *) items->spec)->hdr.proto; + next_protocol &= + ((const struct rte_flow_item_ipv6 *) + items->mask)->hdr.proto; + } else { + /* Reset for inner layer. */ + next_protocol = 0xff; + } break; case RTE_FLOW_ITEM_TYPE_TCP: ret = mlx5_flow_validate_item_tcp @@ -1041,6 +1055,39 @@ flow_dv_prepare(const struct rte_flow_attr *attr __rte_unused, return flow; } +#ifndef NDEBUG +/** + * Sanity check for match mask and value. Similar to check_valid_spec() in + * kernel driver. If unmasked bit is present in value, it returns failure. + * + * @param match_mask + * pointer to match mask buffer. + * @param match_value + * pointer to match value buffer. + * + * @return + * 0 if valid, -EINVAL otherwise. + */ +static int +flow_dv_check_valid_spec(void *match_mask, void *match_value) +{ + uint8_t *m = match_mask; + uint8_t *v = match_value; + unsigned int i; + + for (i = 0; i < MLX5_ST_SZ_DB(fte_match_param); ++i) { + if (v[i] & ~m[i]) { + DRV_LOG(ERR, + "match_value differs from match_criteria" + " %p[%u] != %p[%u]", + match_value, i, match_mask, i); + return -EINVAL; + } + } + return 0; +} +#endif + /** * Add Ethernet item to matcher and to the value. * @@ -1750,114 +1797,6 @@ flow_dv_translate(struct rte_eth_dev *dev, if (priority == MLX5_FLOW_PRIO_RSVD) priority = priv->config.flow_prio - 1; - for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { - int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); - void *match_mask = matcher.mask.buf; - void *match_value = dev_flow->dv.value.buf; - - switch (items->type) { - case RTE_FLOW_ITEM_TYPE_ETH: - flow_dv_translate_item_eth(match_mask, match_value, - items, tunnel); - matcher.priority = MLX5_PRIORITY_MAP_L2; - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : - MLX5_FLOW_LAYER_OUTER_L2; - break; - case RTE_FLOW_ITEM_TYPE_VLAN: - flow_dv_translate_item_vlan(match_mask, match_value, - items, tunnel); - matcher.priority = MLX5_PRIORITY_MAP_L2; - item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 | - MLX5_FLOW_LAYER_INNER_VLAN) : - (MLX5_FLOW_LAYER_OUTER_L2 | - MLX5_FLOW_LAYER_OUTER_VLAN); - break; - case RTE_FLOW_ITEM_TYPE_IPV4: - flow_dv_translate_item_ipv4(match_mask, match_value, - items, tunnel); - matcher.priority = MLX5_PRIORITY_MAP_L3; - dev_flow->dv.hash_fields |= - mlx5_flow_hashfields_adjust - (dev_flow, tunnel, - MLX5_IPV4_LAYER_TYPES, - MLX5_IPV4_IBV_RX_HASH); - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : - MLX5_FLOW_LAYER_OUTER_L3_IPV4; - break; - case RTE_FLOW_ITEM_TYPE_IPV6: - flow_dv_translate_item_ipv6(match_mask, match_value, - items, tunnel); - matcher.priority = MLX5_PRIORITY_MAP_L3; - dev_flow->dv.hash_fields |= - mlx5_flow_hashfields_adjust - (dev_flow, tunnel, - MLX5_IPV6_LAYER_TYPES, - MLX5_IPV6_IBV_RX_HASH); - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : - MLX5_FLOW_LAYER_OUTER_L3_IPV6; - break; - case RTE_FLOW_ITEM_TYPE_TCP: - flow_dv_translate_item_tcp(match_mask, match_value, - items, tunnel); - matcher.priority = MLX5_PRIORITY_MAP_L4; - dev_flow->dv.hash_fields |= - mlx5_flow_hashfields_adjust - (dev_flow, tunnel, ETH_RSS_TCP, - IBV_RX_HASH_SRC_PORT_TCP | - IBV_RX_HASH_DST_PORT_TCP); - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : - MLX5_FLOW_LAYER_OUTER_L4_TCP; - break; - case RTE_FLOW_ITEM_TYPE_UDP: - flow_dv_translate_item_udp(match_mask, match_value, - items, tunnel); - matcher.priority = MLX5_PRIORITY_MAP_L4; - dev_flow->verbs.hash_fields |= - mlx5_flow_hashfields_adjust - (dev_flow, tunnel, ETH_RSS_UDP, - IBV_RX_HASH_SRC_PORT_UDP | - IBV_RX_HASH_DST_PORT_UDP); - item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : - MLX5_FLOW_LAYER_OUTER_L4_UDP; - break; - case RTE_FLOW_ITEM_TYPE_GRE: - flow_dv_translate_item_gre(match_mask, match_value, - items, tunnel); - item_flags |= MLX5_FLOW_LAYER_GRE; - break; - case RTE_FLOW_ITEM_TYPE_NVGRE: - flow_dv_translate_item_nvgre(match_mask, match_value, - items, tunnel); - item_flags |= MLX5_FLOW_LAYER_GRE; - break; - case RTE_FLOW_ITEM_TYPE_VXLAN: - flow_dv_translate_item_vxlan(match_mask, match_value, - items, tunnel); - item_flags |= MLX5_FLOW_LAYER_VXLAN; - break; - case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: - flow_dv_translate_item_vxlan(match_mask, match_value, - items, tunnel); - item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE; - break; - case RTE_FLOW_ITEM_TYPE_META: - flow_dv_translate_item_meta(match_mask, match_value, - items); - item_flags |= MLX5_FLOW_ITEM_METADATA; - break; - default: - break; - } - } - dev_flow->layers = item_flags; - /* Register matcher. */ - matcher.crc = rte_raw_cksum((const void *)matcher.mask.buf, - matcher.mask.size); - matcher.priority = mlx5_flow_adjust_priority(dev, priority, - matcher.priority); - matcher.egress = attr->egress; - if (flow_dv_matcher_register(dev, &matcher, dev_flow, error)) - return -rte_errno; for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { const struct rte_flow_action_queue *queue; const struct rte_flow_action_rss *rss; @@ -1991,6 +1930,116 @@ flow_dv_translate(struct rte_eth_dev *dev, } dev_flow->dv.actions_n = actions_n; flow->actions = action_flags; + for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { + int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL); + void *match_mask = matcher.mask.buf; + void *match_value = dev_flow->dv.value.buf; + + switch (items->type) { + case RTE_FLOW_ITEM_TYPE_ETH: + flow_dv_translate_item_eth(match_mask, match_value, + items, tunnel); + matcher.priority = MLX5_PRIORITY_MAP_L2; + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 : + MLX5_FLOW_LAYER_OUTER_L2; + break; + case RTE_FLOW_ITEM_TYPE_VLAN: + flow_dv_translate_item_vlan(match_mask, match_value, + items, tunnel); + matcher.priority = MLX5_PRIORITY_MAP_L2; + item_flags |= tunnel ? (MLX5_FLOW_LAYER_INNER_L2 | + MLX5_FLOW_LAYER_INNER_VLAN) : + (MLX5_FLOW_LAYER_OUTER_L2 | + MLX5_FLOW_LAYER_OUTER_VLAN); + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + flow_dv_translate_item_ipv4(match_mask, match_value, + items, tunnel); + matcher.priority = MLX5_PRIORITY_MAP_L3; + dev_flow->dv.hash_fields |= + mlx5_flow_hashfields_adjust + (dev_flow, tunnel, + MLX5_IPV4_LAYER_TYPES, + MLX5_IPV4_IBV_RX_HASH); + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 : + MLX5_FLOW_LAYER_OUTER_L3_IPV4; + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + flow_dv_translate_item_ipv6(match_mask, match_value, + items, tunnel); + matcher.priority = MLX5_PRIORITY_MAP_L3; + dev_flow->dv.hash_fields |= + mlx5_flow_hashfields_adjust + (dev_flow, tunnel, + MLX5_IPV6_LAYER_TYPES, + MLX5_IPV6_IBV_RX_HASH); + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 : + MLX5_FLOW_LAYER_OUTER_L3_IPV6; + break; + case RTE_FLOW_ITEM_TYPE_TCP: + flow_dv_translate_item_tcp(match_mask, match_value, + items, tunnel); + matcher.priority = MLX5_PRIORITY_MAP_L4; + dev_flow->dv.hash_fields |= + mlx5_flow_hashfields_adjust + (dev_flow, tunnel, ETH_RSS_TCP, + IBV_RX_HASH_SRC_PORT_TCP | + IBV_RX_HASH_DST_PORT_TCP); + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP : + MLX5_FLOW_LAYER_OUTER_L4_TCP; + break; + case RTE_FLOW_ITEM_TYPE_UDP: + flow_dv_translate_item_udp(match_mask, match_value, + items, tunnel); + matcher.priority = MLX5_PRIORITY_MAP_L4; + dev_flow->dv.hash_fields |= + mlx5_flow_hashfields_adjust + (dev_flow, tunnel, ETH_RSS_UDP, + IBV_RX_HASH_SRC_PORT_UDP | + IBV_RX_HASH_DST_PORT_UDP); + item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP : + MLX5_FLOW_LAYER_OUTER_L4_UDP; + break; + case RTE_FLOW_ITEM_TYPE_GRE: + flow_dv_translate_item_gre(match_mask, match_value, + items, tunnel); + item_flags |= MLX5_FLOW_LAYER_GRE; + break; + case RTE_FLOW_ITEM_TYPE_NVGRE: + flow_dv_translate_item_nvgre(match_mask, match_value, + items, tunnel); + item_flags |= MLX5_FLOW_LAYER_GRE; + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + flow_dv_translate_item_vxlan(match_mask, match_value, + items, tunnel); + item_flags |= MLX5_FLOW_LAYER_VXLAN; + break; + case RTE_FLOW_ITEM_TYPE_VXLAN_GPE: + flow_dv_translate_item_vxlan(match_mask, match_value, + items, tunnel); + item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE; + break; + case RTE_FLOW_ITEM_TYPE_META: + flow_dv_translate_item_meta(match_mask, match_value, + items); + item_flags |= MLX5_FLOW_ITEM_METADATA; + break; + default: + break; + } + } + assert(!flow_dv_check_valid_spec(matcher.mask.buf, + dev_flow->dv.value.buf)); + dev_flow->layers = item_flags; + /* Register matcher. */ + matcher.crc = rte_raw_cksum((const void *)matcher.mask.buf, + matcher.mask.size); + matcher.priority = mlx5_flow_adjust_priority(dev, priority, + matcher.priority); + matcher.egress = attr->egress; + if (flow_dv_matcher_register(dev, &matcher, dev_flow, error)) + return -rte_errno; return 0; } @@ -2034,6 +2083,7 @@ flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow, } else if (flow->actions & (MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)) { struct mlx5_hrxq *hrxq; + hrxq = mlx5_hrxq_get(dev, flow->key, MLX5_RSS_HASH_KEY_LEN, dv->hash_fields, diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c index fb817b23..97d2a54c 100644 --- a/drivers/net/mlx5/mlx5_flow_tcf.c +++ b/drivers/net/mlx5/mlx5_flow_tcf.c @@ -3846,30 +3846,6 @@ flow_tcf_alloc_nlcmd(struct tcf_nlcb_context *ctx, uint32_t size) return nlh; } -/** - * Set NLM_F_ACK flags in the last netlink command in buffer. - * Only last command in the buffer will be acked by system. - * - * @param[in, out] buf - * Pointer to buffer with netlink commands. - */ -static void -flow_tcf_setack_nlcmd(struct tcf_nlcb_buf *buf) -{ - struct nlmsghdr *nlh; - uint32_t size = 0; - - assert(buf->size); - do { - nlh = (struct nlmsghdr *)&buf->msg[size]; - size += NLMSG_ALIGN(nlh->nlmsg_len); - if (size >= buf->size) { - nlh->nlmsg_flags |= NLM_F_ACK; - break; - } - } while (true); -} - /** * Send the buffers with prepared netlink commands. Scans the list and * sends all found buffers. Buffers are sent and freed anyway in order @@ -3888,21 +3864,35 @@ static int flow_tcf_send_nlcmd(struct mlx5_flow_tcf_context *tcf, struct tcf_nlcb_context *ctx) { - struct tcf_nlcb_buf *bc, *bn; - struct nlmsghdr *nlh; + struct tcf_nlcb_buf *bc = LIST_FIRST(&ctx->nlbuf); int ret = 0; - bc = LIST_FIRST(&ctx->nlbuf); while (bc) { + struct tcf_nlcb_buf *bn = LIST_NEXT(bc, next); + struct nlmsghdr *nlh; + uint32_t msg = 0; int rc; - bn = LIST_NEXT(bc, next); - if (bc->size) { - flow_tcf_setack_nlcmd(bc); - nlh = (struct nlmsghdr *)&bc->msg; - rc = flow_tcf_nl_ack(tcf, nlh, bc->size, NULL, NULL); - if (rc && !ret) - ret = rc; + while (msg < bc->size) { + /* + * Send Netlink commands from buffer in one by one + * fashion. If we send multiple rule deletion commands + * in one Netlink message and some error occurs it may + * cause multiple ACK error messages and break sequence + * numbers of Netlink communication, because we expect + * the only one ACK reply. + */ + assert((bc->size - msg) >= sizeof(struct nlmsghdr)); + nlh = (struct nlmsghdr *)&bc->msg[msg]; + assert((bc->size - msg) >= nlh->nlmsg_len); + msg += nlh->nlmsg_len; + rc = flow_tcf_nl_ack(tcf, nlh, 0, NULL, NULL); + if (rc) { + DRV_LOG(WARNING, + "netlink: cleanup error %d", rc); + if (!ret) + ret = rc; + } } rte_free(bc); bc = bn; @@ -3935,6 +3925,7 @@ flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg) struct nlattr *na_local = NULL; struct nlattr *na_peer = NULL; unsigned char family; + uint32_t size; if (nlh->nlmsg_type != RTM_NEWADDR) { rte_errno = EINVAL; @@ -3962,11 +3953,11 @@ flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg) if (!na_local || !na_peer) return 1; /* Local rule found with scope link, permanent and assigned peer. */ - cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) + - MNL_ALIGN(sizeof(struct ifaddrmsg)) + - (family == AF_INET6 - ? 2 * SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) - : 2 * SZ_NLATTR_TYPE_OF(uint32_t))); + size = MNL_ALIGN(sizeof(struct nlmsghdr)) + + MNL_ALIGN(sizeof(struct ifaddrmsg)) + + (family == AF_INET6 ? 2 * SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) + : 2 * SZ_NLATTR_TYPE_OF(uint32_t)); + cmd = flow_tcf_alloc_nlcmd(ctx, size); if (!cmd) { rte_errno = ENOMEM; return -rte_errno; @@ -3991,6 +3982,7 @@ flow_tcf_collect_local_cb(const struct nlmsghdr *nlh, void *arg) mnl_attr_put(cmd, IFA_ADDRESS, IPV6_ADDR_LEN, mnl_attr_get_payload(na_peer)); } + assert(size == cmd->nlmsg_len); return 1; } @@ -4059,6 +4051,7 @@ flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg) struct nlattr *na_ip = NULL; struct nlattr *na_mac = NULL; unsigned char family; + uint32_t size; if (nlh->nlmsg_type != RTM_NEWNEIGH) { rte_errno = EINVAL; @@ -4085,12 +4078,12 @@ flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg) if (!na_mac || !na_ip) return 1; /* Neigh rule with permenent attribute found. */ - cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) + - MNL_ALIGN(sizeof(struct ndmsg)) + - SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) + - (family == AF_INET6 - ? SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) - : SZ_NLATTR_TYPE_OF(uint32_t))); + size = MNL_ALIGN(sizeof(struct nlmsghdr)) + + MNL_ALIGN(sizeof(struct ndmsg)) + + SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) + + (family == AF_INET6 ? SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) + : SZ_NLATTR_TYPE_OF(uint32_t)); + cmd = flow_tcf_alloc_nlcmd(ctx, size); if (!cmd) { rte_errno = ENOMEM; return -rte_errno; @@ -4113,6 +4106,7 @@ flow_tcf_collect_neigh_cb(const struct nlmsghdr *nlh, void *arg) } mnl_attr_put(cmd, NDA_LLADDR, ETHER_ADDR_LEN, mnl_attr_get_payload(na_mac)); + assert(size == cmd->nlmsg_len); return 1; } @@ -4179,6 +4173,7 @@ flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg) struct nlattr *na_vxlan = NULL; bool found = false; unsigned int vxindex; + uint32_t size; if (nlh->nlmsg_type != RTM_NEWLINK) { rte_errno = EINVAL; @@ -4224,9 +4219,10 @@ flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg) return 1; /* Attached VXLAN device found, store the command to delete. */ vxindex = ifm->ifi_index; - cmd = flow_tcf_alloc_nlcmd(ctx, MNL_ALIGN(sizeof(struct nlmsghdr)) + - MNL_ALIGN(sizeof(struct ifinfomsg))); - if (!nlh) { + size = MNL_ALIGN(sizeof(struct nlmsghdr)) + + MNL_ALIGN(sizeof(struct ifinfomsg)); + cmd = flow_tcf_alloc_nlcmd(ctx, size); + if (!cmd) { rte_errno = ENOMEM; return -rte_errno; } @@ -4236,6 +4232,7 @@ flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg) ifm = mnl_nlmsg_put_extra_header(cmd, sizeof(*ifm)); ifm->ifi_family = AF_UNSPEC; ifm->ifi_index = vxindex; + assert(size == cmd->nlmsg_len); return 1; } @@ -5127,6 +5124,13 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow, dev_flow->tcf.applied = 1; return 0; } + if (dev_flow->tcf.tunnel) { + /* Rollback the VTEP configuration if rule apply failed. */ + assert(dev_flow->tcf.tunnel->vtep); + flow_tcf_vtep_release(ctx, dev_flow->tcf.tunnel->vtep, + dev_flow); + dev_flow->tcf.tunnel->vtep = NULL; + } return rte_flow_error_set(error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "netlink: failed to create TC flow rule"); diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c index 699cc88c..d6d95db5 100644 --- a/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/drivers/net/mlx5/mlx5_flow_verbs.c @@ -1058,10 +1058,17 @@ flow_verbs_validate(struct rte_eth_dev *dev, MLX5_FLOW_LAYER_OUTER_L3_IPV4; if (items->mask != NULL && ((const struct rte_flow_item_ipv4 *) - items->mask)->hdr.next_proto_id) + items->mask)->hdr.next_proto_id) { next_protocol = ((const struct rte_flow_item_ipv4 *) (items->spec))->hdr.next_proto_id; + next_protocol &= + ((const struct rte_flow_item_ipv4 *) + (items->mask))->hdr.next_proto_id; + } else { + /* Reset for inner layer. */ + next_protocol = 0xff; + } break; case RTE_FLOW_ITEM_TYPE_IPV6: ret = mlx5_flow_validate_item_ipv6(items, item_flags, @@ -1072,10 +1079,17 @@ flow_verbs_validate(struct rte_eth_dev *dev, MLX5_FLOW_LAYER_OUTER_L3_IPV6; if (items->mask != NULL && ((const struct rte_flow_item_ipv6 *) - items->mask)->hdr.proto) + items->mask)->hdr.proto) { next_protocol = ((const struct rte_flow_item_ipv6 *) items->spec)->hdr.proto; + next_protocol &= + ((const struct rte_flow_item_ipv6 *) + items->mask)->hdr.proto; + } else { + /* Reset for inner layer. */ + next_protocol = 0xff; + } break; case RTE_FLOW_ITEM_TYPE_UDP: ret = mlx5_flow_validate_item_udp(items, item_flags, @@ -1125,13 +1139,6 @@ flow_verbs_validate(struct rte_eth_dev *dev, error); if (ret < 0) return ret; - if (next_protocol != 0xff && - next_protocol != IPPROTO_MPLS) - return rte_flow_error_set - (error, EINVAL, - RTE_FLOW_ERROR_TYPE_ITEM, items, - "protocol filtering not compatible" - " with MPLS layer"); item_flags |= MLX5_FLOW_LAYER_MPLS; break; default: diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h index 886f60e6..97092c74 100644 --- a/drivers/net/mlx5/mlx5_utils.h +++ b/drivers/net/mlx5/mlx5_utils.h @@ -15,6 +15,16 @@ #include "mlx5_defs.h" +/* + * Compilation workaround for PPC64 when AltiVec is fully enabled, e.g. std=c11. + * Otherwise there would be a type conflict between stdbool and altivec. + */ +#if defined(__PPC64__) && !defined(__APPLE_ALTIVEC__) +#undef bool +/* redefine as in stdbool.h */ +#define bool _Bool +#endif + /* Bit-field manipulation. */ #define BITFIELD_DECLARE(bf, type, size) \ type bf[(((size_t)(size) / (sizeof(type) * CHAR_BIT)) + \ diff --git a/drivers/net/octeontx/base/octeontx_pki_var.h b/drivers/net/octeontx/base/octeontx_pki_var.h index c793b655..f4661d24 100644 --- a/drivers/net/octeontx/base/octeontx_pki_var.h +++ b/drivers/net/octeontx/base/octeontx_pki_var.h @@ -7,8 +7,17 @@ #include -#define OCTTX_PACKET_WQE_SKIP 128 -#define OCTTX_PACKET_FIRST_SKIP 240 +#define OCTTX_PACKET_WQE_SKIP 128 +#define OCTTX_PACKET_FIRST_SKIP_MAXREGVAL 496 +#define OCTTX_PACKET_FIRST_SKIP_MAXLEN 512 +#define OCTTX_PACKET_FIRST_SKIP_ADJUST(x) \ + (RTE_MIN(x, OCTTX_PACKET_FIRST_SKIP_MAXREGVAL)) +#define OCTTX_PACKET_FIRST_SKIP_SUM(p) \ + (OCTTX_PACKET_WQE_SKIP \ + + rte_pktmbuf_priv_size(p) \ + + RTE_PKTMBUF_HEADROOM) +#define OCTTX_PACKET_FIRST_SKIP(p) \ + OCTTX_PACKET_FIRST_SKIP_ADJUST(OCTTX_PACKET_FIRST_SKIP_SUM(p)) #define OCTTX_PACKET_LATER_SKIP 128 /* WQE descriptor */ diff --git a/drivers/net/octeontx/octeontx_ethdev.c b/drivers/net/octeontx/octeontx_ethdev.c index 06814862..a3063be4 100644 --- a/drivers/net/octeontx/octeontx_ethdev.c +++ b/drivers/net/octeontx/octeontx_ethdev.c @@ -844,10 +844,11 @@ octeontx_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t qidx, pktbuf_conf.mmask.f_cache_mode = 1; pktbuf_conf.wqe_skip = OCTTX_PACKET_WQE_SKIP; - pktbuf_conf.first_skip = OCTTX_PACKET_FIRST_SKIP; + pktbuf_conf.first_skip = OCTTX_PACKET_FIRST_SKIP(mb_pool); pktbuf_conf.later_skip = OCTTX_PACKET_LATER_SKIP; pktbuf_conf.mbuff_size = (mb_pool->elt_size - RTE_PKTMBUF_HEADROOM - + rte_pktmbuf_priv_size(mb_pool) - sizeof(struct rte_mbuf)); pktbuf_conf.cache_mode = PKI_OPC_MODE_STF2_STT; diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c index 8a4772f4..0e33be1a 100644 --- a/drivers/net/qede/qede_rxtx.c +++ b/drivers/net/qede/qede_rxtx.c @@ -235,12 +235,13 @@ static void qede_rx_queue_release_mbufs(struct qede_rx_queue *rxq) void qede_rx_queue_release(void *rx_queue) { struct qede_rx_queue *rxq = rx_queue; - struct qede_dev *qdev = rxq->qdev; - struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); - - PMD_INIT_FUNC_TRACE(edev); + struct qede_dev *qdev; + struct ecore_dev *edev; if (rxq) { + qdev = rxq->qdev; + edev = QEDE_INIT_EDEV(qdev); + PMD_INIT_FUNC_TRACE(edev); qede_rx_queue_release_mbufs(rxq); qdev->ops->common->chain_free(edev, &rxq->rx_bd_ring); qdev->ops->common->chain_free(edev, &rxq->rx_comp_ring); @@ -399,12 +400,13 @@ static void qede_tx_queue_release_mbufs(struct qede_tx_queue *txq) void qede_tx_queue_release(void *tx_queue) { struct qede_tx_queue *txq = tx_queue; - struct qede_dev *qdev = txq->qdev; - struct ecore_dev *edev = QEDE_INIT_EDEV(qdev); - - PMD_INIT_FUNC_TRACE(edev); + struct qede_dev *qdev; + struct ecore_dev *edev; if (txq) { + qdev = txq->qdev; + edev = QEDE_INIT_EDEV(qdev); + PMD_INIT_FUNC_TRACE(edev); qede_tx_queue_release_mbufs(txq); qdev->ops->common->chain_free(edev, &txq->tx_pbl); rte_free(txq->sw_tx_ring); @@ -1759,6 +1761,18 @@ qede_xmit_prep_pkts(__rte_unused void *p_txq, struct rte_mbuf **tx_pkts, } } if (ol_flags & QEDE_TX_OFFLOAD_NOTSUP_MASK) { + /* We support only limited tunnel protocols */ + if (ol_flags & PKT_TX_TUNNEL_MASK) { + uint64_t temp; + + temp = ol_flags & PKT_TX_TUNNEL_MASK; + if (temp == PKT_TX_TUNNEL_VXLAN || + temp == PKT_TX_TUNNEL_GENEVE || + temp == PKT_TX_TUNNEL_MPLSINUDP || + temp == PKT_TX_TUNNEL_GRE) + break; + } + rte_errno = -ENOTSUP; break; } diff --git a/drivers/net/qede/qede_rxtx.h b/drivers/net/qede/qede_rxtx.h index d3a41e92..0afadd8d 100644 --- a/drivers/net/qede/qede_rxtx.h +++ b/drivers/net/qede/qede_rxtx.h @@ -153,10 +153,7 @@ #define QEDE_TX_OFFLOAD_MASK (QEDE_TX_CSUM_OFFLOAD_MASK | \ PKT_TX_VLAN_PKT | \ - PKT_TX_TUNNEL_VXLAN | \ - PKT_TX_TUNNEL_GENEVE | \ - PKT_TX_TUNNEL_MPLSINUDP | \ - PKT_TX_TUNNEL_GRE) + PKT_TX_TUNNEL_MASK) #define QEDE_TX_OFFLOAD_NOTSUP_MASK \ (PKT_TX_OFFLOAD_MASK ^ QEDE_TX_OFFLOAD_MASK) diff --git a/drivers/net/sfc/base/efx.h b/drivers/net/sfc/base/efx.h index 8e10e893..2e847b6c 100644 --- a/drivers/net/sfc/base/efx.h +++ b/drivers/net/sfc/base/efx.h @@ -2878,6 +2878,8 @@ typedef struct efx_filter_spec_s { efx_filter_flags_t efs_flags; uint16_t efs_dmaq_id; uint32_t efs_rss_context; + uint32_t efs_mark; + /* Fields below here are hashed for software filter lookup */ uint16_t efs_outer_vid; uint16_t efs_inner_vid; uint8_t efs_loc_mac[EFX_MAC_ADDR_LEN]; @@ -2891,7 +2893,6 @@ typedef struct efx_filter_spec_s { efx_oword_t efs_loc_host; uint8_t efs_vni_or_vsid[EFX_VNI_OR_VSID_LEN]; uint8_t efs_ifrm_loc_mac[EFX_MAC_ADDR_LEN]; - uint32_t efs_mark; } efx_filter_spec_t; diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c index e7817e89..49afd38d 100644 --- a/drivers/net/tap/rte_eth_tap.c +++ b/drivers/net/tap/rte_eth_tap.c @@ -248,7 +248,7 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive) return fd; error: - if (fd > 0) + if (fd >= 0) close(fd); return -1; } @@ -1848,6 +1848,7 @@ disable_rte_flow: TAP_LOG(ERR, "Remote feature requires flow support."); goto error_exit; } + rte_eth_dev_probing_finish(dev); return 0; error_remote: diff --git a/drivers/net/tap/tap_netlink.c b/drivers/net/tap/tap_netlink.c index 6cb51009..14bbbec7 100644 --- a/drivers/net/tap/tap_netlink.c +++ b/drivers/net/tap/tap_netlink.c @@ -51,14 +51,17 @@ tap_nl_init(uint32_t nl_groups) } if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) { TAP_LOG(ERR, "Unable to set socket buffer send size"); + close(fd); return -1; } if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) { TAP_LOG(ERR, "Unable to set socket buffer receive size"); + close(fd); return -1; } if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) { TAP_LOG(ERR, "Unable to bind to the netlink socket"); + close(fd); return -1; } return fd; diff --git a/drivers/net/thunderx/nicvf_rxtx.c b/drivers/net/thunderx/nicvf_rxtx.c index 247c3568..1c428743 100644 --- a/drivers/net/thunderx/nicvf_rxtx.c +++ b/drivers/net/thunderx/nicvf_rxtx.c @@ -61,6 +61,14 @@ fill_sq_desc_header(union sq_entry_t *entry, struct rte_mbuf *pkt) entry->buff[0] = sqe.buff[0]; } +static inline void __hot +fill_sq_desc_header_zero_w1(union sq_entry_t *entry, + struct rte_mbuf *pkt) +{ + fill_sq_desc_header(entry, pkt); + entry->buff[1] = 0ULL; +} + void __hot nicvf_single_pool_free_xmited_buffers(struct nicvf_txq *sq) { @@ -204,7 +212,7 @@ nicvf_xmit_pkts_multiseg(void *tx_queue, struct rte_mbuf **tx_pkts, used_bufs += nb_segs; txbuffs[tail] = NULL; - fill_sq_desc_header(desc_ptr + tail, pkt); + fill_sq_desc_header_zero_w1(desc_ptr + tail, pkt); tail = (tail + 1) & qlen_mask; txbuffs[tail] = pkt; diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c index 21110cd6..c8883c32 100644 --- a/drivers/net/virtio/virtio_pci.c +++ b/drivers/net/virtio/virtio_pci.c @@ -614,9 +614,15 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw) hw->common_cfg = get_cfg_addr(dev, &cap); break; case VIRTIO_PCI_CAP_NOTIFY_CFG: - rte_pci_read_config(dev, &hw->notify_off_multiplier, + ret = rte_pci_read_config(dev, + &hw->notify_off_multiplier, 4, pos + sizeof(cap)); - hw->notify_base = get_cfg_addr(dev, &cap); + if (ret != 4) + PMD_INIT_LOG(DEBUG, + "failed to read notify_off_multiplier, ret %d", + ret); + else + hw->notify_base = get_cfg_addr(dev, &cap); break; case VIRTIO_PCI_CAP_DEVICE_CFG: hw->dev_cfg = get_cfg_addr(dev, &cap); diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index 61b7c0a3..f8791391 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -484,7 +484,7 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev) } } else { PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user", - VIRTIO_USER_ARG_QUEUE_SIZE); + VIRTIO_USER_ARG_PATH); goto end; } diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c index 84acd9db..93e5de9a 100644 --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -318,6 +318,9 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev) hw->perm_addr[0], hw->perm_addr[1], hw->perm_addr[2], hw->perm_addr[3], hw->perm_addr[4], hw->perm_addr[5]); + /* Flag to call rte_eth_dev_release_port() in rte_eth_dev_close(). */ + eth_dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; + /* Put device in Quiesce Mode */ VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_QUIESCE_DEV); @@ -876,12 +879,6 @@ vmxnet3_dev_close(struct rte_eth_dev *dev) vmxnet3_dev_stop(dev); vmxnet3_free_queues(dev); - - /* - * flag to rte_eth_dev_close() that it should release the port resources - * (calling rte_eth_dev_release_port()) in addition to closing it. - */ - dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE; } static void diff --git a/examples/fips_validation/fips_validation_tdes.c b/examples/fips_validation/fips_validation_tdes.c index 5064ff3b..15ee434e 100644 --- a/examples/fips_validation/fips_validation_tdes.c +++ b/examples/fips_validation/fips_validation_tdes.c @@ -202,7 +202,7 @@ parse_test_tdes_writeback(struct fips_val *val) static int writeback_tdes_hex_str(const char *key, char *dst, struct fips_val *val) { - struct fips_val tmp_val; + struct fips_val tmp_val = {0}; tmp_val.len = 8; diff --git a/examples/fips_validation/main.c b/examples/fips_validation/main.c index 85f54cbf..e7559c63 100644 --- a/examples/fips_validation/main.c +++ b/examples/fips_validation/main.c @@ -887,9 +887,9 @@ fips_mct_tdes_test(void) #define TDES_EXTERN_ITER 400 #define TDES_INTERN_ITER 10000 struct fips_val val, val_key; - uint8_t prev_out[TDES_BLOCK_SIZE]; - uint8_t prev_prev_out[TDES_BLOCK_SIZE]; - uint8_t prev_in[TDES_BLOCK_SIZE]; + uint8_t prev_out[TDES_BLOCK_SIZE] = {0}; + uint8_t prev_prev_out[TDES_BLOCK_SIZE] = {0}; + uint8_t prev_in[TDES_BLOCK_SIZE] = {0}; uint32_t i, j, k; int ret; diff --git a/examples/flow_filtering/main.c b/examples/flow_filtering/main.c index 27e287ae..a582ac07 100644 --- a/examples/flow_filtering/main.c +++ b/examples/flow_filtering/main.c @@ -137,7 +137,7 @@ init_port(void) struct rte_eth_dev_info dev_info; rte_eth_dev_info_get(port_id, &dev_info); - port_conf.txmode.offloads &= dev_info.rx_offload_capa; + port_conf.txmode.offloads &= dev_info.tx_offload_capa; printf(":: initializing port: %d\n", port_id); ret = rte_eth_dev_configure(port_id, nr_queues, nr_queues, &port_conf); diff --git a/examples/ip_pipeline/cli.c b/examples/ip_pipeline/cli.c index 3de62068..91038628 100644 --- a/examples/ip_pipeline/cli.c +++ b/examples/ip_pipeline/cli.c @@ -6841,20 +6841,26 @@ cli_rule_file_process(const char *file_name, return 0; cli_rule_file_process_free: - *rule_list = NULL; - *n_rules = rule_id; - *line_number = line_id; + if (rule_list != NULL) + *rule_list = NULL; - for ( ; ; ) { - struct table_rule *rule; + if (n_rules != NULL) + *n_rules = rule_id; - rule = TAILQ_FIRST(list); - if (rule == NULL) - break; + if (line_number != NULL) + *line_number = line_id; - TAILQ_REMOVE(list, rule, node); - free(rule); - } + if (list != NULL) + for ( ; ; ) { + struct table_rule *rule; + + rule = TAILQ_FIRST(list); + if (rule == NULL) + break; + + TAILQ_REMOVE(list, rule, node); + free(rule); + } if (f) fclose(f); diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c index 0b3f8fe6..9c7b3156 100644 --- a/examples/l3fwd-power/main.c +++ b/examples/l3fwd-power/main.c @@ -1957,7 +1957,7 @@ main(int argc, char **argv) rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n"); if (init_power_library()) - rte_exit(EXIT_FAILURE, "init_power_library failed\n"); + RTE_LOG(ERR, L3FWD_POWER, "init_power_library failed\n"); if (update_lcore_params() < 0) rte_exit(EXIT_FAILURE, "update_lcore_params failed\n"); diff --git a/lib/librte_bpf/bpf_jit_x86.c b/lib/librte_bpf/bpf_jit_x86.c index 68ea389f..f70cd6be 100644 --- a/lib/librte_bpf/bpf_jit_x86.c +++ b/lib/librte_bpf/bpf_jit_x86.c @@ -208,6 +208,19 @@ emit_sib(struct bpf_jit_state *st, uint32_t scale, uint32_t idx, uint32_t base) emit_bytes(st, &v, sizeof(v)); } +/* + * emit OPCODE+REGIDX byte + */ +static void +emit_opcode(struct bpf_jit_state *st, uint8_t ops, uint32_t reg) +{ + uint8_t v; + + v = ops | (reg & 7); + emit_bytes(st, &v, sizeof(v)); +} + + /* * emit xchg %, % */ @@ -472,19 +485,18 @@ static void emit_ld_imm64(struct bpf_jit_state *st, uint32_t dreg, uint32_t imm0, uint32_t imm1) { + uint32_t op; + const uint8_t ops = 0xB8; - if (imm1 == 0) { - emit_mov_imm(st, EBPF_ALU64 | EBPF_MOV | BPF_K, dreg, imm0); - return; - } + op = (imm1 == 0) ? BPF_ALU : EBPF_ALU64; - emit_rex(st, EBPF_ALU64, 0, dreg); - emit_bytes(st, &ops, sizeof(ops)); - emit_modregrm(st, MOD_DIRECT, 0, dreg); + emit_rex(st, op, 0, dreg); + emit_opcode(st, ops, dreg); emit_imm(st, imm0, sizeof(imm0)); - emit_imm(st, imm1, sizeof(imm1)); + if (imm1 != 0) + emit_imm(st, imm1, sizeof(imm1)); } /* diff --git a/lib/librte_eal/common/arch/x86/rte_memcpy.c b/lib/librte_eal/common/arch/x86/rte_memcpy.c deleted file mode 100644 index 648c8f68..00000000 --- a/lib/librte_eal/common/arch/x86/rte_memcpy.c +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2010-2017 Intel Corporation - */ - -#include -#include -#include - -void *(*rte_memcpy_ptr)(void *dst, const void *src, size_t n) = NULL; - -RTE_INIT(rte_memcpy_init) -{ -#ifdef CC_SUPPORT_AVX512F - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F)) { - rte_memcpy_ptr = rte_memcpy_avx512f; - RTE_LOG(DEBUG, EAL, "AVX512 memcpy is using!\n"); - return; - } -#endif -#ifdef CC_SUPPORT_AVX2 - if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) { - rte_memcpy_ptr = rte_memcpy_avx2; - RTE_LOG(DEBUG, EAL, "AVX2 memcpy is using!\n"); - return; - } -#endif - rte_memcpy_ptr = rte_memcpy_sse; - RTE_LOG(DEBUG, EAL, "Default SSE/AVX memcpy is using!\n"); -} diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c index 5759ec2d..1fdc9ab1 100644 --- a/lib/librte_eal/common/eal_common_dev.c +++ b/lib/librte_eal/common/eal_common_dev.c @@ -150,10 +150,11 @@ local_dev_probe(const char *devargs, struct rte_device **new_dev) goto err_devarg; } - ret = rte_devargs_insert(da); + ret = rte_devargs_insert(&da); if (ret) goto err_devarg; + /* the rte_devargs will be referenced in the matching rte_device */ ret = da->bus->scan(); if (ret) goto err_devarg; diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c index b7b9cb69..1ccf12dc 100644 --- a/lib/librte_eal/common/eal_common_devargs.c +++ b/lib/librte_eal/common/eal_common_devargs.c @@ -263,14 +263,38 @@ rte_devargs_parsef(struct rte_devargs *da, const char *format, ...) } int __rte_experimental -rte_devargs_insert(struct rte_devargs *da) +rte_devargs_insert(struct rte_devargs **da) { - int ret; + struct rte_devargs *listed_da; + void *tmp; + + if (*da == NULL || (*da)->bus == NULL) + return -1; - ret = rte_devargs_remove(da); - if (ret < 0) - return ret; - TAILQ_INSERT_TAIL(&devargs_list, da, next); + TAILQ_FOREACH_SAFE(listed_da, &devargs_list, next, tmp) { + if (listed_da == *da) + /* devargs already in the list */ + return 0; + if (strcmp(listed_da->bus->name, (*da)->bus->name) == 0 && + strcmp(listed_da->name, (*da)->name) == 0) { + /* device already in devargs list, must be updated */ + listed_da->type = (*da)->type; + listed_da->policy = (*da)->policy; + free(listed_da->args); + listed_da->args = (*da)->args; + listed_da->bus = (*da)->bus; + listed_da->cls = (*da)->cls; + listed_da->bus_str = (*da)->bus_str; + listed_da->cls_str = (*da)->cls_str; + listed_da->data = (*da)->data; + /* replace provided devargs with found one */ + free(*da); + *da = listed_da; + return 0; + } + } + /* new device in the list */ + TAILQ_INSERT_TAIL(&devargs_list, *da, next); return 0; } diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 87fd9921..d47ea493 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -439,11 +439,7 @@ check_iova(const struct rte_memseg_list *msl __rte_unused, return 1; } -#if defined(RTE_ARCH_64) #define MAX_DMA_MASK_BITS 63 -#else -#define MAX_DMA_MASK_BITS 31 -#endif /* check memseg iovas are within the required range based on dma mask */ static int __rte_experimental @@ -453,7 +449,8 @@ check_dma_mask(uint8_t maskbits, bool thread_unsafe) uint64_t mask; int ret; - /* sanity check */ + /* Sanity check. We only check width can be managed with 64 bits + * variables. Indeed any higher value is likely wrong. */ if (maskbits > MAX_DMA_MASK_BITS) { RTE_LOG(ERR, EAL, "wrong dma mask size %u (Max: %u)\n", maskbits, MAX_DMA_MASK_BITS); diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c index 97663d3b..f65ef56c 100644 --- a/lib/librte_eal/common/eal_common_proc.c +++ b/lib/librte_eal/common/eal_common_proc.c @@ -800,7 +800,7 @@ mp_request_async(const char *dst, struct rte_mp_msg *req, { struct rte_mp_msg *reply_msg; struct pending_request *pending_req, *exist; - int ret; + int ret = -1; pending_req = calloc(1, sizeof(*pending_req)); reply_msg = calloc(1, sizeof(*reply_msg)); @@ -827,6 +827,28 @@ mp_request_async(const char *dst, struct rte_mp_msg *req, goto fail; } + /* + * set the alarm before sending message. there are two possible error + * scenarios to consider here: + * + * - if the alarm set fails, we free the memory right there + * - if the alarm set succeeds but sending message fails, then the alarm + * will trigger and clean up the memory + * + * Even if the alarm triggers too early (i.e. immediately), we're still + * holding the lock to pending requests queue, so the interrupt thread + * will just spin until we release the lock, and either release the + * memory, or doesn't find any pending requests in the queue because we + * never added any due to send message failure. + */ + if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000, + async_reply_handle, pending_req) < 0) { + RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n", + dst, req->name); + ret = -1; + goto fail; + } + ret = send_msg(dst, req, MP_REQ); if (ret < 0) { RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n", @@ -841,13 +863,6 @@ mp_request_async(const char *dst, struct rte_mp_msg *req, param->user_reply.nb_sent++; - if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000, - async_reply_handle, pending_req) < 0) { - RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n", - dst, req->name); - rte_panic("Fix the above shit to properly free all memory\n"); - } - return 0; fail: free(pending_req); diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h index ab099952..eb0f8e81 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_rtm.h +++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h @@ -1,21 +1,10 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2012,2013 Intel Corporation + */ + #ifndef _RTE_RTM_H_ #define _RTE_RTM_H_ 1 -/* - * Copyright (c) 2012,2013 Intel Corporation - * Author: Andi Kleen - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that: (1) source code distributions - * retain the above copyright notice and this paragraph in its entirety, (2) - * distributions including binary code include the above copyright notice and - * this paragraph in its entirety in the documentation or other materials - * provided with the distribution - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - */ /* Official RTM intrinsics interface matching gcc/icc, but works on older gcc compatible compilers and binutils. */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h index 60321da0..e2e2b264 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h +++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h @@ -15,8 +15,9 @@ extern "C" { #include "rte_branch_prediction.h" #include "rte_common.h" #include "rte_pause.h" +#include "rte_cycles.h" -#define RTE_RTM_MAX_RETRIES (10) +#define RTE_RTM_MAX_RETRIES (20) #define RTE_XABORT_LOCK_BUSY (0xff) #ifndef RTE_FORCE_INTRINSICS @@ -76,7 +77,7 @@ static inline int rte_tm_supported(void) static inline int rte_try_tm(volatile int *lock) { - int retries; + int i, retries; if (!rte_rtm_supported) return 0; @@ -96,9 +97,21 @@ rte_try_tm(volatile int *lock) while (*lock) rte_pause(); - if ((status & RTE_XABORT_EXPLICIT) && - (RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY)) + if ((status & RTE_XABORT_CONFLICT) || + ((status & RTE_XABORT_EXPLICIT) && + (RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY))) { + /* add a small delay before retrying, basing the + * delay on the number of times we've already tried, + * to give a back-off type of behaviour. We + * randomize trycount by taking bits from the tsc count + */ + int try_count = RTE_RTM_MAX_RETRIES - retries; + int pause_count = (rte_rdtsc() & 0x7) | 1; + pause_count <<= try_count; + for (i = 0; i < pause_count; i++) + rte_pause(); continue; + } if ((status & RTE_XABORT_RETRY) == 0) /* do not retry */ break; diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index cba7bbc1..87f0f630 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -473,6 +473,25 @@ rte_log2_u32(uint32_t v) return rte_bsf32(v); } + +/** + * Return the last (most-significant) bit set. + * + * @note The last (most significant) bit is at position 32. + * @note rte_fls_u32(0) = 0, rte_fls_u32(1) = 1, rte_fls_u32(0x80000000) = 32 + * + * @param x + * The input parameter. + * @return + * The last (most-significant) bit set, or 0 if the input is 0. + */ +static inline int +rte_fls_u32(uint32_t x) +{ + return (x == 0) ? 0 : 32 - __builtin_clz(x); +} + + #ifndef offsetof /** Return the offset of a field in a structure. */ #define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h index b1f121f8..29b3fb7c 100644 --- a/lib/librte_eal/common/include/rte_devargs.h +++ b/lib/librte_eal/common/include/rte_devargs.h @@ -146,6 +146,8 @@ __attribute__((format(printf, 2, 0))); * * @param da * The devargs structure to insert. + * If a devargs for the same device is already inserted, + * it will be updated and returned. It means *da pointer can change. * * @return * - 0 on success @@ -153,7 +155,7 @@ __attribute__((format(printf, 2, 0))); */ __rte_experimental int -rte_devargs_insert(struct rte_devargs *da); +rte_devargs_insert(struct rte_devargs **da); /** * Add a device to the user device list diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index 80c516d3..fc26e97a 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -49,7 +49,7 @@ extern "C" { * 0-15 = release candidates * 16 = release */ -#define RTE_VER_RELEASE 2 +#define RTE_VER_RELEASE 3 /** * Macro to compute a version number usable for comparisons diff --git a/lib/librte_eal/common/rte_reciprocal.c b/lib/librte_eal/common/rte_reciprocal.c index d81b11db..f017d0c2 100644 --- a/lib/librte_eal/common/rte_reciprocal.c +++ b/lib/librte_eal/common/rte_reciprocal.c @@ -41,28 +41,13 @@ #include "rte_reciprocal.h" -/* find largest set bit. - * portable and slow but does not matter for this usage. - */ -static inline int fls(uint32_t x) -{ - int b; - - for (b = 31; b >= 0; --b) { - if (x & (1u << b)) - return b + 1; - } - - return 0; -} - struct rte_reciprocal rte_reciprocal_value(uint32_t d) { struct rte_reciprocal R; uint64_t m; int l; - l = fls(d - 1); + l = rte_fls_u32(d - 1); m = ((1ULL << 32) * ((1ULL << l) - d)); m /= d; diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c index 391d2a65..840ede78 100644 --- a/lib/librte_eal/linuxapp/eal/eal_alarm.c +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c @@ -30,7 +30,9 @@ #define NS_PER_US 1000 #define US_PER_MS 1000 #define MS_PER_S 1000 +#ifndef US_PER_S #define US_PER_S (US_PER_MS * MS_PER_S) +#endif #ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ #define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index c1b5e079..48b23ce1 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -1617,6 +1617,7 @@ eal_legacy_hugepage_init(void) tmp_hp = NULL; munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file)); + hugepage = NULL; /* we're not going to allocate more pages, so release VA space for * unused memseg lists diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c index 8eaa5fcc..5f858174 100644 --- a/lib/librte_ethdev/rte_ethdev.c +++ b/lib/librte_ethdev/rte_ethdev.c @@ -1092,8 +1092,9 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, { struct rte_eth_dev *dev; struct rte_eth_dev_info dev_info; - struct rte_eth_conf local_conf = *dev_conf; + struct rte_eth_conf orig_conf; int diag; + int ret; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); @@ -1102,6 +1103,22 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP); + if (dev->data->dev_started) { + RTE_ETHDEV_LOG(ERR, + "Port %u must be stopped to allow configuration\n", + port_id); + return -EBUSY; + } + + /* Store original config, as rollback required on failure */ + memcpy(&orig_conf, &dev->data->dev_conf, sizeof(dev->data->dev_conf)); + + /* + * Copy the dev_conf parameter into the dev structure. + * rte_eth_dev_info_get() requires dev_conf, copy it before dev_info get + */ + memcpy(&dev->data->dev_conf, dev_conf, sizeof(dev->data->dev_conf)); + rte_eth_dev_info_get(port_id, &dev_info); /* If number of queues specified by application for both Rx and Tx is @@ -1123,26 +1140,18 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, RTE_ETHDEV_LOG(ERR, "Number of RX queues requested (%u) is greater than max supported(%d)\n", nb_rx_q, RTE_MAX_QUEUES_PER_PORT); - return -EINVAL; + ret = -EINVAL; + goto rollback; } if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) { RTE_ETHDEV_LOG(ERR, "Number of TX queues requested (%u) is greater than max supported(%d)\n", nb_tx_q, RTE_MAX_QUEUES_PER_PORT); - return -EINVAL; - } - - if (dev->data->dev_started) { - RTE_ETHDEV_LOG(ERR, - "Port %u must be stopped to allow configuration\n", - port_id); - return -EBUSY; + ret = -EINVAL; + goto rollback; } - /* Copy the dev_conf parameter into the dev structure */ - memcpy(&dev->data->dev_conf, &local_conf, sizeof(dev->data->dev_conf)); - /* * Check that the numbers of RX and TX queues are not greater * than the maximum number of RX and TX queues supported by the @@ -1151,13 +1160,15 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, if (nb_rx_q > dev_info.max_rx_queues) { RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u nb_rx_queues=%u > %u\n", port_id, nb_rx_q, dev_info.max_rx_queues); - return -EINVAL; + ret = -EINVAL; + goto rollback; } if (nb_tx_q > dev_info.max_tx_queues) { RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u nb_tx_queues=%u > %u\n", port_id, nb_tx_q, dev_info.max_tx_queues); - return -EINVAL; + ret = -EINVAL; + goto rollback; } /* Check that the device supports requested interrupts */ @@ -1165,32 +1176,36 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) { RTE_ETHDEV_LOG(ERR, "Driver %s does not support lsc\n", dev->device->driver->name); - return -EINVAL; + ret = -EINVAL; + goto rollback; } if ((dev_conf->intr_conf.rmv == 1) && (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) { RTE_ETHDEV_LOG(ERR, "Driver %s does not support rmv\n", dev->device->driver->name); - return -EINVAL; + ret = -EINVAL; + goto rollback; } /* * If jumbo frames are enabled, check that the maximum RX packet * length is supported by the configured device. */ - if (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { + if (dev_conf->rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { if (dev_conf->rxmode.max_rx_pkt_len > dev_info.max_rx_pktlen) { RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u max_rx_pkt_len %u > max valid value %u\n", port_id, dev_conf->rxmode.max_rx_pkt_len, dev_info.max_rx_pktlen); - return -EINVAL; + ret = -EINVAL; + goto rollback; } else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) { RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u max_rx_pkt_len %u < min valid value %u\n", port_id, dev_conf->rxmode.max_rx_pkt_len, (unsigned)ETHER_MIN_LEN); - return -EINVAL; + ret = -EINVAL; + goto rollback; } } else { if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN || @@ -1201,25 +1216,27 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, } /* Any requested offloading must be within its device capabilities */ - if ((local_conf.rxmode.offloads & dev_info.rx_offload_capa) != - local_conf.rxmode.offloads) { + if ((dev_conf->rxmode.offloads & dev_info.rx_offload_capa) != + dev_conf->rxmode.offloads) { RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u requested Rx offloads 0x%"PRIx64" doesn't match Rx offloads " "capabilities 0x%"PRIx64" in %s()\n", - port_id, local_conf.rxmode.offloads, + port_id, dev_conf->rxmode.offloads, dev_info.rx_offload_capa, __func__); - return -EINVAL; + ret = -EINVAL; + goto rollback; } - if ((local_conf.txmode.offloads & dev_info.tx_offload_capa) != - local_conf.txmode.offloads) { + if ((dev_conf->txmode.offloads & dev_info.tx_offload_capa) != + dev_conf->txmode.offloads) { RTE_ETHDEV_LOG(ERR, "Ethdev port_id=%u requested Tx offloads 0x%"PRIx64" doesn't match Tx offloads " "capabilities 0x%"PRIx64" in %s()\n", - port_id, local_conf.txmode.offloads, + port_id, dev_conf->txmode.offloads, dev_info.tx_offload_capa, __func__); - return -EINVAL; + ret = -EINVAL; + goto rollback; } /* Check that device supports requested rss hash functions. */ @@ -1230,7 +1247,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, "Ethdev port_id=%u invalid rss_hf: 0x%"PRIx64", valid value: 0x%"PRIx64"\n", port_id, dev_conf->rx_adv_conf.rss_conf.rss_hf, dev_info.flow_type_rss_offloads); - return -EINVAL; + ret = -EINVAL; + goto rollback; } /* @@ -1241,7 +1259,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, RTE_ETHDEV_LOG(ERR, "Port%u rte_eth_dev_rx_queue_config = %d\n", port_id, diag); - return diag; + ret = diag; + goto rollback; } diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q); @@ -1250,7 +1269,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, "Port%u rte_eth_dev_tx_queue_config = %d\n", port_id, diag); rte_eth_dev_rx_queue_config(dev, 0); - return diag; + ret = diag; + goto rollback; } diag = (*dev->dev_ops->dev_configure)(dev); @@ -1259,7 +1279,8 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, port_id, diag); rte_eth_dev_rx_queue_config(dev, 0); rte_eth_dev_tx_queue_config(dev, 0); - return eth_err(port_id, diag); + ret = eth_err(port_id, diag); + goto rollback; } /* Initialize Rx profiling if enabled at compilation time. */ @@ -1269,10 +1290,16 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, port_id, diag); rte_eth_dev_rx_queue_config(dev, 0); rte_eth_dev_tx_queue_config(dev, 0); - return eth_err(port_id, diag); + ret = eth_err(port_id, diag); + goto rollback; } return 0; + +rollback: + memcpy(&dev->data->dev_conf, &orig_conf, sizeof(dev->data->dev_conf)); + + return ret; } void diff --git a/lib/librte_hash/rte_cmp_x86.h b/lib/librte_hash/rte_cmp_x86.h index e82b4c08..13a58363 100644 --- a/lib/librte_hash/rte_cmp_x86.h +++ b/lib/librte_hash/rte_cmp_x86.h @@ -2,6 +2,8 @@ * Copyright(c) 2015 Intel Corporation */ +#include + /* Functions to compare multiple of 16 byte keys (up to 128 bytes) */ static int rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 5ddcccd8..c55a4f26 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -13,7 +13,6 @@ #include #include /* for definition of RTE_CACHE_LINE_SIZE */ #include -#include #include #include #include @@ -982,7 +981,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size); new_idx = (uint32_t)((uintptr_t) slot_id); /* Copy key */ - rte_memcpy(new_k->key, key, h->key_len); + memcpy(new_k->key, key, h->key_len); /* Key can be of arbitrary length, so it is not possible to store * it atomically. Hence the new key element's memory stores * (key as well as data) should be complete before it is referenced. @@ -1129,9 +1128,38 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data) return ret; } +/* Search one bucket to find the match key - uses rw lock */ +static inline int32_t +search_one_bucket_l(const struct rte_hash *h, const void *key, + uint16_t sig, void **data, + const struct rte_hash_bucket *bkt) +{ + int i; + struct rte_hash_key *k, *keys = h->key_store; + + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (bkt->sig_current[i] == sig && + bkt->key_idx[i] != EMPTY_SLOT) { + k = (struct rte_hash_key *) ((char *)keys + + bkt->key_idx[i] * h->key_entry_size); + + if (rte_hash_cmp_eq(key, k->key, h) == 0) { + if (data != NULL) + *data = k->pdata; + /* + * Return index where key is stored, + * subtracting the first dummy index + */ + return bkt->key_idx[i] - 1; + } + } + } + return -1; +} + /* Search one bucket to find the match key */ static inline int32_t -search_one_bucket(const struct rte_hash *h, const void *key, uint16_t sig, +search_one_bucket_lf(const struct rte_hash *h, const void *key, uint16_t sig, void **data, const struct rte_hash_bucket *bkt) { int i; @@ -1163,12 +1191,11 @@ search_one_bucket(const struct rte_hash *h, const void *key, uint16_t sig, } static inline int32_t -__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, - hash_sig_t sig, void **data) +__rte_hash_lookup_with_hash_l(const struct rte_hash *h, const void *key, + hash_sig_t sig, void **data) { uint32_t prim_bucket_idx, sec_bucket_idx; struct rte_hash_bucket *bkt, *cur_bkt; - uint32_t cnt_b, cnt_a; int ret; uint16_t short_sig; @@ -1176,8 +1203,48 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, prim_bucket_idx = get_prim_bucket_index(h, sig); sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig); + bkt = &h->buckets[prim_bucket_idx]; + __hash_rw_reader_lock(h); + /* Check if key is in primary location */ + ret = search_one_bucket_l(h, key, short_sig, data, bkt); + if (ret != -1) { + __hash_rw_reader_unlock(h); + return ret; + } + /* Calculate secondary hash */ + bkt = &h->buckets[sec_bucket_idx]; + + /* Check if key is in secondary location */ + FOR_EACH_BUCKET(cur_bkt, bkt) { + ret = search_one_bucket_l(h, key, short_sig, + data, cur_bkt); + if (ret != -1) { + __hash_rw_reader_unlock(h); + return ret; + } + } + + __hash_rw_reader_unlock(h); + + return -ENOENT; +} + +static inline int32_t +__rte_hash_lookup_with_hash_lf(const struct rte_hash *h, const void *key, + hash_sig_t sig, void **data) +{ + uint32_t prim_bucket_idx, sec_bucket_idx; + struct rte_hash_bucket *bkt, *cur_bkt; + uint32_t cnt_b, cnt_a; + int ret; + uint16_t short_sig; + + short_sig = get_short_sig(sig); + prim_bucket_idx = get_prim_bucket_index(h, sig); + sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig); + do { /* Load the table change counter before the lookup * starts. Acquire semantics will make sure that @@ -1188,7 +1255,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in primary location */ bkt = &h->buckets[prim_bucket_idx]; - ret = search_one_bucket(h, key, short_sig, data, bkt); + ret = search_one_bucket_lf(h, key, short_sig, data, bkt); if (ret != -1) { __hash_rw_reader_unlock(h); return ret; @@ -1198,7 +1265,7 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, /* Check if key is in secondary location */ FOR_EACH_BUCKET(cur_bkt, bkt) { - ret = search_one_bucket(h, key, short_sig, + ret = search_one_bucket_lf(h, key, short_sig, data, cur_bkt); if (ret != -1) { __hash_rw_reader_unlock(h); @@ -1222,11 +1289,19 @@ __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, __ATOMIC_ACQUIRE); } while (cnt_b != cnt_a); - __hash_rw_reader_unlock(h); - return -ENOENT; } +static inline int32_t +__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, + hash_sig_t sig, void **data) +{ + if (h->readwrite_concur_lf_support) + return __rte_hash_lookup_with_hash_lf(h, key, sig, data); + else + return __rte_hash_lookup_with_hash_l(h, key, sig, data); +} + int32_t rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig) @@ -1528,7 +1603,197 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, #define PREFETCH_OFFSET 4 static inline void -__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, +__rte_hash_lookup_bulk_l(const struct rte_hash *h, const void **keys, + int32_t num_keys, int32_t *positions, + uint64_t *hit_mask, void *data[]) +{ + uint64_t hits = 0; + int32_t i; + int32_t ret; + uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX]; + uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX]; + const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX]; + const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX]; + uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0}; + uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0}; + struct rte_hash_bucket *cur_bkt, *next_bkt; + + /* Prefetch first keys */ + for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++) + rte_prefetch0(keys[i]); + + /* + * Prefetch rest of the keys, calculate primary and + * secondary bucket and prefetch them + */ + for (i = 0; i < (num_keys - PREFETCH_OFFSET); i++) { + rte_prefetch0(keys[i + PREFETCH_OFFSET]); + + prim_hash[i] = rte_hash_hash(h, keys[i]); + + sig[i] = get_short_sig(prim_hash[i]); + prim_index[i] = get_prim_bucket_index(h, prim_hash[i]); + sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]); + + primary_bkt[i] = &h->buckets[prim_index[i]]; + secondary_bkt[i] = &h->buckets[sec_index[i]]; + + rte_prefetch0(primary_bkt[i]); + rte_prefetch0(secondary_bkt[i]); + } + + /* Calculate and prefetch rest of the buckets */ + for (; i < num_keys; i++) { + prim_hash[i] = rte_hash_hash(h, keys[i]); + + sig[i] = get_short_sig(prim_hash[i]); + prim_index[i] = get_prim_bucket_index(h, prim_hash[i]); + sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]); + + primary_bkt[i] = &h->buckets[prim_index[i]]; + secondary_bkt[i] = &h->buckets[sec_index[i]]; + + rte_prefetch0(primary_bkt[i]); + rte_prefetch0(secondary_bkt[i]); + } + + __hash_rw_reader_lock(h); + + /* Compare signatures and prefetch key slot of first hit */ + for (i = 0; i < num_keys; i++) { + compare_signatures(&prim_hitmask[i], &sec_hitmask[i], + primary_bkt[i], secondary_bkt[i], + sig[i], h->sig_cmp_fn); + + if (prim_hitmask[i]) { + uint32_t first_hit = + __builtin_ctzl(prim_hitmask[i]) + >> 1; + uint32_t key_idx = + primary_bkt[i]->key_idx[first_hit]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + rte_prefetch0(key_slot); + continue; + } + + if (sec_hitmask[i]) { + uint32_t first_hit = + __builtin_ctzl(sec_hitmask[i]) + >> 1; + uint32_t key_idx = + secondary_bkt[i]->key_idx[first_hit]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + rte_prefetch0(key_slot); + } + } + + /* Compare keys, first hits in primary first */ + for (i = 0; i < num_keys; i++) { + positions[i] = -ENOENT; + while (prim_hitmask[i]) { + uint32_t hit_index = + __builtin_ctzl(prim_hitmask[i]) + >> 1; + uint32_t key_idx = + primary_bkt[i]->key_idx[hit_index]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + + /* + * If key index is 0, do not compare key, + * as it is checking the dummy slot + */ + if (!!key_idx & + !rte_hash_cmp_eq( + key_slot->key, keys[i], h)) { + if (data != NULL) + data[i] = key_slot->pdata; + + hits |= 1ULL << i; + positions[i] = key_idx - 1; + goto next_key; + } + prim_hitmask[i] &= ~(3ULL << (hit_index << 1)); + } + + while (sec_hitmask[i]) { + uint32_t hit_index = + __builtin_ctzl(sec_hitmask[i]) + >> 1; + uint32_t key_idx = + secondary_bkt[i]->key_idx[hit_index]; + const struct rte_hash_key *key_slot = + (const struct rte_hash_key *)( + (const char *)h->key_store + + key_idx * h->key_entry_size); + + /* + * If key index is 0, do not compare key, + * as it is checking the dummy slot + */ + + if (!!key_idx & + !rte_hash_cmp_eq( + key_slot->key, keys[i], h)) { + if (data != NULL) + data[i] = key_slot->pdata; + + hits |= 1ULL << i; + positions[i] = key_idx - 1; + goto next_key; + } + sec_hitmask[i] &= ~(3ULL << (hit_index << 1)); + } +next_key: + continue; + } + + /* all found, do not need to go through ext bkt */ + if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) { + if (hit_mask != NULL) + *hit_mask = hits; + __hash_rw_reader_unlock(h); + return; + } + + /* need to check ext buckets for match */ + for (i = 0; i < num_keys; i++) { + if ((hits & (1ULL << i)) != 0) + continue; + next_bkt = secondary_bkt[i]->next; + FOR_EACH_BUCKET(cur_bkt, next_bkt) { + if (data != NULL) + ret = search_one_bucket_l(h, keys[i], + sig[i], &data[i], cur_bkt); + else + ret = search_one_bucket_l(h, keys[i], + sig[i], NULL, cur_bkt); + if (ret != -1) { + positions[i] = ret; + hits |= 1ULL << i; + break; + } + } + } + + __hash_rw_reader_unlock(h); + + if (hit_mask != NULL) + *hit_mask = hits; +} + +static inline void +__rte_hash_lookup_bulk_lf(const struct rte_hash *h, const void **keys, int32_t num_keys, int32_t *positions, uint64_t *hit_mask, void *data[]) { @@ -1586,7 +1851,6 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, rte_prefetch0(secondary_bkt[i]); } - __hash_rw_reader_lock(h); do { /* Load the table change counter before the lookup * starts. Acquire semantics will make sure that @@ -1735,10 +1999,10 @@ next_key: next_bkt = secondary_bkt[i]->next; FOR_EACH_BUCKET(cur_bkt, next_bkt) { if (data != NULL) - ret = search_one_bucket(h, keys[i], + ret = search_one_bucket_lf(h, keys[i], sig[i], &data[i], cur_bkt); else - ret = search_one_bucket(h, keys[i], + ret = search_one_bucket_lf(h, keys[i], sig[i], NULL, cur_bkt); if (ret != -1) { positions[i] = ret; @@ -1748,12 +2012,23 @@ next_key: } } - __hash_rw_reader_unlock(h); - if (hit_mask != NULL) *hit_mask = hits; } +static inline void +__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, + int32_t num_keys, int32_t *positions, + uint64_t *hit_mask, void *data[]) +{ + if (h->readwrite_concur_lf_support) + return __rte_hash_lookup_bulk_lf(h, keys, num_keys, + positions, hit_mask, data); + else + return __rte_hash_lookup_bulk_l(h, keys, num_keys, + positions, hit_mask, data); +} + int rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, uint32_t num_keys, int32_t *positions) diff --git a/lib/librte_pci/rte_pci.c b/lib/librte_pci/rte_pci.c index 530738db..f400178b 100644 --- a/lib/librte_pci/rte_pci.c +++ b/lib/librte_pci/rte_pci.c @@ -30,6 +30,10 @@ get_u8_pciaddr_field(const char *in, void *_u8, char dlm) uint8_t *u8 = _u8; char *end; + /* empty string is an error though strtoul() returns 0 */ + if (*in == '\0') + return NULL; + errno = 0; val = strtoul(in, &end, 16); if (errno != 0 || end[0] != dlm || val > UINT8_MAX) { diff --git a/lib/librte_pipeline/rte_table_action.c b/lib/librte_pipeline/rte_table_action.c index 537e6593..7c7c8dd8 100644 --- a/lib/librte_pipeline/rte_table_action.c +++ b/lib/librte_pipeline/rte_table_action.c @@ -1694,10 +1694,9 @@ get_block_size(const struct rte_crypto_sym_xform *xform, uint8_t cdev_id) rte_cryptodev_info_get(cdev_id, &dev_info); - for (i = 0;; i++) { + for (i = 0; dev_info.capabilities[i].op != RTE_CRYPTO_OP_TYPE_UNDEFINED; + i++) { cap = &dev_info.capabilities[i]; - if (!cap) - break; if (cap->sym.xform_type != xform->type) continue; diff --git a/lib/librte_ring/rte_ring_c11_mem.h b/lib/librte_ring/rte_ring_c11_mem.h index 7bc74a4c..0fb73a33 100644 --- a/lib/librte_ring/rte_ring_c11_mem.h +++ b/lib/librte_ring/rte_ring_c11_mem.h @@ -61,11 +61,14 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp, unsigned int max = n; int success; - *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE); + *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED); do { /* Reset n to the initial burst count */ n = max; + /* Ensure the head is read before tail */ + __atomic_thread_fence(__ATOMIC_ACQUIRE); + /* load-acquire synchronize with store-release of ht->tail * in update_tail. */ @@ -94,7 +97,7 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp, /* on failure, *old_head is updated */ success = __atomic_compare_exchange_n(&r->prod.head, old_head, *new_head, - 0, __ATOMIC_ACQUIRE, + 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); } while (unlikely(success == 0)); return n; @@ -134,11 +137,14 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc, int success; /* move cons.head atomically */ - *old_head = __atomic_load_n(&r->cons.head, __ATOMIC_ACQUIRE); + *old_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED); do { /* Restore n as it may change every loop */ n = max; + /* Ensure the head is read before tail */ + __atomic_thread_fence(__ATOMIC_ACQUIRE); + /* this load-acquire synchronize with store-release of ht->tail * in update_tail. */ @@ -166,7 +172,7 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc, /* on failure, *old_head will be updated */ success = __atomic_compare_exchange_n(&r->cons.head, old_head, *new_head, - 0, __ATOMIC_ACQUIRE, + 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); } while (unlikely(success == 0)); return n; diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c index 5472bead..dd01afc0 100644 --- a/lib/librte_vhost/vhost_crypto.c +++ b/lib/librte_vhost/vhost_crypto.c @@ -198,6 +198,7 @@ struct vhost_crypto { struct rte_hash *session_map; struct rte_mempool *mbuf_pool; struct rte_mempool *sess_pool; + struct rte_mempool *wb_pool; /** DPDK cryptodev ID */ uint8_t cid; @@ -215,13 +216,20 @@ struct vhost_crypto { uint8_t option; } __rte_cache_aligned; +struct vhost_crypto_writeback_data { + uint8_t *src; + uint8_t *dst; + uint64_t len; + struct vhost_crypto_writeback_data *next; +}; + struct vhost_crypto_data_req { struct vring_desc *head; struct virtio_net *dev; struct virtio_crypto_inhdr *inhdr; struct vhost_virtqueue *vq; - struct vring_desc *wb_desc; - uint16_t wb_len; + struct vhost_crypto_writeback_data *wb; + struct rte_mempool *wb_pool; uint16_t desc_idx; uint16_t len; uint16_t zero_copy; @@ -506,15 +514,29 @@ move_desc(struct vring_desc *head, struct vring_desc **cur_desc, left -= desc->len; } - if (unlikely(left > 0)) { - VC_LOG_ERR("Incorrect virtio descriptor"); + if (unlikely(left > 0)) return -1; - } *cur_desc = &head[desc->next]; return 0; } +static __rte_always_inline void * +get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc *cur_desc, + uint8_t perm) +{ + void *data; + uint64_t dlen = cur_desc->len; + + data = IOVA_TO_VVA(void *, vc_req, cur_desc->addr, &dlen, perm); + if (unlikely(!data || dlen != cur_desc->len)) { + VC_LOG_ERR("Failed to map object"); + return NULL; + } + + return data; +} + static int copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, struct vring_desc **cur_desc, uint32_t size) @@ -531,10 +553,8 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, dlen = to_copy; src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, VHOST_ACCESS_RO); - if (unlikely(!src || !dlen)) { - VC_LOG_ERR("Failed to map descriptor"); + if (unlikely(!src || !dlen)) return -1; - } rte_memcpy((uint8_t *)data, src, dlen); data += dlen; @@ -609,73 +629,158 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, return 0; } -static __rte_always_inline void * -get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc **cur_desc, - uint32_t size, uint8_t perm) +static void +write_back_data(struct vhost_crypto_data_req *vc_req) { - void *data; - uint64_t dlen = (*cur_desc)->len; - - data = IOVA_TO_VVA(void *, vc_req, (*cur_desc)->addr, &dlen, perm); - if (unlikely(!data || dlen != (*cur_desc)->len)) { - VC_LOG_ERR("Failed to map object"); - return NULL; + struct vhost_crypto_writeback_data *wb_data = vc_req->wb, *wb_last; + + while (wb_data) { + rte_prefetch0(wb_data->next); + rte_memcpy(wb_data->dst, wb_data->src, wb_data->len); + wb_last = wb_data; + wb_data = wb_data->next; + rte_mempool_put(vc_req->wb_pool, wb_last); } +} - if (unlikely(move_desc(vc_req->head, cur_desc, size) < 0)) - return NULL; +static void +free_wb_data(struct vhost_crypto_writeback_data *wb_data, + struct rte_mempool *mp) +{ + while (wb_data->next != NULL) + free_wb_data(wb_data->next, mp); - return data; + rte_mempool_put(mp, wb_data); } -static int -write_back_data(struct rte_crypto_op *op, struct vhost_crypto_data_req *vc_req) +/** + * The function will allocate a vhost_crypto_writeback_data linked list + * containing the source and destination data pointers for the write back + * operation after dequeued from Cryptodev PMD queues. + * + * @param vc_req + * The vhost crypto data request pointer + * @param cur_desc + * The pointer of the current in use descriptor pointer. The content of + * cur_desc is expected to be updated after the function execution. + * @param end_wb_data + * The last write back data element to be returned. It is used only in cipher + * and hash chain operations. + * @param src + * The source data pointer + * @param offset + * The offset to both source and destination data. For source data the offset + * is the number of bytes between src and start point of cipher operation. For + * destination data the offset is the number of bytes from *cur_desc->addr + * to the point where the src will be written to. + * @param write_back_len + * The size of the write back length. + * @return + * The pointer to the start of the write back data linked list. + */ +static struct vhost_crypto_writeback_data * +prepare_write_back_data(struct vhost_crypto_data_req *vc_req, + struct vring_desc **cur_desc, + struct vhost_crypto_writeback_data **end_wb_data, + uint8_t *src, + uint32_t offset, + uint64_t write_back_len) { - struct rte_mbuf *mbuf = op->sym->m_dst; - struct vring_desc *head = vc_req->head; - struct vring_desc *desc = vc_req->wb_desc; - int left = vc_req->wb_len; - uint32_t to_write; - uint8_t *src_data = mbuf->buf_addr, *dst; + struct vhost_crypto_writeback_data *wb_data, *head; + struct vring_desc *desc = *cur_desc; uint64_t dlen; + uint8_t *dst; + int ret; - rte_prefetch0(&head[desc->next]); - to_write = RTE_MIN(desc->len, (uint32_t)left); - dlen = desc->len; - dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, - VHOST_ACCESS_RW); - if (unlikely(!dst || dlen != desc->len)) { - VC_LOG_ERR("Failed to map descriptor"); - return -1; + ret = rte_mempool_get(vc_req->wb_pool, (void **)&head); + if (unlikely(ret < 0)) { + VC_LOG_ERR("no memory"); + goto error_exit; } - rte_memcpy(dst, src_data, to_write); - left -= to_write; - src_data += to_write; + wb_data = head; - while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) { - desc = &head[desc->next]; - rte_prefetch0(&head[desc->next]); - to_write = RTE_MIN(desc->len, (uint32_t)left); + if (likely(desc->len > offset)) { + wb_data->src = src + offset; dlen = desc->len; - dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, - VHOST_ACCESS_RW); + dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, + &dlen, VHOST_ACCESS_RW) + offset; if (unlikely(!dst || dlen != desc->len)) { VC_LOG_ERR("Failed to map descriptor"); - return -1; + goto error_exit; } - rte_memcpy(dst, src_data, to_write); - left -= to_write; - src_data += to_write; - } + wb_data->dst = dst; + wb_data->len = desc->len - offset; + write_back_len -= wb_data->len; + src += offset + wb_data->len; + offset = 0; + + if (unlikely(write_back_len)) { + ret = rte_mempool_get(vc_req->wb_pool, + (void **)&(wb_data->next)); + if (unlikely(ret < 0)) { + VC_LOG_ERR("no memory"); + goto error_exit; + } - if (unlikely(left < 0)) { - VC_LOG_ERR("Incorrect virtio descriptor"); - return -1; + wb_data = wb_data->next; + } else + wb_data->next = NULL; + } else + offset -= desc->len; + + while (write_back_len) { + desc = &vc_req->head[desc->next]; + if (unlikely(!(desc->flags & VRING_DESC_F_WRITE))) { + VC_LOG_ERR("incorrect descriptor"); + goto error_exit; + } + + if (desc->len <= offset) { + offset -= desc->len; + continue; + } + + dlen = desc->len; + dst = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, + VHOST_ACCESS_RW) + offset; + if (unlikely(dst == NULL || dlen != desc->len)) { + VC_LOG_ERR("Failed to map descriptor"); + goto error_exit; + } + + wb_data->src = src; + wb_data->dst = dst; + wb_data->len = RTE_MIN(desc->len - offset, write_back_len); + write_back_len -= wb_data->len; + src += wb_data->len; + offset = 0; + + if (write_back_len) { + ret = rte_mempool_get(vc_req->wb_pool, + (void **)&(wb_data->next)); + if (unlikely(ret < 0)) { + VC_LOG_ERR("no memory"); + goto error_exit; + } + + wb_data = wb_data->next; + } else + wb_data->next = NULL; } - return 0; + *cur_desc = &vc_req->head[desc->next]; + + *end_wb_data = wb_data; + + return head; + +error_exit: + if (head) + free_wb_data(head, vc_req->wb_pool); + + return NULL; } static uint8_t @@ -685,6 +790,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, struct vring_desc *cur_desc) { struct vring_desc *desc = cur_desc; + struct vhost_crypto_writeback_data *ewb = NULL; struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst; uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET); uint8_t ret = 0; @@ -703,16 +809,25 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.src_data_len); - m_src->buf_addr = get_data_ptr(vc_req, &desc, - cipher->para.src_data_len, VHOST_ACCESS_RO); + m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO); if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) { VC_LOG_ERR("zero_copy may fail due to cross page data"); ret = VIRTIO_CRYPTO_ERR; goto error_exit; } + + if (unlikely(move_desc(vc_req->head, &desc, + cipher->para.src_data_len) < 0)) { + VC_LOG_ERR("Incorrect descriptor"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + break; case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + vc_req->wb_pool = vcrypto->wb_pool; + if (unlikely(cipher->para.src_data_len > RTE_MBUF_DEFAULT_BUF_SIZE)) { VC_LOG_ERR("Not enough space to do data copy"); @@ -743,24 +858,31 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: m_dst->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr, cipher->para.dst_data_len); - m_dst->buf_addr = get_data_ptr(vc_req, &desc, - cipher->para.dst_data_len, VHOST_ACCESS_RW); + m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW); if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) { VC_LOG_ERR("zero_copy may fail due to cross page data"); ret = VIRTIO_CRYPTO_ERR; goto error_exit; } + if (unlikely(move_desc(vc_req->head, &desc, + cipher->para.dst_data_len) < 0)) { + VC_LOG_ERR("Incorrect descriptor"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + m_dst->data_len = cipher->para.dst_data_len; break; case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: - vc_req->wb_desc = desc; - vc_req->wb_len = cipher->para.dst_data_len; - if (unlikely(move_desc(vc_req->head, &desc, - vc_req->wb_len) < 0)) { + vc_req->wb = prepare_write_back_data(vc_req, &desc, &ewb, + rte_pktmbuf_mtod(m_src, uint8_t *), 0, + cipher->para.dst_data_len); + if (unlikely(vc_req->wb == NULL)) { ret = VIRTIO_CRYPTO_ERR; goto error_exit; } + break; default: ret = VIRTIO_CRYPTO_BADMSG; @@ -774,7 +896,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, op->sym->cipher.data.offset = 0; op->sym->cipher.data.length = cipher->para.src_data_len; - vc_req->inhdr = get_data_ptr(vc_req, &desc, INHDR_LEN, VHOST_ACCESS_WO); + vc_req->inhdr = get_data_ptr(vc_req, desc, VHOST_ACCESS_WO); if (unlikely(vc_req->inhdr == NULL)) { ret = VIRTIO_CRYPTO_BADMSG; goto error_exit; @@ -786,6 +908,9 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, return 0; error_exit: + if (vc_req->wb) + free_wb_data(vc_req->wb, vc_req->wb_pool); + vc_req->len = INHDR_LEN; return ret; } @@ -796,7 +921,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, struct virtio_crypto_alg_chain_data_req *chain, struct vring_desc *cur_desc) { - struct vring_desc *desc = cur_desc; + struct vring_desc *desc = cur_desc, *digest_desc; + struct vhost_crypto_writeback_data *ewb = NULL, *ewb2 = NULL; struct rte_mbuf *m_src = op->sym->m_src, *m_dst = op->sym->m_dst; uint8_t *iv_data = rte_crypto_op_ctod_offset(op, uint8_t *, IV_OFFSET); uint32_t digest_offset; @@ -812,21 +938,30 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, } m_src->data_len = chain->para.src_data_len; - m_dst->data_len = chain->para.dst_data_len; switch (vcrypto->option) { case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: + m_dst->data_len = chain->para.dst_data_len; + m_src->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.src_data_len); - m_src->buf_addr = get_data_ptr(vc_req, &desc, - chain->para.src_data_len, VHOST_ACCESS_RO); + m_src->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO); if (unlikely(m_src->buf_iova == 0 || m_src->buf_addr == NULL)) { VC_LOG_ERR("zero_copy may fail due to cross page data"); ret = VIRTIO_CRYPTO_ERR; goto error_exit; } + + if (unlikely(move_desc(vc_req->head, &desc, + chain->para.src_data_len) < 0)) { + VC_LOG_ERR("Incorrect descriptor"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } break; case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + vc_req->wb_pool = vcrypto->wb_pool; + if (unlikely(chain->para.src_data_len > RTE_MBUF_DEFAULT_BUF_SIZE)) { VC_LOG_ERR("Not enough space to do data copy"); @@ -838,6 +973,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, ret = VIRTIO_CRYPTO_BADMSG; goto error_exit; } + break; default: ret = VIRTIO_CRYPTO_BADMSG; @@ -856,46 +992,70 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: m_dst->buf_iova = gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.dst_data_len); - m_dst->buf_addr = get_data_ptr(vc_req, &desc, - chain->para.dst_data_len, VHOST_ACCESS_RW); + m_dst->buf_addr = get_data_ptr(vc_req, desc, VHOST_ACCESS_RW); if (unlikely(m_dst->buf_iova == 0 || m_dst->buf_addr == NULL)) { VC_LOG_ERR("zero_copy may fail due to cross page data"); ret = VIRTIO_CRYPTO_ERR; goto error_exit; } + if (unlikely(move_desc(vc_req->head, &desc, + chain->para.dst_data_len) < 0)) { + VC_LOG_ERR("Incorrect descriptor"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + op->sym->auth.digest.phys_addr = gpa_to_hpa(vcrypto->dev, desc->addr, chain->para.hash_result_len); - op->sym->auth.digest.data = get_data_ptr(vc_req, &desc, - chain->para.hash_result_len, VHOST_ACCESS_RW); + op->sym->auth.digest.data = get_data_ptr(vc_req, desc, + VHOST_ACCESS_RW); if (unlikely(op->sym->auth.digest.phys_addr == 0)) { VC_LOG_ERR("zero_copy may fail due to cross page data"); ret = VIRTIO_CRYPTO_ERR; goto error_exit; } + + if (unlikely(move_desc(vc_req->head, &desc, + chain->para.hash_result_len) < 0)) { + VC_LOG_ERR("Incorrect descriptor"); + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } + break; case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: - digest_offset = m_dst->data_len; - digest_addr = rte_pktmbuf_mtod_offset(m_dst, void *, - digest_offset); + vc_req->wb = prepare_write_back_data(vc_req, &desc, &ewb, + rte_pktmbuf_mtod(m_src, uint8_t *), + chain->para.cipher_start_src_offset, + chain->para.dst_data_len - + chain->para.cipher_start_src_offset); + if (unlikely(vc_req->wb == NULL)) { + ret = VIRTIO_CRYPTO_ERR; + goto error_exit; + } - vc_req->wb_desc = desc; - vc_req->wb_len = m_dst->data_len + chain->para.hash_result_len; + digest_offset = m_src->data_len; + digest_addr = rte_pktmbuf_mtod_offset(m_src, void *, + digest_offset); + digest_desc = desc; - if (unlikely(move_desc(vc_req->head, &desc, - chain->para.dst_data_len) < 0)) { - ret = VIRTIO_CRYPTO_BADMSG; + /** create a wb_data for digest */ + ewb->next = prepare_write_back_data(vc_req, &desc, &ewb2, + digest_addr, 0, chain->para.hash_result_len); + if (unlikely(ewb->next == NULL)) { + ret = VIRTIO_CRYPTO_ERR; goto error_exit; } - if (unlikely(copy_data(digest_addr, vc_req, &desc, + if (unlikely(copy_data(digest_addr, vc_req, &digest_desc, chain->para.hash_result_len)) < 0) { ret = VIRTIO_CRYPTO_BADMSG; goto error_exit; } op->sym->auth.digest.data = digest_addr; - op->sym->auth.digest.phys_addr = rte_pktmbuf_iova_offset(m_dst, + op->sym->auth.digest.phys_addr = rte_pktmbuf_iova_offset(m_src, digest_offset); break; default: @@ -904,7 +1064,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, } /* record inhdr */ - vc_req->inhdr = get_data_ptr(vc_req, &desc, INHDR_LEN, VHOST_ACCESS_WO); + vc_req->inhdr = get_data_ptr(vc_req, desc, VHOST_ACCESS_WO); if (unlikely(vc_req->inhdr == NULL)) { ret = VIRTIO_CRYPTO_BADMSG; goto error_exit; @@ -927,6 +1087,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, return 0; error_exit: + if (vc_req->wb) + free_wb_data(vc_req->wb, vc_req->wb_pool); vc_req->len = INHDR_LEN; return ret; } @@ -967,7 +1129,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, vc_req->head = head; vc_req->zero_copy = vcrypto->option; - req = get_data_ptr(vc_req, &desc, sizeof(*req), VHOST_ACCESS_RO); + req = get_data_ptr(vc_req, desc, VHOST_ACCESS_RO); if (unlikely(req == NULL)) { switch (vcrypto->option) { case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: @@ -988,6 +1150,12 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, VC_LOG_ERR("Invalid option"); goto error_exit; } + } else { + if (unlikely(move_desc(vc_req->head, &desc, + sizeof(*req)) < 0)) { + VC_LOG_ERR("Incorrect descriptor"); + goto error_exit; + } } switch (req->header.opcode) { @@ -1062,7 +1230,6 @@ vhost_crypto_finalize_one_request(struct rte_crypto_op *op, struct rte_mbuf *m_dst = op->sym->m_dst; struct vhost_crypto_data_req *vc_req = rte_mbuf_to_priv(m_src); uint16_t desc_idx; - int ret = 0; if (unlikely(!vc_req)) { VC_LOG_ERR("Failed to retrieve vc_req"); @@ -1077,19 +1244,18 @@ vhost_crypto_finalize_one_request(struct rte_crypto_op *op, if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) vc_req->inhdr->status = VIRTIO_CRYPTO_ERR; else { - if (vc_req->zero_copy == 0) { - ret = write_back_data(op, vc_req); - if (unlikely(ret != 0)) - vc_req->inhdr->status = VIRTIO_CRYPTO_ERR; - } + if (vc_req->zero_copy == 0) + write_back_data(vc_req); } vc_req->vq->used->ring[desc_idx].id = desc_idx; vc_req->vq->used->ring[desc_idx].len = vc_req->len; - rte_mempool_put(m_dst->pool, (void *)m_dst); rte_mempool_put(m_src->pool, (void *)m_src); + if (m_dst) + rte_mempool_put(m_dst->pool, (void *)m_dst); + return vc_req->vq; } @@ -1186,6 +1352,18 @@ rte_vhost_crypto_create(int vid, uint8_t cryptodev_id, goto error_exit; } + snprintf(name, 127, "WB_POOL_VM_%u", (uint32_t)vid); + vcrypto->wb_pool = rte_mempool_create(name, + VHOST_CRYPTO_MBUF_POOL_SIZE, + sizeof(struct vhost_crypto_writeback_data), + 128, 0, NULL, NULL, NULL, NULL, + rte_socket_id(), 0); + if (!vcrypto->wb_pool) { + VC_LOG_ERR("Failed to creath mempool"); + ret = -ENOMEM; + goto error_exit; + } + dev->extern_data = vcrypto; dev->extern_ops.pre_msg_handle = NULL; dev->extern_ops.post_msg_handle = vhost_crypto_msg_post_handler; @@ -1222,6 +1400,7 @@ rte_vhost_crypto_free(int vid) rte_hash_free(vcrypto->session_map); rte_mempool_free(vcrypto->mbuf_pool); + rte_mempool_free(vcrypto->wb_pool); rte_free(vcrypto); dev->extern_data = NULL; @@ -1257,11 +1436,30 @@ rte_vhost_crypto_set_zero_copy(int vid, enum rte_vhost_crypto_zero_copy option) if (vcrypto->option == (uint8_t)option) return 0; - if (!(rte_mempool_full(vcrypto->mbuf_pool))) { + if (!(rte_mempool_full(vcrypto->mbuf_pool)) || + !(rte_mempool_full(vcrypto->wb_pool))) { VC_LOG_ERR("Cannot update zero copy as mempool is not full"); return -EINVAL; } + if (option == RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE) { + char name[128]; + + snprintf(name, 127, "WB_POOL_VM_%u", (uint32_t)vid); + vcrypto->wb_pool = rte_mempool_create(name, + VHOST_CRYPTO_MBUF_POOL_SIZE, + sizeof(struct vhost_crypto_writeback_data), + 128, 0, NULL, NULL, NULL, NULL, + rte_socket_id(), 0); + if (!vcrypto->wb_pool) { + VC_LOG_ERR("Failed to creath mbuf pool"); + return -ENOMEM; + } + } else { + rte_mempool_free(vcrypto->wb_pool); + vcrypto->wb_pool = NULL; + } + vcrypto->option = (uint8_t)option; return 0; @@ -1277,9 +1475,8 @@ rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, struct vhost_virtqueue *vq; uint16_t avail_idx; uint16_t start_idx; - uint16_t required; uint16_t count; - uint16_t i; + uint16_t i = 0; if (unlikely(dev == NULL)) { VC_LOG_ERR("Invalid vid %i", vid); @@ -1311,27 +1508,66 @@ rte_vhost_crypto_fetch_requests(int vid, uint32_t qid, /* for zero copy, we need 2 empty mbufs for src and dst, otherwise * we need only 1 mbuf as src and dst */ - required = count * 2; - if (unlikely(rte_mempool_get_bulk(vcrypto->mbuf_pool, (void **)mbufs, - required) < 0)) { - VC_LOG_ERR("Insufficient memory"); - return -ENOMEM; - } + switch (vcrypto->option) { + case RTE_VHOST_CRYPTO_ZERO_COPY_ENABLE: + if (unlikely(rte_mempool_get_bulk(vcrypto->mbuf_pool, + (void **)mbufs, count * 2) < 0)) { + VC_LOG_ERR("Insufficient memory"); + return -ENOMEM; + } - for (i = 0; i < count; i++) { - uint16_t used_idx = (start_idx + i) & (vq->size - 1); - uint16_t desc_idx = vq->avail->ring[used_idx]; - struct vring_desc *head = &vq->desc[desc_idx]; - struct rte_crypto_op *op = ops[i]; + for (i = 0; i < count; i++) { + uint16_t used_idx = (start_idx + i) & (vq->size - 1); + uint16_t desc_idx = vq->avail->ring[used_idx]; + struct vring_desc *head = &vq->desc[desc_idx]; + struct rte_crypto_op *op = ops[i]; - op->sym->m_src = mbufs[i * 2]; - op->sym->m_dst = mbufs[i * 2 + 1]; - op->sym->m_src->data_off = 0; - op->sym->m_dst->data_off = 0; + op->sym->m_src = mbufs[i * 2]; + op->sym->m_dst = mbufs[i * 2 + 1]; + op->sym->m_src->data_off = 0; + op->sym->m_dst->data_off = 0; + + if (unlikely(vhost_crypto_process_one_req(vcrypto, vq, + op, head, desc_idx)) < 0) + break; + } + + if (unlikely(i < count)) + rte_mempool_put_bulk(vcrypto->mbuf_pool, + (void **)&mbufs[i * 2], + (count - i) * 2); + + break; + + case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: + if (unlikely(rte_mempool_get_bulk(vcrypto->mbuf_pool, + (void **)mbufs, count) < 0)) { + VC_LOG_ERR("Insufficient memory"); + return -ENOMEM; + } + + for (i = 0; i < count; i++) { + uint16_t used_idx = (start_idx + i) & (vq->size - 1); + uint16_t desc_idx = vq->avail->ring[used_idx]; + struct vring_desc *head = &vq->desc[desc_idx]; + struct rte_crypto_op *op = ops[i]; + + op->sym->m_src = mbufs[i]; + op->sym->m_dst = NULL; + op->sym->m_src->data_off = 0; + + if (unlikely(vhost_crypto_process_one_req(vcrypto, vq, + op, head, desc_idx)) < 0) + break; + } + + if (unlikely(i < count)) + rte_mempool_put_bulk(vcrypto->mbuf_pool, + (void **)&mbufs[i], + count - i); + + break; - if (unlikely(vhost_crypto_process_one_req(vcrypto, vq, op, head, - desc_idx)) < 0) - break; } vq->last_used_idx += i; diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index cc154f31..3ea64eba 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -1732,7 +1732,7 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg) if (ret <= 0) return ret; - if (msg && msg->size) { + if (msg->size) { if (msg->size > sizeof(msg->payload)) { RTE_LOG(ERR, VHOST_CONFIG, "invalid msg size: %d\n", msg->size); diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 8ad30c94..5e1a1a72 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -598,7 +598,7 @@ reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, avail_idx, &desc_count, buf_vec, &vec_idx, &buf_id, &len, - VHOST_ACCESS_RO) < 0)) + VHOST_ACCESS_RW) < 0)) return -1; len = RTE_MIN(len, size); @@ -1503,7 +1503,7 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, vq->last_avail_idx, &desc_count, buf_vec, &nr_vec, &buf_id, &dummy_len, - VHOST_ACCESS_RW) < 0)) + VHOST_ACCESS_RO) < 0)) break; if (likely(dev->dequeue_zero_copy == 0)) diff --git a/meson.build b/meson.build index 6d25b90f..a72237e1 100644 --- a/meson.build +++ b/meson.build @@ -2,7 +2,7 @@ # Copyright(c) 2017 Intel Corporation project('DPDK', 'C', - version: '18.11.0-rc2', + version: '18.11.0-rc3', license: 'BSD', default_options: ['buildtype=release', 'default_library=static'], meson_version: '>= 0.41' diff --git a/mk/rte.app.mk b/mk/rte.app.mk index 3ebc4e64..5699d979 100644 --- a/mk/rte.app.mk +++ b/mk/rte.app.mk @@ -50,9 +50,11 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_LPM) += -lrte_lpm _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --whole-archive _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += -lrte_acl _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL) += --no-whole-archive +_LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --no-as-needed _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --whole-archive _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += -lrte_telemetry -ljansson _LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --no-whole-archive +_LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += --as-needed _LDLIBS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += -lrte_jobstats _LDLIBS-$(CONFIG_RTE_LIBRTE_METRICS) += -lrte_metrics _LDLIBS-$(CONFIG_RTE_LIBRTE_BITRATE) += -lrte_bitratestats diff --git a/mk/rte.sdkconfig.mk b/mk/rte.sdkconfig.mk index d90d62cc..fa77331c 100644 --- a/mk/rte.sdkconfig.mk +++ b/mk/rte.sdkconfig.mk @@ -114,8 +114,7 @@ SDK_RELPATH=$(shell $(RTE_SDK)/buildtools/relpath.sh $(abspath $(RTE_SRCDIR)) \ OUTPUT_RELPATH=$(shell $(RTE_SDK)/buildtools/relpath.sh $(abspath $(RTE_OUTPUT)) \ $(abspath $(RTE_SRCDIR))) $(RTE_OUTPUT)/Makefile: | $(RTE_OUTPUT) - $(Q)$(RTE_SDK)/buildtools/gen-build-mk.sh $(SDK_RELPATH) $(OUTPUT_RELPATH) \ - > $(RTE_OUTPUT)/Makefile + $(Q)$(RTE_SDK)/buildtools/gen-build-mk.sh $(SDK_RELPATH) > $@ # clean installed files, and generate a new config header file # if NODOTCONF variable is defined, don't try to rebuild .config diff --git a/test/bpf/t1.c b/test/bpf/t1.c index 60f9434a..3364b4f1 100644 --- a/test/bpf/t1.c +++ b/test/bpf/t1.c @@ -20,32 +20,36 @@ * (011) ret #1 * (012) ret #0 * - * To compile: - * clang -O2 -target bpf -c t1.c + * To compile on x86: + * clang -O2 -U __GNUC__ -target bpf -c t1.c + * + * To compile on ARM: + * clang -O2 -I/usr/include/aarch64-linux-gnu/ -target bpf -c t1.c */ #include #include #include #include +#include uint64_t entry(void *pkt) { struct ether_header *ether_header = (void *)pkt; - if (ether_header->ether_type != __builtin_bswap16(0x0800)) + if (ether_header->ether_type != htons(0x0800)) return 0; struct iphdr *iphdr = (void *)(ether_header + 1); if (iphdr->protocol != 17 || (iphdr->frag_off & 0x1ffff) != 0 || - iphdr->daddr != __builtin_bswap32(0x1020304)) + iphdr->daddr != htonl(0x1020304)) return 0; int hlen = iphdr->ihl * 4; struct udphdr *udphdr = (void *)iphdr + hlen; - if (udphdr->dest != __builtin_bswap16(5000)) + if (udphdr->dest != htons(5000)) return 0; return 1; diff --git a/test/bpf/t3.c b/test/bpf/t3.c index 531b9cb8..9ba34638 100644 --- a/test/bpf/t3.c +++ b/test/bpf/t3.c @@ -6,9 +6,15 @@ * eBPF program sample. * Accepts pointer to struct rte_mbuf as an input parameter. * Dump the mbuf into stdout if it is an ARP packet (aka tcpdump 'arp'). - * To compile: - * clang -O2 -I${RTE_SDK}/${RTE_TARGET}/include \ + * + * To compile on x86: + * clang -O2 -U __GNUC__ -I${RTE_SDK}/${RTE_TARGET}/include \ * -target bpf -Wno-int-to-void-pointer-cast -c t3.c + * + * To compile on ARM: + * clang -O2 -I/usr/include/aarch64-linux-gnu \ + * -I${RTE_SDK}/${RTE_TARGET}/include -target bpf \ + * -Wno-int-to-void-pointer-cast -c t3.c */ #include @@ -17,6 +23,7 @@ #include #include #include "mbuf.h" +#include extern void rte_pktmbuf_dump(FILE *, const struct rte_mbuf *, unsigned int); @@ -29,7 +36,7 @@ entry(const void *pkt) mb = pkt; eth = rte_pktmbuf_mtod(mb, const struct ether_header *); - if (eth->ether_type == __builtin_bswap16(ETHERTYPE_ARP)) + if (eth->ether_type == htons(ETHERTYPE_ARP)) rte_pktmbuf_dump(stdout, mb, 64); return 1; diff --git a/test/test/test.c b/test/test/test.c index 24df6299..12fabd0b 100644 --- a/test/test/test.c +++ b/test/test/test.c @@ -102,8 +102,10 @@ main(int argc, char **argv) /* merge argc/argv and the environment args */ all_argc = argc + eargc; all_argv = malloc(sizeof(*all_argv) * (all_argc + 1)); - if (all_argv == NULL) - return -1; + if (all_argv == NULL) { + ret = -1; + goto out; + } for (i = 0; i < argc; i++) all_argv[i] = argv[i]; diff --git a/test/test/test_bpf.c b/test/test/test_bpf.c index fa17c4f7..1d50401a 100644 --- a/test/test/test_bpf.c +++ b/test/test/test_bpf.c @@ -48,6 +48,12 @@ struct dummy_vect8 { #define TEST_JCC_3 5678 #define TEST_JCC_4 TEST_FILL_1 +#define TEST_IMM_1 UINT64_MAX +#define TEST_IMM_2 ((uint64_t)INT64_MIN) +#define TEST_IMM_3 ((uint64_t)INT64_MAX + INT32_MAX) +#define TEST_IMM_4 ((uint64_t)UINT32_MAX) +#define TEST_IMM_5 ((uint64_t)UINT32_MAX + 1) + struct bpf_test { const char *name; size_t arg_sz; @@ -268,6 +274,94 @@ test_load1_check(uint64_t rc, const void *arg) return cmp_res(__func__, v, rc, dft, dft, sizeof(*dft)); } +/* load immediate test-cases */ +static const struct ebpf_insn test_ldimm1_prog[] = { + + { + .code = (BPF_LD | BPF_IMM | EBPF_DW), + .dst_reg = EBPF_REG_0, + .imm = (uint32_t)TEST_IMM_1, + }, + { + .imm = TEST_IMM_1 >> 32, + }, + { + .code = (BPF_LD | BPF_IMM | EBPF_DW), + .dst_reg = EBPF_REG_3, + .imm = (uint32_t)TEST_IMM_2, + }, + { + .imm = TEST_IMM_2 >> 32, + }, + { + .code = (BPF_LD | BPF_IMM | EBPF_DW), + .dst_reg = EBPF_REG_5, + .imm = (uint32_t)TEST_IMM_3, + }, + { + .imm = TEST_IMM_3 >> 32, + }, + { + .code = (BPF_LD | BPF_IMM | EBPF_DW), + .dst_reg = EBPF_REG_7, + .imm = (uint32_t)TEST_IMM_4, + }, + { + .imm = TEST_IMM_4 >> 32, + }, + { + .code = (BPF_LD | BPF_IMM | EBPF_DW), + .dst_reg = EBPF_REG_9, + .imm = (uint32_t)TEST_IMM_5, + }, + { + .imm = TEST_IMM_5 >> 32, + }, + /* return sum */ + { + .code = (EBPF_ALU64 | BPF_ADD | BPF_X), + .dst_reg = EBPF_REG_0, + .src_reg = EBPF_REG_3, + }, + { + .code = (EBPF_ALU64 | BPF_ADD | BPF_X), + .dst_reg = EBPF_REG_0, + .src_reg = EBPF_REG_5, + }, + { + .code = (EBPF_ALU64 | BPF_ADD | BPF_X), + .dst_reg = EBPF_REG_0, + .src_reg = EBPF_REG_7, + }, + { + .code = (EBPF_ALU64 | BPF_ADD | BPF_X), + .dst_reg = EBPF_REG_0, + .src_reg = EBPF_REG_9, + }, + { + .code = (BPF_JMP | EBPF_EXIT), + }, +}; + +static int +test_ldimm1_check(uint64_t rc, const void *arg) +{ + uint64_t v1, v2; + + v1 = TEST_IMM_1; + v2 = TEST_IMM_2; + v1 += v2; + v2 = TEST_IMM_3; + v1 += v2; + v2 = TEST_IMM_4; + v1 += v2; + v2 = TEST_IMM_5; + v1 += v2; + + return cmp_res(__func__, v1, rc, arg, arg, 0); +} + + /* alu mul test-cases */ static const struct ebpf_insn test_mul1_prog[] = { @@ -1726,6 +1820,20 @@ static const struct bpf_test tests[] = { .prepare = test_load1_prepare, .check_result = test_load1_check, }, + { + .name = "test_ldimm1", + .arg_sz = sizeof(struct dummy_offset), + .prm = { + .ins = test_ldimm1_prog, + .nb_ins = RTE_DIM(test_ldimm1_prog), + .prog_arg = { + .type = RTE_BPF_ARG_PTR, + .size = sizeof(struct dummy_offset), + }, + }, + .prepare = test_store1_prepare, + .check_result = test_ldimm1_check, + }, { .name = "test_mul1", .arg_sz = sizeof(struct dummy_vect8), diff --git a/test/test/test_common.c b/test/test/test_common.c index 7a67e458..c6d17baa 100644 --- a/test/test/test_common.c +++ b/test/test/test_common.c @@ -188,6 +188,37 @@ test_log2(void) return 0; } +static int +test_fls(void) +{ + struct fls_test_vector { + uint32_t arg; + int rc; + }; + int expected, rc; + uint32_t i, arg; + + const struct fls_test_vector test[] = { + {0x0, 0}, + {0x1, 1}, + {0x4000, 15}, + {0x80000000, 32}, + }; + + for (i = 0; i < RTE_DIM(test); i++) { + arg = test[i].arg; + rc = rte_fls_u32(arg); + expected = test[i].rc; + if (rc != expected) { + printf("Wrong rte_fls_u32(0x%x) rc=%d, expected=%d\n", + arg, rc, expected); + return TEST_FAILED; + } + } + + return 0; +} + static int test_common(void) { @@ -196,6 +227,7 @@ test_common(void) ret |= test_macros(0); ret |= test_misc(); ret |= test_log2(); + ret |= test_fls(); return ret; } diff --git a/test/test/test_hash_readwrite.c b/test/test/test_hash_readwrite.c index 01f986cf..6b695ce6 100644 --- a/test/test/test_hash_readwrite.c +++ b/test/test/test_hash_readwrite.c @@ -678,24 +678,26 @@ test_hash_readwrite_main(void) reader_faster) < 0) return -1; + printf("================\n"); printf("Results summary:\n"); + printf("================\n"); printf("single read: %u\n", htm_results.single_read); printf("single write: %u\n", htm_results.single_write); for (i = 0; i < NUM_TEST; i++) { - printf("core_cnt: %u\n", core_cnt[i]); + printf("+++ core_cnt: %u +++\n", core_cnt[i]); printf("HTM:\n"); - printf("read only: %u\n", htm_results.read_only[i]); - printf("write only: %u\n", htm_results.write_only[i]); - printf("read-write read: %u\n", htm_results.read_write_r[i]); - printf("read-write write: %u\n", htm_results.read_write_w[i]); + printf(" read only: %u\n", htm_results.read_only[i]); + printf(" write only: %u\n", htm_results.write_only[i]); + printf(" read-write read: %u\n", htm_results.read_write_r[i]); + printf(" read-write write: %u\n", htm_results.read_write_w[i]); printf("non HTM:\n"); - printf("read only: %u\n", non_htm_results.read_only[i]); - printf("write only: %u\n", non_htm_results.write_only[i]); - printf("read-write read: %u\n", + printf(" read only: %u\n", non_htm_results.read_only[i]); + printf(" write only: %u\n", non_htm_results.write_only[i]); + printf(" read-write read: %u\n", non_htm_results.read_write_r[i]); - printf("read-write write: %u\n", + printf(" read-write write: %u\n", non_htm_results.read_write_w[i]); } diff --git a/test/test/test_kni.c b/test/test/test_kni.c index f3c19b5a..c92c0905 100644 --- a/test/test/test_kni.c +++ b/test/test/test_kni.c @@ -549,7 +549,7 @@ test_kni(void) if (!dir) { if (errno == ENOENT) { printf("Cannot run UT due to missing rte_kni module\n"); - return -1; + return TEST_SKIPPED; } printf("opendir: %s", strerror(errno)); return -1; diff --git a/test/test/test_power_acpi_cpufreq.c b/test/test/test_power_acpi_cpufreq.c index 22e541d6..6d637cc7 100644 --- a/test/test/test_power_acpi_cpufreq.c +++ b/test/test/test_power_acpi_cpufreq.c @@ -441,7 +441,7 @@ test_power_acpi_cpufreq(void) "correctly(APCI cpufreq) or operating in another valid " "Power management environment\n", TEST_POWER_LCORE_ID); rte_power_unset_env(); - return -1; + return TEST_SKIPPED; } /** diff --git a/test/test/test_reorder.c b/test/test/test_reorder.c index ccee4d08..58fa9c71 100644 --- a/test/test/test_reorder.c +++ b/test/test/test_reorder.c @@ -269,7 +269,7 @@ test_reorder_drain(void) goto exit; } if (robufs[0] != NULL) - rte_pktmbuf_free(robufs[i]); + rte_pktmbuf_free(robufs[0]); /* Insert more packets * RB[] = {NULL, NULL, NULL, NULL} -- cgit 1.2.3-korg