diff options
Diffstat (limited to 'lib/librte_ethdev')
-rw-r--r-- | lib/librte_ethdev/Makefile | 44 | ||||
-rw-r--r-- | lib/librte_ethdev/ethdev_profile.c | 135 | ||||
-rw-r--r-- | lib/librte_ethdev/ethdev_profile.h | 27 | ||||
-rw-r--r-- | lib/librte_ethdev/meson.build | 27 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_dev_info.h | 49 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_eth_ctrl.h | 828 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_ethdev.c | 4526 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_ethdev.h | 4326 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_ethdev_core.h | 625 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_ethdev_driver.h | 332 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_ethdev_pci.h | 210 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_ethdev_vdev.h | 84 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_ethdev_version.map | 247 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_flow.c | 528 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_flow.h | 2208 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_flow_driver.h | 121 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_mtr.c | 201 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_mtr.h | 730 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_mtr_driver.h | 192 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_tm.c | 409 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_tm.h | 1961 | ||||
-rw-r--r-- | lib/librte_ethdev/rte_tm_driver.h | 337 |
22 files changed, 18147 insertions, 0 deletions
diff --git a/lib/librte_ethdev/Makefile b/lib/librte_ethdev/Makefile new file mode 100644 index 00000000..c2f2f7d8 --- /dev/null +++ b/lib/librte_ethdev/Makefile @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2010-2017 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_ethdev.a + +CFLAGS += -DALLOW_EXPERIMENTAL_API +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_net -lrte_eal -lrte_mempool -lrte_ring +LDLIBS += -lrte_mbuf + +EXPORT_MAP := rte_ethdev_version.map + +LIBABIVER := 9 + +SRCS-y += rte_ethdev.c +SRCS-y += rte_flow.c +SRCS-y += rte_tm.c +SRCS-y += rte_mtr.c +SRCS-y += ethdev_profile.c + +# +# Export include files +# +SYMLINK-y-include += rte_ethdev.h +SYMLINK-y-include += rte_ethdev_driver.h +SYMLINK-y-include += rte_ethdev_core.h +SYMLINK-y-include += rte_ethdev_pci.h +SYMLINK-y-include += rte_ethdev_vdev.h +SYMLINK-y-include += rte_eth_ctrl.h +SYMLINK-y-include += rte_dev_info.h +SYMLINK-y-include += rte_flow.h +SYMLINK-y-include += rte_flow_driver.h +SYMLINK-y-include += rte_tm.h +SYMLINK-y-include += rte_tm_driver.h +SYMLINK-y-include += rte_mtr.h +SYMLINK-y-include += rte_mtr_driver.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ethdev/ethdev_profile.c b/lib/librte_ethdev/ethdev_profile.c new file mode 100644 index 00000000..0d1dcda3 --- /dev/null +++ b/lib/librte_ethdev/ethdev_profile.c @@ -0,0 +1,135 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +#include "ethdev_profile.h" + +/** + * This conditional block enables RX queues profiling by tracking wasted + * iterations, i.e. iterations which yielded no RX packets. Profiling is + * performed using the Instrumentation and Tracing Technology (ITT) API, + * employed by the Intel (R) VTune (TM) Amplifier. + */ +#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS + +#include <ittnotify.h> + +#define ITT_MAX_NAME_LEN (100) + +/** + * Auxiliary ITT structure belonging to Ethernet device and using to: + * - track RX queue state to determine whether it is wasting loop iterations + * - begin or end ITT task using task domain and task name (handle) + */ +struct itt_profile_rx_data { + /** + * ITT domains for each queue. + */ + __itt_domain *domains[RTE_MAX_QUEUES_PER_PORT]; + /** + * ITT task names for each queue. + */ + __itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT]; + /** + * Flags indicating the queues state. Possible values: + * 1 - queue is wasting iterations, + * 0 - otherwise. + */ + uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT]; +}; + +/** + * The pool of *itt_profile_rx_data* structures. + */ +struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS]; + + +/** + * This callback function manages ITT tasks collection on given port and queue. + * It must be registered with rte_eth_add_rx_callback() to be called from + * rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function + * type declaration. + */ +static uint16_t +collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id, + __rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts, + __rte_unused uint16_t max_pkts, __rte_unused void *user_param) +{ + if (unlikely(nb_pkts == 0)) { + if (!itt_rx_data[port_id].queue_state[queue_id]) { + __itt_task_begin( + itt_rx_data[port_id].domains[queue_id], + __itt_null, __itt_null, + itt_rx_data[port_id].handles[queue_id]); + itt_rx_data[port_id].queue_state[queue_id] = 1; + } + } else { + if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) { + __itt_task_end( + itt_rx_data[port_id].domains[queue_id]); + itt_rx_data[port_id].queue_state[queue_id] = 0; + } + } + return nb_pkts; +} + +/** + * Initialization of itt_profile_rx_data for a given Ethernet device. + * This function must be invoked when ethernet device is being configured. + * Result will be stored in the global array *itt_rx_data*. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param port_name + * The name of the Ethernet device. + * @param rx_queue_num + * The number of RX queues on specified port. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +static inline int +itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num) +{ + uint16_t q_id; + + for (q_id = 0; q_id < rx_queue_num; ++q_id) { + char domain_name[ITT_MAX_NAME_LEN]; + + snprintf(domain_name, sizeof(domain_name), + "RXBurst.WastedIterations.Port_%s.Queue_%d", + port_name, q_id); + itt_rx_data[port_id].domains[q_id] + = __itt_domain_create(domain_name); + + char task_name[ITT_MAX_NAME_LEN]; + + snprintf(task_name, sizeof(task_name), + "port id: %d; queue id: %d", + port_id, q_id); + itt_rx_data[port_id].handles[q_id] + = __itt_string_handle_create(task_name); + + itt_rx_data[port_id].queue_state[q_id] = 0; + + if (!rte_eth_add_rx_callback( + port_id, q_id, collect_itt_rx_burst_cb, NULL)) { + return -rte_errno; + } + } + + return 0; +} +#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */ + +int +__rte_eth_profile_rx_init(__rte_unused uint16_t port_id, + __rte_unused struct rte_eth_dev *dev) +{ +#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS + return itt_profile_rx_init( + port_id, dev->data->name, dev->data->nb_rx_queues); +#endif + return 0; +} diff --git a/lib/librte_ethdev/ethdev_profile.h b/lib/librte_ethdev/ethdev_profile.h new file mode 100644 index 00000000..e5ea3682 --- /dev/null +++ b/lib/librte_ethdev/ethdev_profile.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +#ifndef _RTE_ETHDEV_PROFILE_H_ +#define _RTE_ETHDEV_PROFILE_H_ + +#include "rte_ethdev.h" + +/** + * Initialization of profiling RX queues for the Ethernet device. + * Implementation of this function depends on chosen profiling method, + * defined in configs. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param dev + * Pointer to struct rte_eth_dev corresponding to given port_id. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +__rte_eth_profile_rx_init(uint16_t port_id, struct rte_eth_dev *dev); + +#endif diff --git a/lib/librte_ethdev/meson.build b/lib/librte_ethdev/meson.build new file mode 100644 index 00000000..aed5d226 --- /dev/null +++ b/lib/librte_ethdev/meson.build @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +name = 'ethdev' +version = 9 +allow_experimental_apis = true +sources = files('ethdev_profile.c', + 'rte_ethdev.c', + 'rte_flow.c', + 'rte_mtr.c', + 'rte_tm.c') + +headers = files('rte_ethdev.h', + 'rte_ethdev_driver.h', + 'rte_ethdev_core.h', + 'rte_ethdev_pci.h', + 'rte_ethdev_vdev.h', + 'rte_eth_ctrl.h', + 'rte_dev_info.h', + 'rte_flow.h', + 'rte_flow_driver.h', + 'rte_mtr.h', + 'rte_mtr_driver.h', + 'rte_tm.h', + 'rte_tm_driver.h') + +deps += ['net', 'kvargs'] diff --git a/lib/librte_ethdev/rte_dev_info.h b/lib/librte_ethdev/rte_dev_info.h new file mode 100644 index 00000000..fea5da88 --- /dev/null +++ b/lib/librte_ethdev/rte_dev_info.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2015 Intel Corporation + */ + +#ifndef _RTE_DEV_INFO_H_ +#define _RTE_DEV_INFO_H_ + +#include <stdint.h> + +/* + * Placeholder for accessing device registers + */ +struct rte_dev_reg_info { + void *data; /**< Buffer for return registers */ + uint32_t offset; /**< Start register table location for access */ + uint32_t length; /**< Number of registers to fetch */ + uint32_t width; /**< Size of device register */ + uint32_t version; /**< Device version */ +}; + +/* + * Placeholder for accessing device eeprom + */ +struct rte_dev_eeprom_info { + void *data; /**< Buffer for return eeprom */ + uint32_t offset; /**< Start eeprom address for access*/ + uint32_t length; /**< Length of eeprom region to access */ + uint32_t magic; /**< Device-specific key, such as device-id */ +}; + +/** + * Placeholder for accessing plugin module eeprom + */ +struct rte_eth_dev_module_info { + uint32_t type; /**< Type of plugin module eeprom */ + uint32_t eeprom_len; /**< Length of plugin module eeprom */ +}; + +/* EEPROM Standards for plug in modules */ +#define RTE_ETH_MODULE_SFF_8079 0x1 +#define RTE_ETH_MODULE_SFF_8079_LEN 256 +#define RTE_ETH_MODULE_SFF_8472 0x2 +#define RTE_ETH_MODULE_SFF_8472_LEN 512 +#define RTE_ETH_MODULE_SFF_8636 0x3 +#define RTE_ETH_MODULE_SFF_8636_LEN 256 +#define RTE_ETH_MODULE_SFF_8436 0x4 +#define RTE_ETH_MODULE_SFF_8436_LEN 256 + +#endif /* _RTE_DEV_INFO_H_ */ diff --git a/lib/librte_ethdev/rte_eth_ctrl.h b/lib/librte_ethdev/rte_eth_ctrl.h new file mode 100644 index 00000000..5ea8ae24 --- /dev/null +++ b/lib/librte_ethdev/rte_eth_ctrl.h @@ -0,0 +1,828 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2015 Intel Corporation + */ + +#ifndef _RTE_ETH_CTRL_H_ +#define _RTE_ETH_CTRL_H_ + +#include <stdint.h> +#include <rte_common.h> +#include "rte_ether.h" + +/** + * @file + * + * Ethernet device features and related data structures used + * by control APIs should be defined in this file. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * A packet can be identified by hardware as different flow types. Different + * NIC hardwares may support different flow types. + * Basically, the NIC hardware identifies the flow type as deep protocol as + * possible, and exclusively. For example, if a packet is identified as + * 'RTE_ETH_FLOW_NONFRAG_IPV4_TCP', it will not be any of other flow types, + * though it is an actual IPV4 packet. + * Note that the flow types are used to define RSS offload types in + * rte_ethdev.h. + */ +#define RTE_ETH_FLOW_UNKNOWN 0 +#define RTE_ETH_FLOW_RAW 1 +#define RTE_ETH_FLOW_IPV4 2 +#define RTE_ETH_FLOW_FRAG_IPV4 3 +#define RTE_ETH_FLOW_NONFRAG_IPV4_TCP 4 +#define RTE_ETH_FLOW_NONFRAG_IPV4_UDP 5 +#define RTE_ETH_FLOW_NONFRAG_IPV4_SCTP 6 +#define RTE_ETH_FLOW_NONFRAG_IPV4_OTHER 7 +#define RTE_ETH_FLOW_IPV6 8 +#define RTE_ETH_FLOW_FRAG_IPV6 9 +#define RTE_ETH_FLOW_NONFRAG_IPV6_TCP 10 +#define RTE_ETH_FLOW_NONFRAG_IPV6_UDP 11 +#define RTE_ETH_FLOW_NONFRAG_IPV6_SCTP 12 +#define RTE_ETH_FLOW_NONFRAG_IPV6_OTHER 13 +#define RTE_ETH_FLOW_L2_PAYLOAD 14 +#define RTE_ETH_FLOW_IPV6_EX 15 +#define RTE_ETH_FLOW_IPV6_TCP_EX 16 +#define RTE_ETH_FLOW_IPV6_UDP_EX 17 +#define RTE_ETH_FLOW_PORT 18 + /**< Consider device port number as a flow differentiator */ +#define RTE_ETH_FLOW_VXLAN 19 /**< VXLAN protocol based flow */ +#define RTE_ETH_FLOW_GENEVE 20 /**< GENEVE protocol based flow */ +#define RTE_ETH_FLOW_NVGRE 21 /**< NVGRE protocol based flow */ +#define RTE_ETH_FLOW_VXLAN_GPE 22 /**< VXLAN-GPE protocol based flow */ +#define RTE_ETH_FLOW_MAX 23 + +/** + * Feature filter types + */ +enum rte_filter_type { + RTE_ETH_FILTER_NONE = 0, + RTE_ETH_FILTER_MACVLAN, + RTE_ETH_FILTER_ETHERTYPE, + RTE_ETH_FILTER_FLEXIBLE, + RTE_ETH_FILTER_SYN, + RTE_ETH_FILTER_NTUPLE, + RTE_ETH_FILTER_TUNNEL, + RTE_ETH_FILTER_FDIR, + RTE_ETH_FILTER_HASH, + RTE_ETH_FILTER_L2_TUNNEL, + RTE_ETH_FILTER_GENERIC, + RTE_ETH_FILTER_MAX +}; + +/** + * Generic operations on filters + */ +enum rte_filter_op { + /** used to check whether the type filter is supported */ + RTE_ETH_FILTER_NOP = 0, + RTE_ETH_FILTER_ADD, /**< add filter entry */ + RTE_ETH_FILTER_UPDATE, /**< update filter entry */ + RTE_ETH_FILTER_DELETE, /**< delete filter entry */ + RTE_ETH_FILTER_FLUSH, /**< flush all entries */ + RTE_ETH_FILTER_GET, /**< get filter entry */ + RTE_ETH_FILTER_SET, /**< configurations */ + RTE_ETH_FILTER_INFO, /**< retrieve information */ + RTE_ETH_FILTER_STATS, /**< retrieve statistics */ + RTE_ETH_FILTER_OP_MAX +}; + +/** + * MAC filter type + */ +enum rte_mac_filter_type { + RTE_MAC_PERFECT_MATCH = 1, /**< exact match of MAC addr. */ + RTE_MACVLAN_PERFECT_MATCH, /**< exact match of MAC addr and VLAN ID. */ + RTE_MAC_HASH_MATCH, /**< hash match of MAC addr. */ + /** hash match of MAC addr and exact match of VLAN ID. */ + RTE_MACVLAN_HASH_MATCH, +}; + +/** + * MAC filter info + */ +struct rte_eth_mac_filter { + uint8_t is_vf; /**< 1 for VF, 0 for port dev */ + uint16_t dst_id; /**< VF ID, available when is_vf is 1*/ + enum rte_mac_filter_type filter_type; /**< MAC filter type */ + struct ether_addr mac_addr; +}; + +/** + * Define all structures for Ethertype Filter type. + */ + +#define RTE_ETHTYPE_FLAGS_MAC 0x0001 /**< If set, compare mac */ +#define RTE_ETHTYPE_FLAGS_DROP 0x0002 /**< If set, drop packet when match */ + +/** + * A structure used to define the ethertype filter entry + * to support RTE_ETH_FILTER_ETHERTYPE with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_ethertype_filter { + struct ether_addr mac_addr; /**< Mac address to match. */ + uint16_t ether_type; /**< Ether type to match */ + uint16_t flags; /**< Flags from RTE_ETHTYPE_FLAGS_* */ + uint16_t queue; /**< Queue assigned to when match*/ +}; + +#define RTE_FLEX_FILTER_MAXLEN 128 /**< bytes to use in flex filter. */ +#define RTE_FLEX_FILTER_MASK_SIZE \ + (RTE_ALIGN(RTE_FLEX_FILTER_MAXLEN, CHAR_BIT) / CHAR_BIT) + /**< mask bytes in flex filter. */ + +/** + * A structure used to define the flex filter entry + * to support RTE_ETH_FILTER_FLEXIBLE with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_flex_filter { + uint16_t len; + uint8_t bytes[RTE_FLEX_FILTER_MAXLEN]; /**< flex bytes in big endian.*/ + uint8_t mask[RTE_FLEX_FILTER_MASK_SIZE]; /**< if mask bit is 1b, do + not compare corresponding byte. */ + uint8_t priority; + uint16_t queue; /**< Queue assigned to when match. */ +}; + +/** + * A structure used to define the TCP syn filter entry + * to support RTE_ETH_FILTER_SYN with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_syn_filter { + uint8_t hig_pri; /**< 1 - higher priority than other filters, + 0 - lower priority. */ + uint16_t queue; /**< Queue assigned to when match */ +}; + +/** + * Define all structures for ntuple Filter type. + */ + +#define RTE_NTUPLE_FLAGS_DST_IP 0x0001 /**< If set, dst_ip is part of ntuple */ +#define RTE_NTUPLE_FLAGS_SRC_IP 0x0002 /**< If set, src_ip is part of ntuple */ +#define RTE_NTUPLE_FLAGS_DST_PORT 0x0004 /**< If set, dst_port is part of ntuple */ +#define RTE_NTUPLE_FLAGS_SRC_PORT 0x0008 /**< If set, src_port is part of ntuple */ +#define RTE_NTUPLE_FLAGS_PROTO 0x0010 /**< If set, protocol is part of ntuple */ +#define RTE_NTUPLE_FLAGS_TCP_FLAG 0x0020 /**< If set, tcp flag is involved */ + +#define RTE_5TUPLE_FLAGS ( \ + RTE_NTUPLE_FLAGS_DST_IP | \ + RTE_NTUPLE_FLAGS_SRC_IP | \ + RTE_NTUPLE_FLAGS_DST_PORT | \ + RTE_NTUPLE_FLAGS_SRC_PORT | \ + RTE_NTUPLE_FLAGS_PROTO) + +#define RTE_2TUPLE_FLAGS ( \ + RTE_NTUPLE_FLAGS_DST_PORT | \ + RTE_NTUPLE_FLAGS_PROTO) + +#define TCP_URG_FLAG 0x20 +#define TCP_ACK_FLAG 0x10 +#define TCP_PSH_FLAG 0x08 +#define TCP_RST_FLAG 0x04 +#define TCP_SYN_FLAG 0x02 +#define TCP_FIN_FLAG 0x01 +#define TCP_FLAG_ALL 0x3F + +/** + * A structure used to define the ntuple filter entry + * to support RTE_ETH_FILTER_NTUPLE with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_ntuple_filter { + uint16_t flags; /**< Flags from RTE_NTUPLE_FLAGS_* */ + uint32_t dst_ip; /**< Destination IP address in big endian. */ + uint32_t dst_ip_mask; /**< Mask of destination IP address. */ + uint32_t src_ip; /**< Source IP address in big endian. */ + uint32_t src_ip_mask; /**< Mask of destination IP address. */ + uint16_t dst_port; /**< Destination port in big endian. */ + uint16_t dst_port_mask; /**< Mask of destination port. */ + uint16_t src_port; /**< Source Port in big endian. */ + uint16_t src_port_mask; /**< Mask of source port. */ + uint8_t proto; /**< L4 protocol. */ + uint8_t proto_mask; /**< Mask of L4 protocol. */ + /** tcp_flags only meaningful when the proto is TCP. + The packet matched above ntuple fields and contain + any set bit in tcp_flags will hit this filter. */ + uint8_t tcp_flags; + uint16_t priority; /**< seven levels (001b-111b), 111b is highest, + used when more than one filter matches. */ + uint16_t queue; /**< Queue assigned to when match*/ +}; + +/** + * Tunneled type. + */ +enum rte_eth_tunnel_type { + RTE_TUNNEL_TYPE_NONE = 0, + RTE_TUNNEL_TYPE_VXLAN, + RTE_TUNNEL_TYPE_GENEVE, + RTE_TUNNEL_TYPE_TEREDO, + RTE_TUNNEL_TYPE_NVGRE, + RTE_TUNNEL_TYPE_IP_IN_GRE, + RTE_L2_TUNNEL_TYPE_E_TAG, + RTE_TUNNEL_TYPE_MAX, +}; + +/** + * filter type of tunneling packet + */ +#define ETH_TUNNEL_FILTER_OMAC 0x01 /**< filter by outer MAC addr */ +#define ETH_TUNNEL_FILTER_OIP 0x02 /**< filter by outer IP Addr */ +#define ETH_TUNNEL_FILTER_TENID 0x04 /**< filter by tenant ID */ +#define ETH_TUNNEL_FILTER_IMAC 0x08 /**< filter by inner MAC addr */ +#define ETH_TUNNEL_FILTER_IVLAN 0x10 /**< filter by inner VLAN ID */ +#define ETH_TUNNEL_FILTER_IIP 0x20 /**< filter by inner IP addr */ + +#define RTE_TUNNEL_FILTER_IMAC_IVLAN (ETH_TUNNEL_FILTER_IMAC | \ + ETH_TUNNEL_FILTER_IVLAN) +#define RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID (ETH_TUNNEL_FILTER_IMAC | \ + ETH_TUNNEL_FILTER_IVLAN | \ + ETH_TUNNEL_FILTER_TENID) +#define RTE_TUNNEL_FILTER_IMAC_TENID (ETH_TUNNEL_FILTER_IMAC | \ + ETH_TUNNEL_FILTER_TENID) +#define RTE_TUNNEL_FILTER_OMAC_TENID_IMAC (ETH_TUNNEL_FILTER_OMAC | \ + ETH_TUNNEL_FILTER_TENID | \ + ETH_TUNNEL_FILTER_IMAC) + +/** + * Select IPv4 or IPv6 for tunnel filters. + */ +enum rte_tunnel_iptype { + RTE_TUNNEL_IPTYPE_IPV4 = 0, /**< IPv4. */ + RTE_TUNNEL_IPTYPE_IPV6, /**< IPv6. */ +}; + +/** + * Tunneling Packet filter configuration. + */ +struct rte_eth_tunnel_filter_conf { + struct ether_addr outer_mac; /**< Outer MAC address to match. */ + struct ether_addr inner_mac; /**< Inner MAC address to match. */ + uint16_t inner_vlan; /**< Inner VLAN to match. */ + enum rte_tunnel_iptype ip_type; /**< IP address type. */ + /** Outer destination IP address to match if ETH_TUNNEL_FILTER_OIP + is set in filter_type, or inner destination IP address to match + if ETH_TUNNEL_FILTER_IIP is set in filter_type . */ + union { + uint32_t ipv4_addr; /**< IPv4 address in big endian. */ + uint32_t ipv6_addr[4]; /**< IPv6 address in big endian. */ + } ip_addr; + /** Flags from ETH_TUNNEL_FILTER_XX - see above. */ + uint16_t filter_type; + enum rte_eth_tunnel_type tunnel_type; /**< Tunnel Type. */ + uint32_t tenant_id; /**< Tenant ID to match. VNI, GRE key... */ + uint16_t queue_id; /**< Queue assigned to if match. */ +}; + +/** + * Global eth device configuration type. + */ +enum rte_eth_global_cfg_type { + RTE_ETH_GLOBAL_CFG_TYPE_UNKNOWN = 0, + RTE_ETH_GLOBAL_CFG_TYPE_GRE_KEY_LEN, + RTE_ETH_GLOBAL_CFG_TYPE_MAX, +}; + +/** + * Global eth device configuration. + */ +struct rte_eth_global_cfg { + enum rte_eth_global_cfg_type cfg_type; /**< Global config type. */ + union { + uint8_t gre_key_len; /**< Valid GRE key length in byte. */ + uint64_t reserved; /**< Reserve space for future use. */ + } cfg; +}; + +#define RTE_ETH_FDIR_MAX_FLEXLEN 16 /**< Max length of flexbytes. */ +#define RTE_ETH_INSET_SIZE_MAX 128 /**< Max length of input set. */ + +/** + * Input set fields for Flow Director and Hash filters + */ +enum rte_eth_input_set_field { + RTE_ETH_INPUT_SET_UNKNOWN = 0, + + /* L2 */ + RTE_ETH_INPUT_SET_L2_SRC_MAC = 1, + RTE_ETH_INPUT_SET_L2_DST_MAC, + RTE_ETH_INPUT_SET_L2_OUTER_VLAN, + RTE_ETH_INPUT_SET_L2_INNER_VLAN, + RTE_ETH_INPUT_SET_L2_ETHERTYPE, + + /* L3 */ + RTE_ETH_INPUT_SET_L3_SRC_IP4 = 129, + RTE_ETH_INPUT_SET_L3_DST_IP4, + RTE_ETH_INPUT_SET_L3_SRC_IP6, + RTE_ETH_INPUT_SET_L3_DST_IP6, + RTE_ETH_INPUT_SET_L3_IP4_TOS, + RTE_ETH_INPUT_SET_L3_IP4_PROTO, + RTE_ETH_INPUT_SET_L3_IP6_TC, + RTE_ETH_INPUT_SET_L3_IP6_NEXT_HEADER, + RTE_ETH_INPUT_SET_L3_IP4_TTL, + RTE_ETH_INPUT_SET_L3_IP6_HOP_LIMITS, + + /* L4 */ + RTE_ETH_INPUT_SET_L4_UDP_SRC_PORT = 257, + RTE_ETH_INPUT_SET_L4_UDP_DST_PORT, + RTE_ETH_INPUT_SET_L4_TCP_SRC_PORT, + RTE_ETH_INPUT_SET_L4_TCP_DST_PORT, + RTE_ETH_INPUT_SET_L4_SCTP_SRC_PORT, + RTE_ETH_INPUT_SET_L4_SCTP_DST_PORT, + RTE_ETH_INPUT_SET_L4_SCTP_VERIFICATION_TAG, + + /* Tunnel */ + RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_DST_MAC = 385, + RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_SRC_MAC, + RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_VLAN, + RTE_ETH_INPUT_SET_TUNNEL_L4_UDP_KEY, + RTE_ETH_INPUT_SET_TUNNEL_GRE_KEY, + + /* Flexible Payload */ + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_1ST_WORD = 641, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_2ND_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_3RD_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_4TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_5TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_6TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_7TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_8TH_WORD, + + RTE_ETH_INPUT_SET_DEFAULT = 65533, + RTE_ETH_INPUT_SET_NONE = 65534, + RTE_ETH_INPUT_SET_MAX = 65535, +}; + +/** + * Filters input set operations + */ +enum rte_filter_input_set_op { + RTE_ETH_INPUT_SET_OP_UNKNOWN, + RTE_ETH_INPUT_SET_SELECT, /**< select input set */ + RTE_ETH_INPUT_SET_ADD, /**< add input set entry */ + RTE_ETH_INPUT_SET_OP_MAX +}; + + +/** + * A structure used to define the input set configuration for + * flow director and hash filters + */ +struct rte_eth_input_set_conf { + uint16_t flow_type; + uint16_t inset_size; + enum rte_eth_input_set_field field[RTE_ETH_INSET_SIZE_MAX]; + enum rte_filter_input_set_op op; +}; + +/** + * A structure used to define the input for L2 flow + */ +struct rte_eth_l2_flow { + uint16_t ether_type; /**< Ether type in big endian */ +}; + +/** + * A structure used to define the input for IPV4 flow + */ +struct rte_eth_ipv4_flow { + uint32_t src_ip; /**< IPv4 source address in big endian. */ + uint32_t dst_ip; /**< IPv4 destination address in big endian. */ + uint8_t tos; /**< Type of service to match. */ + uint8_t ttl; /**< Time to live to match. */ + uint8_t proto; /**< Protocol, next header in big endian. */ +}; + +/** + * A structure used to define the input for IPV4 UDP flow + */ +struct rte_eth_udpv4_flow { + struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ + uint16_t src_port; /**< UDP source port in big endian. */ + uint16_t dst_port; /**< UDP destination port in big endian. */ +}; + +/** + * A structure used to define the input for IPV4 TCP flow + */ +struct rte_eth_tcpv4_flow { + struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ + uint16_t src_port; /**< TCP source port in big endian. */ + uint16_t dst_port; /**< TCP destination port in big endian. */ +}; + +/** + * A structure used to define the input for IPV4 SCTP flow + */ +struct rte_eth_sctpv4_flow { + struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ + uint16_t src_port; /**< SCTP source port in big endian. */ + uint16_t dst_port; /**< SCTP destination port in big endian. */ + uint32_t verify_tag; /**< Verify tag in big endian */ +}; + +/** + * A structure used to define the input for IPV6 flow + */ +struct rte_eth_ipv6_flow { + uint32_t src_ip[4]; /**< IPv6 source address in big endian. */ + uint32_t dst_ip[4]; /**< IPv6 destination address in big endian. */ + uint8_t tc; /**< Traffic class to match. */ + uint8_t proto; /**< Protocol, next header to match. */ + uint8_t hop_limits; /**< Hop limits to match. */ +}; + +/** + * A structure used to define the input for IPV6 UDP flow + */ +struct rte_eth_udpv6_flow { + struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ + uint16_t src_port; /**< UDP source port in big endian. */ + uint16_t dst_port; /**< UDP destination port in big endian. */ +}; + +/** + * A structure used to define the input for IPV6 TCP flow + */ +struct rte_eth_tcpv6_flow { + struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ + uint16_t src_port; /**< TCP source port to in big endian. */ + uint16_t dst_port; /**< TCP destination port in big endian. */ +}; + +/** + * A structure used to define the input for IPV6 SCTP flow + */ +struct rte_eth_sctpv6_flow { + struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ + uint16_t src_port; /**< SCTP source port in big endian. */ + uint16_t dst_port; /**< SCTP destination port in big endian. */ + uint32_t verify_tag; /**< Verify tag in big endian. */ +}; + +/** + * A structure used to define the input for MAC VLAN flow + */ +struct rte_eth_mac_vlan_flow { + struct ether_addr mac_addr; /**< Mac address to match. */ +}; + +/** + * Tunnel type for flow director. + */ +enum rte_eth_fdir_tunnel_type { + RTE_FDIR_TUNNEL_TYPE_UNKNOWN = 0, + RTE_FDIR_TUNNEL_TYPE_NVGRE, + RTE_FDIR_TUNNEL_TYPE_VXLAN, +}; + +/** + * A structure used to define the input for tunnel flow, now it's VxLAN or + * NVGRE + */ +struct rte_eth_tunnel_flow { + enum rte_eth_fdir_tunnel_type tunnel_type; /**< Tunnel type to match. */ + /** Tunnel ID to match. TNI, VNI... in big endian. */ + uint32_t tunnel_id; + struct ether_addr mac_addr; /**< Mac address to match. */ +}; + +/** + * An union contains the inputs for all types of flow + * Items in flows need to be in big endian + */ +union rte_eth_fdir_flow { + struct rte_eth_l2_flow l2_flow; + struct rte_eth_udpv4_flow udp4_flow; + struct rte_eth_tcpv4_flow tcp4_flow; + struct rte_eth_sctpv4_flow sctp4_flow; + struct rte_eth_ipv4_flow ip4_flow; + struct rte_eth_udpv6_flow udp6_flow; + struct rte_eth_tcpv6_flow tcp6_flow; + struct rte_eth_sctpv6_flow sctp6_flow; + struct rte_eth_ipv6_flow ipv6_flow; + struct rte_eth_mac_vlan_flow mac_vlan_flow; + struct rte_eth_tunnel_flow tunnel_flow; +}; + +/** + * A structure used to contain extend input of flow + */ +struct rte_eth_fdir_flow_ext { + uint16_t vlan_tci; + uint8_t flexbytes[RTE_ETH_FDIR_MAX_FLEXLEN]; + /**< It is filled by the flexible payload to match. */ + uint8_t is_vf; /**< 1 for VF, 0 for port dev */ + uint16_t dst_id; /**< VF ID, available when is_vf is 1*/ +}; + +/** + * A structure used to define the input for a flow director filter entry + */ +struct rte_eth_fdir_input { + uint16_t flow_type; + union rte_eth_fdir_flow flow; + /**< Flow fields to match, dependent on flow_type */ + struct rte_eth_fdir_flow_ext flow_ext; + /**< Additional fields to match */ +}; + +/** + * Behavior will be taken if FDIR match + */ +enum rte_eth_fdir_behavior { + RTE_ETH_FDIR_ACCEPT = 0, + RTE_ETH_FDIR_REJECT, + RTE_ETH_FDIR_PASSTHRU, +}; + +/** + * Flow director report status + * It defines what will be reported if FDIR entry is matched. + */ +enum rte_eth_fdir_status { + RTE_ETH_FDIR_NO_REPORT_STATUS = 0, /**< Report nothing. */ + RTE_ETH_FDIR_REPORT_ID, /**< Only report FD ID. */ + RTE_ETH_FDIR_REPORT_ID_FLEX_4, /**< Report FD ID and 4 flex bytes. */ + RTE_ETH_FDIR_REPORT_FLEX_8, /**< Report 8 flex bytes. */ +}; + +/** + * A structure used to define an action when match FDIR packet filter. + */ +struct rte_eth_fdir_action { + uint16_t rx_queue; /**< Queue assigned to if FDIR match. */ + enum rte_eth_fdir_behavior behavior; /**< Behavior will be taken */ + enum rte_eth_fdir_status report_status; /**< Status report option */ + uint8_t flex_off; + /**< If report_status is RTE_ETH_FDIR_REPORT_ID_FLEX_4 or + RTE_ETH_FDIR_REPORT_FLEX_8, flex_off specifies where the reported + flex bytes start from in flexible payload. */ +}; + +/** + * A structure used to define the flow director filter entry by filter_ctrl API + * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_ADD and + * RTE_ETH_FILTER_DELETE operations. + */ +struct rte_eth_fdir_filter { + uint32_t soft_id; + /**< ID, an unique value is required when deal with FDIR entry */ + struct rte_eth_fdir_input input; /**< Input set */ + struct rte_eth_fdir_action action; /**< Action taken when match */ +}; + +/** + * A structure used to configure FDIR masks that are used by the device + * to match the various fields of RX packet headers. + */ +struct rte_eth_fdir_masks { + uint16_t vlan_tci_mask; /**< Bit mask for vlan_tci in big endian */ + /** Bit mask for ipv4 flow in big endian. */ + struct rte_eth_ipv4_flow ipv4_mask; + /** Bit maks for ipv6 flow in big endian. */ + struct rte_eth_ipv6_flow ipv6_mask; + /** Bit mask for L4 source port in big endian. */ + uint16_t src_port_mask; + /** Bit mask for L4 destination port in big endian. */ + uint16_t dst_port_mask; + /** 6 bit mask for proper 6 bytes of Mac address, bit 0 matches the + first byte on the wire */ + uint8_t mac_addr_byte_mask; + /** Bit mask for tunnel ID in big endian. */ + uint32_t tunnel_id_mask; + uint8_t tunnel_type_mask; /**< 1 - Match tunnel type, + 0 - Ignore tunnel type. */ +}; + +/** + * Payload type + */ +enum rte_eth_payload_type { + RTE_ETH_PAYLOAD_UNKNOWN = 0, + RTE_ETH_RAW_PAYLOAD, + RTE_ETH_L2_PAYLOAD, + RTE_ETH_L3_PAYLOAD, + RTE_ETH_L4_PAYLOAD, + RTE_ETH_PAYLOAD_MAX = 8, +}; + +/** + * A structure used to select bytes extracted from the protocol layers to + * flexible payload for filter + */ +struct rte_eth_flex_payload_cfg { + enum rte_eth_payload_type type; /**< Payload type */ + uint16_t src_offset[RTE_ETH_FDIR_MAX_FLEXLEN]; + /**< Offset in bytes from the beginning of packet's payload + src_offset[i] indicates the flexbyte i's offset in original + packet payload. This value should be less than + flex_payload_limit in struct rte_eth_fdir_info.*/ +}; + +/** + * A structure used to define FDIR masks for flexible payload + * for each flow type + */ +struct rte_eth_fdir_flex_mask { + uint16_t flow_type; + uint8_t mask[RTE_ETH_FDIR_MAX_FLEXLEN]; + /**< Mask for the whole flexible payload */ +}; + +/** + * A structure used to define all flexible payload related setting + * include flex payload and flex mask + */ +struct rte_eth_fdir_flex_conf { + uint16_t nb_payloads; /**< The number of following payload cfg */ + uint16_t nb_flexmasks; /**< The number of following mask */ + struct rte_eth_flex_payload_cfg flex_set[RTE_ETH_PAYLOAD_MAX]; + /**< Flex payload configuration for each payload type */ + struct rte_eth_fdir_flex_mask flex_mask[RTE_ETH_FLOW_MAX]; + /**< Flex mask configuration for each flow type */ +}; + +/** + * Flow Director setting modes: none, signature or perfect. + */ +enum rte_fdir_mode { + RTE_FDIR_MODE_NONE = 0, /**< Disable FDIR support. */ + RTE_FDIR_MODE_SIGNATURE, /**< Enable FDIR signature filter mode. */ + RTE_FDIR_MODE_PERFECT, /**< Enable FDIR perfect filter mode. */ + RTE_FDIR_MODE_PERFECT_MAC_VLAN, /**< Enable FDIR filter mode - MAC VLAN. */ + RTE_FDIR_MODE_PERFECT_TUNNEL, /**< Enable FDIR filter mode - tunnel. */ +}; + +#define UINT64_BIT (CHAR_BIT * sizeof(uint64_t)) +#define RTE_FLOW_MASK_ARRAY_SIZE \ + (RTE_ALIGN(RTE_ETH_FLOW_MAX, UINT64_BIT)/UINT64_BIT) + +/** + * A structure used to get the information of flow director filter. + * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_INFO operation. + * It includes the mode, flexible payload configuration information, + * capabilities and supported flow types, flexible payload characters. + * It can be gotten to help taking specific configurations per device. + */ +struct rte_eth_fdir_info { + enum rte_fdir_mode mode; /**< Flow director mode */ + struct rte_eth_fdir_masks mask; + /** Flex payload configuration information */ + struct rte_eth_fdir_flex_conf flex_conf; + uint32_t guarant_spc; /**< Guaranteed spaces.*/ + uint32_t best_spc; /**< Best effort spaces.*/ + /** Bit mask for every supported flow type. */ + uint64_t flow_types_mask[RTE_FLOW_MASK_ARRAY_SIZE]; + uint32_t max_flexpayload; /**< Total flex payload in bytes. */ + /** Flexible payload unit in bytes. Size and alignments of all flex + payload segments should be multiplies of this value. */ + uint32_t flex_payload_unit; + /** Max number of flexible payload continuous segments. + Each segment should be a multiple of flex_payload_unit.*/ + uint32_t max_flex_payload_segment_num; + /** Maximum src_offset in bytes allowed. It indicates that + src_offset[i] in struct rte_eth_flex_payload_cfg should be less + than this value. */ + uint16_t flex_payload_limit; + /** Flex bitmask unit in bytes. Size of flex bitmasks should be a + multiply of this value. */ + uint32_t flex_bitmask_unit; + /** Max supported size of flex bitmasks in flex_bitmask_unit */ + uint32_t max_flex_bitmask_num; +}; + +/** + * A structure used to define the statistics of flow director. + * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_STATS operation. + */ +struct rte_eth_fdir_stats { + uint32_t collision; /**< Number of filters with collision. */ + uint32_t free; /**< Number of free filters. */ + uint32_t maxhash; + /**< The lookup hash value of the added filter that updated the value + of the MAXLEN field */ + uint32_t maxlen; /**< Longest linked list of filters. */ + uint64_t add; /**< Number of added filters. */ + uint64_t remove; /**< Number of removed filters. */ + uint64_t f_add; /**< Number of failed added filters. */ + uint64_t f_remove; /**< Number of failed removed filters. */ + uint32_t guarant_cnt; /**< Number of filters in guaranteed spaces. */ + uint32_t best_cnt; /**< Number of filters in best effort spaces. */ +}; + +/** + * Flow Director filter information types. + */ +enum rte_eth_fdir_filter_info_type { + RTE_ETH_FDIR_FILTER_INFO_TYPE_UNKNOWN = 0, + /** Flow Director filter input set configuration */ + RTE_ETH_FDIR_FILTER_INPUT_SET_SELECT, + RTE_ETH_FDIR_FILTER_INFO_TYPE_MAX, +}; + +/** + * A structure used to set FDIR filter information, to support filter type + * of 'RTE_ETH_FILTER_FDIR' RTE_ETH_FDIR_FILTER_INPUT_SET_SELECT operation. + */ +struct rte_eth_fdir_filter_info { + enum rte_eth_fdir_filter_info_type info_type; /**< Information type */ + /** Details of fdir filter information */ + union { + /** Flow Director input set configuration per port */ + struct rte_eth_input_set_conf input_set_conf; + } info; +}; + +/** + * Hash filter information types. + * - RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT is for getting/setting the + * information/configuration of 'symmetric hash enable' per port. + * - RTE_ETH_HASH_FILTER_GLOBAL_CONFIG is for getting/setting the global + * configurations of hash filters. Those global configurations are valid + * for all ports of the same NIC. + * - RTE_ETH_HASH_FILTER_INPUT_SET_SELECT is for setting the global + * hash input set fields + */ +enum rte_eth_hash_filter_info_type { + RTE_ETH_HASH_FILTER_INFO_TYPE_UNKNOWN = 0, + /** Symmetric hash enable per port */ + RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT, + /** Configure globally for hash filter */ + RTE_ETH_HASH_FILTER_GLOBAL_CONFIG, + /** Global Hash filter input set configuration */ + RTE_ETH_HASH_FILTER_INPUT_SET_SELECT, + RTE_ETH_HASH_FILTER_INFO_TYPE_MAX, +}; + +/** + * Hash function types. + */ +enum rte_eth_hash_function { + RTE_ETH_HASH_FUNCTION_DEFAULT = 0, + RTE_ETH_HASH_FUNCTION_TOEPLITZ, /**< Toeplitz */ + RTE_ETH_HASH_FUNCTION_SIMPLE_XOR, /**< Simple XOR */ + RTE_ETH_HASH_FUNCTION_MAX, +}; + +#define RTE_SYM_HASH_MASK_ARRAY_SIZE \ + (RTE_ALIGN(RTE_ETH_FLOW_MAX, UINT64_BIT)/UINT64_BIT) +/** + * A structure used to set or get global hash function configurations which + * include symmetric hash enable per flow type and hash function type. + * Each bit in sym_hash_enable_mask[] indicates if the symmetric hash of the + * corresponding flow type is enabled or not. + * Each bit in valid_bit_mask[] indicates if the corresponding bit in + * sym_hash_enable_mask[] is valid or not. For the configurations gotten, it + * also means if the flow type is supported by hardware or not. + */ +struct rte_eth_hash_global_conf { + enum rte_eth_hash_function hash_func; /**< Hash function type */ + /** Bit mask for symmetric hash enable per flow type */ + uint64_t sym_hash_enable_mask[RTE_SYM_HASH_MASK_ARRAY_SIZE]; + /** Bit mask indicates if the corresponding bit is valid */ + uint64_t valid_bit_mask[RTE_SYM_HASH_MASK_ARRAY_SIZE]; +}; + +/** + * A structure used to set or get hash filter information, to support filter + * type of 'RTE_ETH_FILTER_HASH' and its operations. + */ +struct rte_eth_hash_filter_info { + enum rte_eth_hash_filter_info_type info_type; /**< Information type */ + /** Details of hash filter information */ + union { + /** For RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT */ + uint8_t enable; + /** Global configurations of hash filter */ + struct rte_eth_hash_global_conf global_conf; + /** Global configurations of hash filter input set */ + struct rte_eth_input_set_conf input_set_conf; + } info; +}; + +/** + * l2 tunnel configuration. + */ +struct rte_eth_l2_tunnel_conf { + enum rte_eth_tunnel_type l2_tunnel_type; + uint16_t ether_type; /* ether type in l2 header */ + uint32_t tunnel_id; /* port tag id for e-tag */ + uint16_t vf_id; /* VF id for tag insertion */ + uint32_t pool; /* destination pool for tag based forwarding */ +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETH_CTRL_H_ */ diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c new file mode 100644 index 00000000..cd4bfd3c --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev.c @@ -0,0 +1,4526 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +#include <sys/types.h> +#include <sys/queue.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <errno.h> +#include <stdbool.h> +#include <stdint.h> +#include <inttypes.h> +#include <netinet/in.h> + +#include <rte_byteorder.h> +#include <rte_log.h> +#include <rte_debug.h> +#include <rte_interrupts.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_launch.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_lcore.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_common.h> +#include <rte_mempool.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_errno.h> +#include <rte_spinlock.h> +#include <rte_string_fns.h> +#include <rte_kvargs.h> + +#include "rte_ether.h" +#include "rte_ethdev.h" +#include "rte_ethdev_driver.h" +#include "ethdev_profile.h" + +static int ethdev_logtype; + +#define ethdev_log(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, ethdev_logtype, fmt "\n", ## __VA_ARGS__) + +static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data"; +struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS]; +static uint16_t eth_dev_last_created_port; + +/* spinlock for eth device callbacks */ +static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER; + +/* spinlock for add/remove rx callbacks */ +static rte_spinlock_t rte_eth_rx_cb_lock = RTE_SPINLOCK_INITIALIZER; + +/* spinlock for add/remove tx callbacks */ +static rte_spinlock_t rte_eth_tx_cb_lock = RTE_SPINLOCK_INITIALIZER; + +/* spinlock for shared data allocation */ +static rte_spinlock_t rte_eth_shared_data_lock = RTE_SPINLOCK_INITIALIZER; + +/* store statistics names and its offset in stats structure */ +struct rte_eth_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned offset; +}; + +/* Shared memory between primary and secondary processes. */ +static struct { + uint64_t next_owner_id; + rte_spinlock_t ownership_lock; + struct rte_eth_dev_data data[RTE_MAX_ETHPORTS]; +} *rte_eth_dev_shared_data; + +static const struct rte_eth_xstats_name_off rte_stats_strings[] = { + {"rx_good_packets", offsetof(struct rte_eth_stats, ipackets)}, + {"tx_good_packets", offsetof(struct rte_eth_stats, opackets)}, + {"rx_good_bytes", offsetof(struct rte_eth_stats, ibytes)}, + {"tx_good_bytes", offsetof(struct rte_eth_stats, obytes)}, + {"rx_missed_errors", offsetof(struct rte_eth_stats, imissed)}, + {"rx_errors", offsetof(struct rte_eth_stats, ierrors)}, + {"tx_errors", offsetof(struct rte_eth_stats, oerrors)}, + {"rx_mbuf_allocation_errors", offsetof(struct rte_eth_stats, + rx_nombuf)}, +}; + +#define RTE_NB_STATS (sizeof(rte_stats_strings) / sizeof(rte_stats_strings[0])) + +static const struct rte_eth_xstats_name_off rte_rxq_stats_strings[] = { + {"packets", offsetof(struct rte_eth_stats, q_ipackets)}, + {"bytes", offsetof(struct rte_eth_stats, q_ibytes)}, + {"errors", offsetof(struct rte_eth_stats, q_errors)}, +}; + +#define RTE_NB_RXQ_STATS (sizeof(rte_rxq_stats_strings) / \ + sizeof(rte_rxq_stats_strings[0])) + +static const struct rte_eth_xstats_name_off rte_txq_stats_strings[] = { + {"packets", offsetof(struct rte_eth_stats, q_opackets)}, + {"bytes", offsetof(struct rte_eth_stats, q_obytes)}, +}; +#define RTE_NB_TXQ_STATS (sizeof(rte_txq_stats_strings) / \ + sizeof(rte_txq_stats_strings[0])) + +#define RTE_RX_OFFLOAD_BIT2STR(_name) \ + { DEV_RX_OFFLOAD_##_name, #_name } + +static const struct { + uint64_t offload; + const char *name; +} rte_rx_offload_names[] = { + RTE_RX_OFFLOAD_BIT2STR(VLAN_STRIP), + RTE_RX_OFFLOAD_BIT2STR(IPV4_CKSUM), + RTE_RX_OFFLOAD_BIT2STR(UDP_CKSUM), + RTE_RX_OFFLOAD_BIT2STR(TCP_CKSUM), + RTE_RX_OFFLOAD_BIT2STR(TCP_LRO), + RTE_RX_OFFLOAD_BIT2STR(QINQ_STRIP), + RTE_RX_OFFLOAD_BIT2STR(OUTER_IPV4_CKSUM), + RTE_RX_OFFLOAD_BIT2STR(MACSEC_STRIP), + RTE_RX_OFFLOAD_BIT2STR(HEADER_SPLIT), + RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER), + RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND), + RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME), + RTE_RX_OFFLOAD_BIT2STR(CRC_STRIP), + RTE_RX_OFFLOAD_BIT2STR(SCATTER), + RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP), + RTE_RX_OFFLOAD_BIT2STR(SECURITY), +}; + +#undef RTE_RX_OFFLOAD_BIT2STR + +#define RTE_TX_OFFLOAD_BIT2STR(_name) \ + { DEV_TX_OFFLOAD_##_name, #_name } + +static const struct { + uint64_t offload; + const char *name; +} rte_tx_offload_names[] = { + RTE_TX_OFFLOAD_BIT2STR(VLAN_INSERT), + RTE_TX_OFFLOAD_BIT2STR(IPV4_CKSUM), + RTE_TX_OFFLOAD_BIT2STR(UDP_CKSUM), + RTE_TX_OFFLOAD_BIT2STR(TCP_CKSUM), + RTE_TX_OFFLOAD_BIT2STR(SCTP_CKSUM), + RTE_TX_OFFLOAD_BIT2STR(TCP_TSO), + RTE_TX_OFFLOAD_BIT2STR(UDP_TSO), + RTE_TX_OFFLOAD_BIT2STR(OUTER_IPV4_CKSUM), + RTE_TX_OFFLOAD_BIT2STR(QINQ_INSERT), + RTE_TX_OFFLOAD_BIT2STR(VXLAN_TNL_TSO), + RTE_TX_OFFLOAD_BIT2STR(GRE_TNL_TSO), + RTE_TX_OFFLOAD_BIT2STR(IPIP_TNL_TSO), + RTE_TX_OFFLOAD_BIT2STR(GENEVE_TNL_TSO), + RTE_TX_OFFLOAD_BIT2STR(MACSEC_INSERT), + RTE_TX_OFFLOAD_BIT2STR(MT_LOCKFREE), + RTE_TX_OFFLOAD_BIT2STR(MULTI_SEGS), + RTE_TX_OFFLOAD_BIT2STR(MBUF_FAST_FREE), + RTE_TX_OFFLOAD_BIT2STR(SECURITY), +}; + +#undef RTE_TX_OFFLOAD_BIT2STR + +/** + * The user application callback description. + * + * It contains callback address to be registered by user application, + * the pointer to the parameters for callback, and the event type. + */ +struct rte_eth_dev_callback { + TAILQ_ENTRY(rte_eth_dev_callback) next; /**< Callbacks list */ + rte_eth_dev_cb_fn cb_fn; /**< Callback address */ + void *cb_arg; /**< Parameter for callback */ + void *ret_param; /**< Return parameter */ + enum rte_eth_event_type event; /**< Interrupt event type */ + uint32_t active; /**< Callback is executing */ +}; + +enum { + STAT_QMAP_TX = 0, + STAT_QMAP_RX +}; + +uint16_t +rte_eth_find_next(uint16_t port_id) +{ + while (port_id < RTE_MAX_ETHPORTS && + rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED && + rte_eth_devices[port_id].state != RTE_ETH_DEV_REMOVED) + port_id++; + + if (port_id >= RTE_MAX_ETHPORTS) + return RTE_MAX_ETHPORTS; + + return port_id; +} + +static void +rte_eth_dev_shared_data_prepare(void) +{ + const unsigned flags = 0; + const struct rte_memzone *mz; + + rte_spinlock_lock(&rte_eth_shared_data_lock); + + if (rte_eth_dev_shared_data == NULL) { + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + /* Allocate port data and ownership shared memory. */ + mz = rte_memzone_reserve(MZ_RTE_ETH_DEV_DATA, + sizeof(*rte_eth_dev_shared_data), + rte_socket_id(), flags); + } else + mz = rte_memzone_lookup(MZ_RTE_ETH_DEV_DATA); + if (mz == NULL) + rte_panic("Cannot allocate ethdev shared data\n"); + + rte_eth_dev_shared_data = mz->addr; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + rte_eth_dev_shared_data->next_owner_id = + RTE_ETH_DEV_NO_OWNER + 1; + rte_spinlock_init(&rte_eth_dev_shared_data->ownership_lock); + memset(rte_eth_dev_shared_data->data, 0, + sizeof(rte_eth_dev_shared_data->data)); + } + } + + rte_spinlock_unlock(&rte_eth_shared_data_lock); +} + +static bool +is_allocated(const struct rte_eth_dev *ethdev) +{ + return ethdev->data->name[0] != '\0'; +} + +static struct rte_eth_dev * +_rte_eth_dev_allocated(const char *name) +{ + unsigned i; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (rte_eth_devices[i].data != NULL && + strcmp(rte_eth_devices[i].data->name, name) == 0) + return &rte_eth_devices[i]; + } + return NULL; +} + +struct rte_eth_dev * +rte_eth_dev_allocated(const char *name) +{ + struct rte_eth_dev *ethdev; + + rte_eth_dev_shared_data_prepare(); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + ethdev = _rte_eth_dev_allocated(name); + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + + return ethdev; +} + +static uint16_t +rte_eth_dev_find_free_port(void) +{ + unsigned i; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + /* Using shared name field to find a free port. */ + if (rte_eth_dev_shared_data->data[i].name[0] == '\0') { + RTE_ASSERT(rte_eth_devices[i].state == + RTE_ETH_DEV_UNUSED); + return i; + } + } + return RTE_MAX_ETHPORTS; +} + +static struct rte_eth_dev * +eth_dev_get(uint16_t port_id) +{ + struct rte_eth_dev *eth_dev = &rte_eth_devices[port_id]; + + eth_dev->data = &rte_eth_dev_shared_data->data[port_id]; + + eth_dev_last_created_port = port_id; + + return eth_dev; +} + +struct rte_eth_dev * +rte_eth_dev_allocate(const char *name) +{ + uint16_t port_id; + struct rte_eth_dev *eth_dev = NULL; + + rte_eth_dev_shared_data_prepare(); + + /* Synchronize port creation between primary and secondary threads. */ + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + if (_rte_eth_dev_allocated(name) != NULL) { + ethdev_log(ERR, "Ethernet device with name %s already allocated", + name); + goto unlock; + } + + port_id = rte_eth_dev_find_free_port(); + if (port_id == RTE_MAX_ETHPORTS) { + ethdev_log(ERR, "Reached maximum number of Ethernet ports"); + goto unlock; + } + + eth_dev = eth_dev_get(port_id); + snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name); + eth_dev->data->port_id = port_id; + eth_dev->data->mtu = ETHER_MTU; + +unlock: + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + + return eth_dev; +} + +/* + * Attach to a port already registered by the primary process, which + * makes sure that the same device would have the same port id both + * in the primary and secondary process. + */ +struct rte_eth_dev * +rte_eth_dev_attach_secondary(const char *name) +{ + uint16_t i; + struct rte_eth_dev *eth_dev = NULL; + + rte_eth_dev_shared_data_prepare(); + + /* Synchronize port attachment to primary port creation and release. */ + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (strcmp(rte_eth_dev_shared_data->data[i].name, name) == 0) + break; + } + if (i == RTE_MAX_ETHPORTS) { + RTE_PMD_DEBUG_TRACE( + "device %s is not driven by the primary process\n", + name); + } else { + eth_dev = eth_dev_get(i); + RTE_ASSERT(eth_dev->data->port_id == i); + } + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + return eth_dev; +} + +int +rte_eth_dev_release_port(struct rte_eth_dev *eth_dev) +{ + if (eth_dev == NULL) + return -EINVAL; + + rte_eth_dev_shared_data_prepare(); + + _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_DESTROY, NULL); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + eth_dev->state = RTE_ETH_DEV_UNUSED; + + memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data)); + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + + return 0; +} + +int +rte_eth_dev_is_valid_port(uint16_t port_id) +{ + if (port_id >= RTE_MAX_ETHPORTS || + (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED)) + return 0; + else + return 1; +} + +static int +rte_eth_is_valid_owner_id(uint64_t owner_id) +{ + if (owner_id == RTE_ETH_DEV_NO_OWNER || + rte_eth_dev_shared_data->next_owner_id <= owner_id) { + RTE_PMD_DEBUG_TRACE("Invalid owner_id=%016"PRIX64".\n", owner_id); + return 0; + } + return 1; +} + +uint64_t +rte_eth_find_next_owned_by(uint16_t port_id, const uint64_t owner_id) +{ + while (port_id < RTE_MAX_ETHPORTS && + ((rte_eth_devices[port_id].state != RTE_ETH_DEV_ATTACHED && + rte_eth_devices[port_id].state != RTE_ETH_DEV_REMOVED) || + rte_eth_devices[port_id].data->owner.id != owner_id)) + port_id++; + + if (port_id >= RTE_MAX_ETHPORTS) + return RTE_MAX_ETHPORTS; + + return port_id; +} + +int __rte_experimental +rte_eth_dev_owner_new(uint64_t *owner_id) +{ + rte_eth_dev_shared_data_prepare(); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + *owner_id = rte_eth_dev_shared_data->next_owner_id++; + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + return 0; +} + +static int +_rte_eth_dev_owner_set(const uint16_t port_id, const uint64_t old_owner_id, + const struct rte_eth_dev_owner *new_owner) +{ + struct rte_eth_dev *ethdev = &rte_eth_devices[port_id]; + struct rte_eth_dev_owner *port_owner; + int sret; + + if (port_id >= RTE_MAX_ETHPORTS || !is_allocated(ethdev)) { + RTE_PMD_DEBUG_TRACE("Port id %"PRIu16" is not allocated.\n", port_id); + return -ENODEV; + } + + if (!rte_eth_is_valid_owner_id(new_owner->id) && + !rte_eth_is_valid_owner_id(old_owner_id)) + return -EINVAL; + + port_owner = &rte_eth_devices[port_id].data->owner; + if (port_owner->id != old_owner_id) { + RTE_PMD_DEBUG_TRACE("Cannot set owner to port %d already owned" + " by %s_%016"PRIX64".\n", port_id, + port_owner->name, port_owner->id); + return -EPERM; + } + + sret = snprintf(port_owner->name, RTE_ETH_MAX_OWNER_NAME_LEN, "%s", + new_owner->name); + if (sret < 0 || sret >= RTE_ETH_MAX_OWNER_NAME_LEN) + RTE_PMD_DEBUG_TRACE("Port %d owner name was truncated.\n", + port_id); + + port_owner->id = new_owner->id; + + RTE_PMD_DEBUG_TRACE("Port %d owner is %s_%016"PRIX64".\n", port_id, + new_owner->name, new_owner->id); + + return 0; +} + +int __rte_experimental +rte_eth_dev_owner_set(const uint16_t port_id, + const struct rte_eth_dev_owner *owner) +{ + int ret; + + rte_eth_dev_shared_data_prepare(); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + ret = _rte_eth_dev_owner_set(port_id, RTE_ETH_DEV_NO_OWNER, owner); + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + return ret; +} + +int __rte_experimental +rte_eth_dev_owner_unset(const uint16_t port_id, const uint64_t owner_id) +{ + const struct rte_eth_dev_owner new_owner = (struct rte_eth_dev_owner) + {.id = RTE_ETH_DEV_NO_OWNER, .name = ""}; + int ret; + + rte_eth_dev_shared_data_prepare(); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + ret = _rte_eth_dev_owner_set(port_id, owner_id, &new_owner); + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + return ret; +} + +void __rte_experimental +rte_eth_dev_owner_delete(const uint64_t owner_id) +{ + uint16_t port_id; + + rte_eth_dev_shared_data_prepare(); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + if (rte_eth_is_valid_owner_id(owner_id)) { + for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) + if (rte_eth_devices[port_id].data->owner.id == owner_id) + memset(&rte_eth_devices[port_id].data->owner, 0, + sizeof(struct rte_eth_dev_owner)); + RTE_PMD_DEBUG_TRACE("All port owners owned by %016"PRIX64 + " identifier have removed.\n", owner_id); + } + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); +} + +int __rte_experimental +rte_eth_dev_owner_get(const uint16_t port_id, struct rte_eth_dev_owner *owner) +{ + int ret = 0; + struct rte_eth_dev *ethdev = &rte_eth_devices[port_id]; + + rte_eth_dev_shared_data_prepare(); + + rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock); + + if (port_id >= RTE_MAX_ETHPORTS || !is_allocated(ethdev)) { + RTE_PMD_DEBUG_TRACE("Port id %"PRIu16" is not allocated.\n", port_id); + ret = -ENODEV; + } else { + rte_memcpy(owner, ðdev->data->owner, sizeof(*owner)); + } + + rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock); + return ret; +} + +int +rte_eth_dev_socket_id(uint16_t port_id) +{ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1); + return rte_eth_devices[port_id].data->numa_node; +} + +void * +rte_eth_dev_get_sec_ctx(uint16_t port_id) +{ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL); + return rte_eth_devices[port_id].security_ctx; +} + +uint16_t +rte_eth_dev_count(void) +{ + return rte_eth_dev_count_avail(); +} + +uint16_t +rte_eth_dev_count_avail(void) +{ + uint16_t p; + uint16_t count; + + count = 0; + + RTE_ETH_FOREACH_DEV(p) + count++; + + return count; +} + +uint16_t __rte_experimental +rte_eth_dev_count_total(void) +{ + uint16_t port, count = 0; + + for (port = 0; port < RTE_MAX_ETHPORTS; port++) + if (rte_eth_devices[port].state != RTE_ETH_DEV_UNUSED) + count++; + + return count; +} + +int +rte_eth_dev_get_name_by_port(uint16_t port_id, char *name) +{ + char *tmp; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + if (name == NULL) { + RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); + return -EINVAL; + } + + /* shouldn't check 'rte_eth_devices[i].data', + * because it might be overwritten by VDEV PMD */ + tmp = rte_eth_dev_shared_data->data[port_id].name; + strcpy(name, tmp); + return 0; +} + +int +rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id) +{ + uint32_t pid; + + if (name == NULL) { + RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); + return -EINVAL; + } + + for (pid = 0; pid < RTE_MAX_ETHPORTS; pid++) { + if (rte_eth_devices[pid].state != RTE_ETH_DEV_UNUSED && + !strcmp(name, rte_eth_dev_shared_data->data[pid].name)) { + *port_id = pid; + return 0; + } + } + + return -ENODEV; +} + +static int +eth_err(uint16_t port_id, int ret) +{ + if (ret == 0) + return 0; + if (rte_eth_dev_is_removed(port_id)) + return -EIO; + return ret; +} + +/* attach the new device, then store port_id of the device */ +int +rte_eth_dev_attach(const char *devargs, uint16_t *port_id) +{ + int current = rte_eth_dev_count_total(); + struct rte_devargs da; + int ret = -1; + + memset(&da, 0, sizeof(da)); + + if ((devargs == NULL) || (port_id == NULL)) { + ret = -EINVAL; + goto err; + } + + /* parse devargs */ + if (rte_devargs_parse(&da, "%s", devargs)) + goto err; + + ret = rte_eal_hotplug_add(da.bus->name, da.name, da.args); + if (ret < 0) + goto err; + + /* no point looking at the port count if no port exists */ + if (!rte_eth_dev_count_total()) { + ethdev_log(ERR, "No port found for device (%s)", da.name); + ret = -1; + goto err; + } + + /* if nothing happened, there is a bug here, since some driver told us + * it did attach a device, but did not create a port. + * FIXME: race condition in case of plug-out of another device + */ + if (current == rte_eth_dev_count_total()) { + ret = -1; + goto err; + } + + *port_id = eth_dev_last_created_port; + ret = 0; + +err: + free(da.args); + return ret; +} + +/* detach the device, then store the name of the device */ +int +rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused) +{ + struct rte_device *dev; + struct rte_bus *bus; + uint32_t dev_flags; + int ret = -1; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev_flags = rte_eth_devices[port_id].data->dev_flags; + if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) { + ethdev_log(ERR, + "Port %" PRIu16 " is bonded, cannot detach", port_id); + return -ENOTSUP; + } + + dev = rte_eth_devices[port_id].device; + if (dev == NULL) + return -EINVAL; + + bus = rte_bus_find_by_device(dev); + if (bus == NULL) + return -ENOENT; + + ret = rte_eal_hotplug_remove(bus->name, dev->name); + if (ret < 0) + return ret; + + rte_eth_dev_release_port(&rte_eth_devices[port_id]); + return 0; +} + +static int +rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) +{ + uint16_t old_nb_queues = dev->data->nb_rx_queues; + void **rxq; + unsigned i; + + if (dev->data->rx_queues == NULL && nb_queues != 0) { /* first time configuration */ + dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues", + sizeof(dev->data->rx_queues[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (dev->data->rx_queues == NULL) { + dev->data->nb_rx_queues = 0; + return -(ENOMEM); + } + } else if (dev->data->rx_queues != NULL && nb_queues != 0) { /* re-configure */ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP); + + rxq = dev->data->rx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->rx_queue_release)(rxq[i]); + rxq = rte_realloc(rxq, sizeof(rxq[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (rxq == NULL) + return -(ENOMEM); + if (nb_queues > old_nb_queues) { + uint16_t new_qs = nb_queues - old_nb_queues; + + memset(rxq + old_nb_queues, 0, + sizeof(rxq[0]) * new_qs); + } + + dev->data->rx_queues = rxq; + + } else if (dev->data->rx_queues != NULL && nb_queues == 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP); + + rxq = dev->data->rx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->rx_queue_release)(rxq[i]); + + rte_free(dev->data->rx_queues); + dev->data->rx_queues = NULL; + } + dev->data->nb_rx_queues = nb_queues; + return 0; +} + +int +rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (!dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be started before start any queue\n", port_id); + return -EINVAL; + } + + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP); + + if (dev->data->rx_queue_state[rx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 + " already started\n", + rx_queue_id, port_id); + return 0; + } + + return eth_err(port_id, dev->dev_ops->rx_queue_start(dev, + rx_queue_id)); + +} + +int +rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_stop, -ENOTSUP); + + if (dev->data->rx_queue_state[rx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 + " already stopped\n", + rx_queue_id, port_id); + return 0; + } + + return eth_err(port_id, dev->dev_ops->rx_queue_stop(dev, rx_queue_id)); + +} + +int +rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (!dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be started before start any queue\n", port_id); + return -EINVAL; + } + + if (tx_queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_start, -ENOTSUP); + + if (dev->data->tx_queue_state[tx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 + " already started\n", + tx_queue_id, port_id); + return 0; + } + + return eth_err(port_id, dev->dev_ops->tx_queue_start(dev, + tx_queue_id)); + +} + +int +rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (tx_queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_stop, -ENOTSUP); + + if (dev->data->tx_queue_state[tx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 + " already stopped\n", + tx_queue_id, port_id); + return 0; + } + + return eth_err(port_id, dev->dev_ops->tx_queue_stop(dev, tx_queue_id)); + +} + +static int +rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) +{ + uint16_t old_nb_queues = dev->data->nb_tx_queues; + void **txq; + unsigned i; + + if (dev->data->tx_queues == NULL && nb_queues != 0) { /* first time configuration */ + dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues", + sizeof(dev->data->tx_queues[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (dev->data->tx_queues == NULL) { + dev->data->nb_tx_queues = 0; + return -(ENOMEM); + } + } else if (dev->data->tx_queues != NULL && nb_queues != 0) { /* re-configure */ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP); + + txq = dev->data->tx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->tx_queue_release)(txq[i]); + txq = rte_realloc(txq, sizeof(txq[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (txq == NULL) + return -ENOMEM; + if (nb_queues > old_nb_queues) { + uint16_t new_qs = nb_queues - old_nb_queues; + + memset(txq + old_nb_queues, 0, + sizeof(txq[0]) * new_qs); + } + + dev->data->tx_queues = txq; + + } else if (dev->data->tx_queues != NULL && nb_queues == 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP); + + txq = dev->data->tx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->tx_queue_release)(txq[i]); + + rte_free(dev->data->tx_queues); + dev->data->tx_queues = NULL; + } + dev->data->nb_tx_queues = nb_queues; + return 0; +} + +uint32_t +rte_eth_speed_bitflag(uint32_t speed, int duplex) +{ + switch (speed) { + case ETH_SPEED_NUM_10M: + return duplex ? ETH_LINK_SPEED_10M : ETH_LINK_SPEED_10M_HD; + case ETH_SPEED_NUM_100M: + return duplex ? ETH_LINK_SPEED_100M : ETH_LINK_SPEED_100M_HD; + case ETH_SPEED_NUM_1G: + return ETH_LINK_SPEED_1G; + case ETH_SPEED_NUM_2_5G: + return ETH_LINK_SPEED_2_5G; + case ETH_SPEED_NUM_5G: + return ETH_LINK_SPEED_5G; + case ETH_SPEED_NUM_10G: + return ETH_LINK_SPEED_10G; + case ETH_SPEED_NUM_20G: + return ETH_LINK_SPEED_20G; + case ETH_SPEED_NUM_25G: + return ETH_LINK_SPEED_25G; + case ETH_SPEED_NUM_40G: + return ETH_LINK_SPEED_40G; + case ETH_SPEED_NUM_50G: + return ETH_LINK_SPEED_50G; + case ETH_SPEED_NUM_56G: + return ETH_LINK_SPEED_56G; + case ETH_SPEED_NUM_100G: + return ETH_LINK_SPEED_100G; + default: + return 0; + } +} + +/** + * A conversion function from rxmode bitfield API. + */ +static void +rte_eth_convert_rx_offload_bitfield(const struct rte_eth_rxmode *rxmode, + uint64_t *rx_offloads) +{ + uint64_t offloads = 0; + + if (rxmode->header_split == 1) + offloads |= DEV_RX_OFFLOAD_HEADER_SPLIT; + if (rxmode->hw_ip_checksum == 1) + offloads |= DEV_RX_OFFLOAD_CHECKSUM; + if (rxmode->hw_vlan_filter == 1) + offloads |= DEV_RX_OFFLOAD_VLAN_FILTER; + if (rxmode->hw_vlan_strip == 1) + offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; + if (rxmode->hw_vlan_extend == 1) + offloads |= DEV_RX_OFFLOAD_VLAN_EXTEND; + if (rxmode->jumbo_frame == 1) + offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME; + if (rxmode->hw_strip_crc == 1) + offloads |= DEV_RX_OFFLOAD_CRC_STRIP; + if (rxmode->enable_scatter == 1) + offloads |= DEV_RX_OFFLOAD_SCATTER; + if (rxmode->enable_lro == 1) + offloads |= DEV_RX_OFFLOAD_TCP_LRO; + if (rxmode->hw_timestamp == 1) + offloads |= DEV_RX_OFFLOAD_TIMESTAMP; + if (rxmode->security == 1) + offloads |= DEV_RX_OFFLOAD_SECURITY; + + *rx_offloads = offloads; +} + +const char * __rte_experimental +rte_eth_dev_rx_offload_name(uint64_t offload) +{ + const char *name = "UNKNOWN"; + unsigned int i; + + for (i = 0; i < RTE_DIM(rte_rx_offload_names); ++i) { + if (offload == rte_rx_offload_names[i].offload) { + name = rte_rx_offload_names[i].name; + break; + } + } + + return name; +} + +const char * __rte_experimental +rte_eth_dev_tx_offload_name(uint64_t offload) +{ + const char *name = "UNKNOWN"; + unsigned int i; + + for (i = 0; i < RTE_DIM(rte_tx_offload_names); ++i) { + if (offload == rte_tx_offload_names[i].offload) { + name = rte_tx_offload_names[i].name; + break; + } + } + + return name; +} + +int +rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, + const struct rte_eth_conf *dev_conf) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_conf local_conf = *dev_conf; + int diag; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP); + + rte_eth_dev_info_get(port_id, &dev_info); + + /* If number of queues specified by application for both Rx and Tx is + * zero, use driver preferred values. This cannot be done individually + * as it is valid for either Tx or Rx (but not both) to be zero. + * If driver does not provide any preferred valued, fall back on + * EAL defaults. + */ + if (nb_rx_q == 0 && nb_tx_q == 0) { + nb_rx_q = dev_info.default_rxportconf.nb_queues; + if (nb_rx_q == 0) + nb_rx_q = RTE_ETH_DEV_FALLBACK_RX_NBQUEUES; + nb_tx_q = dev_info.default_txportconf.nb_queues; + if (nb_tx_q == 0) + nb_tx_q = RTE_ETH_DEV_FALLBACK_TX_NBQUEUES; + } + + if (nb_rx_q > RTE_MAX_QUEUES_PER_PORT) { + RTE_PMD_DEBUG_TRACE( + "Number of RX queues requested (%u) is greater than max supported(%d)\n", + nb_rx_q, RTE_MAX_QUEUES_PER_PORT); + return -EINVAL; + } + + if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) { + RTE_PMD_DEBUG_TRACE( + "Number of TX queues requested (%u) is greater than max supported(%d)\n", + nb_tx_q, RTE_MAX_QUEUES_PER_PORT); + return -EINVAL; + } + + if (dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be stopped to allow configuration\n", port_id); + return -EBUSY; + } + + /* + * Convert between the offloads API to enable PMDs to support + * only one of them. + */ + if (dev_conf->rxmode.ignore_offload_bitfield == 0) + rte_eth_convert_rx_offload_bitfield( + &dev_conf->rxmode, &local_conf.rxmode.offloads); + + /* Copy the dev_conf parameter into the dev structure */ + memcpy(&dev->data->dev_conf, &local_conf, sizeof(dev->data->dev_conf)); + + /* + * Check that the numbers of RX and TX queues are not greater + * than the maximum number of RX and TX queues supported by the + * configured device. + */ + if (nb_rx_q > dev_info.max_rx_queues) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_queues=%d > %d\n", + port_id, nb_rx_q, dev_info.max_rx_queues); + return -EINVAL; + } + + if (nb_tx_q > dev_info.max_tx_queues) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_queues=%d > %d\n", + port_id, nb_tx_q, dev_info.max_tx_queues); + return -EINVAL; + } + + /* Check that the device supports requested interrupts */ + if ((dev_conf->intr_conf.lsc == 1) && + (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) { + RTE_PMD_DEBUG_TRACE("driver %s does not support lsc\n", + dev->device->driver->name); + return -EINVAL; + } + if ((dev_conf->intr_conf.rmv == 1) && + (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_RMV))) { + RTE_PMD_DEBUG_TRACE("driver %s does not support rmv\n", + dev->device->driver->name); + return -EINVAL; + } + + /* + * If jumbo frames are enabled, check that the maximum RX packet + * length is supported by the configured device. + */ + if (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) { + if (dev_conf->rxmode.max_rx_pkt_len > + dev_info.max_rx_pktlen) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u" + " > max valid value %u\n", + port_id, + (unsigned)dev_conf->rxmode.max_rx_pkt_len, + (unsigned)dev_info.max_rx_pktlen); + return -EINVAL; + } else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u" + " < min valid value %u\n", + port_id, + (unsigned)dev_conf->rxmode.max_rx_pkt_len, + (unsigned)ETHER_MIN_LEN); + return -EINVAL; + } + } else { + if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN || + dev_conf->rxmode.max_rx_pkt_len > ETHER_MAX_LEN) + /* Use default value */ + dev->data->dev_conf.rxmode.max_rx_pkt_len = + ETHER_MAX_LEN; + } + + /* Any requested offloading must be within its device capabilities */ + if ((local_conf.rxmode.offloads & dev_info.rx_offload_capa) != + local_conf.rxmode.offloads) { + ethdev_log(ERR, "ethdev port_id=%d requested Rx offloads " + "0x%" PRIx64 " doesn't match Rx offloads " + "capabilities 0x%" PRIx64 " in %s()\n", + port_id, + local_conf.rxmode.offloads, + dev_info.rx_offload_capa, + __func__); + /* Will return -EINVAL in the next release */ + } + if ((local_conf.txmode.offloads & dev_info.tx_offload_capa) != + local_conf.txmode.offloads) { + ethdev_log(ERR, "ethdev port_id=%d requested Tx offloads " + "0x%" PRIx64 " doesn't match Tx offloads " + "capabilities 0x%" PRIx64 " in %s()\n", + port_id, + local_conf.txmode.offloads, + dev_info.tx_offload_capa, + __func__); + /* Will return -EINVAL in the next release */ + } + + /* Check that device supports requested rss hash functions. */ + if ((dev_info.flow_type_rss_offloads | + dev_conf->rx_adv_conf.rss_conf.rss_hf) != + dev_info.flow_type_rss_offloads) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d invalid rss_hf: " + "0x%"PRIx64", valid value: 0x%"PRIx64"\n", + port_id, + dev_conf->rx_adv_conf.rss_conf.rss_hf, + dev_info.flow_type_rss_offloads); + } + + /* + * Setup new number of RX/TX queues and reconfigure device. + */ + diag = rte_eth_dev_rx_queue_config(dev, nb_rx_q); + if (diag != 0) { + RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_rx_queue_config = %d\n", + port_id, diag); + return diag; + } + + diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q); + if (diag != 0) { + RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_tx_queue_config = %d\n", + port_id, diag); + rte_eth_dev_rx_queue_config(dev, 0); + return diag; + } + + diag = (*dev->dev_ops->dev_configure)(dev); + if (diag != 0) { + RTE_PMD_DEBUG_TRACE("port%d dev_configure = %d\n", + port_id, diag); + rte_eth_dev_rx_queue_config(dev, 0); + rte_eth_dev_tx_queue_config(dev, 0); + return eth_err(port_id, diag); + } + + /* Initialize Rx profiling if enabled at compilation time. */ + diag = __rte_eth_profile_rx_init(port_id, dev); + if (diag != 0) { + RTE_PMD_DEBUG_TRACE("port%d __rte_eth_profile_rx_init = %d\n", + port_id, diag); + rte_eth_dev_rx_queue_config(dev, 0); + rte_eth_dev_tx_queue_config(dev, 0); + return eth_err(port_id, diag); + } + + return 0; +} + +void +_rte_eth_dev_reset(struct rte_eth_dev *dev) +{ + if (dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be stopped to allow reset\n", + dev->data->port_id); + return; + } + + rte_eth_dev_rx_queue_config(dev, 0); + rte_eth_dev_tx_queue_config(dev, 0); + + memset(&dev->data->dev_conf, 0, sizeof(dev->data->dev_conf)); +} + +static void +rte_eth_dev_config_restore(uint16_t port_id) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct ether_addr *addr; + uint16_t i; + uint32_t pool = 0; + uint64_t pool_mask; + + dev = &rte_eth_devices[port_id]; + + rte_eth_dev_info_get(port_id, &dev_info); + + /* replay MAC address configuration including default MAC */ + addr = &dev->data->mac_addrs[0]; + if (*dev->dev_ops->mac_addr_set != NULL) + (*dev->dev_ops->mac_addr_set)(dev, addr); + else if (*dev->dev_ops->mac_addr_add != NULL) + (*dev->dev_ops->mac_addr_add)(dev, addr, 0, pool); + + if (*dev->dev_ops->mac_addr_add != NULL) { + for (i = 1; i < dev_info.max_mac_addrs; i++) { + addr = &dev->data->mac_addrs[i]; + + /* skip zero address */ + if (is_zero_ether_addr(addr)) + continue; + + pool = 0; + pool_mask = dev->data->mac_pool_sel[i]; + + do { + if (pool_mask & 1ULL) + (*dev->dev_ops->mac_addr_add)(dev, + addr, i, pool); + pool_mask >>= 1; + pool++; + } while (pool_mask); + } + } + + /* replay promiscuous configuration */ + if (rte_eth_promiscuous_get(port_id) == 1) + rte_eth_promiscuous_enable(port_id); + else if (rte_eth_promiscuous_get(port_id) == 0) + rte_eth_promiscuous_disable(port_id); + + /* replay all multicast configuration */ + if (rte_eth_allmulticast_get(port_id) == 1) + rte_eth_allmulticast_enable(port_id); + else if (rte_eth_allmulticast_get(port_id) == 0) + rte_eth_allmulticast_disable(port_id); +} + +int +rte_eth_dev_start(uint16_t port_id) +{ + struct rte_eth_dev *dev; + int diag; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP); + + if (dev->data->dev_started != 0) { + RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16 + " already started\n", + port_id); + return 0; + } + + diag = (*dev->dev_ops->dev_start)(dev); + if (diag == 0) + dev->data->dev_started = 1; + else + return eth_err(port_id, diag); + + rte_eth_dev_config_restore(port_id); + + if (dev->data->dev_conf.intr_conf.lsc == 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->link_update, -ENOTSUP); + (*dev->dev_ops->link_update)(dev, 0); + } + return 0; +} + +void +rte_eth_dev_stop(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop); + + if (dev->data->dev_started == 0) { + RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu16 + " already stopped\n", + port_id); + return; + } + + dev->data->dev_started = 0; + (*dev->dev_ops->dev_stop)(dev); +} + +int +rte_eth_dev_set_link_up(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_set_link_up, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->dev_set_link_up)(dev)); +} + +int +rte_eth_dev_set_link_down(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_set_link_down, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->dev_set_link_down)(dev)); +} + +void +rte_eth_dev_close(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_close); + dev->data->dev_started = 0; + (*dev->dev_ops->dev_close)(dev); + + dev->data->nb_rx_queues = 0; + rte_free(dev->data->rx_queues); + dev->data->rx_queues = NULL; + dev->data->nb_tx_queues = 0; + rte_free(dev->data->tx_queues); + dev->data->tx_queues = NULL; +} + +int +rte_eth_dev_reset(uint16_t port_id) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_reset, -ENOTSUP); + + rte_eth_dev_stop(port_id); + ret = dev->dev_ops->dev_reset(dev); + + return eth_err(port_id, ret); +} + +int __rte_experimental +rte_eth_dev_is_removed(uint16_t port_id) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); + + dev = &rte_eth_devices[port_id]; + + if (dev->state == RTE_ETH_DEV_REMOVED) + return 1; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->is_removed, 0); + + ret = dev->dev_ops->is_removed(dev); + if (ret != 0) + /* Device is physically removed. */ + dev->state = RTE_ETH_DEV_REMOVED; + + return ret; +} + +int +rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + int ret; + uint32_t mbp_buf_size; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_rxconf local_conf; + void **rxq; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_setup, -ENOTSUP); + + /* + * Check the size of the mbuf data buffer. + * This value must be provided in the private data of the memory pool. + * First check that the memory pool has a valid private data. + */ + rte_eth_dev_info_get(port_id, &dev_info); + if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) { + RTE_PMD_DEBUG_TRACE("%s private_data_size %d < %d\n", + mp->name, (int) mp->private_data_size, + (int) sizeof(struct rte_pktmbuf_pool_private)); + return -ENOSPC; + } + mbp_buf_size = rte_pktmbuf_data_room_size(mp); + + if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) { + RTE_PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d " + "(RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)" + "=%d)\n", + mp->name, + (int)mbp_buf_size, + (int)(RTE_PKTMBUF_HEADROOM + + dev_info.min_rx_bufsize), + (int)RTE_PKTMBUF_HEADROOM, + (int)dev_info.min_rx_bufsize); + return -EINVAL; + } + + /* Use default specified by driver, if nb_rx_desc is zero */ + if (nb_rx_desc == 0) { + nb_rx_desc = dev_info.default_rxportconf.ring_size; + /* If driver default is also zero, fall back on EAL default */ + if (nb_rx_desc == 0) + nb_rx_desc = RTE_ETH_DEV_FALLBACK_RX_RINGSIZE; + } + + if (nb_rx_desc > dev_info.rx_desc_lim.nb_max || + nb_rx_desc < dev_info.rx_desc_lim.nb_min || + nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) { + + RTE_PMD_DEBUG_TRACE("Invalid value for nb_rx_desc(=%hu), " + "should be: <= %hu, = %hu, and a product of %hu\n", + nb_rx_desc, + dev_info.rx_desc_lim.nb_max, + dev_info.rx_desc_lim.nb_min, + dev_info.rx_desc_lim.nb_align); + return -EINVAL; + } + + if (dev->data->dev_started && + !(dev_info.dev_capa & + RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP)) + return -EBUSY; + + if (dev->data->dev_started && + (dev->data->rx_queue_state[rx_queue_id] != + RTE_ETH_QUEUE_STATE_STOPPED)) + return -EBUSY; + + rxq = dev->data->rx_queues; + if (rxq[rx_queue_id]) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, + -ENOTSUP); + (*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]); + rxq[rx_queue_id] = NULL; + } + + if (rx_conf == NULL) + rx_conf = &dev_info.default_rxconf; + + local_conf = *rx_conf; + if (dev->data->dev_conf.rxmode.ignore_offload_bitfield == 0) { + /** + * Reflect port offloads to queue offloads in order for + * offloads to not be discarded. + */ + rte_eth_convert_rx_offload_bitfield(&dev->data->dev_conf.rxmode, + &local_conf.offloads); + } + + /* + * If an offloading has already been enabled in + * rte_eth_dev_configure(), it has been enabled on all queues, + * so there is no need to enable it in this queue again. + * The local_conf.offloads input to underlying PMD only carries + * those offloadings which are only enabled on this queue and + * not enabled on all queues. + */ + local_conf.offloads &= ~dev->data->dev_conf.rxmode.offloads; + + /* + * New added offloadings for this queue are those not enabled in + * rte_eth_dev_configure() and they must be per-queue type. + * A pure per-port offloading can't be enabled on a queue while + * disabled on another queue. A pure per-port offloading can't + * be enabled for any queue as new added one if it hasn't been + * enabled in rte_eth_dev_configure(). + */ + if ((local_conf.offloads & dev_info.rx_queue_offload_capa) != + local_conf.offloads) { + ethdev_log(ERR, "Ethdev port_id=%d rx_queue_id=%d, new " + "added offloads 0x%" PRIx64 " must be " + "within pre-queue offload capabilities 0x%" + PRIx64 " in %s()\n", + port_id, + rx_queue_id, + local_conf.offloads, + dev_info.rx_queue_offload_capa, + __func__); + /* Will return -EINVAL in the next release */ + } + + ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc, + socket_id, &local_conf, mp); + if (!ret) { + if (!dev->data->min_rx_buf_size || + dev->data->min_rx_buf_size > mbp_buf_size) + dev->data->min_rx_buf_size = mbp_buf_size; + } + + return eth_err(port_id, ret); +} + +/** + * Convert from tx offloads to txq_flags. + */ +static void +rte_eth_convert_tx_offload(const uint64_t tx_offloads, uint32_t *txq_flags) +{ + uint32_t flags = 0; + + if (!(tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS)) + flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + if (!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT)) + flags |= ETH_TXQ_FLAGS_NOVLANOFFL; + if (!(tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM)) + flags |= ETH_TXQ_FLAGS_NOXSUMSCTP; + if (!(tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM)) + flags |= ETH_TXQ_FLAGS_NOXSUMUDP; + if (!(tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM)) + flags |= ETH_TXQ_FLAGS_NOXSUMTCP; + if (tx_offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) + flags |= ETH_TXQ_FLAGS_NOREFCOUNT | ETH_TXQ_FLAGS_NOMULTMEMP; + + *txq_flags = flags; +} + +/** + * A conversion function from txq_flags API. + */ +static void +rte_eth_convert_txq_flags(const uint32_t txq_flags, uint64_t *tx_offloads) +{ + uint64_t offloads = 0; + + if (!(txq_flags & ETH_TXQ_FLAGS_NOMULTSEGS)) + offloads |= DEV_TX_OFFLOAD_MULTI_SEGS; + if (!(txq_flags & ETH_TXQ_FLAGS_NOVLANOFFL)) + offloads |= DEV_TX_OFFLOAD_VLAN_INSERT; + if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMSCTP)) + offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM; + if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMUDP)) + offloads |= DEV_TX_OFFLOAD_UDP_CKSUM; + if (!(txq_flags & ETH_TXQ_FLAGS_NOXSUMTCP)) + offloads |= DEV_TX_OFFLOAD_TCP_CKSUM; + if ((txq_flags & ETH_TXQ_FLAGS_NOREFCOUNT) && + (txq_flags & ETH_TXQ_FLAGS_NOMULTMEMP)) + offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE; + + *tx_offloads = offloads; +} + +int +rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_txconf local_conf; + void **txq; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (tx_queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_setup, -ENOTSUP); + + rte_eth_dev_info_get(port_id, &dev_info); + + /* Use default specified by driver, if nb_tx_desc is zero */ + if (nb_tx_desc == 0) { + nb_tx_desc = dev_info.default_txportconf.ring_size; + /* If driver default is zero, fall back on EAL default */ + if (nb_tx_desc == 0) + nb_tx_desc = RTE_ETH_DEV_FALLBACK_TX_RINGSIZE; + } + if (nb_tx_desc > dev_info.tx_desc_lim.nb_max || + nb_tx_desc < dev_info.tx_desc_lim.nb_min || + nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) { + RTE_PMD_DEBUG_TRACE("Invalid value for nb_tx_desc(=%hu), " + "should be: <= %hu, = %hu, and a product of %hu\n", + nb_tx_desc, + dev_info.tx_desc_lim.nb_max, + dev_info.tx_desc_lim.nb_min, + dev_info.tx_desc_lim.nb_align); + return -EINVAL; + } + + if (dev->data->dev_started && + !(dev_info.dev_capa & + RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP)) + return -EBUSY; + + if (dev->data->dev_started && + (dev->data->tx_queue_state[tx_queue_id] != + RTE_ETH_QUEUE_STATE_STOPPED)) + return -EBUSY; + + txq = dev->data->tx_queues; + if (txq[tx_queue_id]) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, + -ENOTSUP); + (*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]); + txq[tx_queue_id] = NULL; + } + + if (tx_conf == NULL) + tx_conf = &dev_info.default_txconf; + + /* + * Convert between the offloads API to enable PMDs to support + * only one of them. + */ + local_conf = *tx_conf; + if (!(tx_conf->txq_flags & ETH_TXQ_FLAGS_IGNORE)) { + rte_eth_convert_txq_flags(tx_conf->txq_flags, + &local_conf.offloads); + } + + /* + * If an offloading has already been enabled in + * rte_eth_dev_configure(), it has been enabled on all queues, + * so there is no need to enable it in this queue again. + * The local_conf.offloads input to underlying PMD only carries + * those offloadings which are only enabled on this queue and + * not enabled on all queues. + */ + local_conf.offloads &= ~dev->data->dev_conf.txmode.offloads; + + /* + * New added offloadings for this queue are those not enabled in + * rte_eth_dev_configure() and they must be per-queue type. + * A pure per-port offloading can't be enabled on a queue while + * disabled on another queue. A pure per-port offloading can't + * be enabled for any queue as new added one if it hasn't been + * enabled in rte_eth_dev_configure(). + */ + if ((local_conf.offloads & dev_info.tx_queue_offload_capa) != + local_conf.offloads) { + ethdev_log(ERR, "Ethdev port_id=%d tx_queue_id=%d, new " + "added offloads 0x%" PRIx64 " must be " + "within pre-queue offload capabilities 0x%" + PRIx64 " in %s()\n", + port_id, + tx_queue_id, + local_conf.offloads, + dev_info.tx_queue_offload_capa, + __func__); + /* Will return -EINVAL in the next release */ + } + + return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev, + tx_queue_id, nb_tx_desc, socket_id, &local_conf)); +} + +void +rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata __rte_unused) +{ + unsigned i; + + for (i = 0; i < unsent; i++) + rte_pktmbuf_free(pkts[i]); +} + +void +rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata) +{ + uint64_t *count = userdata; + unsigned i; + + for (i = 0; i < unsent; i++) + rte_pktmbuf_free(pkts[i]); + + *count += unsent; +} + +int +rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer, + buffer_tx_error_fn cbfn, void *userdata) +{ + buffer->error_callback = cbfn; + buffer->error_userdata = userdata; + return 0; +} + +int +rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size) +{ + int ret = 0; + + if (buffer == NULL) + return -EINVAL; + + buffer->size = size; + if (buffer->error_callback == NULL) { + ret = rte_eth_tx_buffer_set_err_callback( + buffer, rte_eth_tx_buffer_drop_callback, NULL); + } + + return ret; +} + +int +rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + int ret; + + /* Validate Input Data. Bail if not valid or not supported. */ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_done_cleanup, -ENOTSUP); + + /* Call driver to free pending mbufs. */ + ret = (*dev->dev_ops->tx_done_cleanup)(dev->data->tx_queues[queue_id], + free_cnt); + return eth_err(port_id, ret); +} + +void +rte_eth_promiscuous_enable(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->promiscuous_enable); + (*dev->dev_ops->promiscuous_enable)(dev); + dev->data->promiscuous = 1; +} + +void +rte_eth_promiscuous_disable(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->promiscuous_disable); + dev->data->promiscuous = 0; + (*dev->dev_ops->promiscuous_disable)(dev); +} + +int +rte_eth_promiscuous_get(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + return dev->data->promiscuous; +} + +void +rte_eth_allmulticast_enable(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->allmulticast_enable); + (*dev->dev_ops->allmulticast_enable)(dev); + dev->data->all_multicast = 1; +} + +void +rte_eth_allmulticast_disable(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->allmulticast_disable); + dev->data->all_multicast = 0; + (*dev->dev_ops->allmulticast_disable)(dev); +} + +int +rte_eth_allmulticast_get(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + return dev->data->all_multicast; +} + +void +rte_eth_link_get(uint16_t port_id, struct rte_eth_link *eth_link) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + if (dev->data->dev_conf.intr_conf.lsc && + dev->data->dev_started) + rte_eth_linkstatus_get(dev, eth_link); + else { + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update); + (*dev->dev_ops->link_update)(dev, 1); + *eth_link = dev->data->dev_link; + } +} + +void +rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *eth_link) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + if (dev->data->dev_conf.intr_conf.lsc && + dev->data->dev_started) + rte_eth_linkstatus_get(dev, eth_link); + else { + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update); + (*dev->dev_ops->link_update)(dev, 0); + *eth_link = dev->data->dev_link; + } +} + +int +rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + memset(stats, 0, sizeof(*stats)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_get, -ENOTSUP); + stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; + return eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats)); +} + +int +rte_eth_stats_reset(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_reset, -ENOTSUP); + (*dev->dev_ops->stats_reset)(dev); + dev->data->rx_mbuf_alloc_failed = 0; + + return 0; +} + +static inline int +get_xstats_basic_count(struct rte_eth_dev *dev) +{ + uint16_t nb_rxqs, nb_txqs; + int count; + + nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + + count = RTE_NB_STATS; + count += nb_rxqs * RTE_NB_RXQ_STATS; + count += nb_txqs * RTE_NB_TXQ_STATS; + + return count; +} + +static int +get_xstats_count(uint16_t port_id) +{ + struct rte_eth_dev *dev; + int count; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + dev = &rte_eth_devices[port_id]; + if (dev->dev_ops->xstats_get_names_by_id != NULL) { + count = (*dev->dev_ops->xstats_get_names_by_id)(dev, NULL, + NULL, 0); + if (count < 0) + return eth_err(port_id, count); + } + if (dev->dev_ops->xstats_get_names != NULL) { + count = (*dev->dev_ops->xstats_get_names)(dev, NULL, 0); + if (count < 0) + return eth_err(port_id, count); + } else + count = 0; + + + count += get_xstats_basic_count(dev); + + return count; +} + +int +rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name, + uint64_t *id) +{ + int cnt_xstats, idx_xstat; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (!id) { + RTE_PMD_DEBUG_TRACE("Error: id pointer is NULL\n"); + return -ENOMEM; + } + + if (!xstat_name) { + RTE_PMD_DEBUG_TRACE("Error: xstat_name pointer is NULL\n"); + return -ENOMEM; + } + + /* Get count */ + cnt_xstats = rte_eth_xstats_get_names_by_id(port_id, NULL, 0, NULL); + if (cnt_xstats < 0) { + RTE_PMD_DEBUG_TRACE("Error: Cannot get count of xstats\n"); + return -ENODEV; + } + + /* Get id-name lookup table */ + struct rte_eth_xstat_name xstats_names[cnt_xstats]; + + if (cnt_xstats != rte_eth_xstats_get_names_by_id( + port_id, xstats_names, cnt_xstats, NULL)) { + RTE_PMD_DEBUG_TRACE("Error: Cannot get xstats lookup\n"); + return -1; + } + + for (idx_xstat = 0; idx_xstat < cnt_xstats; idx_xstat++) { + if (!strcmp(xstats_names[idx_xstat].name, xstat_name)) { + *id = idx_xstat; + return 0; + }; + } + + return -EINVAL; +} + +/* retrieve basic stats names */ +static int +rte_eth_basic_stats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names) +{ + int cnt_used_entries = 0; + uint32_t idx, id_queue; + uint16_t num_q; + + for (idx = 0; idx < RTE_NB_STATS; idx++) { + snprintf(xstats_names[cnt_used_entries].name, + sizeof(xstats_names[0].name), + "%s", rte_stats_strings[idx].name); + cnt_used_entries++; + } + num_q = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + for (id_queue = 0; id_queue < num_q; id_queue++) { + for (idx = 0; idx < RTE_NB_RXQ_STATS; idx++) { + snprintf(xstats_names[cnt_used_entries].name, + sizeof(xstats_names[0].name), + "rx_q%u%s", + id_queue, rte_rxq_stats_strings[idx].name); + cnt_used_entries++; + } + + } + num_q = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + for (id_queue = 0; id_queue < num_q; id_queue++) { + for (idx = 0; idx < RTE_NB_TXQ_STATS; idx++) { + snprintf(xstats_names[cnt_used_entries].name, + sizeof(xstats_names[0].name), + "tx_q%u%s", + id_queue, rte_txq_stats_strings[idx].name); + cnt_used_entries++; + } + } + return cnt_used_entries; +} + +/* retrieve ethdev extended statistics names */ +int +rte_eth_xstats_get_names_by_id(uint16_t port_id, + struct rte_eth_xstat_name *xstats_names, unsigned int size, + uint64_t *ids) +{ + struct rte_eth_xstat_name *xstats_names_copy; + unsigned int no_basic_stat_requested = 1; + unsigned int no_ext_stat_requested = 1; + unsigned int expected_entries; + unsigned int basic_count; + struct rte_eth_dev *dev; + unsigned int i; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + basic_count = get_xstats_basic_count(dev); + ret = get_xstats_count(port_id); + if (ret < 0) + return ret; + expected_entries = (unsigned int)ret; + + /* Return max number of stats if no ids given */ + if (!ids) { + if (!xstats_names) + return expected_entries; + else if (xstats_names && size < expected_entries) + return expected_entries; + } + + if (ids && !xstats_names) + return -EINVAL; + + if (ids && dev->dev_ops->xstats_get_names_by_id != NULL && size > 0) { + uint64_t ids_copy[size]; + + for (i = 0; i < size; i++) { + if (ids[i] < basic_count) { + no_basic_stat_requested = 0; + break; + } + + /* + * Convert ids to xstats ids that PMD knows. + * ids known by user are basic + extended stats. + */ + ids_copy[i] = ids[i] - basic_count; + } + + if (no_basic_stat_requested) + return (*dev->dev_ops->xstats_get_names_by_id)(dev, + xstats_names, ids_copy, size); + } + + /* Retrieve all stats */ + if (!ids) { + int num_stats = rte_eth_xstats_get_names(port_id, xstats_names, + expected_entries); + if (num_stats < 0 || num_stats > (int)expected_entries) + return num_stats; + else + return expected_entries; + } + + xstats_names_copy = calloc(expected_entries, + sizeof(struct rte_eth_xstat_name)); + + if (!xstats_names_copy) { + RTE_PMD_DEBUG_TRACE("ERROR: can't allocate memory"); + return -ENOMEM; + } + + if (ids) { + for (i = 0; i < size; i++) { + if (ids[i] >= basic_count) { + no_ext_stat_requested = 0; + break; + } + } + } + + /* Fill xstats_names_copy structure */ + if (ids && no_ext_stat_requested) { + rte_eth_basic_stats_get_names(dev, xstats_names_copy); + } else { + ret = rte_eth_xstats_get_names(port_id, xstats_names_copy, + expected_entries); + if (ret < 0) { + free(xstats_names_copy); + return ret; + } + } + + /* Filter stats */ + for (i = 0; i < size; i++) { + if (ids[i] >= expected_entries) { + RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n"); + free(xstats_names_copy); + return -1; + } + xstats_names[i] = xstats_names_copy[ids[i]]; + } + + free(xstats_names_copy); + return size; +} + +int +rte_eth_xstats_get_names(uint16_t port_id, + struct rte_eth_xstat_name *xstats_names, + unsigned int size) +{ + struct rte_eth_dev *dev; + int cnt_used_entries; + int cnt_expected_entries; + int cnt_driver_entries; + + cnt_expected_entries = get_xstats_count(port_id); + if (xstats_names == NULL || cnt_expected_entries < 0 || + (int)size < cnt_expected_entries) + return cnt_expected_entries; + + /* port_id checked in get_xstats_count() */ + dev = &rte_eth_devices[port_id]; + + cnt_used_entries = rte_eth_basic_stats_get_names( + dev, xstats_names); + + if (dev->dev_ops->xstats_get_names != NULL) { + /* If there are any driver-specific xstats, append them + * to end of list. + */ + cnt_driver_entries = (*dev->dev_ops->xstats_get_names)( + dev, + xstats_names + cnt_used_entries, + size - cnt_used_entries); + if (cnt_driver_entries < 0) + return eth_err(port_id, cnt_driver_entries); + cnt_used_entries += cnt_driver_entries; + } + + return cnt_used_entries; +} + + +static int +rte_eth_basic_stats_get(uint16_t port_id, struct rte_eth_xstat *xstats) +{ + struct rte_eth_dev *dev; + struct rte_eth_stats eth_stats; + unsigned int count = 0, i, q; + uint64_t val, *stats_ptr; + uint16_t nb_rxqs, nb_txqs; + int ret; + + ret = rte_eth_stats_get(port_id, ð_stats); + if (ret < 0) + return ret; + + dev = &rte_eth_devices[port_id]; + + nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + + /* global stats */ + for (i = 0; i < RTE_NB_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_stats_strings[i].offset); + val = *stats_ptr; + xstats[count++].value = val; + } + + /* per-rxq stats */ + for (q = 0; q < nb_rxqs; q++) { + for (i = 0; i < RTE_NB_RXQ_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_rxq_stats_strings[i].offset + + q * sizeof(uint64_t)); + val = *stats_ptr; + xstats[count++].value = val; + } + } + + /* per-txq stats */ + for (q = 0; q < nb_txqs; q++) { + for (i = 0; i < RTE_NB_TXQ_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_txq_stats_strings[i].offset + + q * sizeof(uint64_t)); + val = *stats_ptr; + xstats[count++].value = val; + } + } + return count; +} + +/* retrieve ethdev extended statistics */ +int +rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids, + uint64_t *values, unsigned int size) +{ + unsigned int no_basic_stat_requested = 1; + unsigned int no_ext_stat_requested = 1; + unsigned int num_xstats_filled; + unsigned int basic_count; + uint16_t expected_entries; + struct rte_eth_dev *dev; + unsigned int i; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + ret = get_xstats_count(port_id); + if (ret < 0) + return ret; + expected_entries = (uint16_t)ret; + struct rte_eth_xstat xstats[expected_entries]; + dev = &rte_eth_devices[port_id]; + basic_count = get_xstats_basic_count(dev); + + /* Return max number of stats if no ids given */ + if (!ids) { + if (!values) + return expected_entries; + else if (values && size < expected_entries) + return expected_entries; + } + + if (ids && !values) + return -EINVAL; + + if (ids && dev->dev_ops->xstats_get_by_id != NULL && size) { + unsigned int basic_count = get_xstats_basic_count(dev); + uint64_t ids_copy[size]; + + for (i = 0; i < size; i++) { + if (ids[i] < basic_count) { + no_basic_stat_requested = 0; + break; + } + + /* + * Convert ids to xstats ids that PMD knows. + * ids known by user are basic + extended stats. + */ + ids_copy[i] = ids[i] - basic_count; + } + + if (no_basic_stat_requested) + return (*dev->dev_ops->xstats_get_by_id)(dev, ids_copy, + values, size); + } + + if (ids) { + for (i = 0; i < size; i++) { + if (ids[i] >= basic_count) { + no_ext_stat_requested = 0; + break; + } + } + } + + /* Fill the xstats structure */ + if (ids && no_ext_stat_requested) + ret = rte_eth_basic_stats_get(port_id, xstats); + else + ret = rte_eth_xstats_get(port_id, xstats, expected_entries); + + if (ret < 0) + return ret; + num_xstats_filled = (unsigned int)ret; + + /* Return all stats */ + if (!ids) { + for (i = 0; i < num_xstats_filled; i++) + values[i] = xstats[i].value; + return expected_entries; + } + + /* Filter stats */ + for (i = 0; i < size; i++) { + if (ids[i] >= expected_entries) { + RTE_PMD_DEBUG_TRACE("ERROR: id value isn't valid\n"); + return -1; + } + values[i] = xstats[ids[i]].value; + } + return size; +} + +int +rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats, + unsigned int n) +{ + struct rte_eth_dev *dev; + unsigned int count = 0, i; + signed int xcount = 0; + uint16_t nb_rxqs, nb_txqs; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + nb_rxqs = RTE_MIN(dev->data->nb_rx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + nb_txqs = RTE_MIN(dev->data->nb_tx_queues, RTE_ETHDEV_QUEUE_STAT_CNTRS); + + /* Return generic statistics */ + count = RTE_NB_STATS + (nb_rxqs * RTE_NB_RXQ_STATS) + + (nb_txqs * RTE_NB_TXQ_STATS); + + /* implemented by the driver */ + if (dev->dev_ops->xstats_get != NULL) { + /* Retrieve the xstats from the driver at the end of the + * xstats struct. + */ + xcount = (*dev->dev_ops->xstats_get)(dev, + xstats ? xstats + count : NULL, + (n > count) ? n - count : 0); + + if (xcount < 0) + return eth_err(port_id, xcount); + } + + if (n < count + xcount || xstats == NULL) + return count + xcount; + + /* now fill the xstats structure */ + ret = rte_eth_basic_stats_get(port_id, xstats); + if (ret < 0) + return ret; + count = ret; + + for (i = 0; i < count; i++) + xstats[i].id = i; + /* add an offset to driver-specific stats */ + for ( ; i < count + xcount; i++) + xstats[i].id += count; + + return count + xcount; +} + +/* reset ethdev extended statistics */ +void +rte_eth_xstats_reset(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + /* implemented by the driver */ + if (dev->dev_ops->xstats_reset != NULL) { + (*dev->dev_ops->xstats_reset)(dev); + return; + } + + /* fallback to default */ + rte_eth_stats_reset(port_id); +} + +static int +set_queue_stats_mapping(uint16_t port_id, uint16_t queue_id, uint8_t stat_idx, + uint8_t is_rx) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_stats_mapping_set, -ENOTSUP); + return (*dev->dev_ops->queue_stats_mapping_set) + (dev, queue_id, stat_idx, is_rx); +} + + +int +rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, uint16_t tx_queue_id, + uint8_t stat_idx) +{ + return eth_err(port_id, set_queue_stats_mapping(port_id, tx_queue_id, + stat_idx, STAT_QMAP_TX)); +} + + +int +rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id, uint16_t rx_queue_id, + uint8_t stat_idx) +{ + return eth_err(port_id, set_queue_stats_mapping(port_id, rx_queue_id, + stat_idx, STAT_QMAP_RX)); +} + +int +rte_eth_dev_fw_version_get(uint16_t port_id, char *fw_version, size_t fw_size) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->fw_version_get, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->fw_version_get)(dev, + fw_version, fw_size)); +} + +void +rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info) +{ + struct rte_eth_dev *dev; + struct rte_eth_txconf *txconf; + const struct rte_eth_desc_lim lim = { + .nb_max = UINT16_MAX, + .nb_min = 0, + .nb_align = 1, + }; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + memset(dev_info, 0, sizeof(struct rte_eth_dev_info)); + dev_info->rx_desc_lim = lim; + dev_info->tx_desc_lim = lim; + dev_info->device = dev->device; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); + (*dev->dev_ops->dev_infos_get)(dev, dev_info); + dev_info->driver_name = dev->device->driver->name; + dev_info->nb_rx_queues = dev->data->nb_rx_queues; + dev_info->nb_tx_queues = dev->data->nb_tx_queues; + + dev_info->dev_flags = &dev->data->dev_flags; + txconf = &dev_info->default_txconf; + /* convert offload to txq_flags to support legacy app */ + rte_eth_convert_tx_offload(txconf->offloads, &txconf->txq_flags); +} + +int +rte_eth_dev_get_supported_ptypes(uint16_t port_id, uint32_t ptype_mask, + uint32_t *ptypes, int num) +{ + int i, j; + struct rte_eth_dev *dev; + const uint32_t *all_ptypes; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_supported_ptypes_get, 0); + all_ptypes = (*dev->dev_ops->dev_supported_ptypes_get)(dev); + + if (!all_ptypes) + return 0; + + for (i = 0, j = 0; all_ptypes[i] != RTE_PTYPE_UNKNOWN; ++i) + if (all_ptypes[i] & ptype_mask) { + if (j < num) + ptypes[j] = all_ptypes[i]; + j++; + } + + return j; +} + +void +rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + ether_addr_copy(&dev->data->mac_addrs[0], mac_addr); +} + + +int +rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + *mtu = dev->data->mtu; + return 0; +} + +int +rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu) +{ + int ret; + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mtu_set, -ENOTSUP); + + ret = (*dev->dev_ops->mtu_set)(dev, mtu); + if (!ret) + dev->data->mtu = mtu; + + return eth_err(port_id, ret); +} + +int +rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + if (!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER)) { + RTE_PMD_DEBUG_TRACE("port %d: vlan-filtering disabled\n", port_id); + return -ENOSYS; + } + + if (vlan_id > 4095) { + RTE_PMD_DEBUG_TRACE("(port_id=%d) invalid vlan_id=%u > 4095\n", + port_id, (unsigned) vlan_id); + return -EINVAL; + } + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP); + + ret = (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on); + if (ret == 0) { + struct rte_vlan_filter_conf *vfc; + int vidx; + int vbit; + + vfc = &dev->data->vlan_filter_conf; + vidx = vlan_id / 64; + vbit = vlan_id % 64; + + if (on) + vfc->ids[vidx] |= UINT64_C(1) << vbit; + else + vfc->ids[vidx] &= ~(UINT64_C(1) << vbit); + } + + return eth_err(port_id, ret); +} + +int +rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id, + int on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid rx_queue_id=%d\n", port_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_strip_queue_set, -ENOTSUP); + (*dev->dev_ops->vlan_strip_queue_set)(dev, rx_queue_id, on); + + return 0; +} + +int +rte_eth_dev_set_vlan_ether_type(uint16_t port_id, + enum rte_vlan_type vlan_type, + uint16_t tpid) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_tpid_set, -ENOTSUP); + + return eth_err(port_id, (*dev->dev_ops->vlan_tpid_set)(dev, vlan_type, + tpid)); +} + +int +rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask) +{ + struct rte_eth_dev *dev; + int ret = 0; + int mask = 0; + int cur, org = 0; + uint64_t orig_offloads; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + /* save original values in case of failure */ + orig_offloads = dev->data->dev_conf.rxmode.offloads; + + /*check which option changed by application*/ + cur = !!(offload_mask & ETH_VLAN_STRIP_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_STRIP); + if (cur != org) { + if (cur) + dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_VLAN_STRIP; + else + dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_VLAN_STRIP; + mask |= ETH_VLAN_STRIP_MASK; + } + + cur = !!(offload_mask & ETH_VLAN_FILTER_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER); + if (cur != org) { + if (cur) + dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_VLAN_FILTER; + else + dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_VLAN_FILTER; + mask |= ETH_VLAN_FILTER_MASK; + } + + cur = !!(offload_mask & ETH_VLAN_EXTEND_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_EXTEND); + if (cur != org) { + if (cur) + dev->data->dev_conf.rxmode.offloads |= + DEV_RX_OFFLOAD_VLAN_EXTEND; + else + dev->data->dev_conf.rxmode.offloads &= + ~DEV_RX_OFFLOAD_VLAN_EXTEND; + mask |= ETH_VLAN_EXTEND_MASK; + } + + /*no change*/ + if (mask == 0) + return ret; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_offload_set, -ENOTSUP); + ret = (*dev->dev_ops->vlan_offload_set)(dev, mask); + if (ret) { + /* hit an error restore original values */ + dev->data->dev_conf.rxmode.offloads = orig_offloads; + } + + return eth_err(port_id, ret); +} + +int +rte_eth_dev_get_vlan_offload(uint16_t port_id) +{ + struct rte_eth_dev *dev; + int ret = 0; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + if (dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_STRIP) + ret |= ETH_VLAN_STRIP_OFFLOAD; + + if (dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER) + ret |= ETH_VLAN_FILTER_OFFLOAD; + + if (dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_EXTEND) + ret |= ETH_VLAN_EXTEND_OFFLOAD; + + return ret; +} + +int +rte_eth_dev_set_vlan_pvid(uint16_t port_id, uint16_t pvid, int on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_pvid_set, -ENOTSUP); + + return eth_err(port_id, (*dev->dev_ops->vlan_pvid_set)(dev, pvid, on)); +} + +int +rte_eth_dev_flow_ctrl_get(uint16_t port_id, struct rte_eth_fc_conf *fc_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_get, -ENOTSUP); + memset(fc_conf, 0, sizeof(*fc_conf)); + return eth_err(port_id, (*dev->dev_ops->flow_ctrl_get)(dev, fc_conf)); +} + +int +rte_eth_dev_flow_ctrl_set(uint16_t port_id, struct rte_eth_fc_conf *fc_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if ((fc_conf->send_xon != 0) && (fc_conf->send_xon != 1)) { + RTE_PMD_DEBUG_TRACE("Invalid send_xon, only 0/1 allowed\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_set, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->flow_ctrl_set)(dev, fc_conf)); +} + +int +rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id, + struct rte_eth_pfc_conf *pfc_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (pfc_conf->priority > (ETH_DCB_NUM_USER_PRIORITIES - 1)) { + RTE_PMD_DEBUG_TRACE("Invalid priority, only 0-7 allowed\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + /* High water, low water validation are device specific */ + if (*dev->dev_ops->priority_flow_ctrl_set) + return eth_err(port_id, (*dev->dev_ops->priority_flow_ctrl_set) + (dev, pfc_conf)); + return -ENOTSUP; +} + +static int +rte_eth_check_reta_mask(struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + uint16_t i, num; + + if (!reta_conf) + return -EINVAL; + + num = (reta_size + RTE_RETA_GROUP_SIZE - 1) / RTE_RETA_GROUP_SIZE; + for (i = 0; i < num; i++) { + if (reta_conf[i].mask) + return 0; + } + + return -EINVAL; +} + +static int +rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size, + uint16_t max_rxq) +{ + uint16_t i, idx, shift; + + if (!reta_conf) + return -EINVAL; + + if (max_rxq == 0) { + RTE_PMD_DEBUG_TRACE("No receive queue is available\n"); + return -EINVAL; + } + + for (i = 0; i < reta_size; i++) { + idx = i / RTE_RETA_GROUP_SIZE; + shift = i % RTE_RETA_GROUP_SIZE; + if ((reta_conf[idx].mask & (1ULL << shift)) && + (reta_conf[idx].reta[shift] >= max_rxq)) { + RTE_PMD_DEBUG_TRACE("reta_conf[%u]->reta[%u]: %u exceeds " + "the maximum rxq index: %u\n", idx, shift, + reta_conf[idx].reta[shift], max_rxq); + return -EINVAL; + } + } + + return 0; +} + +int +rte_eth_dev_rss_reta_update(uint16_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + /* Check mask bits */ + ret = rte_eth_check_reta_mask(reta_conf, reta_size); + if (ret < 0) + return ret; + + dev = &rte_eth_devices[port_id]; + + /* Check entry value */ + ret = rte_eth_check_reta_entry(reta_conf, reta_size, + dev->data->nb_rx_queues); + if (ret < 0) + return ret; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_update, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->reta_update)(dev, reta_conf, + reta_size)); +} + +int +rte_eth_dev_rss_reta_query(uint16_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + /* Check mask bits */ + ret = rte_eth_check_reta_mask(reta_conf, reta_size); + if (ret < 0) + return ret; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_query, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->reta_query)(dev, reta_conf, + reta_size)); +} + +int +rte_eth_dev_rss_hash_update(uint16_t port_id, + struct rte_eth_rss_conf *rss_conf) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info = { .flow_type_rss_offloads = 0, }; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + if ((dev_info.flow_type_rss_offloads | rss_conf->rss_hf) != + dev_info.flow_type_rss_offloads) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d invalid rss_hf: " + "0x%"PRIx64", valid value: 0x%"PRIx64"\n", + port_id, + rss_conf->rss_hf, + dev_info.flow_type_rss_offloads); + } + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_update, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->rss_hash_update)(dev, + rss_conf)); +} + +int +rte_eth_dev_rss_hash_conf_get(uint16_t port_id, + struct rte_eth_rss_conf *rss_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_conf_get, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->rss_hash_conf_get)(dev, + rss_conf)); +} + +int +rte_eth_dev_udp_tunnel_port_add(uint16_t port_id, + struct rte_eth_udp_tunnel *udp_tunnel) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (udp_tunnel == NULL) { + RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n"); + return -EINVAL; + } + + if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_port_add, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->udp_tunnel_port_add)(dev, + udp_tunnel)); +} + +int +rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id, + struct rte_eth_udp_tunnel *udp_tunnel) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + if (udp_tunnel == NULL) { + RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n"); + return -EINVAL; + } + + if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n"); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_port_del, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->udp_tunnel_port_del)(dev, + udp_tunnel)); +} + +int +rte_eth_led_on(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_led_on, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->dev_led_on)(dev)); +} + +int +rte_eth_led_off(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_led_off, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->dev_led_off)(dev)); +} + +/* + * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find + * an empty spot. + */ +static int +get_mac_addr_index(uint16_t port_id, const struct ether_addr *addr) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + unsigned i; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + rte_eth_dev_info_get(port_id, &dev_info); + + for (i = 0; i < dev_info.max_mac_addrs; i++) + if (memcmp(addr, &dev->data->mac_addrs[i], ETHER_ADDR_LEN) == 0) + return i; + + return -1; +} + +static const struct ether_addr null_mac_addr; + +int +rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *addr, + uint32_t pool) +{ + struct rte_eth_dev *dev; + int index; + uint64_t pool_mask; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_add, -ENOTSUP); + + if (is_zero_ether_addr(addr)) { + RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n", + port_id); + return -EINVAL; + } + if (pool >= ETH_64_POOLS) { + RTE_PMD_DEBUG_TRACE("pool id must be 0-%d\n", ETH_64_POOLS - 1); + return -EINVAL; + } + + index = get_mac_addr_index(port_id, addr); + if (index < 0) { + index = get_mac_addr_index(port_id, &null_mac_addr); + if (index < 0) { + RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n", + port_id); + return -ENOSPC; + } + } else { + pool_mask = dev->data->mac_pool_sel[index]; + + /* Check if both MAC address and pool is already there, and do nothing */ + if (pool_mask & (1ULL << pool)) + return 0; + } + + /* Update NIC */ + ret = (*dev->dev_ops->mac_addr_add)(dev, addr, index, pool); + + if (ret == 0) { + /* Update address in NIC data structure */ + ether_addr_copy(addr, &dev->data->mac_addrs[index]); + + /* Update pool bitmap in NIC data structure */ + dev->data->mac_pool_sel[index] |= (1ULL << pool); + } + + return eth_err(port_id, ret); +} + +int +rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *addr) +{ + struct rte_eth_dev *dev; + int index; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_remove, -ENOTSUP); + + index = get_mac_addr_index(port_id, addr); + if (index == 0) { + RTE_PMD_DEBUG_TRACE("port %d: Cannot remove default MAC address\n", port_id); + return -EADDRINUSE; + } else if (index < 0) + return 0; /* Do nothing if address wasn't found */ + + /* Update NIC */ + (*dev->dev_ops->mac_addr_remove)(dev, index); + + /* Update address in NIC data structure */ + ether_addr_copy(&null_mac_addr, &dev->data->mac_addrs[index]); + + /* reset pool bitmap */ + dev->data->mac_pool_sel[index] = 0; + + return 0; +} + +int +rte_eth_dev_default_mac_addr_set(uint16_t port_id, struct ether_addr *addr) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (!is_valid_assigned_ether_addr(addr)) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_set, -ENOTSUP); + + ret = (*dev->dev_ops->mac_addr_set)(dev, addr); + if (ret < 0) + return ret; + + /* Update default address in NIC data structure */ + ether_addr_copy(addr, &dev->data->mac_addrs[0]); + + return 0; +} + + +/* + * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find + * an empty spot. + */ +static int +get_hash_mac_addr_index(uint16_t port_id, const struct ether_addr *addr) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + unsigned i; + + rte_eth_dev_info_get(port_id, &dev_info); + if (!dev->data->hash_mac_addrs) + return -1; + + for (i = 0; i < dev_info.max_hash_mac_addrs; i++) + if (memcmp(addr, &dev->data->hash_mac_addrs[i], + ETHER_ADDR_LEN) == 0) + return i; + + return -1; +} + +int +rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr, + uint8_t on) +{ + int index; + int ret; + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + if (is_zero_ether_addr(addr)) { + RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n", + port_id); + return -EINVAL; + } + + index = get_hash_mac_addr_index(port_id, addr); + /* Check if it's already there, and do nothing */ + if ((index >= 0) && on) + return 0; + + if (index < 0) { + if (!on) { + RTE_PMD_DEBUG_TRACE("port %d: the MAC address was not " + "set in UTA\n", port_id); + return -EINVAL; + } + + index = get_hash_mac_addr_index(port_id, &null_mac_addr); + if (index < 0) { + RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n", + port_id); + return -ENOSPC; + } + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->uc_hash_table_set, -ENOTSUP); + ret = (*dev->dev_ops->uc_hash_table_set)(dev, addr, on); + if (ret == 0) { + /* Update address in NIC data structure */ + if (on) + ether_addr_copy(addr, + &dev->data->hash_mac_addrs[index]); + else + ether_addr_copy(&null_mac_addr, + &dev->data->hash_mac_addrs[index]); + } + + return eth_err(port_id, ret); +} + +int +rte_eth_dev_uc_all_hash_table_set(uint16_t port_id, uint8_t on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->uc_all_hash_table_set, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->uc_all_hash_table_set)(dev, + on)); +} + +int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx, + uint16_t tx_rate) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_link link; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + link = dev->data->dev_link; + + if (queue_idx > dev_info.max_tx_queues) { + RTE_PMD_DEBUG_TRACE("set queue rate limit:port %d: " + "invalid queue id=%d\n", port_id, queue_idx); + return -EINVAL; + } + + if (tx_rate > link.link_speed) { + RTE_PMD_DEBUG_TRACE("set queue rate limit:invalid tx_rate=%d, " + "bigger than link speed= %d\n", + tx_rate, link.link_speed); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_queue_rate_limit, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->set_queue_rate_limit)(dev, + queue_idx, tx_rate)); +} + +int +rte_eth_mirror_rule_set(uint16_t port_id, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, uint8_t on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (mirror_conf->rule_type == 0) { + RTE_PMD_DEBUG_TRACE("mirror rule type can not be 0.\n"); + return -EINVAL; + } + + if (mirror_conf->dst_pool >= ETH_64_POOLS) { + RTE_PMD_DEBUG_TRACE("Invalid dst pool, pool id must be 0-%d\n", + ETH_64_POOLS - 1); + return -EINVAL; + } + + if ((mirror_conf->rule_type & (ETH_MIRROR_VIRTUAL_POOL_UP | + ETH_MIRROR_VIRTUAL_POOL_DOWN)) && + (mirror_conf->pool_mask == 0)) { + RTE_PMD_DEBUG_TRACE("Invalid mirror pool, pool mask can not be 0.\n"); + return -EINVAL; + } + + if ((mirror_conf->rule_type & ETH_MIRROR_VLAN) && + mirror_conf->vlan.vlan_mask == 0) { + RTE_PMD_DEBUG_TRACE("Invalid vlan mask, vlan mask can not be 0.\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mirror_rule_set, -ENOTSUP); + + return eth_err(port_id, (*dev->dev_ops->mirror_rule_set)(dev, + mirror_conf, rule_id, on)); +} + +int +rte_eth_mirror_rule_reset(uint16_t port_id, uint8_t rule_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mirror_rule_reset, -ENOTSUP); + + return eth_err(port_id, (*dev->dev_ops->mirror_rule_reset)(dev, + rule_id)); +} + +RTE_INIT(eth_dev_init_cb_lists) +{ + int i; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) + TAILQ_INIT(&rte_eth_devices[i].link_intr_cbs); +} + +int +rte_eth_dev_callback_register(uint16_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_callback *user_cb; + uint32_t next_port; /* size is 32-bit to prevent loop wrap-around */ + uint16_t last_port; + + if (!cb_fn) + return -EINVAL; + + if (!rte_eth_dev_is_valid_port(port_id) && port_id != RTE_ETH_ALL) { + ethdev_log(ERR, "Invalid port_id=%d", port_id); + return -EINVAL; + } + + if (port_id == RTE_ETH_ALL) { + next_port = 0; + last_port = RTE_MAX_ETHPORTS - 1; + } else { + next_port = last_port = port_id; + } + + rte_spinlock_lock(&rte_eth_dev_cb_lock); + + do { + dev = &rte_eth_devices[next_port]; + + TAILQ_FOREACH(user_cb, &(dev->link_intr_cbs), next) { + if (user_cb->cb_fn == cb_fn && + user_cb->cb_arg == cb_arg && + user_cb->event == event) { + break; + } + } + + /* create a new callback. */ + if (user_cb == NULL) { + user_cb = rte_zmalloc("INTR_USER_CALLBACK", + sizeof(struct rte_eth_dev_callback), 0); + if (user_cb != NULL) { + user_cb->cb_fn = cb_fn; + user_cb->cb_arg = cb_arg; + user_cb->event = event; + TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), + user_cb, next); + } else { + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + rte_eth_dev_callback_unregister(port_id, event, + cb_fn, cb_arg); + return -ENOMEM; + } + + } + } while (++next_port <= last_port); + + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + return 0; +} + +int +rte_eth_dev_callback_unregister(uint16_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg) +{ + int ret; + struct rte_eth_dev *dev; + struct rte_eth_dev_callback *cb, *next; + uint32_t next_port; /* size is 32-bit to prevent loop wrap-around */ + uint16_t last_port; + + if (!cb_fn) + return -EINVAL; + + if (!rte_eth_dev_is_valid_port(port_id) && port_id != RTE_ETH_ALL) { + ethdev_log(ERR, "Invalid port_id=%d", port_id); + return -EINVAL; + } + + if (port_id == RTE_ETH_ALL) { + next_port = 0; + last_port = RTE_MAX_ETHPORTS - 1; + } else { + next_port = last_port = port_id; + } + + rte_spinlock_lock(&rte_eth_dev_cb_lock); + + do { + dev = &rte_eth_devices[next_port]; + ret = 0; + for (cb = TAILQ_FIRST(&dev->link_intr_cbs); cb != NULL; + cb = next) { + + next = TAILQ_NEXT(cb, next); + + if (cb->cb_fn != cb_fn || cb->event != event || + (cb->cb_arg != (void *)-1 && cb->cb_arg != cb_arg)) + continue; + + /* + * if this callback is not executing right now, + * then remove it. + */ + if (cb->active == 0) { + TAILQ_REMOVE(&(dev->link_intr_cbs), cb, next); + rte_free(cb); + } else { + ret = -EAGAIN; + } + } + } while (++next_port <= last_port); + + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + return ret; +} + +int +_rte_eth_dev_callback_process(struct rte_eth_dev *dev, + enum rte_eth_event_type event, void *ret_param) +{ + struct rte_eth_dev_callback *cb_lst; + struct rte_eth_dev_callback dev_cb; + int rc = 0; + + rte_spinlock_lock(&rte_eth_dev_cb_lock); + TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) { + if (cb_lst->cb_fn == NULL || cb_lst->event != event) + continue; + dev_cb = *cb_lst; + cb_lst->active = 1; + if (ret_param != NULL) + dev_cb.ret_param = ret_param; + + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + rc = dev_cb.cb_fn(dev->data->port_id, dev_cb.event, + dev_cb.cb_arg, dev_cb.ret_param); + rte_spinlock_lock(&rte_eth_dev_cb_lock); + cb_lst->active = 0; + } + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + return rc; +} + +void +rte_eth_dev_probing_finish(struct rte_eth_dev *dev) +{ + if (dev == NULL) + return; + + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_NEW, NULL); + + dev->state = RTE_ETH_DEV_ATTACHED; +} + +int +rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data) +{ + uint32_t vec; + struct rte_eth_dev *dev; + struct rte_intr_handle *intr_handle; + uint16_t qid; + int rc; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + if (!dev->intr_handle) { + RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n"); + return -ENOTSUP; + } + + intr_handle = dev->intr_handle; + if (!intr_handle->intr_vec) { + RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); + return -EPERM; + } + + for (qid = 0; qid < dev->data->nb_rx_queues; qid++) { + vec = intr_handle->intr_vec[qid]; + rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data); + if (rc && rc != -EEXIST) { + RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error" + " op %d epfd %d vec %u\n", + port_id, qid, op, epfd, vec); + } + } + + return 0; +} + +const struct rte_memzone * +rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name, + uint16_t queue_id, size_t size, unsigned align, + int socket_id) +{ + char z_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *mz; + + snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", + dev->device->driver->name, ring_name, + dev->data->port_id, queue_id); + + mz = rte_memzone_lookup(z_name); + if (mz) + return mz; + + return rte_memzone_reserve_aligned(z_name, size, socket_id, + RTE_MEMZONE_IOVA_CONTIG, align); +} + +int __rte_experimental +rte_eth_dev_create(struct rte_device *device, const char *name, + size_t priv_data_size, + ethdev_bus_specific_init ethdev_bus_specific_init, + void *bus_init_params, + ethdev_init_t ethdev_init, void *init_params) +{ + struct rte_eth_dev *ethdev; + int retval; + + RTE_FUNC_PTR_OR_ERR_RET(*ethdev_init, -EINVAL); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + ethdev = rte_eth_dev_allocate(name); + if (!ethdev) { + retval = -ENODEV; + goto probe_failed; + } + + if (priv_data_size) { + ethdev->data->dev_private = rte_zmalloc_socket( + name, priv_data_size, RTE_CACHE_LINE_SIZE, + device->numa_node); + + if (!ethdev->data->dev_private) { + RTE_LOG(ERR, EAL, "failed to allocate private data"); + retval = -ENOMEM; + goto probe_failed; + } + } + } else { + ethdev = rte_eth_dev_attach_secondary(name); + if (!ethdev) { + RTE_LOG(ERR, EAL, "secondary process attach failed, " + "ethdev doesn't exist"); + retval = -ENODEV; + goto probe_failed; + } + } + + ethdev->device = device; + + if (ethdev_bus_specific_init) { + retval = ethdev_bus_specific_init(ethdev, bus_init_params); + if (retval) { + RTE_LOG(ERR, EAL, + "ethdev bus specific initialisation failed"); + goto probe_failed; + } + } + + retval = ethdev_init(ethdev, init_params); + if (retval) { + RTE_LOG(ERR, EAL, "ethdev initialisation failed"); + goto probe_failed; + } + + rte_eth_dev_probing_finish(ethdev); + + return retval; +probe_failed: + /* free ports private data if primary process */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(ethdev->data->dev_private); + + rte_eth_dev_release_port(ethdev); + + return retval; +} + +int __rte_experimental +rte_eth_dev_destroy(struct rte_eth_dev *ethdev, + ethdev_uninit_t ethdev_uninit) +{ + int ret; + + ethdev = rte_eth_dev_allocated(ethdev->data->name); + if (!ethdev) + return -ENODEV; + + RTE_FUNC_PTR_OR_ERR_RET(*ethdev_uninit, -EINVAL); + if (ethdev_uninit) { + ret = ethdev_uninit(ethdev); + if (ret) + return ret; + } + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(ethdev->data->dev_private); + + ethdev->data->dev_private = NULL; + + return rte_eth_dev_release_port(ethdev); +} + +int +rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id, + int epfd, int op, void *data) +{ + uint32_t vec; + struct rte_eth_dev *dev; + struct rte_intr_handle *intr_handle; + int rc; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + if (queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%u\n", queue_id); + return -EINVAL; + } + + if (!dev->intr_handle) { + RTE_PMD_DEBUG_TRACE("RX Intr handle unset\n"); + return -ENOTSUP; + } + + intr_handle = dev->intr_handle; + if (!intr_handle->intr_vec) { + RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); + return -EPERM; + } + + vec = intr_handle->intr_vec[queue_id]; + rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data); + if (rc && rc != -EEXIST) { + RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error" + " op %d epfd %d vec %u\n", + port_id, queue_id, op, epfd, vec); + return rc; + } + + return 0; +} + +int +rte_eth_dev_rx_intr_enable(uint16_t port_id, + uint16_t queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->rx_queue_intr_enable)(dev, + queue_id)); +} + +int +rte_eth_dev_rx_intr_disable(uint16_t port_id, + uint16_t queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->rx_queue_intr_disable)(dev, + queue_id)); +} + + +int +rte_eth_dev_filter_supported(uint16_t port_id, + enum rte_filter_type filter_type) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP); + return (*dev->dev_ops->filter_ctrl)(dev, filter_type, + RTE_ETH_FILTER_NOP, NULL); +} + +int +rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type, + enum rte_filter_op filter_op, void *arg) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->filter_ctrl)(dev, filter_type, + filter_op, arg)); +} + +const struct rte_eth_rxtx_callback * +rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id, + rte_rx_callback_fn fn, void *user_param) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + rte_errno = ENOTSUP; + return NULL; +#endif + /* check input parameters */ + if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) { + rte_errno = EINVAL; + return NULL; + } + struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); + + if (cb == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + cb->fn.rx = fn; + cb->param = user_param; + + rte_spinlock_lock(&rte_eth_rx_cb_lock); + /* Add the callbacks in fifo order. */ + struct rte_eth_rxtx_callback *tail = + rte_eth_devices[port_id].post_rx_burst_cbs[queue_id]; + + if (!tail) { + rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb; + + } else { + while (tail->next) + tail = tail->next; + tail->next = cb; + } + rte_spinlock_unlock(&rte_eth_rx_cb_lock); + + return cb; +} + +const struct rte_eth_rxtx_callback * +rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id, + rte_rx_callback_fn fn, void *user_param) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + rte_errno = ENOTSUP; + return NULL; +#endif + /* check input parameters */ + if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) { + rte_errno = EINVAL; + return NULL; + } + + struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); + + if (cb == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + cb->fn.rx = fn; + cb->param = user_param; + + rte_spinlock_lock(&rte_eth_rx_cb_lock); + /* Add the callbacks at fisrt position*/ + cb->next = rte_eth_devices[port_id].post_rx_burst_cbs[queue_id]; + rte_smp_wmb(); + rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb; + rte_spinlock_unlock(&rte_eth_rx_cb_lock); + + return cb; +} + +const struct rte_eth_rxtx_callback * +rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id, + rte_tx_callback_fn fn, void *user_param) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + rte_errno = ENOTSUP; + return NULL; +#endif + /* check input parameters */ + if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) { + rte_errno = EINVAL; + return NULL; + } + + struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); + + if (cb == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + cb->fn.tx = fn; + cb->param = user_param; + + rte_spinlock_lock(&rte_eth_tx_cb_lock); + /* Add the callbacks in fifo order. */ + struct rte_eth_rxtx_callback *tail = + rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id]; + + if (!tail) { + rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id] = cb; + + } else { + while (tail->next) + tail = tail->next; + tail->next = cb; + } + rte_spinlock_unlock(&rte_eth_tx_cb_lock); + + return cb; +} + +int +rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id, + const struct rte_eth_rxtx_callback *user_cb) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + return -ENOTSUP; +#endif + /* Check input parameters. */ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + if (user_cb == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) + return -EINVAL; + + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_rxtx_callback *cb; + struct rte_eth_rxtx_callback **prev_cb; + int ret = -EINVAL; + + rte_spinlock_lock(&rte_eth_rx_cb_lock); + prev_cb = &dev->post_rx_burst_cbs[queue_id]; + for (; *prev_cb != NULL; prev_cb = &cb->next) { + cb = *prev_cb; + if (cb == user_cb) { + /* Remove the user cb from the callback list. */ + *prev_cb = cb->next; + ret = 0; + break; + } + } + rte_spinlock_unlock(&rte_eth_rx_cb_lock); + + return ret; +} + +int +rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id, + const struct rte_eth_rxtx_callback *user_cb) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + return -ENOTSUP; +#endif + /* Check input parameters. */ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + if (user_cb == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) + return -EINVAL; + + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + int ret = -EINVAL; + struct rte_eth_rxtx_callback *cb; + struct rte_eth_rxtx_callback **prev_cb; + + rte_spinlock_lock(&rte_eth_tx_cb_lock); + prev_cb = &dev->pre_tx_burst_cbs[queue_id]; + for (; *prev_cb != NULL; prev_cb = &cb->next) { + cb = *prev_cb; + if (cb == user_cb) { + /* Remove the user cb from the callback list. */ + *prev_cb = cb->next; + ret = 0; + break; + } + } + rte_spinlock_unlock(&rte_eth_tx_cb_lock); + + return ret; +} + +int +rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (qinfo == NULL) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + if (queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rxq_info_get, -ENOTSUP); + + memset(qinfo, 0, sizeof(*qinfo)); + dev->dev_ops->rxq_info_get(dev, queue_id, qinfo); + return 0; +} + +int +rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id, + struct rte_eth_txq_info *qinfo) +{ + struct rte_eth_dev *dev; + struct rte_eth_txconf *txconf = &qinfo->conf; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (qinfo == NULL) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + if (queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->txq_info_get, -ENOTSUP); + + memset(qinfo, 0, sizeof(*qinfo)); + dev->dev_ops->txq_info_get(dev, queue_id, qinfo); + /* convert offload to txq_flags to support legacy app */ + rte_eth_convert_tx_offload(txconf->offloads, &txconf->txq_flags); + + return 0; +} + +int +rte_eth_dev_set_mc_addr_list(uint16_t port_id, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_mc_addr_list, -ENOTSUP); + return eth_err(port_id, dev->dev_ops->set_mc_addr_list(dev, + mc_addr_set, nb_mc_addr)); +} + +int +rte_eth_timesync_enable(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_enable, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_enable)(dev)); +} + +int +rte_eth_timesync_disable(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_disable, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_disable)(dev)); +} + +int +rte_eth_timesync_read_rx_timestamp(uint16_t port_id, struct timespec *timestamp, + uint32_t flags) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_rx_timestamp, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_read_rx_timestamp) + (dev, timestamp, flags)); +} + +int +rte_eth_timesync_read_tx_timestamp(uint16_t port_id, + struct timespec *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_tx_timestamp, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_read_tx_timestamp) + (dev, timestamp)); +} + +int +rte_eth_timesync_adjust_time(uint16_t port_id, int64_t delta) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_adjust_time, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_adjust_time)(dev, + delta)); +} + +int +rte_eth_timesync_read_time(uint16_t port_id, struct timespec *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_time, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_read_time)(dev, + timestamp)); +} + +int +rte_eth_timesync_write_time(uint16_t port_id, const struct timespec *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_write_time, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->timesync_write_time)(dev, + timestamp)); +} + +int +rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_reg, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->get_reg)(dev, info)); +} + +int +rte_eth_dev_get_eeprom_length(uint16_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom_length, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->get_eeprom_length)(dev)); +} + +int +rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->get_eeprom)(dev, info)); +} + +int +rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_eeprom, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->set_eeprom)(dev, info)); +} + +int __rte_experimental +rte_eth_dev_get_module_info(uint16_t port_id, + struct rte_eth_dev_module_info *modinfo) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_module_info, -ENOTSUP); + return (*dev->dev_ops->get_module_info)(dev, modinfo); +} + +int __rte_experimental +rte_eth_dev_get_module_eeprom(uint16_t port_id, + struct rte_dev_eeprom_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_module_eeprom, -ENOTSUP); + return (*dev->dev_ops->get_module_eeprom)(dev, info); +} + +int +rte_eth_dev_get_dcb_info(uint16_t port_id, + struct rte_eth_dcb_info *dcb_info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + memset(dcb_info, 0, sizeof(struct rte_eth_dcb_info)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_dcb_info, -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->get_dcb_info)(dev, dcb_info)); +} + +int +rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id, + struct rte_eth_l2_tunnel_conf *l2_tunnel) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (l2_tunnel == NULL) { + RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n"); + return -EINVAL; + } + + if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->l2_tunnel_eth_type_conf, + -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->l2_tunnel_eth_type_conf)(dev, + l2_tunnel)); +} + +int +rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id, + struct rte_eth_l2_tunnel_conf *l2_tunnel, + uint32_t mask, + uint8_t en) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (l2_tunnel == NULL) { + RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n"); + return -EINVAL; + } + + if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_PMD_DEBUG_TRACE("Invalid tunnel type.\n"); + return -EINVAL; + } + + if (mask == 0) { + RTE_PMD_DEBUG_TRACE("Mask should have a value.\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->l2_tunnel_offload_set, + -ENOTSUP); + return eth_err(port_id, (*dev->dev_ops->l2_tunnel_offload_set)(dev, + l2_tunnel, mask, en)); +} + +static void +rte_eth_dev_adjust_nb_desc(uint16_t *nb_desc, + const struct rte_eth_desc_lim *desc_lim) +{ + if (desc_lim->nb_align != 0) + *nb_desc = RTE_ALIGN_CEIL(*nb_desc, desc_lim->nb_align); + + if (desc_lim->nb_max != 0) + *nb_desc = RTE_MIN(*nb_desc, desc_lim->nb_max); + + *nb_desc = RTE_MAX(*nb_desc, desc_lim->nb_min); +} + +int +rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id, + uint16_t *nb_rx_desc, + uint16_t *nb_tx_desc) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + + rte_eth_dev_info_get(port_id, &dev_info); + + if (nb_rx_desc != NULL) + rte_eth_dev_adjust_nb_desc(nb_rx_desc, &dev_info.rx_desc_lim); + + if (nb_tx_desc != NULL) + rte_eth_dev_adjust_nb_desc(nb_tx_desc, &dev_info.tx_desc_lim); + + return 0; +} + +int +rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (pool == NULL) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + + if (*dev->dev_ops->pool_ops_supported == NULL) + return 1; /* all pools are supported */ + + return (*dev->dev_ops->pool_ops_supported)(dev, pool); +} + +/** + * A set of values to describe the possible states of a switch domain. + */ +enum rte_eth_switch_domain_state { + RTE_ETH_SWITCH_DOMAIN_UNUSED = 0, + RTE_ETH_SWITCH_DOMAIN_ALLOCATED +}; + +/** + * Array of switch domains available for allocation. Array is sized to + * RTE_MAX_ETHPORTS elements as there cannot be more active switch domains than + * ethdev ports in a single process. + */ +struct rte_eth_dev_switch { + enum rte_eth_switch_domain_state state; +} rte_eth_switch_domains[RTE_MAX_ETHPORTS]; + +int __rte_experimental +rte_eth_switch_domain_alloc(uint16_t *domain_id) +{ + unsigned int i; + + *domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID; + + for (i = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID + 1; + i < RTE_MAX_ETHPORTS; i++) { + if (rte_eth_switch_domains[i].state == + RTE_ETH_SWITCH_DOMAIN_UNUSED) { + rte_eth_switch_domains[i].state = + RTE_ETH_SWITCH_DOMAIN_ALLOCATED; + *domain_id = i; + return 0; + } + } + + return -ENOSPC; +} + +int __rte_experimental +rte_eth_switch_domain_free(uint16_t domain_id) +{ + if (domain_id == RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID || + domain_id >= RTE_MAX_ETHPORTS) + return -EINVAL; + + if (rte_eth_switch_domains[domain_id].state != + RTE_ETH_SWITCH_DOMAIN_ALLOCATED) + return -EINVAL; + + rte_eth_switch_domains[domain_id].state = RTE_ETH_SWITCH_DOMAIN_UNUSED; + + return 0; +} + +typedef int (*rte_eth_devargs_callback_t)(char *str, void *data); + +static int +rte_eth_devargs_tokenise(struct rte_kvargs *arglist, const char *str_in) +{ + int state; + struct rte_kvargs_pair *pair; + char *letter; + + arglist->str = strdup(str_in); + if (arglist->str == NULL) + return -ENOMEM; + + letter = arglist->str; + state = 0; + arglist->count = 0; + pair = &arglist->pairs[0]; + while (1) { + switch (state) { + case 0: /* Initial */ + if (*letter == '=') + return -EINVAL; + else if (*letter == '\0') + return 0; + + state = 1; + pair->key = letter; + /* fall-thru */ + + case 1: /* Parsing key */ + if (*letter == '=') { + *letter = '\0'; + pair->value = letter + 1; + state = 2; + } else if (*letter == ',' || *letter == '\0') + return -EINVAL; + break; + + + case 2: /* Parsing value */ + if (*letter == '[') + state = 3; + else if (*letter == ',') { + *letter = '\0'; + arglist->count++; + pair = &arglist->pairs[arglist->count]; + state = 0; + } else if (*letter == '\0') { + letter--; + arglist->count++; + pair = &arglist->pairs[arglist->count]; + state = 0; + } + break; + + case 3: /* Parsing list */ + if (*letter == ']') + state = 2; + else if (*letter == '\0') + return -EINVAL; + break; + } + letter++; + } +} + +static int +rte_eth_devargs_parse_list(char *str, rte_eth_devargs_callback_t callback, + void *data) +{ + char *str_start; + int state; + int result; + + if (*str != '[') + /* Single element, not a list */ + return callback(str, data); + + /* Sanity check, then strip the brackets */ + str_start = &str[strlen(str) - 1]; + if (*str_start != ']') { + RTE_LOG(ERR, EAL, "(%s): List does not end with ']'", str); + return -EINVAL; + } + str++; + *str_start = '\0'; + + /* Process list elements */ + state = 0; + while (1) { + if (state == 0) { + if (*str == '\0') + break; + if (*str != ',') { + str_start = str; + state = 1; + } + } else if (state == 1) { + if (*str == ',' || *str == '\0') { + if (str > str_start) { + /* Non-empty string fragment */ + *str = '\0'; + result = callback(str_start, data); + if (result < 0) + return result; + } + state = 0; + } + } + str++; + } + return 0; +} + +static int +rte_eth_devargs_process_range(char *str, uint16_t *list, uint16_t *len_list, + const uint16_t max_list) +{ + uint16_t lo, hi, val; + int result; + + result = sscanf(str, "%hu-%hu", &lo, &hi); + if (result == 1) { + if (*len_list >= max_list) + return -ENOMEM; + list[(*len_list)++] = lo; + } else if (result == 2) { + if (lo >= hi || lo > RTE_MAX_ETHPORTS || hi > RTE_MAX_ETHPORTS) + return -EINVAL; + for (val = lo; val <= hi; val++) { + if (*len_list >= max_list) + return -ENOMEM; + list[(*len_list)++] = val; + } + } else + return -EINVAL; + return 0; +} + + +static int +rte_eth_devargs_parse_representor_ports(char *str, void *data) +{ + struct rte_eth_devargs *eth_da = data; + + return rte_eth_devargs_process_range(str, eth_da->representor_ports, + ð_da->nb_representor_ports, RTE_MAX_ETHPORTS); +} + +int __rte_experimental +rte_eth_devargs_parse(const char *dargs, struct rte_eth_devargs *eth_da) +{ + struct rte_kvargs args; + struct rte_kvargs_pair *pair; + unsigned int i; + int result = 0; + + memset(eth_da, 0, sizeof(*eth_da)); + + result = rte_eth_devargs_tokenise(&args, dargs); + if (result < 0) + goto parse_cleanup; + + for (i = 0; i < args.count; i++) { + pair = &args.pairs[i]; + if (strcmp("representor", pair->key) == 0) { + result = rte_eth_devargs_parse_list(pair->value, + rte_eth_devargs_parse_representor_ports, + eth_da); + if (result < 0) + goto parse_cleanup; + } + } + +parse_cleanup: + if (args.str) + free(args.str); + + return result; +} + +RTE_INIT(ethdev_init_log); +static void +ethdev_init_log(void) +{ + ethdev_logtype = rte_log_register("lib.ethdev"); + if (ethdev_logtype >= 0) + rte_log_set_level(ethdev_logtype, RTE_LOG_INFO); +} diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h new file mode 100644 index 00000000..36e3984e --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev.h @@ -0,0 +1,4326 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2017 Intel Corporation + */ + +#ifndef _RTE_ETHDEV_H_ +#define _RTE_ETHDEV_H_ + +/** + * @file + * + * RTE Ethernet Device API + * + * The Ethernet Device API is composed of two parts: + * + * - The application-oriented Ethernet API that includes functions to setup + * an Ethernet device (configure it, setup its RX and TX queues and start it), + * to get its MAC address, the speed and the status of its physical link, + * to receive and to transmit packets, and so on. + * + * - The driver-oriented Ethernet API that exports functions allowing + * an Ethernet Poll Mode Driver (PMD) to allocate an Ethernet device instance, + * create memzone for HW rings and process registered callbacks, and so on. + * PMDs should include rte_ethdev_driver.h instead of this header. + * + * By default, all the functions of the Ethernet Device API exported by a PMD + * are lock-free functions which assume to not be invoked in parallel on + * different logical cores to work on the same target object. For instance, + * the receive function of a PMD cannot be invoked in parallel on two logical + * cores to poll the same RX queue [of the same port]. Of course, this function + * can be invoked in parallel by different logical cores on different RX queues. + * It is the responsibility of the upper level application to enforce this rule. + * + * If needed, parallel accesses by multiple logical cores to shared queues + * shall be explicitly protected by dedicated inline lock-aware functions + * built on top of their corresponding lock-free functions of the PMD API. + * + * In all functions of the Ethernet API, the Ethernet device is + * designated by an integer >= 0 named the device port identifier. + * + * At the Ethernet driver level, Ethernet devices are represented by a generic + * data structure of type *rte_eth_dev*. + * + * Ethernet devices are dynamically registered during the PCI probing phase + * performed at EAL initialization time. + * When an Ethernet device is being probed, an *rte_eth_dev* structure and + * a new port identifier are allocated for that device. Then, the eth_dev_init() + * function supplied by the Ethernet driver matching the probed PCI + * device is invoked to properly initialize the device. + * + * The role of the device init function consists of resetting the hardware, + * checking access to Non-volatile Memory (NVM), reading the MAC address + * from NVM etc. + * + * If the device init operation is successful, the correspondence between + * the port identifier assigned to the new device and its associated + * *rte_eth_dev* structure is effectively registered. + * Otherwise, both the *rte_eth_dev* structure and the port identifier are + * freed. + * + * The functions exported by the application Ethernet API to setup a device + * designated by its port identifier must be invoked in the following order: + * - rte_eth_dev_configure() + * - rte_eth_tx_queue_setup() + * - rte_eth_rx_queue_setup() + * - rte_eth_dev_start() + * + * Then, the network application can invoke, in any order, the functions + * exported by the Ethernet API to get the MAC address of a given device, to + * get the speed and the status of a device physical link, to receive/transmit + * [burst of] packets, and so on. + * + * If the application wants to change the configuration (i.e. call + * rte_eth_dev_configure(), rte_eth_tx_queue_setup(), or + * rte_eth_rx_queue_setup()), it must call rte_eth_dev_stop() first to stop the + * device and then do the reconfiguration before calling rte_eth_dev_start() + * again. The transmit and receive functions should not be invoked when the + * device is stopped. + * + * Please note that some configuration is not stored between calls to + * rte_eth_dev_stop()/rte_eth_dev_start(). The following configuration will + * be retained: + * + * - flow control settings + * - receive mode configuration (promiscuous mode, hardware checksum mode, + * RSS/VMDQ settings etc.) + * - VLAN filtering configuration + * - MAC addresses supplied to MAC address array + * - flow director filtering mode (but not filtering rules) + * - NIC queue statistics mappings + * + * Any other configuration will not be stored and will need to be re-entered + * before a call to rte_eth_dev_start(). + * + * Finally, a network application can close an Ethernet device by invoking the + * rte_eth_dev_close() function. + * + * Each function of the application Ethernet API invokes a specific function + * of the PMD that controls the target device designated by its port + * identifier. + * For this purpose, all device-specific functions of an Ethernet driver are + * supplied through a set of pointers contained in a generic structure of type + * *eth_dev_ops*. + * The address of the *eth_dev_ops* structure is stored in the *rte_eth_dev* + * structure by the device init function of the Ethernet driver, which is + * invoked during the PCI probing phase, as explained earlier. + * + * In other words, each function of the Ethernet API simply retrieves the + * *rte_eth_dev* structure associated with the device port identifier and + * performs an indirect invocation of the corresponding driver function + * supplied in the *eth_dev_ops* structure of the *rte_eth_dev* structure. + * + * For performance reasons, the address of the burst-oriented RX and TX + * functions of the Ethernet driver are not contained in the *eth_dev_ops* + * structure. Instead, they are directly stored at the beginning of the + * *rte_eth_dev* structure to avoid an extra indirect memory access during + * their invocation. + * + * RTE ethernet device drivers do not use interrupts for transmitting or + * receiving. Instead, Ethernet drivers export Poll-Mode receive and transmit + * functions to applications. + * Both receive and transmit functions are packet-burst oriented to minimize + * their cost per packet through the following optimizations: + * + * - Sharing among multiple packets the incompressible cost of the + * invocation of receive/transmit functions. + * + * - Enabling receive/transmit functions to take advantage of burst-oriented + * hardware features (L1 cache, prefetch instructions, NIC head/tail + * registers) to minimize the number of CPU cycles per packet, for instance, + * by avoiding useless read memory accesses to ring descriptors, or by + * systematically using arrays of pointers that exactly fit L1 cache line + * boundaries and sizes. + * + * The burst-oriented receive function does not provide any error notification, + * to avoid the corresponding overhead. As a hint, the upper-level application + * might check the status of the device link once being systematically returned + * a 0 value by the receive function of the driver for a given number of tries. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdint.h> + +/* Use this macro to check if LRO API is supported */ +#define RTE_ETHDEV_HAS_LRO_SUPPORT + +#include <rte_compat.h> +#include <rte_log.h> +#include <rte_interrupts.h> +#include <rte_dev.h> +#include <rte_devargs.h> +#include <rte_errno.h> +#include <rte_common.h> +#include <rte_config.h> + +#include "rte_ether.h" +#include "rte_eth_ctrl.h" +#include "rte_dev_info.h" + +struct rte_mbuf; + +/** + * A structure used to retrieve statistics for an Ethernet port. + * Not all statistics fields in struct rte_eth_stats are supported + * by any type of network interface card (NIC). If any statistics + * field is not supported, its value is 0. + */ +struct rte_eth_stats { + uint64_t ipackets; /**< Total number of successfully received packets. */ + uint64_t opackets; /**< Total number of successfully transmitted packets.*/ + uint64_t ibytes; /**< Total number of successfully received bytes. */ + uint64_t obytes; /**< Total number of successfully transmitted bytes. */ + uint64_t imissed; + /**< Total of RX packets dropped by the HW, + * because there are no available buffer (i.e. RX queues are full). + */ + uint64_t ierrors; /**< Total number of erroneous received packets. */ + uint64_t oerrors; /**< Total number of failed transmitted packets. */ + uint64_t rx_nombuf; /**< Total number of RX mbuf allocation failures. */ + uint64_t q_ipackets[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of queue RX packets. */ + uint64_t q_opackets[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of queue TX packets. */ + uint64_t q_ibytes[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of successfully received queue bytes. */ + uint64_t q_obytes[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of successfully transmitted queue bytes. */ + uint64_t q_errors[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of queue packets received that are dropped. */ +}; + +/** + * Device supported speeds bitmap flags + */ +#define ETH_LINK_SPEED_AUTONEG (0 << 0) /**< Autonegotiate (all speeds) */ +#define ETH_LINK_SPEED_FIXED (1 << 0) /**< Disable autoneg (fixed speed) */ +#define ETH_LINK_SPEED_10M_HD (1 << 1) /**< 10 Mbps half-duplex */ +#define ETH_LINK_SPEED_10M (1 << 2) /**< 10 Mbps full-duplex */ +#define ETH_LINK_SPEED_100M_HD (1 << 3) /**< 100 Mbps half-duplex */ +#define ETH_LINK_SPEED_100M (1 << 4) /**< 100 Mbps full-duplex */ +#define ETH_LINK_SPEED_1G (1 << 5) /**< 1 Gbps */ +#define ETH_LINK_SPEED_2_5G (1 << 6) /**< 2.5 Gbps */ +#define ETH_LINK_SPEED_5G (1 << 7) /**< 5 Gbps */ +#define ETH_LINK_SPEED_10G (1 << 8) /**< 10 Gbps */ +#define ETH_LINK_SPEED_20G (1 << 9) /**< 20 Gbps */ +#define ETH_LINK_SPEED_25G (1 << 10) /**< 25 Gbps */ +#define ETH_LINK_SPEED_40G (1 << 11) /**< 40 Gbps */ +#define ETH_LINK_SPEED_50G (1 << 12) /**< 50 Gbps */ +#define ETH_LINK_SPEED_56G (1 << 13) /**< 56 Gbps */ +#define ETH_LINK_SPEED_100G (1 << 14) /**< 100 Gbps */ + +/** + * Ethernet numeric link speeds in Mbps + */ +#define ETH_SPEED_NUM_NONE 0 /**< Not defined */ +#define ETH_SPEED_NUM_10M 10 /**< 10 Mbps */ +#define ETH_SPEED_NUM_100M 100 /**< 100 Mbps */ +#define ETH_SPEED_NUM_1G 1000 /**< 1 Gbps */ +#define ETH_SPEED_NUM_2_5G 2500 /**< 2.5 Gbps */ +#define ETH_SPEED_NUM_5G 5000 /**< 5 Gbps */ +#define ETH_SPEED_NUM_10G 10000 /**< 10 Gbps */ +#define ETH_SPEED_NUM_20G 20000 /**< 20 Gbps */ +#define ETH_SPEED_NUM_25G 25000 /**< 25 Gbps */ +#define ETH_SPEED_NUM_40G 40000 /**< 40 Gbps */ +#define ETH_SPEED_NUM_50G 50000 /**< 50 Gbps */ +#define ETH_SPEED_NUM_56G 56000 /**< 56 Gbps */ +#define ETH_SPEED_NUM_100G 100000 /**< 100 Gbps */ + +/** + * A structure used to retrieve link-level information of an Ethernet port. + */ +__extension__ +struct rte_eth_link { + uint32_t link_speed; /**< ETH_SPEED_NUM_ */ + uint16_t link_duplex : 1; /**< ETH_LINK_[HALF/FULL]_DUPLEX */ + uint16_t link_autoneg : 1; /**< ETH_LINK_[AUTONEG/FIXED] */ + uint16_t link_status : 1; /**< ETH_LINK_[DOWN/UP] */ +} __attribute__((aligned(8))); /**< aligned for atomic64 read/write */ + +/* Utility constants */ +#define ETH_LINK_HALF_DUPLEX 0 /**< Half-duplex connection (see link_duplex). */ +#define ETH_LINK_FULL_DUPLEX 1 /**< Full-duplex connection (see link_duplex). */ +#define ETH_LINK_DOWN 0 /**< Link is down (see link_status). */ +#define ETH_LINK_UP 1 /**< Link is up (see link_status). */ +#define ETH_LINK_FIXED 0 /**< No autonegotiation (see link_autoneg). */ +#define ETH_LINK_AUTONEG 1 /**< Autonegotiated (see link_autoneg). */ + +/** + * A structure used to configure the ring threshold registers of an RX/TX + * queue for an Ethernet port. + */ +struct rte_eth_thresh { + uint8_t pthresh; /**< Ring prefetch threshold. */ + uint8_t hthresh; /**< Ring host threshold. */ + uint8_t wthresh; /**< Ring writeback threshold. */ +}; + +/** + * Simple flags are used for rte_eth_conf.rxmode.mq_mode. + */ +#define ETH_MQ_RX_RSS_FLAG 0x1 +#define ETH_MQ_RX_DCB_FLAG 0x2 +#define ETH_MQ_RX_VMDQ_FLAG 0x4 + +/** + * A set of values to identify what method is to be used to route + * packets to multiple queues. + */ +enum rte_eth_rx_mq_mode { + /** None of DCB,RSS or VMDQ mode */ + ETH_MQ_RX_NONE = 0, + + /** For RX side, only RSS is on */ + ETH_MQ_RX_RSS = ETH_MQ_RX_RSS_FLAG, + /** For RX side,only DCB is on. */ + ETH_MQ_RX_DCB = ETH_MQ_RX_DCB_FLAG, + /** Both DCB and RSS enable */ + ETH_MQ_RX_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG, + + /** Only VMDQ, no RSS nor DCB */ + ETH_MQ_RX_VMDQ_ONLY = ETH_MQ_RX_VMDQ_FLAG, + /** RSS mode with VMDQ */ + ETH_MQ_RX_VMDQ_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_VMDQ_FLAG, + /** Use VMDQ+DCB to route traffic to queues */ + ETH_MQ_RX_VMDQ_DCB = ETH_MQ_RX_VMDQ_FLAG | ETH_MQ_RX_DCB_FLAG, + /** Enable both VMDQ and DCB in VMDq */ + ETH_MQ_RX_VMDQ_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG | + ETH_MQ_RX_VMDQ_FLAG, +}; + +/** + * for rx mq mode backward compatible + */ +#define ETH_RSS ETH_MQ_RX_RSS +#define VMDQ_DCB ETH_MQ_RX_VMDQ_DCB +#define ETH_DCB_RX ETH_MQ_RX_DCB + +/** + * A set of values to identify what method is to be used to transmit + * packets using multi-TCs. + */ +enum rte_eth_tx_mq_mode { + ETH_MQ_TX_NONE = 0, /**< It is in neither DCB nor VT mode. */ + ETH_MQ_TX_DCB, /**< For TX side,only DCB is on. */ + ETH_MQ_TX_VMDQ_DCB, /**< For TX side,both DCB and VT is on. */ + ETH_MQ_TX_VMDQ_ONLY, /**< Only VT on, no DCB */ +}; + +/** + * for tx mq mode backward compatible + */ +#define ETH_DCB_NONE ETH_MQ_TX_NONE +#define ETH_VMDQ_DCB_TX ETH_MQ_TX_VMDQ_DCB +#define ETH_DCB_TX ETH_MQ_TX_DCB + +/** + * A structure used to configure the RX features of an Ethernet port. + */ +struct rte_eth_rxmode { + /** The multi-queue packet distribution mode to be used, e.g. RSS. */ + enum rte_eth_rx_mq_mode mq_mode; + uint32_t max_rx_pkt_len; /**< Only used if jumbo_frame enabled. */ + uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/ + /** + * Per-port Rx offloads to be set using DEV_RX_OFFLOAD_* flags. + * Only offloads set on rx_offload_capa field on rte_eth_dev_info + * structure are allowed to be set. + */ + uint64_t offloads; + __extension__ + /** + * Below bitfield API is obsolete. Application should + * enable per-port offloads using the offload field + * above. + */ + uint16_t header_split : 1, /**< Header Split enable. */ + hw_ip_checksum : 1, /**< IP/UDP/TCP checksum offload enable. */ + hw_vlan_filter : 1, /**< VLAN filter enable. */ + hw_vlan_strip : 1, /**< VLAN strip enable. */ + hw_vlan_extend : 1, /**< Extended VLAN enable. */ + jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */ + hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */ + enable_scatter : 1, /**< Enable scatter packets rx handler */ + enable_lro : 1, /**< Enable LRO */ + hw_timestamp : 1, /**< Enable HW timestamp */ + security : 1, /**< Enable rte_security offloads */ + /** + * When set the offload bitfield should be ignored. + * Instead per-port Rx offloads should be set on offloads + * field above. + * Per-queue offloads shuold be set on rte_eth_rxq_conf + * structure. + * This bit is temporary till rxmode bitfield offloads API will + * be deprecated. + */ + ignore_offload_bitfield : 1; +}; + +/** + * VLAN types to indicate if it is for single VLAN, inner VLAN or outer VLAN. + * Note that single VLAN is treated the same as inner VLAN. + */ +enum rte_vlan_type { + ETH_VLAN_TYPE_UNKNOWN = 0, + ETH_VLAN_TYPE_INNER, /**< Inner VLAN. */ + ETH_VLAN_TYPE_OUTER, /**< Single VLAN, or outer VLAN. */ + ETH_VLAN_TYPE_MAX, +}; + +/** + * A structure used to describe a vlan filter. + * If the bit corresponding to a VID is set, such VID is on. + */ +struct rte_vlan_filter_conf { + uint64_t ids[64]; +}; + +/** + * A structure used to configure the Receive Side Scaling (RSS) feature + * of an Ethernet port. + * If not NULL, the *rss_key* pointer of the *rss_conf* structure points + * to an array holding the RSS key to use for hashing specific header + * fields of received packets. The length of this array should be indicated + * by *rss_key_len* below. Otherwise, a default random hash key is used by + * the device driver. + * + * The *rss_key_len* field of the *rss_conf* structure indicates the length + * in bytes of the array pointed by *rss_key*. To be compatible, this length + * will be checked in i40e only. Others assume 40 bytes to be used as before. + * + * The *rss_hf* field of the *rss_conf* structure indicates the different + * types of IPv4/IPv6 packets to which the RSS hashing must be applied. + * Supplying an *rss_hf* equal to zero disables the RSS feature. + */ +struct rte_eth_rss_conf { + uint8_t *rss_key; /**< If not NULL, 40-byte hash key. */ + uint8_t rss_key_len; /**< hash key length in bytes. */ + uint64_t rss_hf; /**< Hash functions to apply - see below. */ +}; + +/* + * The RSS offload types are defined based on flow types which are defined + * in rte_eth_ctrl.h. Different NIC hardwares may support different RSS offload + * types. The supported flow types or RSS offload types can be queried by + * rte_eth_dev_info_get(). + */ +#define ETH_RSS_IPV4 (1ULL << RTE_ETH_FLOW_IPV4) +#define ETH_RSS_FRAG_IPV4 (1ULL << RTE_ETH_FLOW_FRAG_IPV4) +#define ETH_RSS_NONFRAG_IPV4_TCP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_TCP) +#define ETH_RSS_NONFRAG_IPV4_UDP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_UDP) +#define ETH_RSS_NONFRAG_IPV4_SCTP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_SCTP) +#define ETH_RSS_NONFRAG_IPV4_OTHER (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_OTHER) +#define ETH_RSS_IPV6 (1ULL << RTE_ETH_FLOW_IPV6) +#define ETH_RSS_FRAG_IPV6 (1ULL << RTE_ETH_FLOW_FRAG_IPV6) +#define ETH_RSS_NONFRAG_IPV6_TCP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_TCP) +#define ETH_RSS_NONFRAG_IPV6_UDP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_UDP) +#define ETH_RSS_NONFRAG_IPV6_SCTP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_SCTP) +#define ETH_RSS_NONFRAG_IPV6_OTHER (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_OTHER) +#define ETH_RSS_L2_PAYLOAD (1ULL << RTE_ETH_FLOW_L2_PAYLOAD) +#define ETH_RSS_IPV6_EX (1ULL << RTE_ETH_FLOW_IPV6_EX) +#define ETH_RSS_IPV6_TCP_EX (1ULL << RTE_ETH_FLOW_IPV6_TCP_EX) +#define ETH_RSS_IPV6_UDP_EX (1ULL << RTE_ETH_FLOW_IPV6_UDP_EX) +#define ETH_RSS_PORT (1ULL << RTE_ETH_FLOW_PORT) +#define ETH_RSS_VXLAN (1ULL << RTE_ETH_FLOW_VXLAN) +#define ETH_RSS_GENEVE (1ULL << RTE_ETH_FLOW_GENEVE) +#define ETH_RSS_NVGRE (1ULL << RTE_ETH_FLOW_NVGRE) + +#define ETH_RSS_IP ( \ + ETH_RSS_IPV4 | \ + ETH_RSS_FRAG_IPV4 | \ + ETH_RSS_NONFRAG_IPV4_OTHER | \ + ETH_RSS_IPV6 | \ + ETH_RSS_FRAG_IPV6 | \ + ETH_RSS_NONFRAG_IPV6_OTHER | \ + ETH_RSS_IPV6_EX) + +#define ETH_RSS_UDP ( \ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_NONFRAG_IPV6_UDP | \ + ETH_RSS_IPV6_UDP_EX) + +#define ETH_RSS_TCP ( \ + ETH_RSS_NONFRAG_IPV4_TCP | \ + ETH_RSS_NONFRAG_IPV6_TCP | \ + ETH_RSS_IPV6_TCP_EX) + +#define ETH_RSS_SCTP ( \ + ETH_RSS_NONFRAG_IPV4_SCTP | \ + ETH_RSS_NONFRAG_IPV6_SCTP) + +#define ETH_RSS_TUNNEL ( \ + ETH_RSS_VXLAN | \ + ETH_RSS_GENEVE | \ + ETH_RSS_NVGRE) + +/**< Mask of valid RSS hash protocols */ +#define ETH_RSS_PROTO_MASK ( \ + ETH_RSS_IPV4 | \ + ETH_RSS_FRAG_IPV4 | \ + ETH_RSS_NONFRAG_IPV4_TCP | \ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_NONFRAG_IPV4_SCTP | \ + ETH_RSS_NONFRAG_IPV4_OTHER | \ + ETH_RSS_IPV6 | \ + ETH_RSS_FRAG_IPV6 | \ + ETH_RSS_NONFRAG_IPV6_TCP | \ + ETH_RSS_NONFRAG_IPV6_UDP | \ + ETH_RSS_NONFRAG_IPV6_SCTP | \ + ETH_RSS_NONFRAG_IPV6_OTHER | \ + ETH_RSS_L2_PAYLOAD | \ + ETH_RSS_IPV6_EX | \ + ETH_RSS_IPV6_TCP_EX | \ + ETH_RSS_IPV6_UDP_EX | \ + ETH_RSS_PORT | \ + ETH_RSS_VXLAN | \ + ETH_RSS_GENEVE | \ + ETH_RSS_NVGRE) + +/* + * Definitions used for redirection table entry size. + * Some RSS RETA sizes may not be supported by some drivers, check the + * documentation or the description of relevant functions for more details. + */ +#define ETH_RSS_RETA_SIZE_64 64 +#define ETH_RSS_RETA_SIZE_128 128 +#define ETH_RSS_RETA_SIZE_256 256 +#define ETH_RSS_RETA_SIZE_512 512 +#define RTE_RETA_GROUP_SIZE 64 + +/* Definitions used for VMDQ and DCB functionality */ +#define ETH_VMDQ_MAX_VLAN_FILTERS 64 /**< Maximum nb. of VMDQ vlan filters. */ +#define ETH_DCB_NUM_USER_PRIORITIES 8 /**< Maximum nb. of DCB priorities. */ +#define ETH_VMDQ_DCB_NUM_QUEUES 128 /**< Maximum nb. of VMDQ DCB queues. */ +#define ETH_DCB_NUM_QUEUES 128 /**< Maximum nb. of DCB queues. */ + +/* DCB capability defines */ +#define ETH_DCB_PG_SUPPORT 0x00000001 /**< Priority Group(ETS) support. */ +#define ETH_DCB_PFC_SUPPORT 0x00000002 /**< Priority Flow Control support. */ + +/* Definitions used for VLAN Offload functionality */ +#define ETH_VLAN_STRIP_OFFLOAD 0x0001 /**< VLAN Strip On/Off */ +#define ETH_VLAN_FILTER_OFFLOAD 0x0002 /**< VLAN Filter On/Off */ +#define ETH_VLAN_EXTEND_OFFLOAD 0x0004 /**< VLAN Extend On/Off */ + +/* Definitions used for mask VLAN setting */ +#define ETH_VLAN_STRIP_MASK 0x0001 /**< VLAN Strip setting mask */ +#define ETH_VLAN_FILTER_MASK 0x0002 /**< VLAN Filter setting mask*/ +#define ETH_VLAN_EXTEND_MASK 0x0004 /**< VLAN Extend setting mask*/ +#define ETH_VLAN_ID_MAX 0x0FFF /**< VLAN ID is in lower 12 bits*/ + +/* Definitions used for receive MAC address */ +#define ETH_NUM_RECEIVE_MAC_ADDR 128 /**< Maximum nb. of receive mac addr. */ + +/* Definitions used for unicast hash */ +#define ETH_VMDQ_NUM_UC_HASH_ARRAY 128 /**< Maximum nb. of UC hash array. */ + +/* Definitions used for VMDQ pool rx mode setting */ +#define ETH_VMDQ_ACCEPT_UNTAG 0x0001 /**< accept untagged packets. */ +#define ETH_VMDQ_ACCEPT_HASH_MC 0x0002 /**< accept packets in multicast table . */ +#define ETH_VMDQ_ACCEPT_HASH_UC 0x0004 /**< accept packets in unicast table. */ +#define ETH_VMDQ_ACCEPT_BROADCAST 0x0008 /**< accept broadcast packets. */ +#define ETH_VMDQ_ACCEPT_MULTICAST 0x0010 /**< multicast promiscuous. */ + +/** Maximum nb. of vlan per mirror rule */ +#define ETH_MIRROR_MAX_VLANS 64 + +#define ETH_MIRROR_VIRTUAL_POOL_UP 0x01 /**< Virtual Pool uplink Mirroring. */ +#define ETH_MIRROR_UPLINK_PORT 0x02 /**< Uplink Port Mirroring. */ +#define ETH_MIRROR_DOWNLINK_PORT 0x04 /**< Downlink Port Mirroring. */ +#define ETH_MIRROR_VLAN 0x08 /**< VLAN Mirroring. */ +#define ETH_MIRROR_VIRTUAL_POOL_DOWN 0x10 /**< Virtual Pool downlink Mirroring. */ + +/** + * A structure used to configure VLAN traffic mirror of an Ethernet port. + */ +struct rte_eth_vlan_mirror { + uint64_t vlan_mask; /**< mask for valid VLAN ID. */ + /** VLAN ID list for vlan mirroring. */ + uint16_t vlan_id[ETH_MIRROR_MAX_VLANS]; +}; + +/** + * A structure used to configure traffic mirror of an Ethernet port. + */ +struct rte_eth_mirror_conf { + uint8_t rule_type; /**< Mirroring rule type */ + uint8_t dst_pool; /**< Destination pool for this mirror rule. */ + uint64_t pool_mask; /**< Bitmap of pool for pool mirroring */ + /** VLAN ID setting for VLAN mirroring. */ + struct rte_eth_vlan_mirror vlan; +}; + +/** + * A structure used to configure 64 entries of Redirection Table of the + * Receive Side Scaling (RSS) feature of an Ethernet port. To configure + * more than 64 entries supported by hardware, an array of this structure + * is needed. + */ +struct rte_eth_rss_reta_entry64 { + uint64_t mask; + /**< Mask bits indicate which entries need to be updated/queried. */ + uint16_t reta[RTE_RETA_GROUP_SIZE]; + /**< Group of 64 redirection table entries. */ +}; + +/** + * This enum indicates the possible number of traffic classes + * in DCB configurations + */ +enum rte_eth_nb_tcs { + ETH_4_TCS = 4, /**< 4 TCs with DCB. */ + ETH_8_TCS = 8 /**< 8 TCs with DCB. */ +}; + +/** + * This enum indicates the possible number of queue pools + * in VMDQ configurations. + */ +enum rte_eth_nb_pools { + ETH_8_POOLS = 8, /**< 8 VMDq pools. */ + ETH_16_POOLS = 16, /**< 16 VMDq pools. */ + ETH_32_POOLS = 32, /**< 32 VMDq pools. */ + ETH_64_POOLS = 64 /**< 64 VMDq pools. */ +}; + +/* This structure may be extended in future. */ +struct rte_eth_dcb_rx_conf { + enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs */ + /** Traffic class each UP mapped to. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; +}; + +struct rte_eth_vmdq_dcb_tx_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools. */ + /** Traffic class each UP mapped to. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; +}; + +struct rte_eth_dcb_tx_conf { + enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs. */ + /** Traffic class each UP mapped to. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; +}; + +struct rte_eth_vmdq_tx_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< VMDq mode, 64 pools. */ +}; + +/** + * A structure used to configure the VMDQ+DCB feature + * of an Ethernet port. + * + * Using this feature, packets are routed to a pool of queues, based + * on the vlan id in the vlan tag, and then to a specific queue within + * that pool, using the user priority vlan tag field. + * + * A default pool may be used, if desired, to route all traffic which + * does not match the vlan filter rules. + */ +struct rte_eth_vmdq_dcb_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools */ + uint8_t enable_default_pool; /**< If non-zero, use a default pool */ + uint8_t default_pool; /**< The default pool, if applicable */ + uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */ + struct { + uint16_t vlan_id; /**< The vlan id of the received frame */ + uint64_t pools; /**< Bitmask of pools for packet rx */ + } pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; + /**< Selects a queue in a pool */ +}; + +/** + * A structure used to configure the VMDQ feature of an Ethernet port when + * not combined with the DCB feature. + * + * Using this feature, packets are routed to a pool of queues. By default, + * the pool selection is based on the MAC address, the vlan id in the + * vlan tag as specified in the pool_map array. + * Passing the ETH_VMDQ_ACCEPT_UNTAG in the rx_mode field allows pool + * selection using only the MAC address. MAC address to pool mapping is done + * using the rte_eth_dev_mac_addr_add function, with the pool parameter + * corresponding to the pool id. + * + * Queue selection within the selected pool will be done using RSS when + * it is enabled or revert to the first queue of the pool if not. + * + * A default pool may be used, if desired, to route all traffic which + * does not match the vlan filter rules or any pool MAC address. + */ +struct rte_eth_vmdq_rx_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< VMDq only mode, 8 or 64 pools */ + uint8_t enable_default_pool; /**< If non-zero, use a default pool */ + uint8_t default_pool; /**< The default pool, if applicable */ + uint8_t enable_loop_back; /**< Enable VT loop back */ + uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */ + uint32_t rx_mode; /**< Flags from ETH_VMDQ_ACCEPT_* */ + struct { + uint16_t vlan_id; /**< The vlan id of the received frame */ + uint64_t pools; /**< Bitmask of pools for packet rx */ + } pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */ +}; + +/** + * A structure used to configure the TX features of an Ethernet port. + */ +struct rte_eth_txmode { + enum rte_eth_tx_mq_mode mq_mode; /**< TX multi-queues mode. */ + /** + * Per-port Tx offloads to be set using DEV_TX_OFFLOAD_* flags. + * Only offloads set on tx_offload_capa field on rte_eth_dev_info + * structure are allowed to be set. + */ + uint64_t offloads; + + /* For i40e specifically */ + uint16_t pvid; + __extension__ + uint8_t hw_vlan_reject_tagged : 1, + /**< If set, reject sending out tagged pkts */ + hw_vlan_reject_untagged : 1, + /**< If set, reject sending out untagged pkts */ + hw_vlan_insert_pvid : 1; + /**< If set, enable port based VLAN insertion */ +}; + +/** + * A structure used to configure an RX ring of an Ethernet port. + */ +struct rte_eth_rxconf { + struct rte_eth_thresh rx_thresh; /**< RX ring threshold registers. */ + uint16_t rx_free_thresh; /**< Drives the freeing of RX descriptors. */ + uint8_t rx_drop_en; /**< Drop packets if no descriptors are available. */ + uint8_t rx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */ + /** + * Per-queue Rx offloads to be set using DEV_RX_OFFLOAD_* flags. + * Only offloads set on rx_queue_offload_capa or rx_offload_capa + * fields on rte_eth_dev_info structure are allowed to be set. + */ + uint64_t offloads; +}; + +#define ETH_TXQ_FLAGS_NOMULTSEGS 0x0001 /**< nb_segs=1 for all mbufs */ +#define ETH_TXQ_FLAGS_NOREFCOUNT 0x0002 /**< refcnt can be ignored */ +#define ETH_TXQ_FLAGS_NOMULTMEMP 0x0004 /**< all bufs come from same mempool */ +#define ETH_TXQ_FLAGS_NOVLANOFFL 0x0100 /**< disable VLAN offload */ +#define ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum offload */ +#define ETH_TXQ_FLAGS_NOXSUMUDP 0x0400 /**< disable UDP checksum offload */ +#define ETH_TXQ_FLAGS_NOXSUMTCP 0x0800 /**< disable TCP checksum offload */ +#define ETH_TXQ_FLAGS_NOOFFLOADS \ + (ETH_TXQ_FLAGS_NOVLANOFFL | ETH_TXQ_FLAGS_NOXSUMSCTP | \ + ETH_TXQ_FLAGS_NOXSUMUDP | ETH_TXQ_FLAGS_NOXSUMTCP) +#define ETH_TXQ_FLAGS_NOXSUMS \ + (ETH_TXQ_FLAGS_NOXSUMSCTP | ETH_TXQ_FLAGS_NOXSUMUDP | \ + ETH_TXQ_FLAGS_NOXSUMTCP) +/** + * When set the txq_flags should be ignored, + * instead per-queue Tx offloads will be set on offloads field + * located on rte_eth_txq_conf struct. + * This flag is temporary till the rte_eth_txq_conf.txq_flags + * API will be deprecated. + */ +#define ETH_TXQ_FLAGS_IGNORE 0x8000 + +/** + * A structure used to configure a TX ring of an Ethernet port. + */ +struct rte_eth_txconf { + struct rte_eth_thresh tx_thresh; /**< TX ring threshold registers. */ + uint16_t tx_rs_thresh; /**< Drives the setting of RS bit on TXDs. */ + uint16_t tx_free_thresh; /**< Start freeing TX buffers if there are + less free descriptors than this value. */ + + uint32_t txq_flags; /**< Set flags for the Tx queue */ + uint8_t tx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */ + /** + * Per-queue Tx offloads to be set using DEV_TX_OFFLOAD_* flags. + * Only offloads set on tx_queue_offload_capa or tx_offload_capa + * fields on rte_eth_dev_info structure are allowed to be set. + */ + uint64_t offloads; +}; + +/** + * A structure contains information about HW descriptor ring limitations. + */ +struct rte_eth_desc_lim { + uint16_t nb_max; /**< Max allowed number of descriptors. */ + uint16_t nb_min; /**< Min allowed number of descriptors. */ + uint16_t nb_align; /**< Number of descriptors should be aligned to. */ + + /** + * Max allowed number of segments per whole packet. + * + * - For TSO packet this is the total number of data descriptors allowed + * by device. + * + * @see nb_mtu_seg_max + */ + uint16_t nb_seg_max; + + /** + * Max number of segments per one MTU. + * + * - For non-TSO packet, this is the maximum allowed number of segments + * in a single transmit packet. + * + * - For TSO packet each segment within the TSO may span up to this + * value. + * + * @see nb_seg_max + */ + uint16_t nb_mtu_seg_max; +}; + +/** + * This enum indicates the flow control mode + */ +enum rte_eth_fc_mode { + RTE_FC_NONE = 0, /**< Disable flow control. */ + RTE_FC_RX_PAUSE, /**< RX pause frame, enable flowctrl on TX side. */ + RTE_FC_TX_PAUSE, /**< TX pause frame, enable flowctrl on RX side. */ + RTE_FC_FULL /**< Enable flow control on both side. */ +}; + +/** + * A structure used to configure Ethernet flow control parameter. + * These parameters will be configured into the register of the NIC. + * Please refer to the corresponding data sheet for proper value. + */ +struct rte_eth_fc_conf { + uint32_t high_water; /**< High threshold value to trigger XOFF */ + uint32_t low_water; /**< Low threshold value to trigger XON */ + uint16_t pause_time; /**< Pause quota in the Pause frame */ + uint16_t send_xon; /**< Is XON frame need be sent */ + enum rte_eth_fc_mode mode; /**< Link flow control mode */ + uint8_t mac_ctrl_frame_fwd; /**< Forward MAC control frames */ + uint8_t autoneg; /**< Use Pause autoneg */ +}; + +/** + * A structure used to configure Ethernet priority flow control parameter. + * These parameters will be configured into the register of the NIC. + * Please refer to the corresponding data sheet for proper value. + */ +struct rte_eth_pfc_conf { + struct rte_eth_fc_conf fc; /**< General flow control parameter. */ + uint8_t priority; /**< VLAN User Priority. */ +}; + +/** + * Memory space that can be configured to store Flow Director filters + * in the board memory. + */ +enum rte_fdir_pballoc_type { + RTE_FDIR_PBALLOC_64K = 0, /**< 64k. */ + RTE_FDIR_PBALLOC_128K, /**< 128k. */ + RTE_FDIR_PBALLOC_256K, /**< 256k. */ +}; + +/** + * Select report mode of FDIR hash information in RX descriptors. + */ +enum rte_fdir_status_mode { + RTE_FDIR_NO_REPORT_STATUS = 0, /**< Never report FDIR hash. */ + RTE_FDIR_REPORT_STATUS, /**< Only report FDIR hash for matching pkts. */ + RTE_FDIR_REPORT_STATUS_ALWAYS, /**< Always report FDIR hash. */ +}; + +/** + * A structure used to configure the Flow Director (FDIR) feature + * of an Ethernet port. + * + * If mode is RTE_FDIR_DISABLE, the pballoc value is ignored. + */ +struct rte_fdir_conf { + enum rte_fdir_mode mode; /**< Flow Director mode. */ + enum rte_fdir_pballoc_type pballoc; /**< Space for FDIR filters. */ + enum rte_fdir_status_mode status; /**< How to report FDIR hash. */ + /** RX queue of packets matching a "drop" filter in perfect mode. */ + uint8_t drop_queue; + struct rte_eth_fdir_masks mask; + struct rte_eth_fdir_flex_conf flex_conf; + /**< Flex payload configuration. */ +}; + +/** + * UDP tunneling configuration. + * Used to config the UDP port for a type of tunnel. + * NICs need the UDP port to identify the tunnel type. + * Normally a type of tunnel has a default UDP port, this structure can be used + * in case if the users want to change or support more UDP port. + */ +struct rte_eth_udp_tunnel { + uint16_t udp_port; /**< UDP port used for the tunnel. */ + uint8_t prot_type; /**< Tunnel type. Defined in rte_eth_tunnel_type. */ +}; + +/** + * A structure used to enable/disable specific device interrupts. + */ +struct rte_intr_conf { + /** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */ + uint32_t lsc:1; + /** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */ + uint32_t rxq:1; + /** enable/disable rmv interrupt. 0 (default) - disable, 1 enable */ + uint32_t rmv:1; +}; + +/** + * A structure used to configure an Ethernet port. + * Depending upon the RX multi-queue mode, extra advanced + * configuration settings may be needed. + */ +struct rte_eth_conf { + uint32_t link_speeds; /**< bitmap of ETH_LINK_SPEED_XXX of speeds to be + used. ETH_LINK_SPEED_FIXED disables link + autonegotiation, and a unique speed shall be + set. Otherwise, the bitmap defines the set of + speeds to be advertised. If the special value + ETH_LINK_SPEED_AUTONEG (0) is used, all speeds + supported are advertised. */ + struct rte_eth_rxmode rxmode; /**< Port RX configuration. */ + struct rte_eth_txmode txmode; /**< Port TX configuration. */ + uint32_t lpbk_mode; /**< Loopback operation mode. By default the value + is 0, meaning the loopback mode is disabled. + Read the datasheet of given ethernet controller + for details. The possible values of this field + are defined in implementation of each driver. */ + struct { + struct rte_eth_rss_conf rss_conf; /**< Port RSS configuration */ + struct rte_eth_vmdq_dcb_conf vmdq_dcb_conf; + /**< Port vmdq+dcb configuration. */ + struct rte_eth_dcb_rx_conf dcb_rx_conf; + /**< Port dcb RX configuration. */ + struct rte_eth_vmdq_rx_conf vmdq_rx_conf; + /**< Port vmdq RX configuration. */ + } rx_adv_conf; /**< Port RX filtering configuration. */ + union { + struct rte_eth_vmdq_dcb_tx_conf vmdq_dcb_tx_conf; + /**< Port vmdq+dcb TX configuration. */ + struct rte_eth_dcb_tx_conf dcb_tx_conf; + /**< Port dcb TX configuration. */ + struct rte_eth_vmdq_tx_conf vmdq_tx_conf; + /**< Port vmdq TX configuration. */ + } tx_adv_conf; /**< Port TX DCB configuration (union). */ + /** Currently,Priority Flow Control(PFC) are supported,if DCB with PFC + is needed,and the variable must be set ETH_DCB_PFC_SUPPORT. */ + uint32_t dcb_capability_en; + struct rte_fdir_conf fdir_conf; /**< FDIR configuration. */ + struct rte_intr_conf intr_conf; /**< Interrupt mode configuration. */ +}; + +/** + * A structure used to retrieve the contextual information of + * an Ethernet device, such as the controlling driver of the device, + * its PCI context, etc... + */ + +/** + * RX offload capabilities of a device. + */ +#define DEV_RX_OFFLOAD_VLAN_STRIP 0x00000001 +#define DEV_RX_OFFLOAD_IPV4_CKSUM 0x00000002 +#define DEV_RX_OFFLOAD_UDP_CKSUM 0x00000004 +#define DEV_RX_OFFLOAD_TCP_CKSUM 0x00000008 +#define DEV_RX_OFFLOAD_TCP_LRO 0x00000010 +#define DEV_RX_OFFLOAD_QINQ_STRIP 0x00000020 +#define DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000040 +#define DEV_RX_OFFLOAD_MACSEC_STRIP 0x00000080 +#define DEV_RX_OFFLOAD_HEADER_SPLIT 0x00000100 +#define DEV_RX_OFFLOAD_VLAN_FILTER 0x00000200 +#define DEV_RX_OFFLOAD_VLAN_EXTEND 0x00000400 +#define DEV_RX_OFFLOAD_JUMBO_FRAME 0x00000800 +#define DEV_RX_OFFLOAD_CRC_STRIP 0x00001000 +#define DEV_RX_OFFLOAD_SCATTER 0x00002000 +#define DEV_RX_OFFLOAD_TIMESTAMP 0x00004000 +#define DEV_RX_OFFLOAD_SECURITY 0x00008000 +#define DEV_RX_OFFLOAD_CHECKSUM (DEV_RX_OFFLOAD_IPV4_CKSUM | \ + DEV_RX_OFFLOAD_UDP_CKSUM | \ + DEV_RX_OFFLOAD_TCP_CKSUM) +#define DEV_RX_OFFLOAD_VLAN (DEV_RX_OFFLOAD_VLAN_STRIP | \ + DEV_RX_OFFLOAD_VLAN_FILTER | \ + DEV_RX_OFFLOAD_VLAN_EXTEND) + +/* + * If new Rx offload capabilities are defined, they also must be + * mentioned in rte_rx_offload_names in rte_ethdev.c file. + */ + +/** + * TX offload capabilities of a device. + */ +#define DEV_TX_OFFLOAD_VLAN_INSERT 0x00000001 +#define DEV_TX_OFFLOAD_IPV4_CKSUM 0x00000002 +#define DEV_TX_OFFLOAD_UDP_CKSUM 0x00000004 +#define DEV_TX_OFFLOAD_TCP_CKSUM 0x00000008 +#define DEV_TX_OFFLOAD_SCTP_CKSUM 0x00000010 +#define DEV_TX_OFFLOAD_TCP_TSO 0x00000020 +#define DEV_TX_OFFLOAD_UDP_TSO 0x00000040 +#define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_QINQ_INSERT 0x00000100 +#define DEV_TX_OFFLOAD_VXLAN_TNL_TSO 0x00000200 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_GRE_TNL_TSO 0x00000400 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_IPIP_TNL_TSO 0x00000800 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_GENEVE_TNL_TSO 0x00001000 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_MACSEC_INSERT 0x00002000 +#define DEV_TX_OFFLOAD_MT_LOCKFREE 0x00004000 +/**< Multiple threads can invoke rte_eth_tx_burst() concurrently on the same + * tx queue without SW lock. + */ +#define DEV_TX_OFFLOAD_MULTI_SEGS 0x00008000 +/**< Device supports multi segment send. */ +#define DEV_TX_OFFLOAD_MBUF_FAST_FREE 0x00010000 +/**< Device supports optimization for fast release of mbufs. + * When set application must guarantee that per-queue all mbufs comes from + * the same mempool and has refcnt = 1. + */ +#define DEV_TX_OFFLOAD_SECURITY 0x00020000 +/** + * Device supports generic UDP tunneled packet TSO. + * Application must set PKT_TX_TUNNEL_UDP and other mbuf fields required + * for tunnel TSO. + */ +#define DEV_TX_OFFLOAD_UDP_TNL_TSO 0x00040000 +/** + * Device supports generic IP tunneled packet TSO. + * Application must set PKT_TX_TUNNEL_IP and other mbuf fields required + * for tunnel TSO. + */ +#define DEV_TX_OFFLOAD_IP_TNL_TSO 0x00080000 + +#define RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP 0x00000001 +/**< Device supports Rx queue setup after device started*/ +#define RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP 0x00000002 +/**< Device supports Tx queue setup after device started*/ + +/* + * If new Tx offload capabilities are defined, they also must be + * mentioned in rte_tx_offload_names in rte_ethdev.c file. + */ + +struct rte_pci_device; + +/* + * Fallback default preferred Rx/Tx port parameters. + * These are used if an application requests default parameters + * but the PMD does not provide preferred values. + */ +#define RTE_ETH_DEV_FALLBACK_RX_RINGSIZE 512 +#define RTE_ETH_DEV_FALLBACK_TX_RINGSIZE 512 +#define RTE_ETH_DEV_FALLBACK_RX_NBQUEUES 1 +#define RTE_ETH_DEV_FALLBACK_TX_NBQUEUES 1 + +/** + * Preferred Rx/Tx port parameters. + * There are separate instances of this structure for transmission + * and reception respectively. + */ +struct rte_eth_dev_portconf { + uint16_t burst_size; /**< Device-preferred burst size */ + uint16_t ring_size; /**< Device-preferred size of queue rings */ + uint16_t nb_queues; /**< Device-preferred number of queues */ +}; + +/** + * Default values for switch domain id when ethdev does not support switch + * domain definitions. + */ +#define RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID (0) + +/** + * Ethernet device associated switch information + */ +struct rte_eth_switch_info { + const char *name; /**< switch name */ + uint16_t domain_id; /**< switch domain id */ + uint16_t port_id; + /**< + * mapping to the devices physical switch port as enumerated from the + * perspective of the embedded interconnect/switch. For SR-IOV enabled + * device this may correspond to the VF_ID of each virtual function, + * but each driver should explicitly define the mapping of switch + * port identifier to that physical interconnect/switch + */ +}; + +/** + * Ethernet device information + */ +struct rte_eth_dev_info { + struct rte_device *device; /** Generic device information */ + const char *driver_name; /**< Device Driver name. */ + unsigned int if_index; /**< Index to bound host interface, or 0 if none. + Use if_indextoname() to translate into an interface name. */ + const uint32_t *dev_flags; /**< Device flags */ + uint32_t min_rx_bufsize; /**< Minimum size of RX buffer. */ + uint32_t max_rx_pktlen; /**< Maximum configurable length of RX pkt. */ + uint16_t max_rx_queues; /**< Maximum number of RX queues. */ + uint16_t max_tx_queues; /**< Maximum number of TX queues. */ + uint32_t max_mac_addrs; /**< Maximum number of MAC addresses. */ + uint32_t max_hash_mac_addrs; + /** Maximum number of hash MAC addresses for MTA and UTA. */ + uint16_t max_vfs; /**< Maximum number of VFs. */ + uint16_t max_vmdq_pools; /**< Maximum number of VMDq pools. */ + uint64_t rx_offload_capa; + /**< All RX offload capabilities including all per-queue ones */ + uint64_t tx_offload_capa; + /**< All TX offload capabilities including all per-queue ones */ + uint64_t rx_queue_offload_capa; + /**< Device per-queue RX offload capabilities. */ + uint64_t tx_queue_offload_capa; + /**< Device per-queue TX offload capabilities. */ + uint16_t reta_size; + /**< Device redirection table size, the total number of entries. */ + uint8_t hash_key_size; /**< Hash key size in bytes */ + /** Bit mask of RSS offloads, the bit offset also means flow type */ + uint64_t flow_type_rss_offloads; + struct rte_eth_rxconf default_rxconf; /**< Default RX configuration */ + struct rte_eth_txconf default_txconf; /**< Default TX configuration */ + uint16_t vmdq_queue_base; /**< First queue ID for VMDQ pools. */ + uint16_t vmdq_queue_num; /**< Queue number for VMDQ pools. */ + uint16_t vmdq_pool_base; /**< First ID of VMDQ pools. */ + struct rte_eth_desc_lim rx_desc_lim; /**< RX descriptors limits */ + struct rte_eth_desc_lim tx_desc_lim; /**< TX descriptors limits */ + uint32_t speed_capa; /**< Supported speeds bitmap (ETH_LINK_SPEED_). */ + /** Configured number of rx/tx queues */ + uint16_t nb_rx_queues; /**< Number of RX queues. */ + uint16_t nb_tx_queues; /**< Number of TX queues. */ + /** Rx parameter recommendations */ + struct rte_eth_dev_portconf default_rxportconf; + /** Tx parameter recommendations */ + struct rte_eth_dev_portconf default_txportconf; + /** Generic device capabilities (RTE_ETH_DEV_CAPA_). */ + uint64_t dev_capa; + /** + * Switching information for ports on a device with a + * embedded managed interconnect/switch. + */ + struct rte_eth_switch_info switch_info; +}; + +/** + * Ethernet device RX queue information structure. + * Used to retieve information about configured queue. + */ +struct rte_eth_rxq_info { + struct rte_mempool *mp; /**< mempool used by that queue. */ + struct rte_eth_rxconf conf; /**< queue config parameters. */ + uint8_t scattered_rx; /**< scattered packets RX supported. */ + uint16_t nb_desc; /**< configured number of RXDs. */ +} __rte_cache_min_aligned; + +/** + * Ethernet device TX queue information structure. + * Used to retrieve information about configured queue. + */ +struct rte_eth_txq_info { + struct rte_eth_txconf conf; /**< queue config parameters. */ + uint16_t nb_desc; /**< configured number of TXDs. */ +} __rte_cache_min_aligned; + +/** Maximum name length for extended statistics counters */ +#define RTE_ETH_XSTATS_NAME_SIZE 64 + +/** + * An Ethernet device extended statistic structure + * + * This structure is used by rte_eth_xstats_get() to provide + * statistics that are not provided in the generic *rte_eth_stats* + * structure. + * It maps a name id, corresponding to an index in the array returned + * by rte_eth_xstats_get_names(), to a statistic value. + */ +struct rte_eth_xstat { + uint64_t id; /**< The index in xstats name array. */ + uint64_t value; /**< The statistic counter value. */ +}; + +/** + * A name element for extended statistics. + * + * An array of this structure is returned by rte_eth_xstats_get_names(). + * It lists the names of extended statistics for a PMD. The *rte_eth_xstat* + * structure references these names by their array index. + */ +struct rte_eth_xstat_name { + char name[RTE_ETH_XSTATS_NAME_SIZE]; /**< The statistic name. */ +}; + +#define ETH_DCB_NUM_TCS 8 +#define ETH_MAX_VMDQ_POOL 64 + +/** + * A structure used to get the information of queue and + * TC mapping on both TX and RX paths. + */ +struct rte_eth_dcb_tc_queue_mapping { + /** rx queues assigned to tc per Pool */ + struct { + uint8_t base; + uint8_t nb_queue; + } tc_rxq[ETH_MAX_VMDQ_POOL][ETH_DCB_NUM_TCS]; + /** rx queues assigned to tc per Pool */ + struct { + uint8_t base; + uint8_t nb_queue; + } tc_txq[ETH_MAX_VMDQ_POOL][ETH_DCB_NUM_TCS]; +}; + +/** + * A structure used to get the information of DCB. + * It includes TC UP mapping and queue TC mapping. + */ +struct rte_eth_dcb_info { + uint8_t nb_tcs; /**< number of TCs */ + uint8_t prio_tc[ETH_DCB_NUM_USER_PRIORITIES]; /**< Priority to tc */ + uint8_t tc_bws[ETH_DCB_NUM_TCS]; /**< TX BW percentage for each TC */ + /** rx queues assigned to tc */ + struct rte_eth_dcb_tc_queue_mapping tc_queue; +}; + +/** + * RX/TX queue states + */ +#define RTE_ETH_QUEUE_STATE_STOPPED 0 +#define RTE_ETH_QUEUE_STATE_STARTED 1 + +#define RTE_ETH_ALL RTE_MAX_ETHPORTS + +/* Macros to check for valid port */ +#define RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, retval) do { \ + if (!rte_eth_dev_is_valid_port(port_id)) { \ + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \ + return retval; \ + } \ +} while (0) + +#define RTE_ETH_VALID_PORTID_OR_RET(port_id) do { \ + if (!rte_eth_dev_is_valid_port(port_id)) { \ + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \ + return; \ + } \ +} while (0) + +/** + * l2 tunnel configuration. + */ + +/**< l2 tunnel enable mask */ +#define ETH_L2_TUNNEL_ENABLE_MASK 0x00000001 +/**< l2 tunnel insertion mask */ +#define ETH_L2_TUNNEL_INSERTION_MASK 0x00000002 +/**< l2 tunnel stripping mask */ +#define ETH_L2_TUNNEL_STRIPPING_MASK 0x00000004 +/**< l2 tunnel forwarding mask */ +#define ETH_L2_TUNNEL_FORWARDING_MASK 0x00000008 + +/** + * Function type used for RX packet processing packet callbacks. + * + * The callback function is called on RX with a burst of packets that have + * been received on the given port and queue. + * + * @param port_id + * The Ethernet port on which RX is being performed. + * @param queue + * The queue on the Ethernet port which is being used to receive the packets. + * @param pkts + * The burst of packets that have just been received. + * @param nb_pkts + * The number of packets in the burst pointed to by "pkts". + * @param max_pkts + * The max number of packets that can be stored in the "pkts" array. + * @param user_param + * The arbitrary user parameter passed in by the application when the callback + * was originally configured. + * @return + * The number of packets returned to the user. + */ +typedef uint16_t (*rte_rx_callback_fn)(uint16_t port_id, uint16_t queue, + struct rte_mbuf *pkts[], uint16_t nb_pkts, uint16_t max_pkts, + void *user_param); + +/** + * Function type used for TX packet processing packet callbacks. + * + * The callback function is called on TX with a burst of packets immediately + * before the packets are put onto the hardware queue for transmission. + * + * @param port_id + * The Ethernet port on which TX is being performed. + * @param queue + * The queue on the Ethernet port which is being used to transmit the packets. + * @param pkts + * The burst of packets that are about to be transmitted. + * @param nb_pkts + * The number of packets in the burst pointed to by "pkts". + * @param user_param + * The arbitrary user parameter passed in by the application when the callback + * was originally configured. + * @return + * The number of packets to be written to the NIC. + */ +typedef uint16_t (*rte_tx_callback_fn)(uint16_t port_id, uint16_t queue, + struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param); + +/** + * Possible states of an ethdev port. + */ +enum rte_eth_dev_state { + /** Device is unused before being probed. */ + RTE_ETH_DEV_UNUSED = 0, + /** Device is attached when allocated in probing. */ + RTE_ETH_DEV_ATTACHED, + /** The deferred state is useless and replaced by ownership. */ + RTE_ETH_DEV_DEFERRED, + /** Device is in removed state when plug-out is detected. */ + RTE_ETH_DEV_REMOVED, +}; + +struct rte_eth_dev_sriov { + uint8_t active; /**< SRIOV is active with 16, 32 or 64 pools */ + uint8_t nb_q_per_pool; /**< rx queue number per pool */ + uint16_t def_vmdq_idx; /**< Default pool num used for PF */ + uint16_t def_pool_q_idx; /**< Default pool queue start reg index */ +}; +#define RTE_ETH_DEV_SRIOV(dev) ((dev)->data->sriov) + +#define RTE_ETH_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN + +#define RTE_ETH_DEV_NO_OWNER 0 + +#define RTE_ETH_MAX_OWNER_NAME_LEN 64 + +struct rte_eth_dev_owner { + uint64_t id; /**< The owner unique identifier. */ + char name[RTE_ETH_MAX_OWNER_NAME_LEN]; /**< The owner name. */ +}; + +/** Device supports link state interrupt */ +#define RTE_ETH_DEV_INTR_LSC 0x0002 +/** Device is a bonded slave */ +#define RTE_ETH_DEV_BONDED_SLAVE 0x0004 +/** Device supports device removal interrupt */ +#define RTE_ETH_DEV_INTR_RMV 0x0008 +/** Device is port representor */ +#define RTE_ETH_DEV_REPRESENTOR 0x0010 + +/** + * Iterates over valid ethdev ports owned by a specific owner. + * + * @param port_id + * The id of the next possible valid owned port. + * @param owner_id + * The owner identifier. + * RTE_ETH_DEV_NO_OWNER means iterate over all valid ownerless ports. + * @return + * Next valid port id owned by owner_id, RTE_MAX_ETHPORTS if there is none. + */ +uint64_t rte_eth_find_next_owned_by(uint16_t port_id, + const uint64_t owner_id); + +/** + * Macro to iterate over all enabled ethdev ports owned by a specific owner. + */ +#define RTE_ETH_FOREACH_DEV_OWNED_BY(p, o) \ + for (p = rte_eth_find_next_owned_by(0, o); \ + (unsigned int)p < (unsigned int)RTE_MAX_ETHPORTS; \ + p = rte_eth_find_next_owned_by(p + 1, o)) + +/** + * Iterates over valid ethdev ports. + * + * @param port_id + * The id of the next possible valid port. + * @return + * Next valid port id, RTE_MAX_ETHPORTS if there is none. + */ +uint16_t rte_eth_find_next(uint16_t port_id); + +/** + * Macro to iterate over all enabled and ownerless ethdev ports. + */ +#define RTE_ETH_FOREACH_DEV(p) \ + RTE_ETH_FOREACH_DEV_OWNED_BY(p, RTE_ETH_DEV_NO_OWNER) + + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Get a new unique owner identifier. + * An owner identifier is used to owns Ethernet devices by only one DPDK entity + * to avoid multiple management of device by different entities. + * + * @param owner_id + * Owner identifier pointer. + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental rte_eth_dev_owner_new(uint64_t *owner_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Set an Ethernet device owner. + * + * @param port_id + * The identifier of the port to own. + * @param owner + * The owner pointer. + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental rte_eth_dev_owner_set(const uint16_t port_id, + const struct rte_eth_dev_owner *owner); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Unset Ethernet device owner to make the device ownerless. + * + * @param port_id + * The identifier of port to make ownerless. + * @param owner_id + * The owner identifier. + * @return + * 0 on success, negative errno value on error. + */ +int __rte_experimental rte_eth_dev_owner_unset(const uint16_t port_id, + const uint64_t owner_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Remove owner from all Ethernet devices owned by a specific owner. + * + * @param owner_id + * The owner identifier. + */ +void __rte_experimental rte_eth_dev_owner_delete(const uint64_t owner_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Get the owner of an Ethernet device. + * + * @param port_id + * The port identifier. + * @param owner + * The owner structure pointer to fill. + * @return + * 0 on success, negative errno value on error.. + */ +int __rte_experimental rte_eth_dev_owner_get(const uint16_t port_id, + struct rte_eth_dev_owner *owner); + +/** + * Get the total number of Ethernet devices that have been successfully + * initialized by the matching Ethernet driver during the PCI probing phase + * and that are available for applications to use. These devices must be + * accessed by using the ``RTE_ETH_FOREACH_DEV()`` macro to deal with + * non-contiguous ranges of devices. + * These non-contiguous ranges can be created by calls to hotplug functions or + * by some PMDs. + * + * @return + * - The total number of usable Ethernet devices. + */ +__rte_deprecated +uint16_t rte_eth_dev_count(void); + +/** + * Get the number of ports which are usable for the application. + * + * These devices must be iterated by using the macro + * ``RTE_ETH_FOREACH_DEV`` or ``RTE_ETH_FOREACH_DEV_OWNED_BY`` + * to deal with non-contiguous ranges of devices. + * + * @return + * The count of available Ethernet devices. + */ +uint16_t rte_eth_dev_count_avail(void); + +/** + * Get the total number of ports which are allocated. + * + * Some devices may not be available for the application. + * + * @return + * The total count of Ethernet devices. + */ +uint16_t __rte_experimental rte_eth_dev_count_total(void); + +/** + * Attach a new Ethernet device specified by arguments. + * + * @param devargs + * A pointer to a strings array describing the new device + * to be attached. The strings should be a pci address like + * '0000:01:00.0' or virtual device name like 'net_pcap0'. + * @param port_id + * A pointer to a port identifier actually attached. + * @return + * 0 on success and port_id is filled, negative on error + */ +int rte_eth_dev_attach(const char *devargs, uint16_t *port_id); + +/** + * Detach a Ethernet device specified by port identifier. + * This function must be called when the device is in the + * closed state. + * + * @param port_id + * The port identifier of the device to detach. + * @param devname + * A pointer to a buffer that will be filled with the device name. + * This buffer must be at least RTE_DEV_NAME_MAX_LEN long. + * @return + * 0 on success and devname is filled, negative on error + */ +int rte_eth_dev_detach(uint16_t port_id, char *devname); + +/** + * Convert a numerical speed in Mbps to a bitmap flag that can be used in + * the bitmap link_speeds of the struct rte_eth_conf + * + * @param speed + * Numerical speed value in Mbps + * @param duplex + * ETH_LINK_[HALF/FULL]_DUPLEX (only for 10/100M speeds) + * @return + * 0 if the speed cannot be mapped + */ +uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Get DEV_RX_OFFLOAD_* flag name. + * + * @param offload + * Offload flag. + * @return + * Offload name or 'UNKNOWN' if the flag cannot be recognised. + */ +const char * __rte_experimental rte_eth_dev_rx_offload_name(uint64_t offload); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Get DEV_TX_OFFLOAD_* flag name. + * + * @param offload + * Offload flag. + * @return + * Offload name or 'UNKNOWN' if the flag cannot be recognised. + */ +const char * __rte_experimental rte_eth_dev_tx_offload_name(uint64_t offload); + +/** + * Configure an Ethernet device. + * This function must be invoked first before any other function in the + * Ethernet API. This function can also be re-invoked when a device is in the + * stopped state. + * + * @param port_id + * The port identifier of the Ethernet device to configure. + * @param nb_rx_queue + * The number of receive queues to set up for the Ethernet device. + * @param nb_tx_queue + * The number of transmit queues to set up for the Ethernet device. + * @param eth_conf + * The pointer to the configuration data to be used for the Ethernet device. + * The *rte_eth_conf* structure includes: + * - the hardware offload features to activate, with dedicated fields for + * each statically configurable offload hardware feature provided by + * Ethernet devices, such as IP checksum or VLAN tag stripping for + * example. + * The Rx offload bitfield API is obsolete and will be deprecated. + * Applications should set the ignore_bitfield_offloads bit on *rxmode* + * structure and use offloads field to set per-port offloads instead. + * - Any offloading set in eth_conf->[rt]xmode.offloads must be within + * the [rt]x_offload_capa returned from rte_eth_dev_infos_get(). + * Any type of device supported offloading set in the input argument + * eth_conf->[rt]xmode.offloads to rte_eth_dev_configure() is enabled + * on all queues and it can't be disabled in rte_eth_[rt]x_queue_setup(). + * - the Receive Side Scaling (RSS) configuration when using multiple RX + * queues per port. + * + * Embedding all configuration information in a single data structure + * is the more flexible method that allows the addition of new features + * without changing the syntax of the API. + * @return + * - 0: Success, device configured. + * - <0: Error code returned by the driver configuration function. + */ +int rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_queue, + uint16_t nb_tx_queue, const struct rte_eth_conf *eth_conf); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Check if an Ethernet device was physically removed. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * 1 when the Ethernet device is removed, otherwise 0. + */ +int __rte_experimental +rte_eth_dev_is_removed(uint16_t port_id); + +/** + * Allocate and set up a receive queue for an Ethernet device. + * + * The function allocates a contiguous block of memory for *nb_rx_desc* + * receive descriptors from a memory zone associated with *socket_id* + * and initializes each receive descriptor with a network buffer allocated + * from the memory pool *mb_pool*. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rx_queue_id + * The index of the receive queue to set up. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param nb_rx_desc + * The number of receive descriptors to allocate for the receive ring. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for + * the DMA memory allocated for the receive descriptors of the ring. + * @param rx_conf + * The pointer to the configuration data to be used for the receive queue. + * NULL value is allowed, in which case default RX configuration + * will be used. + * The *rx_conf* structure contains an *rx_thresh* structure with the values + * of the Prefetch, Host, and Write-Back threshold registers of the receive + * ring. + * In addition it contains the hardware offloads features to activate using + * the DEV_RX_OFFLOAD_* flags. + * If an offloading set in rx_conf->offloads + * hasn't been set in the input argument eth_conf->rxmode.offloads + * to rte_eth_dev_configure(), it is a new added offloading, it must be + * per-queue type and it is enabled for the queue. + * No need to repeat any bit in rx_conf->offloads which has already been + * enabled in rte_eth_dev_configure() at port level. An offloading enabled + * at port level can't be disabled at queue level. + * @param mb_pool + * The pointer to the memory pool from which to allocate *rte_mbuf* network + * memory buffers to populate each descriptor of the receive ring. + * @return + * - 0: Success, receive queue correctly set up. + * - -EIO: if device is removed. + * - -EINVAL: The size of network buffers which can be allocated from the + * memory pool does not fit the various buffer sizes allowed by the + * device controller. + * - -ENOMEM: Unable to allocate the receive ring descriptors or to + * allocate network memory buffers from the memory pool when + * initializing receive descriptors. + */ +int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); + +/** + * Allocate and set up a transmit queue for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tx_queue_id + * The index of the transmit queue to set up. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param nb_tx_desc + * The number of transmit descriptors to allocate for the transmit ring. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + * Its value can be *SOCKET_ID_ANY* if there is no NUMA constraint for + * the DMA memory allocated for the transmit descriptors of the ring. + * @param tx_conf + * The pointer to the configuration data to be used for the transmit queue. + * NULL value is allowed, in which case default TX configuration + * will be used. + * The *tx_conf* structure contains the following data: + * - The *tx_thresh* structure with the values of the Prefetch, Host, and + * Write-Back threshold registers of the transmit ring. + * When setting Write-Back threshold to the value greater then zero, + * *tx_rs_thresh* value should be explicitly set to one. + * - The *tx_free_thresh* value indicates the [minimum] number of network + * buffers that must be pending in the transmit ring to trigger their + * [implicit] freeing by the driver transmit function. + * - The *tx_rs_thresh* value indicates the [minimum] number of transmit + * descriptors that must be pending in the transmit ring before setting the + * RS bit on a descriptor by the driver transmit function. + * The *tx_rs_thresh* value should be less or equal then + * *tx_free_thresh* value, and both of them should be less then + * *nb_tx_desc* - 3. + * - The *txq_flags* member contains flags to pass to the TX queue setup + * function to configure the behavior of the TX queue. This should be set + * to 0 if no special configuration is required. + * This API is obsolete and will be deprecated. Applications + * should set it to ETH_TXQ_FLAGS_IGNORE and use + * the offloads field below. + * - The *offloads* member contains Tx offloads to be enabled. + * If an offloading set in tx_conf->offloads + * hasn't been set in the input argument eth_conf->txmode.offloads + * to rte_eth_dev_configure(), it is a new added offloading, it must be + * per-queue type and it is enabled for the queue. + * No need to repeat any bit in tx_conf->offloads which has already been + * enabled in rte_eth_dev_configure() at port level. An offloading enabled + * at port level can't be disabled at queue level. + * + * Note that setting *tx_free_thresh* or *tx_rs_thresh* value to 0 forces + * the transmit function to use default values. + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -ENOMEM: Unable to allocate the transmit ring descriptors. + */ +int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); + +/** + * Return the NUMA socket to which an Ethernet device is connected + * + * @param port_id + * The port identifier of the Ethernet device + * @return + * The NUMA socket id to which the Ethernet device is connected or + * a default of zero if the socket could not be determined. + * -1 is returned is the port_id value is out of range. + */ +int rte_eth_dev_socket_id(uint16_t port_id); + +/** + * Check if port_id of device is attached + * + * @param port_id + * The port identifier of the Ethernet device + * @return + * - 0 if port is out of range or not attached + * - 1 if device is attached + */ +int rte_eth_dev_is_valid_port(uint16_t port_id); + +/** + * Start specified RX queue of a port. It is used when rx_deferred_start + * flag of the specified queue is true. + * + * @param port_id + * The port identifier of the Ethernet device + * @param rx_queue_id + * The index of the rx queue to update the ring. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the receive queue is started. + * - -EINVAL: The port_id or the queue_id out of range. + * - -EIO: if device is removed. + * - -ENOTSUP: The function not supported in PMD driver. + */ +int rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id); + +/** + * Stop specified RX queue of a port + * + * @param port_id + * The port identifier of the Ethernet device + * @param rx_queue_id + * The index of the rx queue to update the ring. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the receive queue is stopped. + * - -EINVAL: The port_id or the queue_id out of range. + * - -EIO: if device is removed. + * - -ENOTSUP: The function not supported in PMD driver. + */ +int rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id); + +/** + * Start TX for specified queue of a port. It is used when tx_deferred_start + * flag of the specified queue is true. + * + * @param port_id + * The port identifier of the Ethernet device + * @param tx_queue_id + * The index of the tx queue to update the ring. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the transmit queue is started. + * - -EINVAL: The port_id or the queue_id out of range. + * - -EIO: if device is removed. + * - -ENOTSUP: The function not supported in PMD driver. + */ +int rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id); + +/** + * Stop specified TX queue of a port + * + * @param port_id + * The port identifier of the Ethernet device + * @param tx_queue_id + * The index of the tx queue to update the ring. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the transmit queue is stopped. + * - -EINVAL: The port_id or the queue_id out of range. + * - -EIO: if device is removed. + * - -ENOTSUP: The function not supported in PMD driver. + */ +int rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id); + +/** + * Start an Ethernet device. + * + * The device start step is the last one and consists of setting the configured + * offload features and in starting the transmit and the receive units of the + * device. + * On success, all basic functions exported by the Ethernet API (link status, + * receive/transmit, and so on) can be invoked. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - 0: Success, Ethernet device started. + * - <0: Error code of the driver device start function. + */ +int rte_eth_dev_start(uint16_t port_id); + +/** + * Stop an Ethernet device. The device can be restarted with a call to + * rte_eth_dev_start() + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_dev_stop(uint16_t port_id); + +/** + * Link up an Ethernet device. + * + * Set device link up will re-enable the device rx/tx + * functionality after it is previously set device linked down. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - 0: Success, Ethernet device linked up. + * - <0: Error code of the driver device link up function. + */ +int rte_eth_dev_set_link_up(uint16_t port_id); + +/** + * Link down an Ethernet device. + * The device rx/tx functionality will be disabled if success, + * and it can be re-enabled with a call to + * rte_eth_dev_set_link_up() + * + * @param port_id + * The port identifier of the Ethernet device. + */ +int rte_eth_dev_set_link_down(uint16_t port_id); + +/** + * Close a stopped Ethernet device. The device cannot be restarted! + * The function frees all resources except for needed by the + * closed state. To free these resources, call rte_eth_dev_detach(). + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_dev_close(uint16_t port_id); + +/** + * Reset a Ethernet device and keep its port id. + * + * When a port has to be reset passively, the DPDK application can invoke + * this function. For example when a PF is reset, all its VFs should also + * be reset. Normally a DPDK application can invoke this function when + * RTE_ETH_EVENT_INTR_RESET event is detected, but can also use it to start + * a port reset in other circumstances. + * + * When this function is called, it first stops the port and then calls the + * PMD specific dev_uninit( ) and dev_init( ) to return the port to initial + * state, in which no Tx and Rx queues are setup, as if the port has been + * reset and not started. The port keeps the port id it had before the + * function call. + * + * After calling rte_eth_dev_reset( ), the application should use + * rte_eth_dev_configure( ), rte_eth_rx_queue_setup( ), + * rte_eth_tx_queue_setup( ), and rte_eth_dev_start( ) + * to reconfigure the device as appropriate. + * + * Note: To avoid unexpected behavior, the application should stop calling + * Tx and Rx functions before calling rte_eth_dev_reset( ). For thread + * safety, all these controlling functions should be called from the same + * thread. + * + * @param port_id + * The port identifier of the Ethernet device. + * + * @return + * - (0) if successful. + * - (-EINVAL) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support this function. + * - (-EPERM) if not ran from the primary process. + * - (-EIO) if re-initialisation failed or device is removed. + * - (-ENOMEM) if the reset failed due to OOM. + * - (-EAGAIN) if the reset temporarily failed and should be retried later. + */ +int rte_eth_dev_reset(uint16_t port_id); + +/** + * Enable receipt in promiscuous mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_promiscuous_enable(uint16_t port_id); + +/** + * Disable receipt in promiscuous mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_promiscuous_disable(uint16_t port_id); + +/** + * Return the value of promiscuous mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (1) if promiscuous is enabled + * - (0) if promiscuous is disabled. + * - (-1) on error + */ +int rte_eth_promiscuous_get(uint16_t port_id); + +/** + * Enable the receipt of any multicast frame by an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_allmulticast_enable(uint16_t port_id); + +/** + * Disable the receipt of all multicast frames by an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_allmulticast_disable(uint16_t port_id); + +/** + * Return the value of allmulticast mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (1) if allmulticast is enabled + * - (0) if allmulticast is disabled. + * - (-1) on error + */ +int rte_eth_allmulticast_get(uint16_t port_id); + +/** + * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX + * or FULL-DUPLEX) of the physical link of an Ethernet device. It might need + * to wait up to 9 seconds in it. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param link + * A pointer to an *rte_eth_link* structure to be filled with + * the status, the speed and the mode of the Ethernet device link. + */ +void rte_eth_link_get(uint16_t port_id, struct rte_eth_link *link); + +/** + * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX + * or FULL-DUPLEX) of the physical link of an Ethernet device. It is a no-wait + * version of rte_eth_link_get(). + * + * @param port_id + * The port identifier of the Ethernet device. + * @param link + * A pointer to an *rte_eth_link* structure to be filled with + * the status, the speed and the mode of the Ethernet device link. + */ +void rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *link); + +/** + * Retrieve the general I/O statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param stats + * A pointer to a structure of type *rte_eth_stats* to be filled with + * the values of device counters for the following set of statistics: + * - *ipackets* with the total of successfully received packets. + * - *opackets* with the total of successfully transmitted packets. + * - *ibytes* with the total of successfully received bytes. + * - *obytes* with the total of successfully transmitted bytes. + * - *ierrors* with the total of erroneous received packets. + * - *oerrors* with the total of failed transmitted packets. + * @return + * Zero if successful. Non-zero otherwise. + */ +int rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats); + +/** + * Reset the general I/O statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if device notified to reset stats. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_stats_reset(uint16_t port_id); + +/** + * Retrieve names of extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param xstats_names + * An rte_eth_xstat_name array of at least *size* elements to + * be filled. If set to NULL, the function returns the required number + * of elements. + * @param size + * The size of the xstats_names array (number of elements). + * @return + * - A positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. + * - A positive value higher than size: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - A negative value on error (invalid port id). + */ +int rte_eth_xstats_get_names(uint16_t port_id, + struct rte_eth_xstat_name *xstats_names, + unsigned int size); + +/** + * Retrieve extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param xstats + * A pointer to a table of structure of type *rte_eth_xstat* + * to be filled with device statistics ids and values: id is the + * index of the name string in xstats_names (see rte_eth_xstats_get_names()), + * and value is the statistic counter. + * This parameter can be set to NULL if n is 0. + * @param n + * The size of the xstats array (number of elements). + * @return + * - A positive value lower or equal to n: success. The return value + * is the number of entries filled in the stats table. + * - A positive value higher than n: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - A negative value on error (invalid port id). + */ +int rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats, + unsigned int n); + +/** + * Retrieve names of extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param xstats_names + * An rte_eth_xstat_name array of at least *size* elements to + * be filled. If set to NULL, the function returns the required number + * of elements. + * @param ids + * IDs array given by app to retrieve specific statistics + * @param size + * The size of the xstats_names array (number of elements). + * @return + * - A positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. + * - A positive value higher than size: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - A negative value on error (invalid port id). + */ +int +rte_eth_xstats_get_names_by_id(uint16_t port_id, + struct rte_eth_xstat_name *xstats_names, unsigned int size, + uint64_t *ids); + +/** + * Retrieve extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param ids + * A pointer to an ids array passed by application. This tells which + * statistics values function should retrieve. This parameter + * can be set to NULL if size is 0. In this case function will retrieve + * all avalible statistics. + * @param values + * A pointer to a table to be filled with device statistics values. + * @param size + * The size of the ids array (number of elements). + * @return + * - A positive value lower or equal to size: success. The return value + * is the number of entries filled in the stats table. + * - A positive value higher than size: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - A negative value on error (invalid port id). + */ +int rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids, + uint64_t *values, unsigned int size); + +/** + * Gets the ID of a statistic from its name. + * + * This function searches for the statistics using string compares, and + * as such should not be used on the fast-path. For fast-path retrieval of + * specific statistics, store the ID as provided in *id* from this function, + * and pass the ID to rte_eth_xstats_get() + * + * @param port_id The port to look up statistics from + * @param xstat_name The name of the statistic to return + * @param[out] id A pointer to an app-supplied uint64_t which should be + * set to the ID of the stat if the stat exists. + * @return + * 0 on success + * -ENODEV for invalid port_id, + * -EIO if device is removed, + * -EINVAL if the xstat_name doesn't exist in port_id + */ +int rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name, + uint64_t *id); + +/** + * Reset extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_xstats_reset(uint16_t port_id); + +/** + * Set a mapping for the specified transmit queue to the specified per-queue + * statistics counter. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tx_queue_id + * The index of the transmit queue for which a queue stats mapping is required. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param stat_idx + * The per-queue packet statistics functionality number that the transmit + * queue is to be assigned. + * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1]. + * @return + * Zero if successful. Non-zero otherwise. + */ +int rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, + uint16_t tx_queue_id, uint8_t stat_idx); + +/** + * Set a mapping for the specified receive queue to the specified per-queue + * statistics counter. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rx_queue_id + * The index of the receive queue for which a queue stats mapping is required. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param stat_idx + * The per-queue packet statistics functionality number that the receive + * queue is to be assigned. + * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1]. + * @return + * Zero if successful. Non-zero otherwise. + */ +int rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id, + uint16_t rx_queue_id, + uint8_t stat_idx); + +/** + * Retrieve the Ethernet address of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mac_addr + * A pointer to a structure of type *ether_addr* to be filled with + * the Ethernet address of the Ethernet device. + */ +void rte_eth_macaddr_get(uint16_t port_id, struct ether_addr *mac_addr); + +/** + * Retrieve the contextual information of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param dev_info + * A pointer to a structure of type *rte_eth_dev_info* to be filled with + * the contextual information of the Ethernet device. + */ +void rte_eth_dev_info_get(uint16_t port_id, struct rte_eth_dev_info *dev_info); + +/** + * Retrieve the firmware version of a device. + * + * @param port_id + * The port identifier of the device. + * @param fw_version + * A pointer to a string array storing the firmware version of a device, + * the string includes terminating null. This pointer is allocated by caller. + * @param fw_size + * The size of the string array pointed by fw_version, which should be + * large enough to store firmware version of the device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if operation is not supported. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (>0) if *fw_size* is not enough to store firmware version, return + * the size of the non truncated string. + */ +int rte_eth_dev_fw_version_get(uint16_t port_id, + char *fw_version, size_t fw_size); + +/** + * Retrieve the supported packet types of an Ethernet device. + * + * When a packet type is announced as supported, it *must* be recognized by + * the PMD. For instance, if RTE_PTYPE_L2_ETHER, RTE_PTYPE_L2_ETHER_VLAN + * and RTE_PTYPE_L3_IPV4 are announced, the PMD must return the following + * packet types for these packets: + * - Ether/IPv4 -> RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4 + * - Ether/Vlan/IPv4 -> RTE_PTYPE_L2_ETHER_VLAN | RTE_PTYPE_L3_IPV4 + * - Ether/[anything else] -> RTE_PTYPE_L2_ETHER + * - Ether/Vlan/[anything else] -> RTE_PTYPE_L2_ETHER_VLAN + * + * When a packet is received by a PMD, the most precise type must be + * returned among the ones supported. However a PMD is allowed to set + * packet type that is not in the supported list, at the condition that it + * is more precise. Therefore, a PMD announcing no supported packet types + * can still set a matching packet type in a received packet. + * + * @note + * Better to invoke this API after the device is already started or rx burst + * function is decided, to obtain correct supported ptypes. + * @note + * if a given PMD does not report what ptypes it supports, then the supported + * ptype count is reported as 0. + * @param port_id + * The port identifier of the Ethernet device. + * @param ptype_mask + * A hint of what kind of packet type which the caller is interested in. + * @param ptypes + * An array pointer to store adequate packet types, allocated by caller. + * @param num + * Size of the array pointed by param ptypes. + * @return + * - (>=0) Number of supported ptypes. If the number of types exceeds num, + * only num entries will be filled into the ptypes array, but the full + * count of supported ptypes will be returned. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_get_supported_ptypes(uint16_t port_id, uint32_t ptype_mask, + uint32_t *ptypes, int num); + +/** + * Retrieve the MTU of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mtu + * A pointer to a uint16_t where the retrieved MTU is to be stored. + * @return + * - (0) if successful. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_get_mtu(uint16_t port_id, uint16_t *mtu); + +/** + * Change the MTU of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mtu + * A uint16_t for the MTU to be applied. + * @return + * - (0) if successful. + * - (-ENOTSUP) if operation is not supported. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-EINVAL) if *mtu* invalid. + * - (-EBUSY) if operation is not allowed when the port is running + */ +int rte_eth_dev_set_mtu(uint16_t port_id, uint16_t mtu); + +/** + * Enable/Disable hardware filtering by an Ethernet device of received + * VLAN packets tagged with a given VLAN Tag Identifier. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param vlan_id + * The VLAN Tag Identifier whose filtering must be enabled or disabled. + * @param on + * If > 0, enable VLAN filtering of VLAN packets tagged with *vlan_id*. + * Otherwise, disable VLAN filtering of VLAN packets tagged with *vlan_id*. + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN filtering not configured. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-ENOSYS) if VLAN filtering on *port_id* disabled. + * - (-EINVAL) if *vlan_id* > 4095. + */ +int rte_eth_dev_vlan_filter(uint16_t port_id, uint16_t vlan_id, int on); + +/** + * Enable/Disable hardware VLAN Strip by a rx queue of an Ethernet device. + * 82599/X540/X550 can support VLAN stripping at the rx queue level + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rx_queue_id + * The index of the receive queue for which a queue stats mapping is required. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param on + * If 1, Enable VLAN Stripping of the receive queue of the Ethernet port. + * If 0, Disable VLAN Stripping of the receive queue of the Ethernet port. + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN stripping not configured. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if *rx_queue_id* invalid. + */ +int rte_eth_dev_set_vlan_strip_on_queue(uint16_t port_id, uint16_t rx_queue_id, + int on); + +/** + * Set the Outer VLAN Ether Type by an Ethernet device, it can be inserted to + * the VLAN Header. This is a register setup available on some Intel NIC, not + * but all, please check the data sheet for availability. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param vlan_type + * The vlan type. + * @param tag_type + * The Tag Protocol ID + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN TPID setup is not supported. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_set_vlan_ether_type(uint16_t port_id, + enum rte_vlan_type vlan_type, + uint16_t tag_type); + +/** + * Set VLAN offload configuration on an Ethernet device + * Enable/Disable Extended VLAN by an Ethernet device, This is a register setup + * available on some Intel NIC, not but all, please check the data sheet for + * availability. + * Enable/Disable VLAN Strip can be done on rx queue for certain NIC, but here + * the configuration is applied on the port level. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param offload_mask + * The VLAN Offload bit mask can be mixed use with "OR" + * ETH_VLAN_STRIP_OFFLOAD + * ETH_VLAN_FILTER_OFFLOAD + * ETH_VLAN_EXTEND_OFFLOAD + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN filtering not configured. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_set_vlan_offload(uint16_t port_id, int offload_mask); + +/** + * Read VLAN Offload configuration from an Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (>0) if successful. Bit mask to indicate + * ETH_VLAN_STRIP_OFFLOAD + * ETH_VLAN_FILTER_OFFLOAD + * ETH_VLAN_EXTEND_OFFLOAD + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_get_vlan_offload(uint16_t port_id); + +/** + * Set port based TX VLAN insertion on or off. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param pvid + * Port based TX VLAN identifier together with user priority. + * @param on + * Turn on or off the port based TX VLAN insertion. + * + * @return + * - (0) if successful. + * - negative if failed. + */ +int rte_eth_dev_set_vlan_pvid(uint16_t port_id, uint16_t pvid, int on); + +typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count, + void *userdata); + +/** + * Structure used to buffer packets for future TX + * Used by APIs rte_eth_tx_buffer and rte_eth_tx_buffer_flush + */ +struct rte_eth_dev_tx_buffer { + buffer_tx_error_fn error_callback; + void *error_userdata; + uint16_t size; /**< Size of buffer for buffered tx */ + uint16_t length; /**< Number of packets in the array */ + struct rte_mbuf *pkts[]; + /**< Pending packets to be sent on explicit flush or when full */ +}; + +/** + * Calculate the size of the tx buffer. + * + * @param sz + * Number of stored packets. + */ +#define RTE_ETH_TX_BUFFER_SIZE(sz) \ + (sizeof(struct rte_eth_dev_tx_buffer) + (sz) * sizeof(struct rte_mbuf *)) + +/** + * Initialize default values for buffered transmitting + * + * @param buffer + * Tx buffer to be initialized. + * @param size + * Buffer size + * @return + * 0 if no error + */ +int +rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size); + +/** + * Configure a callback for buffered packets which cannot be sent + * + * Register a specific callback to be called when an attempt is made to send + * all packets buffered on an ethernet port, but not all packets can + * successfully be sent. The callback registered here will be called only + * from calls to rte_eth_tx_buffer() and rte_eth_tx_buffer_flush() APIs. + * The default callback configured for each queue by default just frees the + * packets back to the calling mempool. If additional behaviour is required, + * for example, to count dropped packets, or to retry transmission of packets + * which cannot be sent, this function should be used to register a suitable + * callback function to implement the desired behaviour. + * The example callback "rte_eth_count_unsent_packet_callback()" is also + * provided as reference. + * + * @param buffer + * The port identifier of the Ethernet device. + * @param callback + * The function to be used as the callback. + * @param userdata + * Arbitrary parameter to be passed to the callback function + * @return + * 0 on success, or -1 on error with rte_errno set appropriately + */ +int +rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer, + buffer_tx_error_fn callback, void *userdata); + +/** + * Callback function for silently dropping unsent buffered packets. + * + * This function can be passed to rte_eth_tx_buffer_set_err_callback() to + * adjust the default behavior when buffered packets cannot be sent. This + * function drops any unsent packets silently and is used by tx buffered + * operations as default behavior. + * + * NOTE: this function should not be called directly, instead it should be used + * as a callback for packet buffering. + * + * NOTE: when configuring this function as a callback with + * rte_eth_tx_buffer_set_err_callback(), the final, userdata parameter + * should point to an uint64_t value. + * + * @param pkts + * The previously buffered packets which could not be sent + * @param unsent + * The number of unsent packets in the pkts array + * @param userdata + * Not used + */ +void +rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata); + +/** + * Callback function for tracking unsent buffered packets. + * + * This function can be passed to rte_eth_tx_buffer_set_err_callback() to + * adjust the default behavior when buffered packets cannot be sent. This + * function drops any unsent packets, but also updates a user-supplied counter + * to track the overall number of packets dropped. The counter should be an + * uint64_t variable. + * + * NOTE: this function should not be called directly, instead it should be used + * as a callback for packet buffering. + * + * NOTE: when configuring this function as a callback with + * rte_eth_tx_buffer_set_err_callback(), the final, userdata parameter + * should point to an uint64_t value. + * + * @param pkts + * The previously buffered packets which could not be sent + * @param unsent + * The number of unsent packets in the pkts array + * @param userdata + * Pointer to an uint64_t value, which will be incremented by unsent + */ +void +rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata); + +/** + * Request the driver to free mbufs currently cached by the driver. The + * driver will only free the mbuf if it is no longer in use. It is the + * application's responsibity to ensure rte_eth_tx_buffer_flush(..) is + * called if needed. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param free_cnt + * Maximum number of packets to free. Use 0 to indicate all possible packets + * should be freed. Note that a packet may be using multiple mbufs. + * @return + * Failure: < 0 + * -ENODEV: Invalid interface + * -EIO: device is removed + * -ENOTSUP: Driver does not support function + * Success: >= 0 + * 0-n: Number of packets freed. More packets may still remain in ring that + * are in use. + */ +int +rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt); + +/** + * Subtypes for IPsec offload event(@ref RTE_ETH_EVENT_IPSEC) raised by + * eth device. + */ +enum rte_eth_event_ipsec_subtype { + RTE_ETH_EVENT_IPSEC_UNKNOWN = 0, + /**< Unknown event type */ + RTE_ETH_EVENT_IPSEC_ESN_OVERFLOW, + /**< Sequence number overflow */ + RTE_ETH_EVENT_IPSEC_SA_TIME_EXPIRY, + /**< Soft time expiry of SA */ + RTE_ETH_EVENT_IPSEC_SA_BYTE_EXPIRY, + /**< Soft byte expiry of SA */ + RTE_ETH_EVENT_IPSEC_MAX + /**< Max value of this enum */ +}; + +/** + * Descriptor for @ref RTE_ETH_EVENT_IPSEC event. Used by eth dev to send extra + * information of the IPsec offload event. + */ +struct rte_eth_event_ipsec_desc { + enum rte_eth_event_ipsec_subtype subtype; + /**< Type of RTE_ETH_EVENT_IPSEC_* event */ + uint64_t metadata; + /**< Event specific metadata + * + * For the following events, *userdata* registered + * with the *rte_security_session* would be returned + * as metadata, + * + * - @ref RTE_ETH_EVENT_IPSEC_ESN_OVERFLOW + * - @ref RTE_ETH_EVENT_IPSEC_SA_TIME_EXPIRY + * - @ref RTE_ETH_EVENT_IPSEC_SA_BYTE_EXPIRY + * + * @see struct rte_security_session_conf + * + */ +}; + +/** + * The eth device event type for interrupt, and maybe others in the future. + */ +enum rte_eth_event_type { + RTE_ETH_EVENT_UNKNOWN, /**< unknown event type */ + RTE_ETH_EVENT_INTR_LSC, /**< lsc interrupt event */ + RTE_ETH_EVENT_QUEUE_STATE, + /**< queue state event (enabled/disabled) */ + RTE_ETH_EVENT_INTR_RESET, + /**< reset interrupt event, sent to VF on PF reset */ + RTE_ETH_EVENT_VF_MBOX, /**< message from the VF received by PF */ + RTE_ETH_EVENT_MACSEC, /**< MACsec offload related event */ + RTE_ETH_EVENT_INTR_RMV, /**< device removal event */ + RTE_ETH_EVENT_NEW, /**< port is probed */ + RTE_ETH_EVENT_DESTROY, /**< port is released */ + RTE_ETH_EVENT_IPSEC, /**< IPsec offload related event */ + RTE_ETH_EVENT_MAX /**< max value of this enum */ +}; + +typedef int (*rte_eth_dev_cb_fn)(uint16_t port_id, + enum rte_eth_event_type event, void *cb_arg, void *ret_param); +/**< user application callback to be registered for interrupts */ + +/** + * Register a callback function for port event. + * + * @param port_id + * Port id. + * RTE_ETH_ALL means register the event for all port ids. + * @param event + * Event interested. + * @param cb_fn + * User supplied callback function to be called. + * @param cb_arg + * Pointer to the parameters for the registered callback. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_callback_register(uint16_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg); + +/** + * Unregister a callback function for port event. + * + * @param port_id + * Port id. + * RTE_ETH_ALL means unregister the event for all port ids. + * @param event + * Event interested. + * @param cb_fn + * User supplied callback function to be called. + * @param cb_arg + * Pointer to the parameters for the registered callback. -1 means to + * remove all for the same callback address and same event. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_callback_unregister(uint16_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg); + +/** + * When there is no rx packet coming in Rx Queue for a long time, we can + * sleep lcore related to RX Queue for power saving, and enable rx interrupt + * to be triggered when Rx packet arrives. + * + * The rte_eth_dev_rx_intr_enable() function enables rx queue + * interrupt on specific rx queue of a port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_rx_intr_enable(uint16_t port_id, uint16_t queue_id); + +/** + * When lcore wakes up from rx interrupt indicating packet coming, disable rx + * interrupt and returns to polling mode. + * + * The rte_eth_dev_rx_intr_disable() function disables rx queue + * interrupt on specific rx queue of a port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_rx_intr_disable(uint16_t port_id, uint16_t queue_id); + +/** + * RX Interrupt control per port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param epfd + * Epoll instance fd which the intr vector associated to. + * Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance. + * @param op + * The operation be performed for the vector. + * Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data); + +/** + * RX Interrupt control per queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param epfd + * Epoll instance fd which the intr vector associated to. + * Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance. + * @param op + * The operation be performed for the vector. + * Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id, + int epfd, int op, void *data); + +/** + * Turn on the LED on the Ethernet device. + * This function turns on the LED on the Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_led_on(uint16_t port_id); + +/** + * Turn off the LED on the Ethernet device. + * This function turns off the LED on the Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_led_off(uint16_t port_id); + +/** + * Get current status of the Ethernet link flow control for Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param fc_conf + * The pointer to the structure where to store the flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support flow control. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_flow_ctrl_get(uint16_t port_id, + struct rte_eth_fc_conf *fc_conf); + +/** + * Configure the Ethernet link flow control for Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param fc_conf + * The pointer to the structure of the flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support flow control mode. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter + * - (-EIO) if flow control setup failure or device is removed. + */ +int rte_eth_dev_flow_ctrl_set(uint16_t port_id, + struct rte_eth_fc_conf *fc_conf); + +/** + * Configure the Ethernet priority flow control under DCB environment + * for Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param pfc_conf + * The pointer to the structure of the priority flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support priority flow control mode. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter + * - (-EIO) if flow control setup failure or device is removed. + */ +int rte_eth_dev_priority_flow_ctrl_set(uint16_t port_id, + struct rte_eth_pfc_conf *pfc_conf); + +/** + * Add a MAC address to an internal array of addresses used to enable whitelist + * filtering to accept packets only if the destination MAC address matches. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mac_addr + * The MAC address to add. + * @param pool + * VMDq pool index to associate address with (if VMDq is enabled). If VMDq is + * not enabled, this should be set to 0. + * @return + * - (0) if successfully added or *mac_addr* was already added. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port* is invalid. + * - (-EIO) if device is removed. + * - (-ENOSPC) if no more MAC addresses can be added. + * - (-EINVAL) if MAC address is invalid. + */ +int rte_eth_dev_mac_addr_add(uint16_t port_id, struct ether_addr *mac_addr, + uint32_t pool); + +/** + * Remove a MAC address from the internal array of addresses. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mac_addr + * MAC address to remove. + * @return + * - (0) if successful, or *mac_addr* didn't exist. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port* invalid. + * - (-EADDRINUSE) if attempting to remove the default MAC address + */ +int rte_eth_dev_mac_addr_remove(uint16_t port_id, struct ether_addr *mac_addr); + +/** + * Set the default MAC address. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mac_addr + * New default MAC address. + * @return + * - (0) if successful, or *mac_addr* didn't exist. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if MAC address is invalid. + */ +int rte_eth_dev_default_mac_addr_set(uint16_t port_id, + struct ether_addr *mac_addr); + +/** + * Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param reta_conf + * RETA to update. + * @param reta_size + * Redirection table size. The table size can be queried by + * rte_eth_dev_info_get(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_rss_reta_update(uint16_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); + + /** + * Query Redirection Table(RETA) of Receive Side Scaling of Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param reta_conf + * RETA to query. + * @param reta_size + * Redirection table size. The table size can be queried by + * rte_eth_dev_info_get(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_rss_reta_query(uint16_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); + + /** + * Updates unicast hash table for receiving packet with the given destination + * MAC address, and the packet is routed to all VFs for which the RX mode is + * accept packets that match the unicast hash table. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param addr + * Unicast MAC address. + * @param on + * 1 - Set an unicast hash bit for receiving packets with the MAC address. + * 0 - Clear an unicast hash bit. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_uc_hash_table_set(uint16_t port_id, struct ether_addr *addr, + uint8_t on); + + /** + * Updates all unicast hash bitmaps for receiving packet with any Unicast + * Ethernet MAC addresses,the packet is routed to all VFs for which the RX + * mode is accept packets that match the unicast hash table. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param on + * 1 - Set all unicast hash bitmaps for receiving all the Ethernet + * MAC addresses + * 0 - Clear all unicast hash bitmaps + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_uc_all_hash_table_set(uint16_t port_id, uint8_t on); + +/** + * Set a traffic mirroring rule on an Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mirror_conf + * The pointer to the traffic mirroring structure describing the mirroring rule. + * The *rte_eth_vm_mirror_conf* structure includes the type of mirroring rule, + * destination pool and the value of rule if enable vlan or pool mirroring. + * + * @param rule_id + * The index of traffic mirroring rule, we support four separated rules. + * @param on + * 1 - Enable a mirroring rule. + * 0 - Disable a mirroring rule. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-EINVAL) if the mr_conf information is not correct. + */ +int rte_eth_mirror_rule_set(uint16_t port_id, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, + uint8_t on); + +/** + * Reset a traffic mirroring rule on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rule_id + * The index of traffic mirroring rule, we support four separated rules. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_mirror_rule_reset(uint16_t port_id, + uint8_t rule_id); + +/** + * Set the rate limitation for a queue on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_idx + * The queue id. + * @param tx_rate + * The tx rate in Mbps. Allocated from the total port link speed. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx, + uint16_t tx_rate); + + /** + * Configuration of Receive Side Scaling hash computation of Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rss_conf + * The new configuration to use for RSS hash computation on the port. + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_rss_hash_update(uint16_t port_id, + struct rte_eth_rss_conf *rss_conf); + + /** + * Retrieve current configuration of Receive Side Scaling hash computation + * of Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rss_conf + * Where to store the current RSS hash configuration of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support RSS. + */ +int +rte_eth_dev_rss_hash_conf_get(uint16_t port_id, + struct rte_eth_rss_conf *rss_conf); + + /** + * Add UDP tunneling port for a specific type of tunnel. + * The packets with this UDP port will be identified as this type of tunnel. + * Before enabling any offloading function for a tunnel, users can call this API + * to change or add more UDP port for the tunnel. So the offloading function + * can take effect on the packets with the specific UDP port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tunnel_udp + * UDP tunneling configuration. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_udp_tunnel_port_add(uint16_t port_id, + struct rte_eth_udp_tunnel *tunnel_udp); + + /** + * Delete UDP tunneling port a specific type of tunnel. + * The packets with this UDP port will not be identified as this type of tunnel + * any more. + * Before enabling any offloading function for a tunnel, users can call this API + * to delete a UDP port for the tunnel. So the offloading function will not take + * effect on the packets with the specific UDP port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tunnel_udp + * UDP tunneling configuration. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_udp_tunnel_port_delete(uint16_t port_id, + struct rte_eth_udp_tunnel *tunnel_udp); + +/** + * Check whether the filter type is supported on an Ethernet device. + * All the supported filter types are defined in 'rte_eth_ctrl.h'. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param filter_type + * Filter type. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this filter type. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + */ +int rte_eth_dev_filter_supported(uint16_t port_id, + enum rte_filter_type filter_type); + +/** + * Take operations to assigned filter type on an Ethernet device. + * All the supported operations and filter types are defined in 'rte_eth_ctrl.h'. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param filter_type + * Filter type. + * @param filter_op + * Type of operation. + * @param arg + * A pointer to arguments defined specifically for the operation. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_filter_ctrl(uint16_t port_id, enum rte_filter_type filter_type, + enum rte_filter_op filter_op, void *arg); + +/** + * Get DCB information on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param dcb_info + * dcb information. + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support. + */ +int rte_eth_dev_get_dcb_info(uint16_t port_id, + struct rte_eth_dcb_info *dcb_info); + +struct rte_eth_rxtx_callback; + +/** + * Add a callback to be called on packet RX on a given port and queue. + * + * This API configures a function to be called for each burst of + * packets received on a given NIC port queue. The return value is a pointer + * that can be used to later remove the callback using + * rte_eth_remove_rx_callback(). + * + * Multiple functions are called in the order that they are added. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device on which the callback is to be added. + * @param fn + * The callback function + * @param user_param + * A generic pointer parameter which will be passed to each invocation of the + * callback function on this port and queue. + * + * @return + * NULL on error. + * On success, a pointer value which can later be used to remove the callback. + */ +const struct rte_eth_rxtx_callback * +rte_eth_add_rx_callback(uint16_t port_id, uint16_t queue_id, + rte_rx_callback_fn fn, void *user_param); + +/** + * Add a callback that must be called first on packet RX on a given port + * and queue. + * + * This API configures a first function to be called for each burst of + * packets received on a given NIC port queue. The return value is a pointer + * that can be used to later remove the callback using + * rte_eth_remove_rx_callback(). + * + * Multiple functions are called in the order that they are added. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device on which the callback is to be added. + * @param fn + * The callback function + * @param user_param + * A generic pointer parameter which will be passed to each invocation of the + * callback function on this port and queue. + * + * @return + * NULL on error. + * On success, a pointer value which can later be used to remove the callback. + */ +const struct rte_eth_rxtx_callback * +rte_eth_add_first_rx_callback(uint16_t port_id, uint16_t queue_id, + rte_rx_callback_fn fn, void *user_param); + +/** + * Add a callback to be called on packet TX on a given port and queue. + * + * This API configures a function to be called for each burst of + * packets sent on a given NIC port queue. The return value is a pointer + * that can be used to later remove the callback using + * rte_eth_remove_tx_callback(). + * + * Multiple functions are called in the order that they are added. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device on which the callback is to be added. + * @param fn + * The callback function + * @param user_param + * A generic pointer parameter which will be passed to each invocation of the + * callback function on this port and queue. + * + * @return + * NULL on error. + * On success, a pointer value which can later be used to remove the callback. + */ +const struct rte_eth_rxtx_callback * +rte_eth_add_tx_callback(uint16_t port_id, uint16_t queue_id, + rte_tx_callback_fn fn, void *user_param); + +/** + * Remove an RX packet callback from a given port and queue. + * + * This function is used to removed callbacks that were added to a NIC port + * queue using rte_eth_add_rx_callback(). + * + * Note: the callback is removed from the callback list but it isn't freed + * since the it may still be in use. The memory for the callback can be + * subsequently freed back by the application by calling rte_free(): + * + * - Immediately - if the port is stopped, or the user knows that no + * callbacks are in flight e.g. if called from the thread doing RX/TX + * on that queue. + * + * - After a short delay - where the delay is sufficient to allow any + * in-flight callbacks to complete. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device from which the callback is to be removed. + * @param user_cb + * User supplied callback created via rte_eth_add_rx_callback(). + * + * @return + * - 0: Success. Callback was removed. + * - -ENOTSUP: Callback support is not available. + * - -EINVAL: The port_id or the queue_id is out of range, or the callback + * is NULL or not found for the port/queue. + */ +int rte_eth_remove_rx_callback(uint16_t port_id, uint16_t queue_id, + const struct rte_eth_rxtx_callback *user_cb); + +/** + * Remove a TX packet callback from a given port and queue. + * + * This function is used to removed callbacks that were added to a NIC port + * queue using rte_eth_add_tx_callback(). + * + * Note: the callback is removed from the callback list but it isn't freed + * since the it may still be in use. The memory for the callback can be + * subsequently freed back by the application by calling rte_free(): + * + * - Immediately - if the port is stopped, or the user knows that no + * callbacks are in flight e.g. if called from the thread doing RX/TX + * on that queue. + * + * - After a short delay - where the delay is sufficient to allow any + * in-flight callbacks to complete. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device from which the callback is to be removed. + * @param user_cb + * User supplied callback created via rte_eth_add_tx_callback(). + * + * @return + * - 0: Success. Callback was removed. + * - -ENOTSUP: Callback support is not available. + * - -EINVAL: The port_id or the queue_id is out of range, or the callback + * is NULL or not found for the port/queue. + */ +int rte_eth_remove_tx_callback(uint16_t port_id, uint16_t queue_id, + const struct rte_eth_rxtx_callback *user_cb); + +/** + * Retrieve information about given port's RX queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The RX queue on the Ethernet device for which information + * will be retrieved. + * @param qinfo + * A pointer to a structure of type *rte_eth_rxq_info_info* to be filled with + * the information of the Ethernet device. + * + * @return + * - 0: Success + * - -ENOTSUP: routine is not supported by the device PMD. + * - -EINVAL: The port_id or the queue_id is out of range. + */ +int rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo); + +/** + * Retrieve information about given port's TX queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The TX queue on the Ethernet device for which information + * will be retrieved. + * @param qinfo + * A pointer to a structure of type *rte_eth_txq_info_info* to be filled with + * the information of the Ethernet device. + * + * @return + * - 0: Success + * - -ENOTSUP: routine is not supported by the device PMD. + * - -EINVAL: The port_id or the queue_id is out of range. + */ +int rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id, + struct rte_eth_txq_info *qinfo); + +/** + * Retrieve device registers and register attributes (number of registers and + * register size) + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * Pointer to rte_dev_reg_info structure to fill in. If info->data is + * NULL the function fills in the width and length fields. If non-NULL + * the registers are put into the buffer pointed at by the data field. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_reg_info(uint16_t port_id, struct rte_dev_reg_info *info); + +/** + * Retrieve size of device EEPROM + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (>=0) EEPROM size if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_eeprom_length(uint16_t port_id); + +/** + * Retrieve EEPROM and EEPROM attribute + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * The template includes buffer for return EEPROM data and + * EEPROM attributes to be filled. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info); + +/** + * Program EEPROM with provided data + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * The template includes EEPROM data for programming and + * EEPROM attributes to be filled + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_set_eeprom(uint16_t port_id, struct rte_dev_eeprom_info *info); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Retrieve the type and size of plugin module EEPROM + * + * @param port_id + * The port identifier of the Ethernet device. + * @param modinfo + * The type and size of plugin module EEPROM. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int __rte_experimental +rte_eth_dev_get_module_info(uint16_t port_id, + struct rte_eth_dev_module_info *modinfo); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Retrieve the data of plugin module EEPROM + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * The template includes the plugin module EEPROM attributes, and the + * buffer for return plugin module EEPROM data. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - others depends on the specific operations implementation. + */ +int __rte_experimental +rte_eth_dev_get_module_eeprom(uint16_t port_id, + struct rte_dev_eeprom_info *info); + +/** + * Set the list of multicast addresses to filter on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mc_addr_set + * The array of multicast addresses to set. Equal to NULL when the function + * is invoked to flush the set of filtered addresses. + * @param nb_mc_addr + * The number of multicast addresses in the *mc_addr_set* array. Equal to 0 + * when the function is invoked to flush the set of filtered addresses. + * @return + * - (0) if successful. + * - (-ENODEV) if *port_id* invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if PMD of *port_id* doesn't support multicast filtering. + * - (-ENOSPC) if *port_id* has not enough multicast filtering resources. + */ +int rte_eth_dev_set_mc_addr_list(uint16_t port_id, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr); + +/** + * Enable IEEE1588/802.1AS timestamping for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -EIO: if device is removed. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_enable(uint16_t port_id); + +/** + * Disable IEEE1588/802.1AS timestamping for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -EIO: if device is removed. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_disable(uint16_t port_id); + +/** + * Read an IEEE1588/802.1AS RX timestamp from an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param timestamp + * Pointer to the timestamp struct. + * @param flags + * Device specific flags. Used to pass the RX timesync register index to + * i40e. Unused in igb/ixgbe, pass 0 instead. + * + * @return + * - 0: Success. + * - -EINVAL: No timestamp is available. + * - -ENODEV: The port ID is invalid. + * - -EIO: if device is removed. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_read_rx_timestamp(uint16_t port_id, + struct timespec *timestamp, uint32_t flags); + +/** + * Read an IEEE1588/802.1AS TX timestamp from an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param timestamp + * Pointer to the timestamp struct. + * + * @return + * - 0: Success. + * - -EINVAL: No timestamp is available. + * - -ENODEV: The port ID is invalid. + * - -EIO: if device is removed. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_read_tx_timestamp(uint16_t port_id, + struct timespec *timestamp); + +/** + * Adjust the timesync clock on an Ethernet device. + * + * This is usually used in conjunction with other Ethdev timesync functions to + * synchronize the device time using the IEEE1588/802.1AS protocol. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param delta + * The adjustment in nanoseconds. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -EIO: if device is removed. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_adjust_time(uint16_t port_id, int64_t delta); + +/** + * Read the time from the timesync clock on an Ethernet device. + * + * This is usually used in conjunction with other Ethdev timesync functions to + * synchronize the device time using the IEEE1588/802.1AS protocol. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param time + * Pointer to the timespec struct that holds the time. + * + * @return + * - 0: Success. + */ +int rte_eth_timesync_read_time(uint16_t port_id, struct timespec *time); + +/** + * Set the time of the timesync clock on an Ethernet device. + * + * This is usually used in conjunction with other Ethdev timesync functions to + * synchronize the device time using the IEEE1588/802.1AS protocol. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param time + * Pointer to the timespec struct that holds the time. + * + * @return + * - 0: Success. + * - -EINVAL: No timestamp is available. + * - -ENODEV: The port ID is invalid. + * - -EIO: if device is removed. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_write_time(uint16_t port_id, const struct timespec *time); + +/** + * Config l2 tunnel ether type of an Ethernet device for filtering specific + * tunnel packets by ether type. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param l2_tunnel + * l2 tunnel configuration. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_l2_tunnel_eth_type_conf(uint16_t port_id, + struct rte_eth_l2_tunnel_conf *l2_tunnel); + +/** + * Enable/disable l2 tunnel offload functions. Include, + * 1, The ability of parsing a type of l2 tunnel of an Ethernet device. + * Filtering, forwarding and offloading this type of tunnel packets depend on + * this ability. + * 2, Stripping the l2 tunnel tag. + * 3, Insertion of the l2 tunnel tag. + * 4, Forwarding the packets based on the l2 tunnel tag. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param l2_tunnel + * l2 tunnel parameters. + * @param mask + * Indicate the offload function. + * @param en + * Enable or disable this function. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-EIO) if device is removed. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_l2_tunnel_offload_set(uint16_t port_id, + struct rte_eth_l2_tunnel_conf *l2_tunnel, + uint32_t mask, + uint8_t en); + +/** +* Get the port id from pci address or device name +* Example: +* - PCIe, 0000:2:00.0 +* - SoC, fsl-gmac0 +* - vdev, net_pcap0 +* +* @param name +* pci address or name of the device +* @param port_id +* pointer to port identifier of the device +* @return +* - (0) if successful and port_id is filled. +* - (-ENODEV or -EINVAL) on failure. +*/ +int +rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id); + +/** +* Get the device name from port id +* Example: +* - PCIe Bus:Domain:Function, 0000:02:00.0 +* - SoC device name, fsl-gmac0 +* - vdev dpdk name, net_[pcap0|null0|tun0|tap0] +* +* @param port_id +* Port identifier of the device. +* @param name +* Buffer of size RTE_ETH_NAME_MAX_LEN to store the name. +* @return +* - (0) if successful. +* - (-EINVAL) on failure. +*/ +int +rte_eth_dev_get_name_by_port(uint16_t port_id, char *name); + +/** + * Check that numbers of Rx and Tx descriptors satisfy descriptors limits from + * the ethernet device information, otherwise adjust them to boundaries. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param nb_rx_desc + * A pointer to a uint16_t where the number of receive + * descriptors stored. + * @param nb_tx_desc + * A pointer to a uint16_t where the number of transmit + * descriptors stored. + * @return + * - (0) if successful. + * - (-ENOTSUP, -ENODEV or -EINVAL) on failure. + */ +int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id, + uint16_t *nb_rx_desc, + uint16_t *nb_tx_desc); + +/** + * Test if a port supports specific mempool ops. + * + * @param port_id + * Port identifier of the Ethernet device. + * @param [in] pool + * The name of the pool operations to test. + * @return + * - 0: best mempool ops choice for this port. + * - 1: mempool ops are supported for this port. + * - -ENOTSUP: mempool ops not supported for this port. + * - -ENODEV: Invalid port Identifier. + * - -EINVAL: Pool param is null. + */ +int +rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool); + +/** + * Get the security context for the Ethernet device. + * + * @param port_id + * Port identifier of the Ethernet device + * @return + * - NULL on error. + * - pointer to security context on success. + */ +void * +rte_eth_dev_get_sec_ctx(uint16_t port_id); + + +#include <rte_ethdev_core.h> + +/** + * + * Retrieve a burst of input packets from a receive queue of an Ethernet + * device. The retrieved packets are stored in *rte_mbuf* structures whose + * pointers are supplied in the *rx_pkts* array. + * + * The rte_eth_rx_burst() function loops, parsing the RX ring of the + * receive queue, up to *nb_pkts* packets, and for each completed RX + * descriptor in the ring, it performs the following operations: + * + * - Initialize the *rte_mbuf* data structure associated with the + * RX descriptor according to the information provided by the NIC into + * that RX descriptor. + * + * - Store the *rte_mbuf* data structure into the next entry of the + * *rx_pkts* array. + * + * - Replenish the RX descriptor with a new *rte_mbuf* buffer + * allocated from the memory pool associated with the receive queue at + * initialization time. + * + * When retrieving an input packet that was scattered by the controller + * into multiple receive descriptors, the rte_eth_rx_burst() function + * appends the associated *rte_mbuf* buffers to the first buffer of the + * packet. + * + * The rte_eth_rx_burst() function returns the number of packets + * actually retrieved, which is the number of *rte_mbuf* data structures + * effectively supplied into the *rx_pkts* array. + * A return value equal to *nb_pkts* indicates that the RX queue contained + * at least *rx_pkts* packets, and this is likely to signify that other + * received packets remain in the input queue. Applications implementing + * a "retrieve as much received packets as possible" policy can check this + * specific case and keep invoking the rte_eth_rx_burst() function until + * a value less than *nb_pkts* is returned. + * + * This receive method has the following advantages: + * + * - It allows a run-to-completion network stack engine to retrieve and + * to immediately process received packets in a fast burst-oriented + * approach, avoiding the overhead of unnecessary intermediate packet + * queue/dequeue operations. + * + * - Conversely, it also allows an asynchronous-oriented processing + * method to retrieve bursts of received packets and to immediately + * queue them for further parallel processing by another logical core, + * for instance. However, instead of having received packets being + * individually queued by the driver, this approach allows the caller + * of the rte_eth_rx_burst() function to queue a burst of retrieved + * packets at a time and therefore dramatically reduce the cost of + * enqueue/dequeue operations per packet. + * + * - It allows the rte_eth_rx_burst() function of the driver to take + * advantage of burst-oriented hardware features (CPU cache, + * prefetch instructions, and so on) to minimize the number of CPU + * cycles per packet. + * + * To summarize, the proposed receive API enables many + * burst-oriented optimizations in both synchronous and asynchronous + * packet processing environments with no overhead in both cases. + * + * The rte_eth_rx_burst() function does not provide any error + * notification to avoid the corresponding overhead. As a hint, the + * upper-level application might check the status of the device link once + * being systematically returned a 0 value for a given number of tries. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param rx_pkts + * The address of an array of pointers to *rte_mbuf* structures that + * must be large enough to store *nb_pkts* pointers in it. + * @param nb_pkts + * The maximum number of packets to retrieve. + * @return + * The number of packets actually retrieved, which is the number + * of pointers to *rte_mbuf* structures effectively supplied to the + * *rx_pkts* array. + */ +static inline uint16_t +rte_eth_rx_burst(uint16_t port_id, uint16_t queue_id, + struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + uint16_t nb_rx; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); + RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0); + + if (queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id); + return 0; + } +#endif + nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], + rx_pkts, nb_pkts); + +#ifdef RTE_ETHDEV_RXTX_CALLBACKS + if (unlikely(dev->post_rx_burst_cbs[queue_id] != NULL)) { + struct rte_eth_rxtx_callback *cb = + dev->post_rx_burst_cbs[queue_id]; + + do { + nb_rx = cb->fn.rx(port_id, queue_id, rx_pkts, nb_rx, + nb_pkts, cb->param); + cb = cb->next; + } while (cb != NULL); + } +#endif + + return nb_rx; +} + +/** + * Get the number of used descriptors of a rx queue + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue id on the specific port. + * @return + * The number of used descriptors in the specific queue, or: + * (-EINVAL) if *port_id* or *queue_id* is invalid + * (-ENOTSUP) if the device does not support this function + */ +static inline int +rte_eth_rx_queue_count(uint16_t port_id, uint16_t queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_count, -ENOTSUP); + if (queue_id >= dev->data->nb_rx_queues) + return -EINVAL; + + return (int)(*dev->dev_ops->rx_queue_count)(dev, queue_id); +} + +/** + * Check if the DD bit of the specific RX descriptor in the queue has been set + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue id on the specific port. + * @param offset + * The offset of the descriptor ID from tail. + * @return + * - (1) if the specific DD bit is set. + * - (0) if the specific DD bit is not set. + * - (-ENODEV) if *port_id* invalid. + * - (-ENOTSUP) if the device does not support this function + */ +static inline int +rte_eth_rx_descriptor_done(uint16_t port_id, uint16_t queue_id, uint16_t offset) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_descriptor_done, -ENOTSUP); + return (*dev->dev_ops->rx_descriptor_done)( \ + dev->data->rx_queues[queue_id], offset); +} + +#define RTE_ETH_RX_DESC_AVAIL 0 /**< Desc available for hw. */ +#define RTE_ETH_RX_DESC_DONE 1 /**< Desc done, filled by hw. */ +#define RTE_ETH_RX_DESC_UNAVAIL 2 /**< Desc used by driver or hw. */ + +/** + * Check the status of a Rx descriptor in the queue + * + * It should be called in a similar context than the Rx function: + * - on a dataplane core + * - not concurrently on the same queue + * + * Since it's a dataplane function, no check is performed on port_id and + * queue_id. The caller must therefore ensure that the port is enabled + * and the queue is configured and running. + * + * Note: accessing to a random descriptor in the ring may trigger cache + * misses and have a performance impact. + * + * @param port_id + * A valid port identifier of the Ethernet device which. + * @param queue_id + * A valid Rx queue identifier on this port. + * @param offset + * The offset of the descriptor starting from tail (0 is the next + * packet to be received by the driver). + * + * @return + * - (RTE_ETH_RX_DESC_AVAIL): Descriptor is available for the hardware to + * receive a packet. + * - (RTE_ETH_RX_DESC_DONE): Descriptor is done, it is filled by hw, but + * not yet processed by the driver (i.e. in the receive queue). + * - (RTE_ETH_RX_DESC_UNAVAIL): Descriptor is unavailable, either hold by + * the driver and not yet returned to hw, or reserved by the hw. + * - (-EINVAL) bad descriptor offset. + * - (-ENOTSUP) if the device does not support this function. + * - (-ENODEV) bad port or queue (only if compiled with debug). + */ +static inline int +rte_eth_rx_descriptor_status(uint16_t port_id, uint16_t queue_id, + uint16_t offset) +{ + struct rte_eth_dev *dev; + void *rxq; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); +#endif + dev = &rte_eth_devices[port_id]; +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (queue_id >= dev->data->nb_rx_queues) + return -ENODEV; +#endif + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_descriptor_status, -ENOTSUP); + rxq = dev->data->rx_queues[queue_id]; + + return (*dev->dev_ops->rx_descriptor_status)(rxq, offset); +} + +#define RTE_ETH_TX_DESC_FULL 0 /**< Desc filled for hw, waiting xmit. */ +#define RTE_ETH_TX_DESC_DONE 1 /**< Desc done, packet is transmitted. */ +#define RTE_ETH_TX_DESC_UNAVAIL 2 /**< Desc used by driver or hw. */ + +/** + * Check the status of a Tx descriptor in the queue. + * + * It should be called in a similar context than the Tx function: + * - on a dataplane core + * - not concurrently on the same queue + * + * Since it's a dataplane function, no check is performed on port_id and + * queue_id. The caller must therefore ensure that the port is enabled + * and the queue is configured and running. + * + * Note: accessing to a random descriptor in the ring may trigger cache + * misses and have a performance impact. + * + * @param port_id + * A valid port identifier of the Ethernet device which. + * @param queue_id + * A valid Tx queue identifier on this port. + * @param offset + * The offset of the descriptor starting from tail (0 is the place where + * the next packet will be send). + * + * @return + * - (RTE_ETH_TX_DESC_FULL) Descriptor is being processed by the hw, i.e. + * in the transmit queue. + * - (RTE_ETH_TX_DESC_DONE) Hardware is done with this descriptor, it can + * be reused by the driver. + * - (RTE_ETH_TX_DESC_UNAVAIL): Descriptor is unavailable, reserved by the + * driver or the hardware. + * - (-EINVAL) bad descriptor offset. + * - (-ENOTSUP) if the device does not support this function. + * - (-ENODEV) bad port or queue (only if compiled with debug). + */ +static inline int rte_eth_tx_descriptor_status(uint16_t port_id, + uint16_t queue_id, uint16_t offset) +{ + struct rte_eth_dev *dev; + void *txq; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); +#endif + dev = &rte_eth_devices[port_id]; +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (queue_id >= dev->data->nb_tx_queues) + return -ENODEV; +#endif + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_descriptor_status, -ENOTSUP); + txq = dev->data->tx_queues[queue_id]; + + return (*dev->dev_ops->tx_descriptor_status)(txq, offset); +} + +/** + * Send a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_burst() function is invoked to transmit output packets + * on the output queue *queue_id* of the Ethernet device designated by its + * *port_id*. + * The *nb_pkts* parameter is the number of packets to send which are + * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them + * allocated from a pool created with rte_pktmbuf_pool_create(). + * The rte_eth_tx_burst() function loops, sending *nb_pkts* packets, + * up to the number of transmit descriptors available in the TX ring of the + * transmit queue. + * For each packet to send, the rte_eth_tx_burst() function performs + * the following operations: + * + * - Pick up the next available descriptor in the transmit ring. + * + * - Free the network buffer previously sent with that descriptor, if any. + * + * - Initialize the transmit descriptor with the information provided + * in the *rte_mbuf data structure. + * + * In the case of a segmented packet composed of a list of *rte_mbuf* buffers, + * the rte_eth_tx_burst() function uses several transmit descriptors + * of the ring. + * + * The rte_eth_tx_burst() function returns the number of packets it + * actually sent. A return value equal to *nb_pkts* means that all packets + * have been sent, and this is likely to signify that other output packets + * could be immediately transmitted again. Applications that implement a + * "send as many packets to transmit as possible" policy can check this + * specific case and keep invoking the rte_eth_tx_burst() function until + * a value less than *nb_pkts* is returned. + * + * It is the responsibility of the rte_eth_tx_burst() function to + * transparently free the memory buffers of packets previously sent. + * This feature is driven by the *tx_free_thresh* value supplied to the + * rte_eth_dev_configure() function at device configuration time. + * When the number of free TX descriptors drops below this threshold, the + * rte_eth_tx_burst() function must [attempt to] free the *rte_mbuf* buffers + * of those packets whose transmission was effectively completed. + * + * If the PMD is DEV_TX_OFFLOAD_MT_LOCKFREE capable, multiple threads can + * invoke this function concurrently on the same tx queue without SW lock. + * @see rte_eth_dev_info_get, struct rte_eth_txconf::txq_flags + * + * @see rte_eth_tx_prepare to perform some prior checks or adjustments + * for offloads. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param tx_pkts + * The address of an array of *nb_pkts* pointers to *rte_mbuf* structures + * which contain the output packets. + * @param nb_pkts + * The maximum number of packets to transmit. + * @return + * The number of output packets actually stored in transmit descriptors of + * the transmit ring. The return value can be less than the value of the + * *tx_pkts* parameter when the transmit ring is full or has been filled up. + */ +static inline uint16_t +rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); + RTE_FUNC_PTR_OR_ERR_RET(*dev->tx_pkt_burst, 0); + + if (queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); + return 0; + } +#endif + +#ifdef RTE_ETHDEV_RXTX_CALLBACKS + struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id]; + + if (unlikely(cb != NULL)) { + do { + nb_pkts = cb->fn.tx(port_id, queue_id, tx_pkts, nb_pkts, + cb->param); + cb = cb->next; + } while (cb != NULL); + } +#endif + + return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); +} + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Process a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_prepare() function is invoked to prepare output packets to be + * transmitted on the output queue *queue_id* of the Ethernet device designated + * by its *port_id*. + * The *nb_pkts* parameter is the number of packets to be prepared which are + * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them + * allocated from a pool created with rte_pktmbuf_pool_create(). + * For each packet to send, the rte_eth_tx_prepare() function performs + * the following operations: + * + * - Check if packet meets devices requirements for tx offloads. + * + * - Check limitations about number of segments. + * + * - Check additional requirements when debug is enabled. + * + * - Update and/or reset required checksums when tx offload is set for packet. + * + * Since this function can modify packet data, provided mbufs must be safely + * writable (e.g. modified data cannot be in shared segment). + * + * The rte_eth_tx_prepare() function returns the number of packets ready to be + * sent. A return value equal to *nb_pkts* means that all packets are valid and + * ready to be sent, otherwise stops processing on the first invalid packet and + * leaves the rest packets untouched. + * + * When this functionality is not implemented in the driver, all packets are + * are returned untouched. + * + * @param port_id + * The port identifier of the Ethernet device. + * The value must be a valid port id. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param tx_pkts + * The address of an array of *nb_pkts* pointers to *rte_mbuf* structures + * which contain the output packets. + * @param nb_pkts + * The maximum number of packets to process. + * @return + * The number of packets correct and ready to be sent. The return value can be + * less than the value of the *tx_pkts* parameter when some packet doesn't + * meet devices requirements with rte_errno set appropriately: + * - -EINVAL: offload flags are not correctly set + * - -ENOTSUP: the offload feature is not supported by the hardware + * + */ + +#ifndef RTE_ETHDEV_TX_PREPARE_NOOP + +static inline uint16_t +rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct rte_eth_dev *dev; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid TX port_id=%d\n", port_id); + rte_errno = -EINVAL; + return 0; + } +#endif + + dev = &rte_eth_devices[port_id]; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + if (queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); + rte_errno = -EINVAL; + return 0; + } +#endif + + if (!dev->tx_pkt_prepare) + return nb_pkts; + + return (*dev->tx_pkt_prepare)(dev->data->tx_queues[queue_id], + tx_pkts, nb_pkts); +} + +#else + +/* + * Native NOOP operation for compilation targets which doesn't require any + * preparations steps, and functional NOOP may introduce unnecessary performance + * drop. + * + * Generally this is not a good idea to turn it on globally and didn't should + * be used if behavior of tx_preparation can change. + */ + +static inline uint16_t +rte_eth_tx_prepare(__rte_unused uint16_t port_id, + __rte_unused uint16_t queue_id, + __rte_unused struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + return nb_pkts; +} + +#endif + +/** + * Send any packets queued up for transmission on a port and HW queue + * + * This causes an explicit flush of packets previously buffered via the + * rte_eth_tx_buffer() function. It returns the number of packets successfully + * sent to the NIC, and calls the error callback for any unsent packets. Unless + * explicitly set up otherwise, the default callback simply frees the unsent + * packets back to the owning mempool. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param buffer + * Buffer of packets to be transmit. + * @return + * The number of packets successfully sent to the Ethernet device. The error + * callback is called for any packets which could not be sent. + */ +static inline uint16_t +rte_eth_tx_buffer_flush(uint16_t port_id, uint16_t queue_id, + struct rte_eth_dev_tx_buffer *buffer) +{ + uint16_t sent; + uint16_t to_send = buffer->length; + + if (to_send == 0) + return 0; + + sent = rte_eth_tx_burst(port_id, queue_id, buffer->pkts, to_send); + + buffer->length = 0; + + /* All packets sent, or to be dealt with by callback below */ + if (unlikely(sent != to_send)) + buffer->error_callback(&buffer->pkts[sent], + (uint16_t)(to_send - sent), + buffer->error_userdata); + + return sent; +} + +/** + * Buffer a single packet for future transmission on a port and queue + * + * This function takes a single mbuf/packet and buffers it for later + * transmission on the particular port and queue specified. Once the buffer is + * full of packets, an attempt will be made to transmit all the buffered + * packets. In case of error, where not all packets can be transmitted, a + * callback is called with the unsent packets as a parameter. If no callback + * is explicitly set up, the unsent packets are just freed back to the owning + * mempool. The function returns the number of packets actually sent i.e. + * 0 if no buffer flush occurred, otherwise the number of packets successfully + * flushed + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param buffer + * Buffer used to collect packets to be sent. + * @param tx_pkt + * Pointer to the packet mbuf to be sent. + * @return + * 0 = packet has been buffered for later transmission + * N > 0 = packet has been buffered, and the buffer was subsequently flushed, + * causing N packets to be sent, and the error callback to be called for + * the rest. + */ +static __rte_always_inline uint16_t +rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id, + struct rte_eth_dev_tx_buffer *buffer, struct rte_mbuf *tx_pkt) +{ + buffer->pkts[buffer->length++] = tx_pkt; + if (buffer->length < buffer->size) + return 0; + + return rte_eth_tx_buffer_flush(port_id, queue_id, buffer); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETHDEV_H_ */ diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h new file mode 100644 index 00000000..33d12b3a --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev_core.h @@ -0,0 +1,625 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef _RTE_ETHDEV_CORE_H_ +#define _RTE_ETHDEV_CORE_H_ + +/** + * @file + * + * RTE Ethernet Device internal header. + * + * This header contains internal data types. But they are still part of the + * public API because they are used by inline functions in the published API. + * + * Applications should not use these directly. + * + */ + +struct rte_eth_dev_callback; +/** @internal Structure to keep track of registered callbacks */ +TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback); + +/* + * Definitions of all functions exported by an Ethernet driver through the + * the generic structure of type *eth_dev_ops* supplied in the *rte_eth_dev* + * structure associated with an Ethernet device. + */ +struct rte_eth_dev; + +typedef int (*eth_dev_configure_t)(struct rte_eth_dev *dev); +/**< @internal Ethernet device configuration. */ + +typedef int (*eth_dev_start_t)(struct rte_eth_dev *dev); +/**< @internal Function used to start a configured Ethernet device. */ + +typedef void (*eth_dev_stop_t)(struct rte_eth_dev *dev); +/**< @internal Function used to stop a configured Ethernet device. */ + +typedef int (*eth_dev_set_link_up_t)(struct rte_eth_dev *dev); +/**< @internal Function used to link up a configured Ethernet device. */ + +typedef int (*eth_dev_set_link_down_t)(struct rte_eth_dev *dev); +/**< @internal Function used to link down a configured Ethernet device. */ + +typedef void (*eth_dev_close_t)(struct rte_eth_dev *dev); +/**< @internal Function used to close a configured Ethernet device. */ + +typedef int (*eth_dev_reset_t)(struct rte_eth_dev *dev); +/** <@internal Function used to reset a configured Ethernet device. */ + +typedef int (*eth_is_removed_t)(struct rte_eth_dev *dev); +/**< @internal Function used to detect an Ethernet device removal. */ + +typedef void (*eth_promiscuous_enable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to enable the RX promiscuous mode of an Ethernet device. */ + +typedef void (*eth_promiscuous_disable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to disable the RX promiscuous mode of an Ethernet device. */ + +typedef void (*eth_allmulticast_enable_t)(struct rte_eth_dev *dev); +/**< @internal Enable the receipt of all multicast packets by an Ethernet device. */ + +typedef void (*eth_allmulticast_disable_t)(struct rte_eth_dev *dev); +/**< @internal Disable the receipt of all multicast packets by an Ethernet device. */ + +typedef int (*eth_link_update_t)(struct rte_eth_dev *dev, + int wait_to_complete); +/**< @internal Get link speed, duplex mode and state (up/down) of an Ethernet device. */ + +typedef int (*eth_stats_get_t)(struct rte_eth_dev *dev, + struct rte_eth_stats *igb_stats); +/**< @internal Get global I/O statistics of an Ethernet device. */ + +typedef void (*eth_stats_reset_t)(struct rte_eth_dev *dev); +/**< @internal Reset global I/O statistics of an Ethernet device to 0. */ + +typedef int (*eth_xstats_get_t)(struct rte_eth_dev *dev, + struct rte_eth_xstat *stats, unsigned n); +/**< @internal Get extended stats of an Ethernet device. */ + +typedef int (*eth_xstats_get_by_id_t)(struct rte_eth_dev *dev, + const uint64_t *ids, + uint64_t *values, + unsigned int n); +/**< @internal Get extended stats of an Ethernet device. */ + +typedef void (*eth_xstats_reset_t)(struct rte_eth_dev *dev); +/**< @internal Reset extended stats of an Ethernet device. */ + +typedef int (*eth_xstats_get_names_t)(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, unsigned size); +/**< @internal Get names of extended stats of an Ethernet device. */ + +typedef int (*eth_xstats_get_names_by_id_t)(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, const uint64_t *ids, + unsigned int size); +/**< @internal Get names of extended stats of an Ethernet device. */ + +typedef int (*eth_queue_stats_mapping_set_t)(struct rte_eth_dev *dev, + uint16_t queue_id, + uint8_t stat_idx, + uint8_t is_rx); +/**< @internal Set a queue statistics mapping for a tx/rx queue of an Ethernet device. */ + +typedef void (*eth_dev_infos_get_t)(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info); +/**< @internal Get specific informations of an Ethernet device. */ + +typedef const uint32_t *(*eth_dev_supported_ptypes_get_t)(struct rte_eth_dev *dev); +/**< @internal Get supported ptypes of an Ethernet device. */ + +typedef int (*eth_queue_start_t)(struct rte_eth_dev *dev, + uint16_t queue_id); +/**< @internal Start rx and tx of a queue of an Ethernet device. */ + +typedef int (*eth_queue_stop_t)(struct rte_eth_dev *dev, + uint16_t queue_id); +/**< @internal Stop rx and tx of a queue of an Ethernet device. */ + +typedef int (*eth_rx_queue_setup_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id, + uint16_t nb_rx_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); +/**< @internal Set up a receive queue of an Ethernet device. */ + +typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev, + uint16_t tx_queue_id, + uint16_t nb_tx_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); +/**< @internal Setup a transmit queue of an Ethernet device. */ + +typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id); +/**< @internal Enable interrupt of a receive queue of an Ethernet device. */ + +typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id); +/**< @internal Disable interrupt of a receive queue of an Ethernet device. */ + +typedef void (*eth_queue_release_t)(void *queue); +/**< @internal Release memory resources allocated by given RX/TX queue. */ + +typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id); +/**< @internal Get number of used descriptors on a receive queue. */ + +typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset); +/**< @internal Check DD bit of specific RX descriptor */ + +typedef int (*eth_rx_descriptor_status_t)(void *rxq, uint16_t offset); +/**< @internal Check the status of a Rx descriptor */ + +typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset); +/**< @internal Check the status of a Tx descriptor */ + +typedef int (*eth_fw_version_get_t)(struct rte_eth_dev *dev, + char *fw_version, size_t fw_size); +/**< @internal Get firmware information of an Ethernet device. */ + +typedef int (*eth_tx_done_cleanup_t)(void *txq, uint32_t free_cnt); +/**< @internal Force mbufs to be from TX ring. */ + +typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id, struct rte_eth_rxq_info *qinfo); + +typedef void (*eth_txq_info_get_t)(struct rte_eth_dev *dev, + uint16_t tx_queue_id, struct rte_eth_txq_info *qinfo); + +typedef int (*mtu_set_t)(struct rte_eth_dev *dev, uint16_t mtu); +/**< @internal Set MTU. */ + +typedef int (*vlan_filter_set_t)(struct rte_eth_dev *dev, + uint16_t vlan_id, + int on); +/**< @internal filtering of a VLAN Tag Identifier by an Ethernet device. */ + +typedef int (*vlan_tpid_set_t)(struct rte_eth_dev *dev, + enum rte_vlan_type type, uint16_t tpid); +/**< @internal set the outer/inner VLAN-TPID by an Ethernet device. */ + +typedef int (*vlan_offload_set_t)(struct rte_eth_dev *dev, int mask); +/**< @internal set VLAN offload function by an Ethernet device. */ + +typedef int (*vlan_pvid_set_t)(struct rte_eth_dev *dev, + uint16_t vlan_id, + int on); +/**< @internal set port based TX VLAN insertion by an Ethernet device. */ + +typedef void (*vlan_strip_queue_set_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id, + int on); +/**< @internal VLAN stripping enable/disable by an queue of Ethernet device. */ + +typedef uint16_t (*eth_rx_burst_t)(void *rxq, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +/**< @internal Retrieve input packets from a receive queue of an Ethernet device. */ + +typedef uint16_t (*eth_tx_burst_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Send output packets on a transmit queue of an Ethernet device. */ + +typedef uint16_t (*eth_tx_prep_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Prepare output packets on a transmit queue of an Ethernet device. */ + +typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); +/**< @internal Get current flow control parameter on an Ethernet device */ + +typedef int (*flow_ctrl_set_t)(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); +/**< @internal Setup flow control parameter on an Ethernet device */ + +typedef int (*priority_flow_ctrl_set_t)(struct rte_eth_dev *dev, + struct rte_eth_pfc_conf *pfc_conf); +/**< @internal Setup priority flow control parameter on an Ethernet device */ + +typedef int (*reta_update_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); +/**< @internal Update RSS redirection table on an Ethernet device */ + +typedef int (*reta_query_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); +/**< @internal Query RSS redirection table on an Ethernet device */ + +typedef int (*rss_hash_update_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +/**< @internal Update RSS hash configuration of an Ethernet device */ + +typedef int (*rss_hash_conf_get_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +/**< @internal Get current RSS hash configuration of an Ethernet device */ + +typedef int (*eth_dev_led_on_t)(struct rte_eth_dev *dev); +/**< @internal Turn on SW controllable LED on an Ethernet device */ + +typedef int (*eth_dev_led_off_t)(struct rte_eth_dev *dev); +/**< @internal Turn off SW controllable LED on an Ethernet device */ + +typedef void (*eth_mac_addr_remove_t)(struct rte_eth_dev *dev, uint32_t index); +/**< @internal Remove MAC address from receive address register */ + +typedef int (*eth_mac_addr_add_t)(struct rte_eth_dev *dev, + struct ether_addr *mac_addr, + uint32_t index, + uint32_t vmdq); +/**< @internal Set a MAC address into Receive Address Address Register */ + +typedef int (*eth_mac_addr_set_t)(struct rte_eth_dev *dev, + struct ether_addr *mac_addr); +/**< @internal Set a MAC address into Receive Address Address Register */ + +typedef int (*eth_uc_hash_table_set_t)(struct rte_eth_dev *dev, + struct ether_addr *mac_addr, + uint8_t on); +/**< @internal Set a Unicast Hash bitmap */ + +typedef int (*eth_uc_all_hash_table_set_t)(struct rte_eth_dev *dev, + uint8_t on); +/**< @internal Set all Unicast Hash bitmap */ + +typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t tx_rate); +/**< @internal Set queue TX rate */ + +typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, + uint8_t on); +/**< @internal Add a traffic mirroring rule on an Ethernet device */ + +typedef int (*eth_mirror_rule_reset_t)(struct rte_eth_dev *dev, + uint8_t rule_id); +/**< @internal Remove a traffic mirroring rule on an Ethernet device */ + +typedef int (*eth_udp_tunnel_port_add_t)(struct rte_eth_dev *dev, + struct rte_eth_udp_tunnel *tunnel_udp); +/**< @internal Add tunneling UDP port */ + +typedef int (*eth_udp_tunnel_port_del_t)(struct rte_eth_dev *dev, + struct rte_eth_udp_tunnel *tunnel_udp); +/**< @internal Delete tunneling UDP port */ + +typedef int (*eth_set_mc_addr_list_t)(struct rte_eth_dev *dev, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr); +/**< @internal set the list of multicast addresses on an Ethernet device */ + +typedef int (*eth_timesync_enable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to enable IEEE1588/802.1AS timestamping. */ + +typedef int (*eth_timesync_disable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to disable IEEE1588/802.1AS timestamping. */ + +typedef int (*eth_timesync_read_rx_timestamp_t)(struct rte_eth_dev *dev, + struct timespec *timestamp, + uint32_t flags); +/**< @internal Function used to read an RX IEEE1588/802.1AS timestamp. */ + +typedef int (*eth_timesync_read_tx_timestamp_t)(struct rte_eth_dev *dev, + struct timespec *timestamp); +/**< @internal Function used to read a TX IEEE1588/802.1AS timestamp. */ + +typedef int (*eth_timesync_adjust_time)(struct rte_eth_dev *dev, int64_t); +/**< @internal Function used to adjust the device clock */ + +typedef int (*eth_timesync_read_time)(struct rte_eth_dev *dev, + struct timespec *timestamp); +/**< @internal Function used to get time from the device clock. */ + +typedef int (*eth_timesync_write_time)(struct rte_eth_dev *dev, + const struct timespec *timestamp); +/**< @internal Function used to get time from the device clock */ + +typedef int (*eth_get_reg_t)(struct rte_eth_dev *dev, + struct rte_dev_reg_info *info); +/**< @internal Retrieve registers */ + +typedef int (*eth_get_eeprom_length_t)(struct rte_eth_dev *dev); +/**< @internal Retrieve eeprom size */ + +typedef int (*eth_get_eeprom_t)(struct rte_eth_dev *dev, + struct rte_dev_eeprom_info *info); +/**< @internal Retrieve eeprom data */ + +typedef int (*eth_set_eeprom_t)(struct rte_eth_dev *dev, + struct rte_dev_eeprom_info *info); +/**< @internal Program eeprom data */ + +typedef int (*eth_get_module_info_t)(struct rte_eth_dev *dev, + struct rte_eth_dev_module_info *modinfo); +/**< @internal Retrieve type and size of plugin module eeprom */ + +typedef int (*eth_get_module_eeprom_t)(struct rte_eth_dev *dev, + struct rte_dev_eeprom_info *info); +/**< @internal Retrieve plugin module eeprom data */ + +typedef int (*eth_l2_tunnel_eth_type_conf_t) + (struct rte_eth_dev *dev, struct rte_eth_l2_tunnel_conf *l2_tunnel); +/**< @internal config l2 tunnel ether type */ + +typedef int (*eth_l2_tunnel_offload_set_t) + (struct rte_eth_dev *dev, + struct rte_eth_l2_tunnel_conf *l2_tunnel, + uint32_t mask, + uint8_t en); +/**< @internal enable/disable the l2 tunnel offload functions */ + + +typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev, + enum rte_filter_type filter_type, + enum rte_filter_op filter_op, + void *arg); +/**< @internal Take operations to assigned filter type on an Ethernet device */ + +typedef int (*eth_tm_ops_get_t)(struct rte_eth_dev *dev, void *ops); +/**< @internal Get Traffic Management (TM) operations on an Ethernet device */ + +typedef int (*eth_mtr_ops_get_t)(struct rte_eth_dev *dev, void *ops); +/**< @internal Get Trafffic Metering and Policing (MTR) operations */ + +typedef int (*eth_get_dcb_info)(struct rte_eth_dev *dev, + struct rte_eth_dcb_info *dcb_info); +/**< @internal Get dcb information on an Ethernet device */ + +typedef int (*eth_pool_ops_supported_t)(struct rte_eth_dev *dev, + const char *pool); +/**< @internal Test if a port supports specific mempool ops */ + +/** + * @internal A structure containing the functions exported by an Ethernet driver. + */ +struct eth_dev_ops { + eth_dev_configure_t dev_configure; /**< Configure device. */ + eth_dev_start_t dev_start; /**< Start device. */ + eth_dev_stop_t dev_stop; /**< Stop device. */ + eth_dev_set_link_up_t dev_set_link_up; /**< Device link up. */ + eth_dev_set_link_down_t dev_set_link_down; /**< Device link down. */ + eth_dev_close_t dev_close; /**< Close device. */ + eth_dev_reset_t dev_reset; /**< Reset device. */ + eth_link_update_t link_update; /**< Get device link state. */ + eth_is_removed_t is_removed; + /**< Check if the device was physically removed. */ + + eth_promiscuous_enable_t promiscuous_enable; /**< Promiscuous ON. */ + eth_promiscuous_disable_t promiscuous_disable;/**< Promiscuous OFF. */ + eth_allmulticast_enable_t allmulticast_enable;/**< RX multicast ON. */ + eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OFF. */ + eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address. */ + eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address. */ + eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address. */ + eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs. */ + mtu_set_t mtu_set; /**< Set MTU. */ + + eth_stats_get_t stats_get; /**< Get generic device statistics. */ + eth_stats_reset_t stats_reset; /**< Reset generic device statistics. */ + eth_xstats_get_t xstats_get; /**< Get extended device statistics. */ + eth_xstats_reset_t xstats_reset; /**< Reset extended device statistics. */ + eth_xstats_get_names_t xstats_get_names; + /**< Get names of extended statistics. */ + eth_queue_stats_mapping_set_t queue_stats_mapping_set; + /**< Configure per queue stat counter mapping. */ + + eth_dev_infos_get_t dev_infos_get; /**< Get device info. */ + eth_rxq_info_get_t rxq_info_get; /**< retrieve RX queue information. */ + eth_txq_info_get_t txq_info_get; /**< retrieve TX queue information. */ + eth_fw_version_get_t fw_version_get; /**< Get firmware version. */ + eth_dev_supported_ptypes_get_t dev_supported_ptypes_get; + /**< Get packet types supported and identified by device. */ + + vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */ + vlan_tpid_set_t vlan_tpid_set; /**< Outer/Inner VLAN TPID Setup. */ + vlan_strip_queue_set_t vlan_strip_queue_set; /**< VLAN Stripping on queue. */ + vlan_offload_set_t vlan_offload_set; /**< Set VLAN Offload. */ + vlan_pvid_set_t vlan_pvid_set; /**< Set port based TX VLAN insertion. */ + + eth_queue_start_t rx_queue_start;/**< Start RX for a queue. */ + eth_queue_stop_t rx_queue_stop; /**< Stop RX for a queue. */ + eth_queue_start_t tx_queue_start;/**< Start TX for a queue. */ + eth_queue_stop_t tx_queue_stop; /**< Stop TX for a queue. */ + eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue. */ + eth_queue_release_t rx_queue_release; /**< Release RX queue. */ + eth_rx_queue_count_t rx_queue_count; + /**< Get the number of used RX descriptors. */ + eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit. */ + eth_rx_descriptor_status_t rx_descriptor_status; + /**< Check the status of a Rx descriptor. */ + eth_tx_descriptor_status_t tx_descriptor_status; + /**< Check the status of a Tx descriptor. */ + eth_rx_enable_intr_t rx_queue_intr_enable; /**< Enable Rx queue interrupt. */ + eth_rx_disable_intr_t rx_queue_intr_disable; /**< Disable Rx queue interrupt. */ + eth_tx_queue_setup_t tx_queue_setup;/**< Set up device TX queue. */ + eth_queue_release_t tx_queue_release; /**< Release TX queue. */ + eth_tx_done_cleanup_t tx_done_cleanup;/**< Free tx ring mbufs */ + + eth_dev_led_on_t dev_led_on; /**< Turn on LED. */ + eth_dev_led_off_t dev_led_off; /**< Turn off LED. */ + + flow_ctrl_get_t flow_ctrl_get; /**< Get flow control. */ + flow_ctrl_set_t flow_ctrl_set; /**< Setup flow control. */ + priority_flow_ctrl_set_t priority_flow_ctrl_set; /**< Setup priority flow control. */ + + eth_uc_hash_table_set_t uc_hash_table_set; /**< Set Unicast Table Array. */ + eth_uc_all_hash_table_set_t uc_all_hash_table_set; /**< Set Unicast hash bitmap. */ + + eth_mirror_rule_set_t mirror_rule_set; /**< Add a traffic mirror rule. */ + eth_mirror_rule_reset_t mirror_rule_reset; /**< reset a traffic mirror rule. */ + + eth_udp_tunnel_port_add_t udp_tunnel_port_add; /** Add UDP tunnel port. */ + eth_udp_tunnel_port_del_t udp_tunnel_port_del; /** Del UDP tunnel port. */ + eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf; + /** Config ether type of l2 tunnel. */ + eth_l2_tunnel_offload_set_t l2_tunnel_offload_set; + /** Enable/disable l2 tunnel offload functions. */ + + eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit. */ + + rss_hash_update_t rss_hash_update; /** Configure RSS hash protocols. */ + rss_hash_conf_get_t rss_hash_conf_get; /** Get current RSS hash configuration. */ + reta_update_t reta_update; /** Update redirection table. */ + reta_query_t reta_query; /** Query redirection table. */ + + eth_get_reg_t get_reg; /**< Get registers. */ + eth_get_eeprom_length_t get_eeprom_length; /**< Get eeprom length. */ + eth_get_eeprom_t get_eeprom; /**< Get eeprom data. */ + eth_set_eeprom_t set_eeprom; /**< Set eeprom. */ + + eth_get_module_info_t get_module_info; + /** Get plugin module eeprom attribute. */ + eth_get_module_eeprom_t get_module_eeprom; + /** Get plugin module eeprom data. */ + + eth_filter_ctrl_t filter_ctrl; /**< common filter control. */ + + eth_get_dcb_info get_dcb_info; /** Get DCB information. */ + + eth_timesync_enable_t timesync_enable; + /** Turn IEEE1588/802.1AS timestamping on. */ + eth_timesync_disable_t timesync_disable; + /** Turn IEEE1588/802.1AS timestamping off. */ + eth_timesync_read_rx_timestamp_t timesync_read_rx_timestamp; + /** Read the IEEE1588/802.1AS RX timestamp. */ + eth_timesync_read_tx_timestamp_t timesync_read_tx_timestamp; + /** Read the IEEE1588/802.1AS TX timestamp. */ + eth_timesync_adjust_time timesync_adjust_time; /** Adjust the device clock. */ + eth_timesync_read_time timesync_read_time; /** Get the device clock time. */ + eth_timesync_write_time timesync_write_time; /** Set the device clock time. */ + + eth_xstats_get_by_id_t xstats_get_by_id; + /**< Get extended device statistic values by ID. */ + eth_xstats_get_names_by_id_t xstats_get_names_by_id; + /**< Get name of extended device statistics by ID. */ + + eth_tm_ops_get_t tm_ops_get; + /**< Get Traffic Management (TM) operations. */ + + eth_mtr_ops_get_t mtr_ops_get; + /**< Get Traffic Metering and Policing (MTR) operations. */ + + eth_pool_ops_supported_t pool_ops_supported; + /**< Test if a port supports specific mempool ops */ +}; + +/** + * @internal + * Structure used to hold information about the callbacks to be called for a + * queue on RX and TX. + */ +struct rte_eth_rxtx_callback { + struct rte_eth_rxtx_callback *next; + union{ + rte_rx_callback_fn rx; + rte_tx_callback_fn tx; + } fn; + void *param; +}; + +/** + * @internal + * The generic data structure associated with each ethernet device. + * + * Pointers to burst-oriented packet receive and transmit functions are + * located at the beginning of the structure, along with the pointer to + * where all the data elements for the particular device are stored in shared + * memory. This split allows the function pointer and driver data to be per- + * process, while the actual configuration data for the device is shared. + */ +struct rte_eth_dev { + eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ + eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */ + struct rte_eth_dev_data *data; /**< Pointer to device data */ + const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ + struct rte_device *device; /**< Backing device */ + struct rte_intr_handle *intr_handle; /**< Device interrupt handle */ + /** User application callbacks for NIC interrupts */ + struct rte_eth_dev_cb_list link_intr_cbs; + /** + * User-supplied functions called from rx_burst to post-process + * received packets before passing them to the user + */ + struct rte_eth_rxtx_callback *post_rx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; + /** + * User-supplied functions called from tx_burst to pre-process + * received packets before passing them to the driver for transmission. + */ + struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; + enum rte_eth_dev_state state; /**< Flag indicating the port state */ + void *security_ctx; /**< Context for security ops */ +} __rte_cache_aligned; + +struct rte_eth_dev_sriov; +struct rte_eth_dev_owner; + +/** + * @internal + * The data part, with no function pointers, associated with each ethernet device. + * + * This structure is safe to place in shared memory to be common among different + * processes in a multi-process configuration. + */ +struct rte_eth_dev_data { + char name[RTE_ETH_NAME_MAX_LEN]; /**< Unique identifier name */ + + void **rx_queues; /**< Array of pointers to RX queues. */ + void **tx_queues; /**< Array of pointers to TX queues. */ + uint16_t nb_rx_queues; /**< Number of RX queues. */ + uint16_t nb_tx_queues; /**< Number of TX queues. */ + + struct rte_eth_dev_sriov sriov; /**< SRIOV data */ + + void *dev_private; /**< PMD-specific private data */ + + struct rte_eth_link dev_link; + /**< Link-level information & status */ + + struct rte_eth_conf dev_conf; /**< Configuration applied to device. */ + uint16_t mtu; /**< Maximum Transmission Unit. */ + + uint32_t min_rx_buf_size; + /**< Common rx buffer size handled by all queues */ + + uint64_t rx_mbuf_alloc_failed; /**< RX ring mbuf allocation failures. */ + struct ether_addr* mac_addrs;/**< Device Ethernet Link address. */ + uint64_t mac_pool_sel[ETH_NUM_RECEIVE_MAC_ADDR]; + /** bitmap array of associating Ethernet MAC addresses to pools */ + struct ether_addr* hash_mac_addrs; + /** Device Ethernet MAC addresses of hash filtering. */ + uint16_t port_id; /**< Device [external] port identifier. */ + __extension__ + uint8_t promiscuous : 1, /**< RX promiscuous mode ON(1) / OFF(0). */ + scattered_rx : 1, /**< RX of scattered packets is ON(1) / OFF(0) */ + all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */ + dev_started : 1, /**< Device state: STARTED(1) / STOPPED(0). */ + lro : 1; /**< RX LRO is ON(1) / OFF(0) */ + uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT]; + /** Queues state: STARTED(1) / STOPPED(0) */ + uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT]; + /** Queues state: STARTED(1) / STOPPED(0) */ + uint32_t dev_flags; /**< Capabilities */ + enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ + int numa_node; /**< NUMA node connection */ + struct rte_vlan_filter_conf vlan_filter_conf; + /**< VLAN filter configuration. */ + struct rte_eth_dev_owner owner; /**< The port owner. */ +} __rte_cache_aligned; + +/** + * @internal + * The pool of *rte_eth_dev* structures. The size of the pool + * is configured at compile-time in the <rte_ethdev.c> file. + */ +extern struct rte_eth_dev rte_eth_devices[]; + +#endif /* _RTE_ETHDEV_CORE_H_ */ diff --git a/lib/librte_ethdev/rte_ethdev_driver.h b/lib/librte_ethdev/rte_ethdev_driver.h new file mode 100644 index 00000000..c9c825e3 --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev_driver.h @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef _RTE_ETHDEV_DRIVER_H_ +#define _RTE_ETHDEV_DRIVER_H_ + +/** + * @file + * + * RTE Ethernet Device PMD API + * + * These APIs for the use from Ethernet drivers, user applications shouldn't + * use them. + * + */ + +#include <rte_ethdev.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @internal + * Returns a ethdev slot specified by the unique identifier name. + * + * @param name + * The pointer to the Unique identifier name for each Ethernet device + * @return + * - The pointer to the ethdev slot, on success. NULL on error + */ +struct rte_eth_dev *rte_eth_dev_allocated(const char *name); + +/** + * @internal + * Allocates a new ethdev slot for an ethernet device and returns the pointer + * to that slot for the driver to use. + * + * @param name Unique identifier name for each Ethernet device + * @param type Device type of this Ethernet device + * @return + * - Slot in the rte_dev_devices array for a new device; + */ +struct rte_eth_dev *rte_eth_dev_allocate(const char *name); + +/** + * @internal + * Attach to the ethdev already initialized by the primary + * process. + * + * @param name Ethernet device's name. + * @return + * - Success: Slot in the rte_dev_devices array for attached + * device. + * - Error: Null pointer. + */ +struct rte_eth_dev *rte_eth_dev_attach_secondary(const char *name); + +/** + * @internal + * Release the specified ethdev port. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. + * @return + * - 0 on success, negative on error + */ +int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev); + +/** + * @internal + * Release device queues and clear its configuration to force the user + * application to reconfigure it. It is for internal use only. + * + * @param dev + * Pointer to struct rte_eth_dev. + * + * @return + * void + */ +void _rte_eth_dev_reset(struct rte_eth_dev *dev); + +/** + * @internal Executes all the user application registered callbacks for + * the specific device. It is for DPDK internal user only. User + * application should not call it directly. + * + * @param dev + * Pointer to struct rte_eth_dev. + * @param event + * Eth device interrupt event type. + * @param ret_param + * To pass data back to user application. + * This allows the user application to decide if a particular function + * is permitted or not. + * + * @return + * int + */ +int _rte_eth_dev_callback_process(struct rte_eth_dev *dev, + enum rte_eth_event_type event, void *ret_param); + +/** + * @internal + * This is the last step of device probing. + * It must be called after a port is allocated and initialized successfully. + * + * The notification RTE_ETH_EVENT_NEW is sent to other entities + * (libraries and applications). + * The state is set as RTE_ETH_DEV_ATTACHED. + * + * @param dev + * New ethdev port. + */ +void rte_eth_dev_probing_finish(struct rte_eth_dev *dev); + +/** + * Create memzone for HW rings. + * malloc can't be used as the physical address is needed. + * If the memzone is already created, then this function returns a ptr + * to the old one. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure + * @param name + * The name of the memory zone + * @param queue_id + * The index of the queue to add to name + * @param size + * The sizeof of the memory area + * @param align + * Alignment for resulting memzone. Must be a power of 2. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + */ +const struct rte_memzone * +rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name, + uint16_t queue_id, size_t size, + unsigned align, int socket_id); + +/** + * @internal + * Atomically set the link status for the specific device. + * It is for use by DPDK device driver use only. + * User applications should not call it + * + * @param dev + * Pointer to struct rte_eth_dev. + * @param link + * New link status value. + * @return + * Same convention as eth_link_update operation. + * 0 if link up status has changed + * -1 if link up status was unchanged + */ +static inline int +rte_eth_linkstatus_set(struct rte_eth_dev *dev, + const struct rte_eth_link *new_link) +{ + volatile uint64_t *dev_link + = (volatile uint64_t *)&(dev->data->dev_link); + union { + uint64_t val64; + struct rte_eth_link link; + } orig; + + RTE_BUILD_BUG_ON(sizeof(*new_link) != sizeof(uint64_t)); + + orig.val64 = rte_atomic64_exchange(dev_link, + *(const uint64_t *)new_link); + + return (orig.link.link_status == new_link->link_status) ? -1 : 0; +} + +/** + * @internal + * Atomically get the link speed and status. + * + * @param dev + * Pointer to struct rte_eth_dev. + * @param link + * link status value. + */ +static inline void +rte_eth_linkstatus_get(const struct rte_eth_dev *dev, + struct rte_eth_link *link) +{ + volatile uint64_t *src = (uint64_t *)&(dev->data->dev_link); + uint64_t *dst = (uint64_t *)link; + + RTE_BUILD_BUG_ON(sizeof(*link) != sizeof(uint64_t)); + +#ifdef __LP64__ + /* if cpu arch has 64 bit unsigned lon then implicitly atomic */ + *dst = *src; +#else + /* can't use rte_atomic64_read because it returns signed int */ + do { + *dst = *src; + } while (!rte_atomic64_cmpset(src, *dst, *dst)); +#endif +} + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Allocate an unique switch domain identifier. + * + * A pool of switch domain identifiers which can be allocated on request. This + * will enabled devices which support the concept of switch domains to request + * a switch domain id which is guaranteed to be unique from other devices + * running in the same process. + * + * @param domain_id + * switch domain identifier parameter to pass back to application + * + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental +rte_eth_switch_domain_alloc(uint16_t *domain_id); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * Free switch domain. + * + * Return a switch domain identifier to the pool of free identifiers after it is + * no longer in use by device. + * + * @param domain_id + * switch domain identifier to free + * + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental +rte_eth_switch_domain_free(uint16_t domain_id); + +/** Generic Ethernet device arguments */ +struct rte_eth_devargs { + uint16_t ports[RTE_MAX_ETHPORTS]; + /** port/s number to enable on a multi-port single function */ + uint16_t nb_ports; + /** number of ports in ports field */ + uint16_t representor_ports[RTE_MAX_ETHPORTS]; + /** representor port/s identifier to enable on device */ + uint16_t nb_representor_ports; + /** number of ports in representor port field */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * PMD helper function to parse ethdev arguments + * + * @param devargs + * device arguments + * @param eth_devargs + * parsed ethdev specific arguments. + * + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental +rte_eth_devargs_parse(const char *devargs, struct rte_eth_devargs *eth_devargs); + + +typedef int (*ethdev_init_t)(struct rte_eth_dev *ethdev, void *init_params); +typedef int (*ethdev_bus_specific_init)(struct rte_eth_dev *ethdev, + void *bus_specific_init_params); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * PMD helper function for the creation of a new ethdev ports. + * + * @param device + * rte_device handle. + * @param name + * port name. + * @param priv_data_size + * size of private data required for port. + * @param bus_specific_init + * port bus specific initialisation callback function + * @param bus_init_params + * port bus specific initialisation parameters + * @param ethdev_init + * device specific port initialization callback function + * @param init_params + * port initialisation parameters + * + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental +rte_eth_dev_create(struct rte_device *device, const char *name, + size_t priv_data_size, + ethdev_bus_specific_init bus_specific_init, void *bus_init_params, + ethdev_init_t ethdev_init, void *init_params); + + +typedef int (*ethdev_uninit_t)(struct rte_eth_dev *ethdev); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice. + * + * PMD helper function for cleaing up the resources of a ethdev port on it's + * destruction. + * + * @param ethdev + * ethdev handle of port. + * @param ethdev_uninit + * device specific port un-initialise callback function + * + * @return + * Negative errno value on error, 0 on success. + */ +int __rte_experimental +rte_eth_dev_destroy(struct rte_eth_dev *ethdev, ethdev_uninit_t ethdev_uninit); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETHDEV_DRIVER_H_ */ diff --git a/lib/librte_ethdev/rte_ethdev_pci.h b/lib/librte_ethdev/rte_ethdev_pci.h new file mode 100644 index 00000000..2cfd3727 --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev_pci.h @@ -0,0 +1,210 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Brocade Communications Systems, Inc. + * Author: Jan Blunck <jblunck@infradead.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHDEV_PCI_H_ +#define _RTE_ETHDEV_PCI_H_ + +#include <rte_malloc.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_config.h> +#include <rte_ethdev_driver.h> + +/** + * Copy pci device info to the Ethernet device data. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. + * @param pci_dev + * The *pci_dev* pointer is the address of the *rte_pci_device* structure. + */ +static inline void +rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, + struct rte_pci_device *pci_dev) +{ + if ((eth_dev == NULL) || (pci_dev == NULL)) { + RTE_PMD_DEBUG_TRACE("NULL pointer eth_dev=%p pci_dev=%p\n", + eth_dev, pci_dev); + return; + } + + eth_dev->intr_handle = &pci_dev->intr_handle; + + eth_dev->data->dev_flags = 0; + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_RMV) + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV; + + eth_dev->data->kdrv = pci_dev->kdrv; + eth_dev->data->numa_node = pci_dev->device.numa_node; +} + +static inline int +eth_dev_pci_specific_init(struct rte_eth_dev *eth_dev, void *bus_device) { + struct rte_pci_device *pci_dev = bus_device; + + if (!pci_dev) + return -ENODEV; + + rte_eth_copy_pci_info(eth_dev, pci_dev); + + return 0; +} + +/** + * @internal + * Allocates a new ethdev slot for an ethernet device and returns the pointer + * to that slot for the driver to use. + * + * @param dev + * Pointer to the PCI device + * + * @param private_data_size + * Size of private data structure + * + * @return + * A pointer to a rte_eth_dev or NULL if allocation failed. + */ +static inline struct rte_eth_dev * +rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size) +{ + struct rte_eth_dev *eth_dev; + const char *name; + + if (!dev) + return NULL; + + name = dev->device.name; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev = rte_eth_dev_allocate(name); + if (!eth_dev) + return NULL; + + if (private_data_size) { + eth_dev->data->dev_private = rte_zmalloc_socket(name, + private_data_size, RTE_CACHE_LINE_SIZE, + dev->device.numa_node); + if (!eth_dev->data->dev_private) { + rte_eth_dev_release_port(eth_dev); + return NULL; + } + } + } else { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) + return NULL; + } + + eth_dev->device = &dev->device; + rte_eth_copy_pci_info(eth_dev, dev); + return eth_dev; +} + +static inline void +rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev) +{ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(eth_dev->data->dev_private); + + eth_dev->data->dev_private = NULL; + + /* + * Secondary process will check the name to attach. + * Clear this field to avoid attaching a released ports. + */ + eth_dev->data->name[0] = '\0'; + + eth_dev->device = NULL; + eth_dev->intr_handle = NULL; + + /* free ether device */ + rte_eth_dev_release_port(eth_dev); +} + +typedef int (*eth_dev_pci_callback_t)(struct rte_eth_dev *eth_dev); + +/** + * @internal + * Wrapper for use by pci drivers in a .probe function to attach to a ethdev + * interface. + */ +static inline int +rte_eth_dev_pci_generic_probe(struct rte_pci_device *pci_dev, + size_t private_data_size, eth_dev_pci_callback_t dev_init) +{ + struct rte_eth_dev *eth_dev; + int ret; + + eth_dev = rte_eth_dev_pci_allocate(pci_dev, private_data_size); + if (!eth_dev) + return -ENOMEM; + + RTE_FUNC_PTR_OR_ERR_RET(*dev_init, -EINVAL); + ret = dev_init(eth_dev); + if (ret) + rte_eth_dev_pci_release(eth_dev); + else + rte_eth_dev_probing_finish(eth_dev); + + return ret; +} + +/** + * @internal + * Wrapper for use by pci drivers in a .remove function to detach a ethdev + * interface. + */ +static inline int +rte_eth_dev_pci_generic_remove(struct rte_pci_device *pci_dev, + eth_dev_pci_callback_t dev_uninit) +{ + struct rte_eth_dev *eth_dev; + int ret; + + eth_dev = rte_eth_dev_allocated(pci_dev->device.name); + if (!eth_dev) + return -ENODEV; + + if (dev_uninit) { + ret = dev_uninit(eth_dev); + if (ret) + return ret; + } + + rte_eth_dev_pci_release(eth_dev); + return 0; +} + +#endif /* _RTE_ETHDEV_PCI_H_ */ diff --git a/lib/librte_ethdev/rte_ethdev_vdev.h b/lib/librte_ethdev/rte_ethdev_vdev.h new file mode 100644 index 00000000..259feda3 --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev_vdev.h @@ -0,0 +1,84 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Brocade Communications Systems, Inc. + * Author: Jan Blunck <jblunck@infradead.org> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHDEV_VDEV_H_ +#define _RTE_ETHDEV_VDEV_H_ + +#include <rte_config.h> +#include <rte_malloc.h> +#include <rte_bus_vdev.h> +#include <rte_ethdev_driver.h> + +/** + * @internal + * Allocates a new ethdev slot for an ethernet device and returns the pointer + * to that slot for the driver to use. + * + * @param dev + * Pointer to virtual device + * + * @param private_data_size + * Size of private data structure + * + * @return + * A pointer to a rte_eth_dev or NULL if allocation failed. + */ +static inline struct rte_eth_dev * +rte_eth_vdev_allocate(struct rte_vdev_device *dev, size_t private_data_size) +{ + struct rte_eth_dev *eth_dev; + const char *name = rte_vdev_device_name(dev); + + eth_dev = rte_eth_dev_allocate(name); + if (!eth_dev) + return NULL; + + if (private_data_size) { + eth_dev->data->dev_private = rte_zmalloc_socket(name, + private_data_size, RTE_CACHE_LINE_SIZE, + dev->device.numa_node); + if (!eth_dev->data->dev_private) { + rte_eth_dev_release_port(eth_dev); + return NULL; + } + } + + eth_dev->device = &dev->device; + eth_dev->intr_handle = NULL; + + eth_dev->data->kdrv = RTE_KDRV_NONE; + eth_dev->data->numa_node = dev->device.numa_node; + return eth_dev; +} + +#endif /* _RTE_ETHDEV_VDEV_H_ */ diff --git a/lib/librte_ethdev/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map new file mode 100644 index 00000000..40cf42b8 --- /dev/null +++ b/lib/librte_ethdev/rte_ethdev_version.map @@ -0,0 +1,247 @@ +DPDK_2.2 { + global: + + rte_eth_add_rx_callback; + rte_eth_add_tx_callback; + rte_eth_allmulticast_disable; + rte_eth_allmulticast_enable; + rte_eth_allmulticast_get; + rte_eth_dev_allocate; + rte_eth_dev_allocated; + rte_eth_dev_attach; + rte_eth_dev_callback_register; + rte_eth_dev_callback_unregister; + rte_eth_dev_close; + rte_eth_dev_configure; + rte_eth_dev_count; + rte_eth_dev_default_mac_addr_set; + rte_eth_dev_detach; + rte_eth_dev_filter_supported; + rte_eth_dev_flow_ctrl_get; + rte_eth_dev_flow_ctrl_set; + rte_eth_dev_get_dcb_info; + rte_eth_dev_get_eeprom; + rte_eth_dev_get_eeprom_length; + rte_eth_dev_get_mtu; + rte_eth_dev_get_reg_info; + rte_eth_dev_get_vlan_offload; + rte_eth_devices; + rte_eth_dev_info_get; + rte_eth_dev_is_valid_port; + rte_eth_dev_mac_addr_add; + rte_eth_dev_mac_addr_remove; + rte_eth_dev_priority_flow_ctrl_set; + rte_eth_dev_release_port; + rte_eth_dev_rss_hash_conf_get; + rte_eth_dev_rss_hash_update; + rte_eth_dev_rss_reta_query; + rte_eth_dev_rss_reta_update; + rte_eth_dev_rx_intr_ctl; + rte_eth_dev_rx_intr_ctl_q; + rte_eth_dev_rx_intr_disable; + rte_eth_dev_rx_intr_enable; + rte_eth_dev_rx_queue_start; + rte_eth_dev_rx_queue_stop; + rte_eth_dev_set_eeprom; + rte_eth_dev_set_link_down; + rte_eth_dev_set_link_up; + rte_eth_dev_set_mc_addr_list; + rte_eth_dev_set_mtu; + rte_eth_dev_set_rx_queue_stats_mapping; + rte_eth_dev_set_tx_queue_stats_mapping; + rte_eth_dev_set_vlan_offload; + rte_eth_dev_set_vlan_pvid; + rte_eth_dev_set_vlan_strip_on_queue; + rte_eth_dev_socket_id; + rte_eth_dev_start; + rte_eth_dev_stop; + rte_eth_dev_tx_queue_start; + rte_eth_dev_tx_queue_stop; + rte_eth_dev_uc_all_hash_table_set; + rte_eth_dev_uc_hash_table_set; + rte_eth_dev_vlan_filter; + rte_eth_dma_zone_reserve; + rte_eth_led_off; + rte_eth_led_on; + rte_eth_link; + rte_eth_link_get; + rte_eth_link_get_nowait; + rte_eth_macaddr_get; + rte_eth_mirror_rule_reset; + rte_eth_mirror_rule_set; + rte_eth_promiscuous_disable; + rte_eth_promiscuous_enable; + rte_eth_promiscuous_get; + rte_eth_remove_rx_callback; + rte_eth_remove_tx_callback; + rte_eth_rx_queue_info_get; + rte_eth_rx_queue_setup; + rte_eth_set_queue_rate_limit; + rte_eth_stats; + rte_eth_stats_get; + rte_eth_stats_reset; + rte_eth_timesync_adjust_time; + rte_eth_timesync_disable; + rte_eth_timesync_enable; + rte_eth_timesync_read_rx_timestamp; + rte_eth_timesync_read_time; + rte_eth_timesync_read_tx_timestamp; + rte_eth_timesync_write_time; + rte_eth_tx_queue_info_get; + rte_eth_tx_queue_setup; + rte_eth_xstats_get; + rte_eth_xstats_reset; + + local: *; +}; + +DPDK_16.04 { + global: + + rte_eth_dev_get_supported_ptypes; + rte_eth_dev_l2_tunnel_eth_type_conf; + rte_eth_dev_l2_tunnel_offload_set; + rte_eth_dev_set_vlan_ether_type; + rte_eth_dev_udp_tunnel_port_add; + rte_eth_dev_udp_tunnel_port_delete; + rte_eth_speed_bitflag; + rte_eth_tx_buffer_count_callback; + rte_eth_tx_buffer_drop_callback; + rte_eth_tx_buffer_init; + rte_eth_tx_buffer_set_err_callback; + +} DPDK_2.2; + +DPDK_16.07 { + global: + + rte_eth_add_first_rx_callback; + rte_eth_dev_get_name_by_port; + rte_eth_dev_get_port_by_name; + rte_eth_xstats_get_names; + +} DPDK_16.04; + +DPDK_17.02 { + global: + + _rte_eth_dev_reset; + rte_eth_dev_fw_version_get; + +} DPDK_16.07; + +DPDK_17.05 { + global: + + rte_eth_dev_attach_secondary; + rte_eth_find_next; + rte_eth_tx_done_cleanup; + rte_eth_xstats_get_by_id; + rte_eth_xstats_get_id_by_name; + rte_eth_xstats_get_names_by_id; + +} DPDK_17.02; + +DPDK_17.08 { + global: + + _rte_eth_dev_callback_process; + rte_eth_dev_adjust_nb_rx_tx_desc; + rte_tm_capabilities_get; + rte_tm_get_number_of_leaf_nodes; + rte_tm_hierarchy_commit; + rte_tm_level_capabilities_get; + rte_tm_mark_ip_dscp; + rte_tm_mark_ip_ecn; + rte_tm_mark_vlan_dei; + rte_tm_node_add; + rte_tm_node_capabilities_get; + rte_tm_node_cman_update; + rte_tm_node_delete; + rte_tm_node_parent_update; + rte_tm_node_resume; + rte_tm_node_shaper_update; + rte_tm_node_shared_shaper_update; + rte_tm_node_shared_wred_context_update; + rte_tm_node_stats_read; + rte_tm_node_stats_update; + rte_tm_node_suspend; + rte_tm_node_type_get; + rte_tm_node_wfq_weight_mode_update; + rte_tm_node_wred_context_update; + rte_tm_shaper_profile_add; + rte_tm_shaper_profile_delete; + rte_tm_shared_shaper_add_update; + rte_tm_shared_shaper_delete; + rte_tm_shared_wred_context_add_update; + rte_tm_shared_wred_context_delete; + rte_tm_wred_profile_add; + rte_tm_wred_profile_delete; + +} DPDK_17.05; + +DPDK_17.11 { + global: + + rte_eth_dev_get_sec_ctx; + rte_eth_dev_pool_ops_supported; + rte_eth_dev_reset; + +} DPDK_17.08; + +DPDK_18.02 { + global: + + rte_eth_dev_filter_ctrl; + +} DPDK_17.11; + +DPDK_18.05 { + global: + + rte_eth_dev_count_avail; + rte_eth_dev_probing_finish; + rte_eth_find_next_owned_by; + rte_flow_copy; + rte_flow_create; + rte_flow_destroy; + rte_flow_error_set; + rte_flow_flush; + rte_flow_isolate; + rte_flow_query; + rte_flow_validate; + +} DPDK_18.02; + +EXPERIMENTAL { + global: + + rte_eth_devargs_parse; + rte_eth_dev_count_total; + rte_eth_dev_create; + rte_eth_dev_destroy; + rte_eth_dev_get_module_eeprom; + rte_eth_dev_get_module_info; + rte_eth_dev_is_removed; + rte_eth_dev_owner_delete; + rte_eth_dev_owner_get; + rte_eth_dev_owner_new; + rte_eth_dev_owner_set; + rte_eth_dev_owner_unset; + rte_eth_dev_rx_offload_name; + rte_eth_dev_tx_offload_name; + rte_eth_switch_domain_alloc; + rte_eth_switch_domain_free; + rte_mtr_capabilities_get; + rte_mtr_create; + rte_mtr_destroy; + rte_mtr_meter_disable; + rte_mtr_meter_dscp_table_update; + rte_mtr_meter_enable; + rte_mtr_meter_profile_add; + rte_mtr_meter_profile_delete; + rte_mtr_meter_profile_update; + rte_mtr_policer_actions_update; + rte_mtr_stats_read; + rte_mtr_stats_update; +}; diff --git a/lib/librte_ethdev/rte_flow.c b/lib/librte_ethdev/rte_flow.c new file mode 100644 index 00000000..b2afba08 --- /dev/null +++ b/lib/librte_ethdev/rte_flow.c @@ -0,0 +1,528 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox Technologies, Ltd + */ + +#include <errno.h> +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +#include <rte_common.h> +#include <rte_errno.h> +#include <rte_branch_prediction.h> +#include "rte_ethdev.h" +#include "rte_flow_driver.h" +#include "rte_flow.h" + +/** + * Flow elements description tables. + */ +struct rte_flow_desc_data { + const char *name; + size_t size; +}; + +/** Generate flow_item[] entry. */ +#define MK_FLOW_ITEM(t, s) \ + [RTE_FLOW_ITEM_TYPE_ ## t] = { \ + .name = # t, \ + .size = s, \ + } + +/** Information about known flow pattern items. */ +static const struct rte_flow_desc_data rte_flow_desc_item[] = { + MK_FLOW_ITEM(END, 0), + MK_FLOW_ITEM(VOID, 0), + MK_FLOW_ITEM(INVERT, 0), + MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)), + MK_FLOW_ITEM(PF, 0), + MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)), + MK_FLOW_ITEM(PHY_PORT, sizeof(struct rte_flow_item_phy_port)), + MK_FLOW_ITEM(PORT_ID, sizeof(struct rte_flow_item_port_id)), + MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)), + MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)), + MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)), + MK_FLOW_ITEM(IPV4, sizeof(struct rte_flow_item_ipv4)), + MK_FLOW_ITEM(IPV6, sizeof(struct rte_flow_item_ipv6)), + MK_FLOW_ITEM(ICMP, sizeof(struct rte_flow_item_icmp)), + MK_FLOW_ITEM(UDP, sizeof(struct rte_flow_item_udp)), + MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)), + MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)), + MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)), + MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)), + MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)), + MK_FLOW_ITEM(E_TAG, sizeof(struct rte_flow_item_e_tag)), + MK_FLOW_ITEM(NVGRE, sizeof(struct rte_flow_item_nvgre)), + MK_FLOW_ITEM(GENEVE, sizeof(struct rte_flow_item_geneve)), + MK_FLOW_ITEM(VXLAN_GPE, sizeof(struct rte_flow_item_vxlan_gpe)), + MK_FLOW_ITEM(ARP_ETH_IPV4, sizeof(struct rte_flow_item_arp_eth_ipv4)), + MK_FLOW_ITEM(IPV6_EXT, sizeof(struct rte_flow_item_ipv6_ext)), + MK_FLOW_ITEM(ICMP6, sizeof(struct rte_flow_item_icmp6)), + MK_FLOW_ITEM(ICMP6_ND_NS, sizeof(struct rte_flow_item_icmp6_nd_ns)), + MK_FLOW_ITEM(ICMP6_ND_NA, sizeof(struct rte_flow_item_icmp6_nd_na)), + MK_FLOW_ITEM(ICMP6_ND_OPT, sizeof(struct rte_flow_item_icmp6_nd_opt)), + MK_FLOW_ITEM(ICMP6_ND_OPT_SLA_ETH, + sizeof(struct rte_flow_item_icmp6_nd_opt_sla_eth)), + MK_FLOW_ITEM(ICMP6_ND_OPT_TLA_ETH, + sizeof(struct rte_flow_item_icmp6_nd_opt_tla_eth)), +}; + +/** Generate flow_action[] entry. */ +#define MK_FLOW_ACTION(t, s) \ + [RTE_FLOW_ACTION_TYPE_ ## t] = { \ + .name = # t, \ + .size = s, \ + } + +/** Information about known flow actions. */ +static const struct rte_flow_desc_data rte_flow_desc_action[] = { + MK_FLOW_ACTION(END, 0), + MK_FLOW_ACTION(VOID, 0), + MK_FLOW_ACTION(PASSTHRU, 0), + MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)), + MK_FLOW_ACTION(FLAG, 0), + MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)), + MK_FLOW_ACTION(DROP, 0), + MK_FLOW_ACTION(COUNT, 0), + MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)), + MK_FLOW_ACTION(PF, 0), + MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)), + MK_FLOW_ACTION(PHY_PORT, sizeof(struct rte_flow_action_phy_port)), + MK_FLOW_ACTION(PORT_ID, sizeof(struct rte_flow_action_port_id)), + MK_FLOW_ACTION(OF_SET_MPLS_TTL, + sizeof(struct rte_flow_action_of_set_mpls_ttl)), + MK_FLOW_ACTION(OF_DEC_MPLS_TTL, 0), + MK_FLOW_ACTION(OF_SET_NW_TTL, + sizeof(struct rte_flow_action_of_set_nw_ttl)), + MK_FLOW_ACTION(OF_DEC_NW_TTL, 0), + MK_FLOW_ACTION(OF_COPY_TTL_OUT, 0), + MK_FLOW_ACTION(OF_COPY_TTL_IN, 0), + MK_FLOW_ACTION(OF_POP_VLAN, 0), + MK_FLOW_ACTION(OF_PUSH_VLAN, + sizeof(struct rte_flow_action_of_push_vlan)), + MK_FLOW_ACTION(OF_SET_VLAN_VID, + sizeof(struct rte_flow_action_of_set_vlan_vid)), + MK_FLOW_ACTION(OF_SET_VLAN_PCP, + sizeof(struct rte_flow_action_of_set_vlan_pcp)), + MK_FLOW_ACTION(OF_POP_MPLS, + sizeof(struct rte_flow_action_of_pop_mpls)), + MK_FLOW_ACTION(OF_PUSH_MPLS, + sizeof(struct rte_flow_action_of_push_mpls)), +}; + +static int +flow_err(uint16_t port_id, int ret, struct rte_flow_error *error) +{ + if (ret == 0) + return 0; + if (rte_eth_dev_is_removed(port_id)) + return rte_flow_error_set(error, EIO, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(EIO)); + return ret; +} + +/* Get generic flow operations structure from a port. */ +const struct rte_flow_ops * +rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops; + int code; + + if (unlikely(!rte_eth_dev_is_valid_port(port_id))) + code = ENODEV; + else if (unlikely(!dev->dev_ops->filter_ctrl || + dev->dev_ops->filter_ctrl(dev, + RTE_ETH_FILTER_GENERIC, + RTE_ETH_FILTER_GET, + &ops) || + !ops)) + code = ENOSYS; + else + return ops; + rte_flow_error_set(error, code, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(code)); + return NULL; +} + +/* Check whether a flow rule can be created on a given port. */ +int +rte_flow_validate(uint16_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->validate)) + return flow_err(port_id, ops->validate(dev, attr, pattern, + actions, error), error); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Create a flow rule on a given port. */ +struct rte_flow * +rte_flow_create(uint16_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_flow *flow; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return NULL; + if (likely(!!ops->create)) { + flow = ops->create(dev, attr, pattern, actions, error); + if (flow == NULL) + flow_err(port_id, -rte_errno, error); + return flow; + } + rte_flow_error_set(error, ENOSYS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); + return NULL; +} + +/* Destroy a flow rule on a given port. */ +int +rte_flow_destroy(uint16_t port_id, + struct rte_flow *flow, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->destroy)) + return flow_err(port_id, ops->destroy(dev, flow, error), + error); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Destroy all flow rules associated with a port. */ +int +rte_flow_flush(uint16_t port_id, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (unlikely(!ops)) + return -rte_errno; + if (likely(!!ops->flush)) + return flow_err(port_id, ops->flush(dev, error), error); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Query an existing flow rule. */ +int +rte_flow_query(uint16_t port_id, + struct rte_flow *flow, + const struct rte_flow_action *action, + void *data, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (!ops) + return -rte_errno; + if (likely(!!ops->query)) + return flow_err(port_id, ops->query(dev, flow, action, data, + error), error); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Restrict ingress traffic to the defined flow rules. */ +int +rte_flow_isolate(uint16_t port_id, + int set, + struct rte_flow_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_flow_ops *ops = rte_flow_ops_get(port_id, error); + + if (!ops) + return -rte_errno; + if (likely(!!ops->isolate)) + return flow_err(port_id, ops->isolate(dev, set, error), error); + return rte_flow_error_set(error, ENOSYS, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, rte_strerror(ENOSYS)); +} + +/* Initialize flow error structure. */ +int +rte_flow_error_set(struct rte_flow_error *error, + int code, + enum rte_flow_error_type type, + const void *cause, + const char *message) +{ + if (error) { + *error = (struct rte_flow_error){ + .type = type, + .cause = cause, + .message = message, + }; + } + rte_errno = code; + return -code; +} + +/** Pattern item specification types. */ +enum item_spec_type { + ITEM_SPEC, + ITEM_LAST, + ITEM_MASK, +}; + +/** Compute storage space needed by item specification and copy it. */ +static size_t +flow_item_spec_copy(void *buf, const struct rte_flow_item *item, + enum item_spec_type type) +{ + size_t size = 0; + const void *data = + type == ITEM_SPEC ? item->spec : + type == ITEM_LAST ? item->last : + type == ITEM_MASK ? item->mask : + NULL; + + if (!item->spec || !data) + goto empty; + switch (item->type) { + union { + const struct rte_flow_item_raw *raw; + } spec; + union { + const struct rte_flow_item_raw *raw; + } last; + union { + const struct rte_flow_item_raw *raw; + } mask; + union { + const struct rte_flow_item_raw *raw; + } src; + union { + struct rte_flow_item_raw *raw; + } dst; + size_t off; + + case RTE_FLOW_ITEM_TYPE_RAW: + spec.raw = item->spec; + last.raw = item->last ? item->last : item->spec; + mask.raw = item->mask ? item->mask : &rte_flow_item_raw_mask; + src.raw = data; + dst.raw = buf; + off = RTE_ALIGN_CEIL(sizeof(struct rte_flow_item_raw), + sizeof(*src.raw->pattern)); + if (type == ITEM_SPEC || + (type == ITEM_MASK && + ((spec.raw->length & mask.raw->length) >= + (last.raw->length & mask.raw->length)))) + size = spec.raw->length & mask.raw->length; + else + size = last.raw->length & mask.raw->length; + size = off + size * sizeof(*src.raw->pattern); + if (dst.raw) { + memcpy(dst.raw, src.raw, sizeof(*src.raw)); + dst.raw->pattern = memcpy((uint8_t *)dst.raw + off, + src.raw->pattern, + size - off); + } + break; + default: + size = rte_flow_desc_item[item->type].size; + if (buf) + memcpy(buf, data, size); + break; + } +empty: + return RTE_ALIGN_CEIL(size, sizeof(double)); +} + +/** Compute storage space needed by action configuration and copy it. */ +static size_t +flow_action_conf_copy(void *buf, const struct rte_flow_action *action) +{ + size_t size = 0; + + if (!action->conf) + goto empty; + switch (action->type) { + union { + const struct rte_flow_action_rss *rss; + } src; + union { + struct rte_flow_action_rss *rss; + } dst; + size_t off; + + case RTE_FLOW_ACTION_TYPE_RSS: + src.rss = action->conf; + dst.rss = buf; + off = 0; + if (dst.rss) + *dst.rss = (struct rte_flow_action_rss){ + .func = src.rss->func, + .level = src.rss->level, + .types = src.rss->types, + .key_len = src.rss->key_len, + .queue_num = src.rss->queue_num, + }; + off += sizeof(*src.rss); + if (src.rss->key_len) { + off = RTE_ALIGN_CEIL(off, sizeof(double)); + size = sizeof(*src.rss->key) * src.rss->key_len; + if (dst.rss) + dst.rss->key = memcpy + ((void *)((uintptr_t)dst.rss + off), + src.rss->key, size); + off += size; + } + if (src.rss->queue_num) { + off = RTE_ALIGN_CEIL(off, sizeof(double)); + size = sizeof(*src.rss->queue) * src.rss->queue_num; + if (dst.rss) + dst.rss->queue = memcpy + ((void *)((uintptr_t)dst.rss + off), + src.rss->queue, size); + off += size; + } + size = off; + break; + default: + size = rte_flow_desc_action[action->type].size; + if (buf) + memcpy(buf, action->conf, size); + break; + } +empty: + return RTE_ALIGN_CEIL(size, sizeof(double)); +} + +/** Store a full rte_flow description. */ +size_t +rte_flow_copy(struct rte_flow_desc *desc, size_t len, + const struct rte_flow_attr *attr, + const struct rte_flow_item *items, + const struct rte_flow_action *actions) +{ + struct rte_flow_desc *fd = NULL; + size_t tmp; + size_t off1 = 0; + size_t off2 = 0; + size_t size = 0; + +store: + if (items) { + const struct rte_flow_item *item; + + item = items; + if (fd) + fd->items = (void *)&fd->data[off1]; + do { + struct rte_flow_item *dst = NULL; + + if ((size_t)item->type >= + RTE_DIM(rte_flow_desc_item) || + !rte_flow_desc_item[item->type].name) { + rte_errno = ENOTSUP; + return 0; + } + if (fd) + dst = memcpy(fd->data + off1, item, + sizeof(*item)); + off1 += sizeof(*item); + if (item->spec) { + if (fd) + dst->spec = fd->data + off2; + off2 += flow_item_spec_copy + (fd ? fd->data + off2 : NULL, item, + ITEM_SPEC); + } + if (item->last) { + if (fd) + dst->last = fd->data + off2; + off2 += flow_item_spec_copy + (fd ? fd->data + off2 : NULL, item, + ITEM_LAST); + } + if (item->mask) { + if (fd) + dst->mask = fd->data + off2; + off2 += flow_item_spec_copy + (fd ? fd->data + off2 : NULL, item, + ITEM_MASK); + } + off2 = RTE_ALIGN_CEIL(off2, sizeof(double)); + } while ((item++)->type != RTE_FLOW_ITEM_TYPE_END); + off1 = RTE_ALIGN_CEIL(off1, sizeof(double)); + } + if (actions) { + const struct rte_flow_action *action; + + action = actions; + if (fd) + fd->actions = (void *)&fd->data[off1]; + do { + struct rte_flow_action *dst = NULL; + + if ((size_t)action->type >= + RTE_DIM(rte_flow_desc_action) || + !rte_flow_desc_action[action->type].name) { + rte_errno = ENOTSUP; + return 0; + } + if (fd) + dst = memcpy(fd->data + off1, action, + sizeof(*action)); + off1 += sizeof(*action); + if (action->conf) { + if (fd) + dst->conf = fd->data + off2; + off2 += flow_action_conf_copy + (fd ? fd->data + off2 : NULL, action); + } + off2 = RTE_ALIGN_CEIL(off2, sizeof(double)); + } while ((action++)->type != RTE_FLOW_ACTION_TYPE_END); + } + if (fd != NULL) + return size; + off1 = RTE_ALIGN_CEIL(off1, sizeof(double)); + tmp = RTE_ALIGN_CEIL(offsetof(struct rte_flow_desc, data), + sizeof(double)); + size = tmp + off1 + off2; + if (size > len) + return size; + fd = desc; + if (fd != NULL) { + *fd = (const struct rte_flow_desc) { + .size = size, + .attr = *attr, + }; + tmp -= offsetof(struct rte_flow_desc, data); + off2 = tmp + off1; + off1 = tmp; + goto store; + } + return 0; +} diff --git a/lib/librte_ethdev/rte_flow.h b/lib/librte_ethdev/rte_flow.h new file mode 100644 index 00000000..f8ba71cd --- /dev/null +++ b/lib/librte_ethdev/rte_flow.h @@ -0,0 +1,2208 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox Technologies, Ltd + */ + +#ifndef RTE_FLOW_H_ +#define RTE_FLOW_H_ + +/** + * @file + * RTE generic flow API + * + * This interface provides the ability to program packet matching and + * associated actions in hardware through flow rules. + */ + +#include <stddef.h> +#include <stdint.h> + +#include <rte_arp.h> +#include <rte_ether.h> +#include <rte_eth_ctrl.h> +#include <rte_icmp.h> +#include <rte_ip.h> +#include <rte_sctp.h> +#include <rte_tcp.h> +#include <rte_udp.h> +#include <rte_byteorder.h> +#include <rte_esp.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Flow rule attributes. + * + * Priorities are set on a per rule based within groups. + * + * Lower values denote higher priority, the highest priority for a flow rule + * is 0, so that a flow that matches for than one rule, the rule with the + * lowest priority value will always be matched. + * + * Although optional, applications are encouraged to group similar rules as + * much as possible to fully take advantage of hardware capabilities + * (e.g. optimized matching) and work around limitations (e.g. a single + * pattern type possibly allowed in a given group). Applications should be + * aware that groups are not linked by default, and that they must be + * explicitly linked by the application using the JUMP action. + * + * Priority levels are arbitrary and up to the application, they + * do not need to be contiguous nor start from 0, however the maximum number + * varies between devices and may be affected by existing flow rules. + * + * If a packet is matched by several rules of a given group for a given + * priority level, the outcome is undefined. It can take any path, may be + * duplicated or even cause unrecoverable errors. + * + * Note that support for more than a single group and priority level is not + * guaranteed. + * + * Flow rules can apply to inbound and/or outbound traffic (ingress/egress). + * + * Several pattern items and actions are valid and can be used in both + * directions. Those valid for only one direction are described as such. + * + * At least one direction must be specified. + * + * Specifying both directions at once for a given rule is not recommended + * but may be valid in a few cases (e.g. shared counter). + */ +struct rte_flow_attr { + uint32_t group; /**< Priority group. */ + uint32_t priority; /**< Rule priority level within group. */ + uint32_t ingress:1; /**< Rule applies to ingress traffic. */ + uint32_t egress:1; /**< Rule applies to egress traffic. */ + /** + * Instead of simply matching the properties of traffic as it would + * appear on a given DPDK port ID, enabling this attribute transfers + * a flow rule to the lowest possible level of any device endpoints + * found in the pattern. + * + * When supported, this effectively enables an application to + * re-route traffic not necessarily intended for it (e.g. coming + * from or addressed to different physical ports, VFs or + * applications) at the device level. + * + * It complements the behavior of some pattern items such as + * RTE_FLOW_ITEM_TYPE_PHY_PORT and is meaningless without them. + * + * When transferring flow rules, ingress and egress attributes keep + * their original meaning, as if processing traffic emitted or + * received by the application. + */ + uint32_t transfer:1; + uint32_t reserved:29; /**< Reserved, must be zero. */ +}; + +/** + * Matching pattern item types. + * + * Pattern items fall in two categories: + * + * - Matching protocol headers and packet data, usually associated with a + * specification structure. These must be stacked in the same order as the + * protocol layers to match inside packets, starting from the lowest. + * + * - Matching meta-data or affecting pattern processing, often without a + * specification structure. Since they do not match packet contents, their + * position in the list is usually not relevant. + * + * See the description of individual types for more information. Those + * marked with [META] fall into the second category. + */ +enum rte_flow_item_type { + /** + * [META] + * + * End marker for item lists. Prevents further processing of items, + * thereby ending the pattern. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_END, + + /** + * [META] + * + * Used as a placeholder for convenience. It is ignored and simply + * discarded by PMDs. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_VOID, + + /** + * [META] + * + * Inverted matching, i.e. process packets that do not match the + * pattern. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_INVERT, + + /** + * Matches any protocol in place of the current layer, a single ANY + * may also stand for several protocol layers. + * + * See struct rte_flow_item_any. + */ + RTE_FLOW_ITEM_TYPE_ANY, + + /** + * [META] + * + * Matches traffic originating from (ingress) or going to (egress) + * the physical function of the current device. + * + * No associated specification structure. + */ + RTE_FLOW_ITEM_TYPE_PF, + + /** + * [META] + * + * Matches traffic originating from (ingress) or going to (egress) a + * given virtual function of the current device. + * + * See struct rte_flow_item_vf. + */ + RTE_FLOW_ITEM_TYPE_VF, + + /** + * [META] + * + * Matches traffic originating from (ingress) or going to (egress) a + * physical port of the underlying device. + * + * See struct rte_flow_item_phy_port. + */ + RTE_FLOW_ITEM_TYPE_PHY_PORT, + + /** + * [META] + * + * Matches traffic originating from (ingress) or going to (egress) a + * given DPDK port ID. + * + * See struct rte_flow_item_port_id. + */ + RTE_FLOW_ITEM_TYPE_PORT_ID, + + /** + * Matches a byte string of a given length at a given offset. + * + * See struct rte_flow_item_raw. + */ + RTE_FLOW_ITEM_TYPE_RAW, + + /** + * Matches an Ethernet header. + * + * See struct rte_flow_item_eth. + */ + RTE_FLOW_ITEM_TYPE_ETH, + + /** + * Matches an 802.1Q/ad VLAN tag. + * + * See struct rte_flow_item_vlan. + */ + RTE_FLOW_ITEM_TYPE_VLAN, + + /** + * Matches an IPv4 header. + * + * See struct rte_flow_item_ipv4. + */ + RTE_FLOW_ITEM_TYPE_IPV4, + + /** + * Matches an IPv6 header. + * + * See struct rte_flow_item_ipv6. + */ + RTE_FLOW_ITEM_TYPE_IPV6, + + /** + * Matches an ICMP header. + * + * See struct rte_flow_item_icmp. + */ + RTE_FLOW_ITEM_TYPE_ICMP, + + /** + * Matches a UDP header. + * + * See struct rte_flow_item_udp. + */ + RTE_FLOW_ITEM_TYPE_UDP, + + /** + * Matches a TCP header. + * + * See struct rte_flow_item_tcp. + */ + RTE_FLOW_ITEM_TYPE_TCP, + + /** + * Matches a SCTP header. + * + * See struct rte_flow_item_sctp. + */ + RTE_FLOW_ITEM_TYPE_SCTP, + + /** + * Matches a VXLAN header. + * + * See struct rte_flow_item_vxlan. + */ + RTE_FLOW_ITEM_TYPE_VXLAN, + + /** + * Matches a E_TAG header. + * + * See struct rte_flow_item_e_tag. + */ + RTE_FLOW_ITEM_TYPE_E_TAG, + + /** + * Matches a NVGRE header. + * + * See struct rte_flow_item_nvgre. + */ + RTE_FLOW_ITEM_TYPE_NVGRE, + + /** + * Matches a MPLS header. + * + * See struct rte_flow_item_mpls. + */ + RTE_FLOW_ITEM_TYPE_MPLS, + + /** + * Matches a GRE header. + * + * See struct rte_flow_item_gre. + */ + RTE_FLOW_ITEM_TYPE_GRE, + + /** + * [META] + * + * Fuzzy pattern match, expect faster than default. + * + * This is for device that support fuzzy matching option. + * Usually a fuzzy matching is fast but the cost is accuracy. + * + * See struct rte_flow_item_fuzzy. + */ + RTE_FLOW_ITEM_TYPE_FUZZY, + + /** + * Matches a GTP header. + * + * Configure flow for GTP packets. + * + * See struct rte_flow_item_gtp. + */ + RTE_FLOW_ITEM_TYPE_GTP, + + /** + * Matches a GTP header. + * + * Configure flow for GTP-C packets. + * + * See struct rte_flow_item_gtp. + */ + RTE_FLOW_ITEM_TYPE_GTPC, + + /** + * Matches a GTP header. + * + * Configure flow for GTP-U packets. + * + * See struct rte_flow_item_gtp. + */ + RTE_FLOW_ITEM_TYPE_GTPU, + + /** + * Matches a ESP header. + * + * See struct rte_flow_item_esp. + */ + RTE_FLOW_ITEM_TYPE_ESP, + + /** + * Matches a GENEVE header. + * + * See struct rte_flow_item_geneve. + */ + RTE_FLOW_ITEM_TYPE_GENEVE, + + /** + * Matches a VXLAN-GPE header. + * + * See struct rte_flow_item_vxlan_gpe. + */ + RTE_FLOW_ITEM_TYPE_VXLAN_GPE, + + /** + * Matches an ARP header for Ethernet/IPv4. + * + * See struct rte_flow_item_arp_eth_ipv4. + */ + RTE_FLOW_ITEM_TYPE_ARP_ETH_IPV4, + + /** + * Matches the presence of any IPv6 extension header. + * + * See struct rte_flow_item_ipv6_ext. + */ + RTE_FLOW_ITEM_TYPE_IPV6_EXT, + + /** + * Matches any ICMPv6 header. + * + * See struct rte_flow_item_icmp6. + */ + RTE_FLOW_ITEM_TYPE_ICMP6, + + /** + * Matches an ICMPv6 neighbor discovery solicitation. + * + * See struct rte_flow_item_icmp6_nd_ns. + */ + RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS, + + /** + * Matches an ICMPv6 neighbor discovery advertisement. + * + * See struct rte_flow_item_icmp6_nd_na. + */ + RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA, + + /** + * Matches the presence of any ICMPv6 neighbor discovery option. + * + * See struct rte_flow_item_icmp6_nd_opt. + */ + RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT, + + /** + * Matches an ICMPv6 neighbor discovery source Ethernet link-layer + * address option. + * + * See struct rte_flow_item_icmp6_nd_opt_sla_eth. + */ + RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_SLA_ETH, + + /** + * Matches an ICMPv6 neighbor discovery target Ethernet link-layer + * address option. + * + * See struct rte_flow_item_icmp6_nd_opt_tla_eth. + */ + RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_TLA_ETH, + + /** + * Matches specified mark field. + * + * See struct rte_flow_item_mark. + */ + RTE_FLOW_ITEM_TYPE_MARK, +}; + +/** + * RTE_FLOW_ITEM_TYPE_ANY + * + * Matches any protocol in place of the current layer, a single ANY may also + * stand for several protocol layers. + * + * This is usually specified as the first pattern item when looking for a + * protocol anywhere in a packet. + * + * A zeroed mask stands for any number of layers. + */ +struct rte_flow_item_any { + uint32_t num; /**< Number of layers covered. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ANY. */ +#ifndef __cplusplus +static const struct rte_flow_item_any rte_flow_item_any_mask = { + .num = 0x00000000, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_VF + * + * Matches traffic originating from (ingress) or going to (egress) a given + * virtual function of the current device. + * + * If supported, should work even if the virtual function is not managed by + * the application and thus not associated with a DPDK port ID. + * + * Note this pattern item does not match VF representors traffic which, as + * separate entities, should be addressed through their own DPDK port IDs. + * + * - Can be specified multiple times to match traffic addressed to several + * VF IDs. + * - Can be combined with a PF item to match both PF and VF traffic. + * + * A zeroed mask can be used to match any VF ID. + */ +struct rte_flow_item_vf { + uint32_t id; /**< VF ID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VF. */ +#ifndef __cplusplus +static const struct rte_flow_item_vf rte_flow_item_vf_mask = { + .id = 0x00000000, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_PHY_PORT + * + * Matches traffic originating from (ingress) or going to (egress) a + * physical port of the underlying device. + * + * The first PHY_PORT item overrides the physical port normally associated + * with the specified DPDK input port (port_id). This item can be provided + * several times to match additional physical ports. + * + * Note that physical ports are not necessarily tied to DPDK input ports + * (port_id) when those are not under DPDK control. Possible values are + * specific to each device, they are not necessarily indexed from zero and + * may not be contiguous. + * + * As a device property, the list of allowed values as well as the value + * associated with a port_id should be retrieved by other means. + * + * A zeroed mask can be used to match any port index. + */ +struct rte_flow_item_phy_port { + uint32_t index; /**< Physical port index. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_PHY_PORT. */ +#ifndef __cplusplus +static const struct rte_flow_item_phy_port rte_flow_item_phy_port_mask = { + .index = 0x00000000, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_PORT_ID + * + * Matches traffic originating from (ingress) or going to (egress) a given + * DPDK port ID. + * + * Normally only supported if the port ID in question is known by the + * underlying PMD and related to the device the flow rule is created + * against. + * + * This must not be confused with @p PHY_PORT which refers to the physical + * port of a device, whereas @p PORT_ID refers to a struct rte_eth_dev + * object on the application side (also known as "port representor" + * depending on the kind of underlying device). + */ +struct rte_flow_item_port_id { + uint32_t id; /**< DPDK port ID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_PORT_ID. */ +#ifndef __cplusplus +static const struct rte_flow_item_port_id rte_flow_item_port_id_mask = { + .id = 0xffffffff, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_RAW + * + * Matches a byte string of a given length at a given offset. + * + * Offset is either absolute (using the start of the packet) or relative to + * the end of the previous matched item in the stack, in which case negative + * values are allowed. + * + * If search is enabled, offset is used as the starting point. The search + * area can be delimited by setting limit to a nonzero value, which is the + * maximum number of bytes after offset where the pattern may start. + * + * Matching a zero-length pattern is allowed, doing so resets the relative + * offset for subsequent items. + * + * This type does not support ranges (struct rte_flow_item.last). + */ +struct rte_flow_item_raw { + uint32_t relative:1; /**< Look for pattern after the previous item. */ + uint32_t search:1; /**< Search pattern from offset (see also limit). */ + uint32_t reserved:30; /**< Reserved, must be set to zero. */ + int32_t offset; /**< Absolute or relative offset for pattern. */ + uint16_t limit; /**< Search area limit for start of pattern. */ + uint16_t length; /**< Pattern length. */ + const uint8_t *pattern; /**< Byte string to look for. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_RAW. */ +#ifndef __cplusplus +static const struct rte_flow_item_raw rte_flow_item_raw_mask = { + .relative = 1, + .search = 1, + .reserved = 0x3fffffff, + .offset = 0xffffffff, + .limit = 0xffff, + .length = 0xffff, + .pattern = NULL, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ETH + * + * Matches an Ethernet header. + * + * The @p type field either stands for "EtherType" or "TPID" when followed + * by so-called layer 2.5 pattern items such as RTE_FLOW_ITEM_TYPE_VLAN. In + * the latter case, @p type refers to that of the outer header, with the + * inner EtherType/TPID provided by the subsequent pattern item. This is the + * same order as on the wire. + */ +struct rte_flow_item_eth { + struct ether_addr dst; /**< Destination MAC. */ + struct ether_addr src; /**< Source MAC. */ + rte_be16_t type; /**< EtherType or TPID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ETH. */ +#ifndef __cplusplus +static const struct rte_flow_item_eth rte_flow_item_eth_mask = { + .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .type = RTE_BE16(0x0000), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_VLAN + * + * Matches an 802.1Q/ad VLAN tag. + * + * The corresponding standard outer EtherType (TPID) values are + * ETHER_TYPE_VLAN or ETHER_TYPE_QINQ. It can be overridden by the preceding + * pattern item. + */ +struct rte_flow_item_vlan { + rte_be16_t tci; /**< Tag control information. */ + rte_be16_t inner_type; /**< Inner EtherType or TPID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VLAN. */ +#ifndef __cplusplus +static const struct rte_flow_item_vlan rte_flow_item_vlan_mask = { + .tci = RTE_BE16(0x0fff), + .inner_type = RTE_BE16(0x0000), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_IPV4 + * + * Matches an IPv4 header. + * + * Note: IPv4 options are handled by dedicated pattern items. + */ +struct rte_flow_item_ipv4 { + struct ipv4_hdr hdr; /**< IPv4 header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_IPV4. */ +#ifndef __cplusplus +static const struct rte_flow_item_ipv4 rte_flow_item_ipv4_mask = { + .hdr = { + .src_addr = RTE_BE32(0xffffffff), + .dst_addr = RTE_BE32(0xffffffff), + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_IPV6. + * + * Matches an IPv6 header. + * + * Note: IPv6 options are handled by dedicated pattern items, see + * RTE_FLOW_ITEM_TYPE_IPV6_EXT. + */ +struct rte_flow_item_ipv6 { + struct ipv6_hdr hdr; /**< IPv6 header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_IPV6. */ +#ifndef __cplusplus +static const struct rte_flow_item_ipv6 rte_flow_item_ipv6_mask = { + .hdr = { + .src_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + .dst_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP. + * + * Matches an ICMP header. + */ +struct rte_flow_item_icmp { + struct icmp_hdr hdr; /**< ICMP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP. */ +#ifndef __cplusplus +static const struct rte_flow_item_icmp rte_flow_item_icmp_mask = { + .hdr = { + .icmp_type = 0xff, + .icmp_code = 0xff, + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_UDP. + * + * Matches a UDP header. + */ +struct rte_flow_item_udp { + struct udp_hdr hdr; /**< UDP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_UDP. */ +#ifndef __cplusplus +static const struct rte_flow_item_udp rte_flow_item_udp_mask = { + .hdr = { + .src_port = RTE_BE16(0xffff), + .dst_port = RTE_BE16(0xffff), + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_TCP. + * + * Matches a TCP header. + */ +struct rte_flow_item_tcp { + struct tcp_hdr hdr; /**< TCP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_TCP. */ +#ifndef __cplusplus +static const struct rte_flow_item_tcp rte_flow_item_tcp_mask = { + .hdr = { + .src_port = RTE_BE16(0xffff), + .dst_port = RTE_BE16(0xffff), + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_SCTP. + * + * Matches a SCTP header. + */ +struct rte_flow_item_sctp { + struct sctp_hdr hdr; /**< SCTP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_SCTP. */ +#ifndef __cplusplus +static const struct rte_flow_item_sctp rte_flow_item_sctp_mask = { + .hdr = { + .src_port = RTE_BE16(0xffff), + .dst_port = RTE_BE16(0xffff), + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_VXLAN. + * + * Matches a VXLAN header (RFC 7348). + */ +struct rte_flow_item_vxlan { + uint8_t flags; /**< Normally 0x08 (I flag). */ + uint8_t rsvd0[3]; /**< Reserved, normally 0x000000. */ + uint8_t vni[3]; /**< VXLAN identifier. */ + uint8_t rsvd1; /**< Reserved, normally 0x00. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VXLAN. */ +#ifndef __cplusplus +static const struct rte_flow_item_vxlan rte_flow_item_vxlan_mask = { + .vni = "\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_E_TAG. + * + * Matches a E-tag header. + * + * The corresponding standard outer EtherType (TPID) value is + * ETHER_TYPE_ETAG. It can be overridden by the preceding pattern item. + */ +struct rte_flow_item_e_tag { + /** + * E-Tag control information (E-TCI). + * E-PCP (3b), E-DEI (1b), ingress E-CID base (12b). + */ + rte_be16_t epcp_edei_in_ecid_b; + /** Reserved (2b), GRP (2b), E-CID base (12b). */ + rte_be16_t rsvd_grp_ecid_b; + uint8_t in_ecid_e; /**< Ingress E-CID ext. */ + uint8_t ecid_e; /**< E-CID ext. */ + rte_be16_t inner_type; /**< Inner EtherType or TPID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_E_TAG. */ +#ifndef __cplusplus +static const struct rte_flow_item_e_tag rte_flow_item_e_tag_mask = { + .rsvd_grp_ecid_b = RTE_BE16(0x3fff), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_NVGRE. + * + * Matches a NVGRE header. + */ +struct rte_flow_item_nvgre { + /** + * Checksum (1b), undefined (1b), key bit (1b), sequence number (1b), + * reserved 0 (9b), version (3b). + * + * c_k_s_rsvd0_ver must have value 0x2000 according to RFC 7637. + */ + rte_be16_t c_k_s_rsvd0_ver; + rte_be16_t protocol; /**< Protocol type (0x6558). */ + uint8_t tni[3]; /**< Virtual subnet ID. */ + uint8_t flow_id; /**< Flow ID. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_NVGRE. */ +#ifndef __cplusplus +static const struct rte_flow_item_nvgre rte_flow_item_nvgre_mask = { + .tni = "\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_MPLS. + * + * Matches a MPLS header. + */ +struct rte_flow_item_mpls { + /** + * Label (20b), TC (3b), Bottom of Stack (1b). + */ + uint8_t label_tc_s[3]; + uint8_t ttl; /** Time-to-Live. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_MPLS. */ +#ifndef __cplusplus +static const struct rte_flow_item_mpls rte_flow_item_mpls_mask = { + .label_tc_s = "\xff\xff\xf0", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_GRE. + * + * Matches a GRE header. + */ +struct rte_flow_item_gre { + /** + * Checksum (1b), reserved 0 (12b), version (3b). + * Refer to RFC 2784. + */ + rte_be16_t c_rsvd0_ver; + rte_be16_t protocol; /**< Protocol type. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_GRE. */ +#ifndef __cplusplus +static const struct rte_flow_item_gre rte_flow_item_gre_mask = { + .protocol = RTE_BE16(0xffff), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_FUZZY + * + * Fuzzy pattern match, expect faster than default. + * + * This is for device that support fuzzy match option. + * Usually a fuzzy match is fast but the cost is accuracy. + * i.e. Signature Match only match pattern's hash value, but it is + * possible two different patterns have the same hash value. + * + * Matching accuracy level can be configure by threshold. + * Driver can divide the range of threshold and map to different + * accuracy levels that device support. + * + * Threshold 0 means perfect match (no fuzziness), while threshold + * 0xffffffff means fuzziest match. + */ +struct rte_flow_item_fuzzy { + uint32_t thresh; /**< Accuracy threshold. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_FUZZY. */ +#ifndef __cplusplus +static const struct rte_flow_item_fuzzy rte_flow_item_fuzzy_mask = { + .thresh = 0xffffffff, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_GTP. + * + * Matches a GTPv1 header. + */ +struct rte_flow_item_gtp { + /** + * Version (3b), protocol type (1b), reserved (1b), + * Extension header flag (1b), + * Sequence number flag (1b), + * N-PDU number flag (1b). + */ + uint8_t v_pt_rsv_flags; + uint8_t msg_type; /**< Message type. */ + rte_be16_t msg_len; /**< Message length. */ + rte_be32_t teid; /**< Tunnel endpoint identifier. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_GTP. */ +#ifndef __cplusplus +static const struct rte_flow_item_gtp rte_flow_item_gtp_mask = { + .teid = RTE_BE32(0xffffffff), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ESP + * + * Matches an ESP header. + */ +struct rte_flow_item_esp { + struct esp_hdr hdr; /**< ESP header definition. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ESP. */ +#ifndef __cplusplus +static const struct rte_flow_item_esp rte_flow_item_esp_mask = { + .hdr = { + .spi = 0xffffffff, + }, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_GENEVE. + * + * Matches a GENEVE header. + */ +struct rte_flow_item_geneve { + /** + * Version (2b), length of the options fields (6b), OAM packet (1b), + * critical options present (1b), reserved 0 (6b). + */ + rte_be16_t ver_opt_len_o_c_rsvd0; + rte_be16_t protocol; /**< Protocol type. */ + uint8_t vni[3]; /**< Virtual Network Identifier. */ + uint8_t rsvd1; /**< Reserved, normally 0x00. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_GENEVE. */ +#ifndef __cplusplus +static const struct rte_flow_item_geneve rte_flow_item_geneve_mask = { + .vni = "\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_VXLAN_GPE (draft-ietf-nvo3-vxlan-gpe-05). + * + * Matches a VXLAN-GPE header. + */ +struct rte_flow_item_vxlan_gpe { + uint8_t flags; /**< Normally 0x0c (I and P flags). */ + uint8_t rsvd0[2]; /**< Reserved, normally 0x0000. */ + uint8_t protocol; /**< Protocol type. */ + uint8_t vni[3]; /**< VXLAN identifier. */ + uint8_t rsvd1; /**< Reserved, normally 0x00. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_VXLAN_GPE. */ +#ifndef __cplusplus +static const struct rte_flow_item_vxlan_gpe rte_flow_item_vxlan_gpe_mask = { + .vni = "\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ARP_ETH_IPV4 + * + * Matches an ARP header for Ethernet/IPv4. + */ +struct rte_flow_item_arp_eth_ipv4 { + rte_be16_t hrd; /**< Hardware type, normally 1. */ + rte_be16_t pro; /**< Protocol type, normally 0x0800. */ + uint8_t hln; /**< Hardware address length, normally 6. */ + uint8_t pln; /**< Protocol address length, normally 4. */ + rte_be16_t op; /**< Opcode (1 for request, 2 for reply). */ + struct ether_addr sha; /**< Sender hardware address. */ + rte_be32_t spa; /**< Sender IPv4 address. */ + struct ether_addr tha; /**< Target hardware address. */ + rte_be32_t tpa; /**< Target IPv4 address. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ARP_ETH_IPV4. */ +#ifndef __cplusplus +static const struct rte_flow_item_arp_eth_ipv4 +rte_flow_item_arp_eth_ipv4_mask = { + .sha.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .spa = RTE_BE32(0xffffffff), + .tha.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .tpa = RTE_BE32(0xffffffff), +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_IPV6_EXT + * + * Matches the presence of any IPv6 extension header. + * + * Normally preceded by any of: + * + * - RTE_FLOW_ITEM_TYPE_IPV6 + * - RTE_FLOW_ITEM_TYPE_IPV6_EXT + */ +struct rte_flow_item_ipv6_ext { + uint8_t next_hdr; /**< Next header. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_IPV6_EXT. */ +#ifndef __cplusplus +static const +struct rte_flow_item_ipv6_ext rte_flow_item_ipv6_ext_mask = { + .next_hdr = 0xff, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP6 + * + * Matches any ICMPv6 header. + */ +struct rte_flow_item_icmp6 { + uint8_t type; /**< ICMPv6 type. */ + uint8_t code; /**< ICMPv6 code. */ + uint16_t checksum; /**< ICMPv6 checksum. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6. */ +#ifndef __cplusplus +static const struct rte_flow_item_icmp6 rte_flow_item_icmp6_mask = { + .type = 0xff, + .code = 0xff, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS + * + * Matches an ICMPv6 neighbor discovery solicitation. + */ +struct rte_flow_item_icmp6_nd_ns { + uint8_t type; /**< ICMPv6 type, normally 135. */ + uint8_t code; /**< ICMPv6 code, normally 0. */ + rte_be16_t checksum; /**< ICMPv6 checksum. */ + rte_be32_t reserved; /**< Reserved, normally 0. */ + uint8_t target_addr[16]; /**< Target address. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS. */ +#ifndef __cplusplus +static const +struct rte_flow_item_icmp6_nd_ns rte_flow_item_icmp6_nd_ns_mask = { + .target_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA + * + * Matches an ICMPv6 neighbor discovery advertisement. + */ +struct rte_flow_item_icmp6_nd_na { + uint8_t type; /**< ICMPv6 type, normally 136. */ + uint8_t code; /**< ICMPv6 code, normally 0. */ + rte_be16_t checksum; /**< ICMPv6 checksum. */ + /** + * Route flag (1b), solicited flag (1b), override flag (1b), + * reserved (29b). + */ + rte_be32_t rso_reserved; + uint8_t target_addr[16]; /**< Target address. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA. */ +#ifndef __cplusplus +static const +struct rte_flow_item_icmp6_nd_na rte_flow_item_icmp6_nd_na_mask = { + .target_addr = + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT + * + * Matches the presence of any ICMPv6 neighbor discovery option. + * + * Normally preceded by any of: + * + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT + */ +struct rte_flow_item_icmp6_nd_opt { + uint8_t type; /**< ND option type. */ + uint8_t length; /**< ND option length. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT. */ +#ifndef __cplusplus +static const struct rte_flow_item_icmp6_nd_opt +rte_flow_item_icmp6_nd_opt_mask = { + .type = 0xff, +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_SLA_ETH + * + * Matches an ICMPv6 neighbor discovery source Ethernet link-layer address + * option. + * + * Normally preceded by any of: + * + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT + */ +struct rte_flow_item_icmp6_nd_opt_sla_eth { + uint8_t type; /**< ND option type, normally 1. */ + uint8_t length; /**< ND option length, normally 1. */ + struct ether_addr sla; /**< Source Ethernet LLA. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_SLA_ETH. */ +#ifndef __cplusplus +static const struct rte_flow_item_icmp6_nd_opt_sla_eth +rte_flow_item_icmp6_nd_opt_sla_eth_mask = { + .sla.addr_bytes = "\xff\xff\xff\xff\xff\xff", +}; +#endif + +/** + * RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_TLA_ETH + * + * Matches an ICMPv6 neighbor discovery target Ethernet link-layer address + * option. + * + * Normally preceded by any of: + * + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS + * - RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT + */ +struct rte_flow_item_icmp6_nd_opt_tla_eth { + uint8_t type; /**< ND option type, normally 2. */ + uint8_t length; /**< ND option length, normally 1. */ + struct ether_addr tla; /**< Target Ethernet LLA. */ +}; + +/** Default mask for RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_TLA_ETH. */ +#ifndef __cplusplus +static const struct rte_flow_item_icmp6_nd_opt_tla_eth +rte_flow_item_icmp6_nd_opt_tla_eth_mask = { + .tla.addr_bytes = "\xff\xff\xff\xff\xff\xff", +}; +#endif + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * RTE_FLOW_ITEM_TYPE_MARK + * + * Matches an arbitrary integer value which was set using the ``MARK`` action + * in a previously matched rule. + * + * This item can only be specified once as a match criteria as the ``MARK`` + * action can only be specified once in a flow action. + * + * This value is arbitrary and application-defined. Maximum allowed value + * depends on the underlying implementation. + * + * Depending on the underlying implementation the MARK item may be supported on + * the physical device, with virtual groups in the PMD or not at all. + */ +struct rte_flow_item_mark { + uint32_t id; /**< Integer value to match against. */ +}; + +/** + * Matching pattern item definition. + * + * A pattern is formed by stacking items starting from the lowest protocol + * layer to match. This stacking restriction does not apply to meta items + * which can be placed anywhere in the stack without affecting the meaning + * of the resulting pattern. + * + * Patterns are terminated by END items. + * + * The spec field should be a valid pointer to a structure of the related + * item type. It may remain unspecified (NULL) in many cases to request + * broad (nonspecific) matching. In such cases, last and mask must also be + * set to NULL. + * + * Optionally, last can point to a structure of the same type to define an + * inclusive range. This is mostly supported by integer and address fields, + * may cause errors otherwise. Fields that do not support ranges must be set + * to 0 or to the same value as the corresponding fields in spec. + * + * Only the fields defined to nonzero values in the default masks (see + * rte_flow_item_{name}_mask constants) are considered relevant by + * default. This can be overridden by providing a mask structure of the + * same type with applicable bits set to one. It can also be used to + * partially filter out specific fields (e.g. as an alternate mean to match + * ranges of IP addresses). + * + * Mask is a simple bit-mask applied before interpreting the contents of + * spec and last, which may yield unexpected results if not used + * carefully. For example, if for an IPv4 address field, spec provides + * 10.1.2.3, last provides 10.3.4.5 and mask provides 255.255.0.0, the + * effective range becomes 10.1.0.0 to 10.3.255.255. + */ +struct rte_flow_item { + enum rte_flow_item_type type; /**< Item type. */ + const void *spec; /**< Pointer to item specification structure. */ + const void *last; /**< Defines an inclusive range (spec to last). */ + const void *mask; /**< Bit-mask applied to spec and last. */ +}; + +/** + * Action types. + * + * Each possible action is represented by a type. Some have associated + * configuration structures. Several actions combined in a list can be + * assigned to a flow rule and are performed in order. + * + * They fall in three categories: + * + * - Actions that modify the fate of matching traffic, for instance by + * dropping or assigning it a specific destination. + * + * - Actions that modify matching traffic contents or its properties. This + * includes adding/removing encapsulation, encryption, compression and + * marks. + * + * - Actions related to the flow rule itself, such as updating counters or + * making it non-terminating. + * + * Flow rules being terminating by default, not specifying any action of the + * fate kind results in undefined behavior. This applies to both ingress and + * egress. + * + * PASSTHRU, when supported, makes a flow rule non-terminating. + */ +enum rte_flow_action_type { + /** + * End marker for action lists. Prevents further processing of + * actions, thereby ending the list. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_END, + + /** + * Used as a placeholder for convenience. It is ignored and simply + * discarded by PMDs. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_VOID, + + /** + * Leaves traffic up for additional processing by subsequent flow + * rules; makes a flow rule non-terminating. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_PASSTHRU, + + /** + * RTE_FLOW_ACTION_TYPE_JUMP + * + * Redirects packets to a group on the current device. + * + * See struct rte_flow_action_jump. + */ + RTE_FLOW_ACTION_TYPE_JUMP, + + /** + * Attaches an integer value to packets and sets PKT_RX_FDIR and + * PKT_RX_FDIR_ID mbuf flags. + * + * See struct rte_flow_action_mark. + */ + RTE_FLOW_ACTION_TYPE_MARK, + + /** + * Flags packets. Similar to MARK without a specific value; only + * sets the PKT_RX_FDIR mbuf flag. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_FLAG, + + /** + * Assigns packets to a given queue index. + * + * See struct rte_flow_action_queue. + */ + RTE_FLOW_ACTION_TYPE_QUEUE, + + /** + * Drops packets. + * + * PASSTHRU overrides this action if both are specified. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_DROP, + + /** + * Enables counters for this flow rule. + * + * These counters can be retrieved and reset through rte_flow_query(), + * see struct rte_flow_query_count. + * + * See struct rte_flow_action_count. + */ + RTE_FLOW_ACTION_TYPE_COUNT, + + /** + * Similar to QUEUE, except RSS is additionally performed on packets + * to spread them among several queues according to the provided + * parameters. + * + * See struct rte_flow_action_rss. + */ + RTE_FLOW_ACTION_TYPE_RSS, + + /** + * Directs matching traffic to the physical function (PF) of the + * current device. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_PF, + + /** + * Directs matching traffic to a given virtual function of the + * current device. + * + * See struct rte_flow_action_vf. + */ + RTE_FLOW_ACTION_TYPE_VF, + + /** + * Directs packets to a given physical port index of the underlying + * device. + * + * See struct rte_flow_action_phy_port. + */ + RTE_FLOW_ACTION_TYPE_PHY_PORT, + + /** + * Directs matching traffic to a given DPDK port ID. + * + * See struct rte_flow_action_port_id. + */ + RTE_FLOW_ACTION_TYPE_PORT_ID, + + /** + * Traffic metering and policing (MTR). + * + * See struct rte_flow_action_meter. + * See file rte_mtr.h for MTR object configuration. + */ + RTE_FLOW_ACTION_TYPE_METER, + + /** + * Redirects packets to security engine of current device for security + * processing as specified by security session. + * + * See struct rte_flow_action_security. + */ + RTE_FLOW_ACTION_TYPE_SECURITY, + + /** + * Implements OFPAT_SET_MPLS_TTL ("MPLS TTL") as defined by the + * OpenFlow Switch Specification. + * + * See struct rte_flow_action_of_set_mpls_ttl. + */ + RTE_FLOW_ACTION_TYPE_OF_SET_MPLS_TTL, + + /** + * Implements OFPAT_DEC_MPLS_TTL ("decrement MPLS TTL") as defined + * by the OpenFlow Switch Specification. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_OF_DEC_MPLS_TTL, + + /** + * Implements OFPAT_SET_NW_TTL ("IP TTL") as defined by the OpenFlow + * Switch Specification. + * + * See struct rte_flow_action_of_set_nw_ttl. + */ + RTE_FLOW_ACTION_TYPE_OF_SET_NW_TTL, + + /** + * Implements OFPAT_DEC_NW_TTL ("decrement IP TTL") as defined by + * the OpenFlow Switch Specification. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_OF_DEC_NW_TTL, + + /** + * Implements OFPAT_COPY_TTL_OUT ("copy TTL "outwards" -- from + * next-to-outermost to outermost") as defined by the OpenFlow + * Switch Specification. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_OF_COPY_TTL_OUT, + + /** + * Implements OFPAT_COPY_TTL_IN ("copy TTL "inwards" -- from + * outermost to next-to-outermost") as defined by the OpenFlow + * Switch Specification. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_OF_COPY_TTL_IN, + + /** + * Implements OFPAT_POP_VLAN ("pop the outer VLAN tag") as defined + * by the OpenFlow Switch Specification. + * + * No associated configuration structure. + */ + RTE_FLOW_ACTION_TYPE_OF_POP_VLAN, + + /** + * Implements OFPAT_PUSH_VLAN ("push a new VLAN tag") as defined by + * the OpenFlow Switch Specification. + * + * See struct rte_flow_action_of_push_vlan. + */ + RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN, + + /** + * Implements OFPAT_SET_VLAN_VID ("set the 802.1q VLAN id") as + * defined by the OpenFlow Switch Specification. + * + * See struct rte_flow_action_of_set_vlan_vid. + */ + RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID, + + /** + * Implements OFPAT_SET_LAN_PCP ("set the 802.1q priority") as + * defined by the OpenFlow Switch Specification. + * + * See struct rte_flow_action_of_set_vlan_pcp. + */ + RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP, + + /** + * Implements OFPAT_POP_MPLS ("pop the outer MPLS tag") as defined + * by the OpenFlow Switch Specification. + * + * See struct rte_flow_action_of_pop_mpls. + */ + RTE_FLOW_ACTION_TYPE_OF_POP_MPLS, + + /** + * Implements OFPAT_PUSH_MPLS ("push a new MPLS tag") as defined by + * the OpenFlow Switch Specification. + * + * See struct rte_flow_action_of_push_mpls. + */ + RTE_FLOW_ACTION_TYPE_OF_PUSH_MPLS, + + /** + * Encapsulate flow in VXLAN tunnel as defined in + * rte_flow_action_vxlan_encap action structure. + * + * See struct rte_flow_action_vxlan_encap. + */ + RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, + + /** + * Decapsulate outer most VXLAN tunnel from matched flow. + * + * If flow pattern does not define a valid VXLAN tunnel (as specified by + * RFC7348) then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION + * error. + */ + RTE_FLOW_ACTION_TYPE_VXLAN_DECAP, + + /** + * Encapsulate flow in NVGRE tunnel defined in the + * rte_flow_action_nvgre_encap action structure. + * + * See struct rte_flow_action_nvgre_encap. + */ + RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP, + + /** + * Decapsulate outer most NVGRE tunnel from matched flow. + * + * If flow pattern does not define a valid NVGRE tunnel (as specified by + * RFC7637) then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION + * error. + */ + RTE_FLOW_ACTION_TYPE_NVGRE_DECAP, +}; + +/** + * RTE_FLOW_ACTION_TYPE_MARK + * + * Attaches an integer value to packets and sets PKT_RX_FDIR and + * PKT_RX_FDIR_ID mbuf flags. + * + * This value is arbitrary and application-defined. Maximum allowed value + * depends on the underlying implementation. It is returned in the + * hash.fdir.hi mbuf field. + */ +struct rte_flow_action_mark { + uint32_t id; /**< Integer value to return with packets. */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * RTE_FLOW_ACTION_TYPE_JUMP + * + * Redirects packets to a group on the current device. + * + * In a hierarchy of groups, which can be used to represent physical or logical + * flow tables on the device, this action allows the action to be a redirect to + * a group on that device. + */ +struct rte_flow_action_jump { + uint32_t group; +}; + +/** + * RTE_FLOW_ACTION_TYPE_QUEUE + * + * Assign packets to a given queue index. + */ +struct rte_flow_action_queue { + uint16_t index; /**< Queue index to use. */ +}; + + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * RTE_FLOW_ACTION_TYPE_COUNT + * + * Adds a counter action to a matched flow. + * + * If more than one count action is specified in a single flow rule, then each + * action must specify a unique id. + * + * Counters can be retrieved and reset through ``rte_flow_query()``, see + * ``struct rte_flow_query_count``. + * + * The shared flag indicates whether the counter is unique to the flow rule the + * action is specified with, or whether it is a shared counter. + * + * For a count action with the shared flag set, then then a global device + * namespace is assumed for the counter id, so that any matched flow rules using + * a count action with the same counter id on the same port will contribute to + * that counter. + * + * For ports within the same switch domain then the counter id namespace extends + * to all ports within that switch domain. + */ +struct rte_flow_action_count { + uint32_t shared:1; /**< Share counter ID with other flow rules. */ + uint32_t reserved:31; /**< Reserved, must be zero. */ + uint32_t id; /**< Counter ID. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_COUNT (query) + * + * Query structure to retrieve and reset flow rule counters. + */ +struct rte_flow_query_count { + uint32_t reset:1; /**< Reset counters after query [in]. */ + uint32_t hits_set:1; /**< hits field is set [out]. */ + uint32_t bytes_set:1; /**< bytes field is set [out]. */ + uint32_t reserved:29; /**< Reserved, must be zero [in, out]. */ + uint64_t hits; /**< Number of hits for this rule [out]. */ + uint64_t bytes; /**< Number of bytes through this rule [out]. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_RSS + * + * Similar to QUEUE, except RSS is additionally performed on packets to + * spread them among several queues according to the provided parameters. + * + * Unlike global RSS settings used by other DPDK APIs, unsetting the + * @p types field does not disable RSS in a flow rule. Doing so instead + * requests safe unspecified "best-effort" settings from the underlying PMD, + * which depending on the flow rule, may result in anything ranging from + * empty (single queue) to all-inclusive RSS. + * + * Note: RSS hash result is stored in the hash.rss mbuf field which overlaps + * hash.fdir.lo. Since the MARK action sets the hash.fdir.hi field only, + * both can be requested simultaneously. + */ +struct rte_flow_action_rss { + enum rte_eth_hash_function func; /**< RSS hash function to apply. */ + /** + * Packet encapsulation level RSS hash @p types apply to. + * + * - @p 0 requests the default behavior. Depending on the packet + * type, it can mean outermost, innermost, anything in between or + * even no RSS. + * + * It basically stands for the innermost encapsulation level RSS + * can be performed on according to PMD and device capabilities. + * + * - @p 1 requests RSS to be performed on the outermost packet + * encapsulation level. + * + * - @p 2 and subsequent values request RSS to be performed on the + * specified inner packet encapsulation level, from outermost to + * innermost (lower to higher values). + * + * Values other than @p 0 are not necessarily supported. + * + * Requesting a specific RSS level on unrecognized traffic results + * in undefined behavior. For predictable results, it is recommended + * to make the flow rule pattern match packet headers up to the + * requested encapsulation level so that only matching traffic goes + * through. + */ + uint32_t level; + uint64_t types; /**< Specific RSS hash types (see ETH_RSS_*). */ + uint32_t key_len; /**< Hash key length in bytes. */ + uint32_t queue_num; /**< Number of entries in @p queue. */ + const uint8_t *key; /**< Hash key. */ + const uint16_t *queue; /**< Queue indices to use. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_VF + * + * Directs matching traffic to a given virtual function of the current + * device. + * + * Packets matched by a VF pattern item can be redirected to their original + * VF ID instead of the specified one. This parameter may not be available + * and is not guaranteed to work properly if the VF part is matched by a + * prior flow rule or if packets are not addressed to a VF in the first + * place. + */ +struct rte_flow_action_vf { + uint32_t original:1; /**< Use original VF ID if possible. */ + uint32_t reserved:31; /**< Reserved, must be zero. */ + uint32_t id; /**< VF ID. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_PHY_PORT + * + * Directs packets to a given physical port index of the underlying + * device. + * + * @see RTE_FLOW_ITEM_TYPE_PHY_PORT + */ +struct rte_flow_action_phy_port { + uint32_t original:1; /**< Use original port index if possible. */ + uint32_t reserved:31; /**< Reserved, must be zero. */ + uint32_t index; /**< Physical port index. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_PORT_ID + * + * Directs matching traffic to a given DPDK port ID. + * + * @see RTE_FLOW_ITEM_TYPE_PORT_ID + */ +struct rte_flow_action_port_id { + uint32_t original:1; /**< Use original DPDK port ID if possible. */ + uint32_t reserved:31; /**< Reserved, must be zero. */ + uint32_t id; /**< DPDK port ID. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_METER + * + * Traffic metering and policing (MTR). + * + * Packets matched by items of this type can be either dropped or passed to the + * next item with their color set by the MTR object. + */ +struct rte_flow_action_meter { + uint32_t mtr_id; /**< MTR object ID created with rte_mtr_create(). */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_SECURITY + * + * Perform the security action on flows matched by the pattern items + * according to the configuration of the security session. + * + * This action modifies the payload of matched flows. For INLINE_CRYPTO, the + * security protocol headers and IV are fully provided by the application as + * specified in the flow pattern. The payload of matching packets is + * encrypted on egress, and decrypted and authenticated on ingress. + * For INLINE_PROTOCOL, the security protocol is fully offloaded to HW, + * providing full encapsulation and decapsulation of packets in security + * protocols. The flow pattern specifies both the outer security header fields + * and the inner packet fields. The security session specified in the action + * must match the pattern parameters. + * + * The security session specified in the action must be created on the same + * port as the flow action that is being specified. + * + * The ingress/egress flow attribute should match that specified in the + * security session if the security session supports the definition of the + * direction. + * + * Multiple flows can be configured to use the same security session. + */ +struct rte_flow_action_security { + void *security_session; /**< Pointer to security session structure. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_SET_MPLS_TTL + * + * Implements OFPAT_SET_MPLS_TTL ("MPLS TTL") as defined by the OpenFlow + * Switch Specification. + */ +struct rte_flow_action_of_set_mpls_ttl { + uint8_t mpls_ttl; /**< MPLS TTL. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_SET_NW_TTL + * + * Implements OFPAT_SET_NW_TTL ("IP TTL") as defined by the OpenFlow Switch + * Specification. + */ +struct rte_flow_action_of_set_nw_ttl { + uint8_t nw_ttl; /**< IP TTL. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN + * + * Implements OFPAT_PUSH_VLAN ("push a new VLAN tag") as defined by the + * OpenFlow Switch Specification. + */ +struct rte_flow_action_of_push_vlan { + rte_be16_t ethertype; /**< EtherType. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID + * + * Implements OFPAT_SET_VLAN_VID ("set the 802.1q VLAN id") as defined by + * the OpenFlow Switch Specification. + */ +struct rte_flow_action_of_set_vlan_vid { + rte_be16_t vlan_vid; /**< VLAN id. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP + * + * Implements OFPAT_SET_LAN_PCP ("set the 802.1q priority") as defined by + * the OpenFlow Switch Specification. + */ +struct rte_flow_action_of_set_vlan_pcp { + uint8_t vlan_pcp; /**< VLAN priority. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_POP_MPLS + * + * Implements OFPAT_POP_MPLS ("pop the outer MPLS tag") as defined by the + * OpenFlow Switch Specification. + */ +struct rte_flow_action_of_pop_mpls { + rte_be16_t ethertype; /**< EtherType. */ +}; + +/** + * RTE_FLOW_ACTION_TYPE_OF_PUSH_MPLS + * + * Implements OFPAT_PUSH_MPLS ("push a new MPLS tag") as defined by the + * OpenFlow Switch Specification. + */ +struct rte_flow_action_of_push_mpls { + rte_be16_t ethertype; /**< EtherType. */ +}; + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP + * + * VXLAN tunnel end-point encapsulation data definition + * + * The tunnel definition is provided through the flow item pattern, the + * provided pattern must conform to RFC7348 for the tunnel specified. The flow + * definition must be provided in order from the RTE_FLOW_ITEM_TYPE_ETH + * definition up the end item which is specified by RTE_FLOW_ITEM_TYPE_END. + * + * The mask field allows user to specify which fields in the flow item + * definitions can be ignored and which have valid data and can be used + * verbatim. + * + * Note: the last field is not used in the definition of a tunnel and can be + * ignored. + * + * Valid flow definition for RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP include: + * + * - ETH / IPV4 / UDP / VXLAN / END + * - ETH / IPV6 / UDP / VXLAN / END + * - ETH / VLAN / IPV4 / UDP / VXLAN / END + * + */ +struct rte_flow_action_vxlan_encap { + /** + * Encapsulating vxlan tunnel definition + * (terminated by the END pattern item). + */ + struct rte_flow_item *definition; +}; + +/** + * @warning + * @b EXPERIMENTAL: this structure may change without prior notice + * + * RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP + * + * NVGRE tunnel end-point encapsulation data definition + * + * The tunnel definition is provided through the flow item pattern the + * provided pattern must conform with RFC7637. The flow definition must be + * provided in order from the RTE_FLOW_ITEM_TYPE_ETH definition up the end item + * which is specified by RTE_FLOW_ITEM_TYPE_END. + * + * The mask field allows user to specify which fields in the flow item + * definitions can be ignored and which have valid data and can be used + * verbatim. + * + * Note: the last field is not used in the definition of a tunnel and can be + * ignored. + * + * Valid flow definition for RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP include: + * + * - ETH / IPV4 / NVGRE / END + * - ETH / VLAN / IPV6 / NVGRE / END + * + */ +struct rte_flow_action_nvgre_encap { + /** + * Encapsulating vxlan tunnel definition + * (terminated by the END pattern item). + */ + struct rte_flow_item *definition; +}; + +/* + * Definition of a single action. + * + * A list of actions is terminated by a END action. + * + * For simple actions without a configuration structure, conf remains NULL. + */ +struct rte_flow_action { + enum rte_flow_action_type type; /**< Action type. */ + const void *conf; /**< Pointer to action configuration structure. */ +}; + +/** + * Opaque type returned after successfully creating a flow. + * + * This handle can be used to manage and query the related flow (e.g. to + * destroy it or retrieve counters). + */ +struct rte_flow; + +/** + * Verbose error types. + * + * Most of them provide the type of the object referenced by struct + * rte_flow_error.cause. + */ +enum rte_flow_error_type { + RTE_FLOW_ERROR_TYPE_NONE, /**< No error. */ + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ + RTE_FLOW_ERROR_TYPE_HANDLE, /**< Flow rule (handle). */ + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, /**< Group field. */ + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, /**< Priority field. */ + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, /**< Ingress field. */ + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, /**< Egress field. */ + RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, /**< Transfer field. */ + RTE_FLOW_ERROR_TYPE_ATTR, /**< Attributes structure. */ + RTE_FLOW_ERROR_TYPE_ITEM_NUM, /**< Pattern length. */ + RTE_FLOW_ERROR_TYPE_ITEM_SPEC, /**< Item specification. */ + RTE_FLOW_ERROR_TYPE_ITEM_LAST, /**< Item specification range. */ + RTE_FLOW_ERROR_TYPE_ITEM_MASK, /**< Item specification mask. */ + RTE_FLOW_ERROR_TYPE_ITEM, /**< Specific pattern item. */ + RTE_FLOW_ERROR_TYPE_ACTION_NUM, /**< Number of actions. */ + RTE_FLOW_ERROR_TYPE_ACTION_CONF, /**< Action configuration. */ + RTE_FLOW_ERROR_TYPE_ACTION, /**< Specific action. */ +}; + +/** + * Verbose error structure definition. + * + * This object is normally allocated by applications and set by PMDs, the + * message points to a constant string which does not need to be freed by + * the application, however its pointer can be considered valid only as long + * as its associated DPDK port remains configured. Closing the underlying + * device or unloading the PMD invalidates it. + * + * Both cause and message may be NULL regardless of the error type. + */ +struct rte_flow_error { + enum rte_flow_error_type type; /**< Cause field and error types. */ + const void *cause; /**< Object responsible for the error. */ + const char *message; /**< Human-readable error message. */ +}; + +/** + * Check whether a flow rule can be created on a given port. + * + * The flow rule is validated for correctness and whether it could be accepted + * by the device given sufficient resources. The rule is checked against the + * current device mode and queue configuration. The flow rule may also + * optionally be validated against existing flow rules and device resources. + * This function has no effect on the target device. + * + * The returned value is guaranteed to remain valid only as long as no + * successful calls to rte_flow_create() or rte_flow_destroy() are made in + * the meantime and no device parameter affecting flow rules in any way are + * modified, due to possible collisions or resource limitations (although in + * such cases EINVAL should not be returned). + * + * @param port_id + * Port identifier of Ethernet device. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 if flow rule is valid and can be created. A negative errno value + * otherwise (rte_errno is also set), the following errors are defined: + * + * -ENOSYS: underlying device does not support this functionality. + * + * -EIO: underlying device is removed. + * + * -EINVAL: unknown or invalid rule specification. + * + * -ENOTSUP: valid but unsupported rule specification (e.g. partial + * bit-masks are unsupported). + * + * -EEXIST: collision with an existing rule. Only returned if device + * supports flow rule collision checking and there was a flow rule + * collision. Not receiving this return code is no guarantee that creating + * the rule will not fail due to a collision. + * + * -ENOMEM: not enough memory to execute the function, or if the device + * supports resource validation, resource limitation on the device. + * + * -EBUSY: action cannot be performed due to busy device resources, may + * succeed if the affected queues or even the entire port are in a stopped + * state (see rte_eth_dev_rx_queue_stop() and rte_eth_dev_stop()). + */ +int +rte_flow_validate(uint16_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); + +/** + * Create a flow rule on a given port. + * + * @param port_id + * Port identifier of Ethernet device. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * A valid handle in case of success, NULL otherwise and rte_errno is set + * to the positive version of one of the error codes defined for + * rte_flow_validate(). + */ +struct rte_flow * +rte_flow_create(uint16_t port_id, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); + +/** + * Destroy a flow rule on a given port. + * + * Failure to destroy a flow rule handle may occur when other flow rules + * depend on it, and destroying it would result in an inconsistent state. + * + * This function is only guaranteed to succeed if handles are destroyed in + * reverse order of their creation. + * + * @param port_id + * Port identifier of Ethernet device. + * @param flow + * Flow rule handle to destroy. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_destroy(uint16_t port_id, + struct rte_flow *flow, + struct rte_flow_error *error); + +/** + * Destroy all flow rules associated with a port. + * + * In the unlikely event of failure, handles are still considered destroyed + * and no longer valid but the port must be assumed to be in an inconsistent + * state. + * + * @param port_id + * Port identifier of Ethernet device. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_flush(uint16_t port_id, + struct rte_flow_error *error); + +/** + * Query an existing flow rule. + * + * This function allows retrieving flow-specific data such as counters. + * Data is gathered by special actions which must be present in the flow + * rule definition. + * + * \see RTE_FLOW_ACTION_TYPE_COUNT + * + * @param port_id + * Port identifier of Ethernet device. + * @param flow + * Flow rule handle to query. + * @param action + * Action definition as defined in original flow rule. + * @param[in, out] data + * Pointer to storage for the associated query data type. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_query(uint16_t port_id, + struct rte_flow *flow, + const struct rte_flow_action *action, + void *data, + struct rte_flow_error *error); + +/** + * Restrict ingress traffic to the defined flow rules. + * + * Isolated mode guarantees that all ingress traffic comes from defined flow + * rules only (current and future). + * + * Besides making ingress more deterministic, it allows PMDs to safely reuse + * resources otherwise assigned to handle the remaining traffic, such as + * global RSS configuration settings, VLAN filters, MAC address entries, + * legacy filter API rules and so on in order to expand the set of possible + * flow rule types. + * + * Calling this function as soon as possible after device initialization, + * ideally before the first call to rte_eth_dev_configure(), is recommended + * to avoid possible failures due to conflicting settings. + * + * Once effective, leaving isolated mode may not be possible depending on + * PMD implementation. + * + * Additionally, the following functionality has no effect on the underlying + * port and may return errors such as ENOTSUP ("not supported"): + * + * - Toggling promiscuous mode. + * - Toggling allmulticast mode. + * - Configuring MAC addresses. + * - Configuring multicast addresses. + * - Configuring VLAN filters. + * - Configuring Rx filters through the legacy API (e.g. FDIR). + * - Configuring global RSS settings. + * + * @param port_id + * Port identifier of Ethernet device. + * @param set + * Nonzero to enter isolated mode, attempt to leave it otherwise. + * @param[out] error + * Perform verbose error reporting if not NULL. PMDs initialize this + * structure in case of error only. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +rte_flow_isolate(uint16_t port_id, int set, struct rte_flow_error *error); + +/** + * Initialize flow error structure. + * + * @param[out] error + * Pointer to flow error structure (may be NULL). + * @param code + * Related error code (rte_errno). + * @param type + * Cause field and error types. + * @param cause + * Object responsible for the error. + * @param message + * Human-readable error message. + * + * @return + * Negative error code (errno value) and rte_errno is set. + */ +int +rte_flow_error_set(struct rte_flow_error *error, + int code, + enum rte_flow_error_type type, + const void *cause, + const char *message); + +/** + * Generic flow representation. + * + * This form is sufficient to describe an rte_flow independently from any + * PMD implementation and allows for replayability and identification. + */ +struct rte_flow_desc { + size_t size; /**< Allocated space including data[]. */ + struct rte_flow_attr attr; /**< Attributes. */ + struct rte_flow_item *items; /**< Items. */ + struct rte_flow_action *actions; /**< Actions. */ + uint8_t data[]; /**< Storage for items/actions. */ +}; + +/** + * Copy an rte_flow rule description. + * + * @param[in] fd + * Flow rule description. + * @param[in] len + * Total size of allocated data for the flow description. + * @param[in] attr + * Flow rule attributes. + * @param[in] items + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * + * @return + * If len is greater or equal to the size of the flow, the total size of the + * flow description and its data. + * If len is lower than the size of the flow, the number of bytes that would + * have been written to desc had it been sufficient. Nothing is written. + */ +size_t +rte_flow_copy(struct rte_flow_desc *fd, size_t len, + const struct rte_flow_attr *attr, + const struct rte_flow_item *items, + const struct rte_flow_action *actions); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_FLOW_H_ */ diff --git a/lib/librte_ethdev/rte_flow_driver.h b/lib/librte_ethdev/rte_flow_driver.h new file mode 100644 index 00000000..1c90c600 --- /dev/null +++ b/lib/librte_ethdev/rte_flow_driver.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox Technologies, Ltd + */ + +#ifndef RTE_FLOW_DRIVER_H_ +#define RTE_FLOW_DRIVER_H_ + +/** + * @file + * RTE generic flow API (driver side) + * + * This file provides implementation helpers for internal use by PMDs, they + * are not intended to be exposed to applications and are not subject to ABI + * versioning. + */ + +#include <stdint.h> + +#include "rte_ethdev.h" +#include "rte_flow.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Generic flow operations structure implemented and returned by PMDs. + * + * To implement this API, PMDs must handle the RTE_ETH_FILTER_GENERIC filter + * type in their .filter_ctrl callback function (struct eth_dev_ops) as well + * as the RTE_ETH_FILTER_GET filter operation. + * + * If successful, this operation must result in a pointer to a PMD-specific + * struct rte_flow_ops written to the argument address as described below: + * + * \code + * + * // PMD filter_ctrl callback + * + * static const struct rte_flow_ops pmd_flow_ops = { ... }; + * + * switch (filter_type) { + * case RTE_ETH_FILTER_GENERIC: + * if (filter_op != RTE_ETH_FILTER_GET) + * return -EINVAL; + * *(const void **)arg = &pmd_flow_ops; + * return 0; + * } + * + * \endcode + * + * See also rte_flow_ops_get(). + * + * These callback functions are not supposed to be used by applications + * directly, which must rely on the API defined in rte_flow.h. + * + * Public-facing wrapper functions perform a few consistency checks so that + * unimplemented (i.e. NULL) callbacks simply return -ENOTSUP. These + * callbacks otherwise only differ by their first argument (with port ID + * already resolved to a pointer to struct rte_eth_dev). + */ +struct rte_flow_ops { + /** See rte_flow_validate(). */ + int (*validate) + (struct rte_eth_dev *, + const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); + /** See rte_flow_create(). */ + struct rte_flow *(*create) + (struct rte_eth_dev *, + const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); + /** See rte_flow_destroy(). */ + int (*destroy) + (struct rte_eth_dev *, + struct rte_flow *, + struct rte_flow_error *); + /** See rte_flow_flush(). */ + int (*flush) + (struct rte_eth_dev *, + struct rte_flow_error *); + /** See rte_flow_query(). */ + int (*query) + (struct rte_eth_dev *, + struct rte_flow *, + const struct rte_flow_action *, + void *, + struct rte_flow_error *); + /** See rte_flow_isolate(). */ + int (*isolate) + (struct rte_eth_dev *, + int, + struct rte_flow_error *); +}; + +/** + * Get generic flow operations structure from a port. + * + * @param port_id + * Port identifier to query. + * @param[out] error + * Pointer to flow error structure. + * + * @return + * The flow operations structure associated with port_id, NULL in case of + * error, in which case rte_errno is set and the error structure contains + * additional details. + */ +const struct rte_flow_ops * +rte_flow_ops_get(uint16_t port_id, struct rte_flow_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_FLOW_DRIVER_H_ */ diff --git a/lib/librte_ethdev/rte_mtr.c b/lib/librte_ethdev/rte_mtr.c new file mode 100644 index 00000000..1046cb5f --- /dev/null +++ b/lib/librte_ethdev/rte_mtr.c @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include <stdint.h> + +#include <rte_errno.h> +#include "rte_compat.h" +#include "rte_ethdev.h" +#include "rte_mtr_driver.h" +#include "rte_mtr.h" + +/* Get generic traffic metering & policing operations structure from a port. */ +const struct rte_mtr_ops * +rte_mtr_ops_get(uint16_t port_id, struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_mtr_ops *ops; + + if (!rte_eth_dev_is_valid_port(port_id)) { + rte_mtr_error_set(error, + ENODEV, + RTE_MTR_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(ENODEV)); + return NULL; + } + + if ((dev->dev_ops->mtr_ops_get == NULL) || + (dev->dev_ops->mtr_ops_get(dev, &ops) != 0) || + (ops == NULL)) { + rte_mtr_error_set(error, + ENOSYS, + RTE_MTR_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(ENOSYS)); + return NULL; + } + + return ops; +} + +#define RTE_MTR_FUNC(port_id, func) \ +({ \ + const struct rte_mtr_ops *ops = \ + rte_mtr_ops_get(port_id, error); \ + if (ops == NULL) \ + return -rte_errno; \ + \ + if (ops->func == NULL) \ + return -rte_mtr_error_set(error, \ + ENOSYS, \ + RTE_MTR_ERROR_TYPE_UNSPECIFIED, \ + NULL, \ + rte_strerror(ENOSYS)); \ + \ + ops->func; \ +}) + +/* MTR capabilities get */ +int __rte_experimental +rte_mtr_capabilities_get(uint16_t port_id, + struct rte_mtr_capabilities *cap, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, capabilities_get)(dev, + cap, error); +} + +/* MTR meter profile add */ +int __rte_experimental +rte_mtr_meter_profile_add(uint16_t port_id, + uint32_t meter_profile_id, + struct rte_mtr_meter_profile *profile, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_profile_add)(dev, + meter_profile_id, profile, error); +} + +/** MTR meter profile delete */ +int __rte_experimental +rte_mtr_meter_profile_delete(uint16_t port_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_profile_delete)(dev, + meter_profile_id, error); +} + +/** MTR object create */ +int __rte_experimental +rte_mtr_create(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_params *params, + int shared, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, create)(dev, + mtr_id, params, shared, error); +} + +/** MTR object destroy */ +int __rte_experimental +rte_mtr_destroy(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, destroy)(dev, + mtr_id, error); +} + +/** MTR object meter enable */ +int __rte_experimental +rte_mtr_meter_enable(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_enable)(dev, + mtr_id, error); +} + +/** MTR object meter disable */ +int __rte_experimental +rte_mtr_meter_disable(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_disable)(dev, + mtr_id, error); +} + +/** MTR object meter profile update */ +int __rte_experimental +rte_mtr_meter_profile_update(uint16_t port_id, + uint32_t mtr_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_profile_update)(dev, + mtr_id, meter_profile_id, error); +} + +/** MTR object meter DSCP table update */ +int __rte_experimental +rte_mtr_meter_dscp_table_update(uint16_t port_id, + uint32_t mtr_id, + enum rte_mtr_color *dscp_table, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, meter_dscp_table_update)(dev, + mtr_id, dscp_table, error); +} + +/** MTR object policer action update */ +int __rte_experimental +rte_mtr_policer_actions_update(uint16_t port_id, + uint32_t mtr_id, + uint32_t action_mask, + enum rte_mtr_policer_action *actions, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, policer_actions_update)(dev, + mtr_id, action_mask, actions, error); +} + +/** MTR object enabled stats update */ +int __rte_experimental +rte_mtr_stats_update(uint16_t port_id, + uint32_t mtr_id, + uint64_t stats_mask, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, stats_update)(dev, + mtr_id, stats_mask, error); +} + +/** MTR object stats read */ +int __rte_experimental +rte_mtr_stats_read(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_mtr_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_MTR_FUNC(port_id, stats_read)(dev, + mtr_id, stats, stats_mask, clear, error); +} diff --git a/lib/librte_ethdev/rte_mtr.h b/lib/librte_ethdev/rte_mtr.h new file mode 100644 index 00000000..c4819b27 --- /dev/null +++ b/lib/librte_ethdev/rte_mtr.h @@ -0,0 +1,730 @@ +/*- + * BSD LICENSE + * + * Copyright 2017 Intel Corporation + * Copyright 2017 NXP + * Copyright 2017 Cavium + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_MTR_H__ +#define __INCLUDE_RTE_MTR_H__ + +/** + * @file + * RTE Generic Traffic Metering and Policing API + * + * This interface provides the ability to configure the traffic metering and + * policing (MTR) in a generic way. + * + * The processing done for each input packet hitting a MTR object is: + * A) Traffic metering: The packet is assigned a color (the meter output + * color), based on the previous history of the flow reflected in the + * current state of the MTR object, according to the specific traffic + * metering algorithm. The traffic metering algorithm can typically work + * in color aware mode, in which case the input packet already has an + * initial color (the input color), or in color blind mode, which is + * equivalent to considering all input packets initially colored as green. + * B) Policing: There is a separate policer action configured for each meter + * output color, which can: + * a) Drop the packet. + * b) Keep the same packet color: the policer output color matches the + * meter output color (essentially a no-op action). + * c) Recolor the packet: the policer output color is different than + * the meter output color. + * The policer output color is the output color of the packet, which is + * set in the packet meta-data (i.e. struct rte_mbuf::sched::color). + * C) Statistics: The set of counters maintained for each MTR object is + * configurable and subject to the implementation support. This set + * includes the number of packets and bytes dropped or passed for each + * output color. + * + * Once successfully created, an MTR object is linked to one or several flows + * through the meter action of the flow API. + * A) Whether an MTR object is private to a flow or potentially shared by + * several flows has to be specified at creation time. + * B) Several meter actions can be potentially registered for the same flow. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + */ +#include <stdint.h> +#include <rte_compat.h> +#include <rte_common.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Color + */ +enum rte_mtr_color { + RTE_MTR_GREEN = 0, /**< Green */ + RTE_MTR_YELLOW, /**< Yellow */ + RTE_MTR_RED, /**< Red */ + RTE_MTR_COLORS /**< Number of colors. */ +}; + +/** + * Statistics counter type + */ +enum rte_mtr_stats_type { + /** Number of packets passed as green by the policer. */ + RTE_MTR_STATS_N_PKTS_GREEN = 1 << 0, + + /** Number of packets passed as yellow by the policer. */ + RTE_MTR_STATS_N_PKTS_YELLOW = 1 << 1, + + /** Number of packets passed as red by the policer. */ + RTE_MTR_STATS_N_PKTS_RED = 1 << 2, + + /** Number of packets dropped by the policer. */ + RTE_MTR_STATS_N_PKTS_DROPPED = 1 << 3, + + /** Number of bytes passed as green by the policer. */ + RTE_MTR_STATS_N_BYTES_GREEN = 1 << 4, + + /** Number of bytes passed as yellow by the policer. */ + RTE_MTR_STATS_N_BYTES_YELLOW = 1 << 5, + + /** Number of bytes passed as red by the policer. */ + RTE_MTR_STATS_N_BYTES_RED = 1 << 6, + + /** Number of bytes dropped by the policer. */ + RTE_MTR_STATS_N_BYTES_DROPPED = 1 << 7, +}; + +/** + * Statistics counters + */ +struct rte_mtr_stats { + /** Number of packets passed by the policer (per color). */ + uint64_t n_pkts[RTE_MTR_COLORS]; + + /** Number of bytes passed by the policer (per color). */ + uint64_t n_bytes[RTE_MTR_COLORS]; + + /** Number of packets dropped by the policer. */ + uint64_t n_pkts_dropped; + + /** Number of bytes passed by the policer. */ + uint64_t n_bytes_dropped; +}; + +/** + * Traffic metering algorithms + */ +enum rte_mtr_algorithm { + /** No traffic metering performed, the output color is the same as the + * input color for every input packet. The meter of the MTR object is + * working in pass-through mode, having same effect as meter disable. + * @see rte_mtr_meter_disable() + */ + RTE_MTR_NONE = 0, + + /** Single Rate Three Color Marker (srTCM) - IETF RFC 2697. */ + RTE_MTR_SRTCM_RFC2697, + + /** Two Rate Three Color Marker (trTCM) - IETF RFC 2698. */ + RTE_MTR_TRTCM_RFC2698, + + /** Two Rate Three Color Marker (trTCM) - IETF RFC 4115. */ + RTE_MTR_TRTCM_RFC4115, +}; + +/** + * Meter profile + */ +struct rte_mtr_meter_profile { + /** Traffic metering algorithm. */ + enum rte_mtr_algorithm alg; + + RTE_STD_C11 + union { + /** Items only valid when *alg* is set to srTCM - RFC 2697. */ + struct { + /** Committed Information Rate (CIR) (bytes/second). */ + uint64_t cir; + + /** Committed Burst Size (CBS) (bytes). */ + uint64_t cbs; + + /** Excess Burst Size (EBS) (bytes). */ + uint64_t ebs; + } srtcm_rfc2697; + + /** Items only valid when *alg* is set to trTCM - RFC 2698. */ + struct { + /** Committed Information Rate (CIR) (bytes/second). */ + uint64_t cir; + + /** Peak Information Rate (PIR) (bytes/second). */ + uint64_t pir; + + /** Committed Burst Size (CBS) (byes). */ + uint64_t cbs; + + /** Peak Burst Size (PBS) (bytes). */ + uint64_t pbs; + } trtcm_rfc2698; + + /** Items only valid when *alg* is set to trTCM - RFC 4115. */ + struct { + /** Committed Information Rate (CIR) (bytes/second). */ + uint64_t cir; + + /** Excess Information Rate (EIR) (bytes/second). */ + uint64_t eir; + + /** Committed Burst Size (CBS) (byes). */ + uint64_t cbs; + + /** Excess Burst Size (EBS) (bytes). */ + uint64_t ebs; + } trtcm_rfc4115; + }; +}; + +/** + * Policer actions + */ +enum rte_mtr_policer_action { + /** Recolor the packet as green. */ + MTR_POLICER_ACTION_COLOR_GREEN = 0, + + /** Recolor the packet as yellow. */ + MTR_POLICER_ACTION_COLOR_YELLOW, + + /** Recolor the packet as red. */ + MTR_POLICER_ACTION_COLOR_RED, + + /** Drop the packet. */ + MTR_POLICER_ACTION_DROP, +}; + +/** + * Parameters for each traffic metering & policing object + * + * @see enum rte_mtr_stats_type + */ +struct rte_mtr_params { + /** Meter profile ID. */ + uint32_t meter_profile_id; + + /** Meter input color in case of MTR object chaining. When non-zero: if + * a previous MTR object is enabled in the same flow, then the color + * determined by the latest MTR object in the same flow is used as the + * input color by the current MTR object, otherwise the current MTR + * object uses the *dscp_table* to determine the input color. When zero: + * the color determined by any previous MTR object in same flow is + * ignored by the current MTR object, which uses the *dscp_table* to + * determine the input color. + */ + int use_prev_mtr_color; + + /** Meter input color. When non-NULL: it points to a pre-allocated and + * pre-populated table with exactly 64 elements providing the input + * color for each value of the IPv4/IPv6 Differentiated Services Code + * Point (DSCP) input packet field. When NULL: it is equivalent to + * setting this parameter to an all-green populated table (i.e. table + * with all the 64 elements set to green color). The color blind mode + * is configured by setting *use_prev_mtr_color* to 0 and *dscp_table* + * to either NULL or to an all-green populated table. When + * *use_prev_mtr_color* is non-zero value or when *dscp_table* contains + * at least one yellow or red color element, then the color aware mode + * is configured. + */ + enum rte_mtr_color *dscp_table; + + /** Non-zero to enable the meter, zero to disable the meter at the time + * of MTR object creation. Ignored when the meter profile indicated by + * *meter_profile_id* is set to NONE. + * @see rte_mtr_meter_disable() + */ + int meter_enable; + + /** Policer actions (per meter output color). */ + enum rte_mtr_policer_action action[RTE_MTR_COLORS]; + + /** Set of stats counters to be enabled. + * @see enum rte_mtr_stats_type + */ + uint64_t stats_mask; +}; + +/** + * MTR capabilities + */ +struct rte_mtr_capabilities { + /** Maximum number of MTR objects. */ + uint32_t n_max; + + /** Maximum number of MTR objects that can be shared by multiple flows. + * The value of zero indicates that shared MTR objects are not + * supported. The maximum value is *n_max*. + */ + uint32_t n_shared_max; + + /** When non-zero, this flag indicates that all the MTR objects that + * cannot be shared by multiple flows have identical capability set. + */ + int identical; + + /** When non-zero, this flag indicates that all the MTR objects that + * can be shared by multiple flows have identical capability set. + */ + int shared_identical; + + /** Maximum number of flows that can share the same MTR object. The + * value of zero is invalid. The value of 1 means that shared MTR + * objects not supported. + */ + uint32_t shared_n_flows_per_mtr_max; + + /** Maximum number of MTR objects that can be part of the same flow. The + * value of zero is invalid. The value of 1 indicates that MTR object + * chaining is not supported. The maximum value is *n_max*. + */ + uint32_t chaining_n_mtrs_per_flow_max; + + /** + * When non-zero, it indicates that the packet color identified by one + * MTR object can be used as the packet input color by any subsequent + * MTR object from the same flow. When zero, it indicates that the color + * determined by one MTR object is always ignored by any subsequent MTR + * object from the same flow. Only valid when MTR chaining is supported, + * i.e. *chaining_n_mtrs_per_flow_max* is greater than 1. When non-zero, + * it also means that the color aware mode is supported by at least one + * metering algorithm. + */ + int chaining_use_prev_mtr_color_supported; + + /** + * When non-zero, it indicates that the packet color identified by one + * MTR object is always used as the packet input color by any subsequent + * MTR object that is part of the same flow. When zero, it indicates + * that whether the color determined by one MTR object is either ignored + * or used as the packet input color by any subsequent MTR object from + * the same flow is individually configurable for each MTR object. Only + * valid when *chaining_use_prev_mtr_color_supported* is non-zero. + */ + int chaining_use_prev_mtr_color_enforced; + + /** Maximum number of MTR objects that can have their meter configured + * to run the srTCM RFC 2697 algorithm. The value of 0 indicates this + * metering algorithm is not supported. The maximum value is *n_max*. + */ + uint32_t meter_srtcm_rfc2697_n_max; + + /** Maximum number of MTR objects that can have their meter configured + * to run the trTCM RFC 2698 algorithm. The value of 0 indicates this + * metering algorithm is not supported. The maximum value is *n_max*. + */ + uint32_t meter_trtcm_rfc2698_n_max; + + /** Maximum number of MTR objects that can have their meter configured + * to run the trTCM RFC 4115 algorithm. The value of 0 indicates this + * metering algorithm is not supported. The maximum value is *n_max*. + */ + uint32_t meter_trtcm_rfc4115_n_max; + + /** Maximum traffic rate that can be metered by a single MTR object. For + * srTCM RFC 2697, this is the maximum CIR rate. For trTCM RFC 2698, + * this is the maximum PIR rate. For trTCM RFC 4115, this is the maximum + * value for the sum of PIR and EIR rates. + */ + uint64_t meter_rate_max; + + /** + * When non-zero, it indicates that color aware mode is supported for + * the srTCM RFC 2697 metering algorithm. + */ + int color_aware_srtcm_rfc2697_supported; + + /** + * When non-zero, it indicates that color aware mode is supported for + * the trTCM RFC 2698 metering algorithm. + */ + int color_aware_trtcm_rfc2698_supported; + + /** + * When non-zero, it indicates that color aware mode is supported for + * the trTCM RFC 4115 metering algorithm. + */ + int color_aware_trtcm_rfc4115_supported; + + /** When non-zero, it indicates that the policer packet recolor actions + * are supported. + * @see enum rte_mtr_policer_action + */ + int policer_action_recolor_supported; + + /** When non-zero, it indicates that the policer packet drop action is + * supported. + * @see enum rte_mtr_policer_action + */ + int policer_action_drop_supported; + + /** Set of supported statistics counter types. + * @see enum rte_mtr_stats_type + */ + uint64_t stats_mask; +}; + +/** + * Verbose error types. + * + * Most of them provide the type of the object referenced by struct + * rte_mtr_error::cause. + */ +enum rte_mtr_error_type { + RTE_MTR_ERROR_TYPE_NONE, /**< No error. */ + RTE_MTR_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ + RTE_MTR_ERROR_TYPE_METER_PROFILE_ID, + RTE_MTR_ERROR_TYPE_METER_PROFILE, + RTE_MTR_ERROR_TYPE_MTR_ID, + RTE_MTR_ERROR_TYPE_MTR_PARAMS, + RTE_MTR_ERROR_TYPE_POLICER_ACTION_GREEN, + RTE_MTR_ERROR_TYPE_POLICER_ACTION_YELLOW, + RTE_MTR_ERROR_TYPE_POLICER_ACTION_RED, + RTE_MTR_ERROR_TYPE_STATS_MASK, + RTE_MTR_ERROR_TYPE_STATS, + RTE_MTR_ERROR_TYPE_SHARED, +}; + +/** + * Verbose error structure definition. + * + * This object is normally allocated by applications and set by PMDs, the + * message points to a constant string which does not need to be freed by + * the application, however its pointer can be considered valid only as long + * as its associated DPDK port remains configured. Closing the underlying + * device or unloading the PMD invalidates it. + * + * Both cause and message may be NULL regardless of the error type. + */ +struct rte_mtr_error { + enum rte_mtr_error_type type; /**< Cause field and error type. */ + const void *cause; /**< Object responsible for the error. */ + const char *message; /**< Human-readable error message. */ +}; + +/** + * MTR capabilities get + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] cap + * MTR capabilities. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_capabilities_get(uint16_t port_id, + struct rte_mtr_capabilities *cap, + struct rte_mtr_error *error); + +/** + * Meter profile add + * + * Create a new meter profile with ID set to *meter_profile_id*. The new profile + * is used to create one or several MTR objects. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] meter_profile_id + * ID for the new meter profile. Needs to be unused by any of the existing + * meter profiles added for the current port. + * @param[in] profile + * Meter profile parameters. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_meter_profile_add(uint16_t port_id, + uint32_t meter_profile_id, + struct rte_mtr_meter_profile *profile, + struct rte_mtr_error *error); + +/** + * Meter profile delete + * + * Delete an existing meter profile. This operation fails when there is + * currently at least one user (i.e. MTR object) of this profile. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] meter_profile_id + * Meter profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_meter_profile_delete(uint16_t port_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error); + +/** + * MTR object create + * + * Create a new MTR object for the current port. This object is run as part of + * associated flow action for traffic metering and policing. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be unused by any of the existing MTR objects. + * created for the current port. + * @param[in] params + * MTR object params. Needs to be pre-allocated and valid. + * @param[in] shared + * Non-zero when this MTR object can be shared by multiple flows, zero when + * this MTR object can be used by a single flow. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_flow_action_type::RTE_FLOW_ACTION_TYPE_METER + */ +int __rte_experimental +rte_mtr_create(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_params *params, + int shared, + struct rte_mtr_error *error); + +/** + * MTR object destroy + * + * Delete an existing MTR object. This operation fails when there is currently + * at least one user (i.e. flow) of this MTR object. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * created for the current port. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_destroy(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error); + +/** + * MTR object meter disable + * + * Disable the meter of an existing MTR object. In disabled state, the meter of + * the current MTR object works in pass-through mode, meaning that for each + * input packet the meter output color is always the same as the input color. In + * particular, when the meter of the current MTR object is configured in color + * blind mode, the input color is always green, so the meter output color is + * also always green. Note that the policer and the statistics of the current + * MTR object are working as usual while the meter is disabled. No action is + * taken and this function returns successfully when the meter of the current + * MTR object is already disabled. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_meter_disable(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error); + +/** + * MTR object meter enable + * + * Enable the meter of an existing MTR object. If the MTR object has its meter + * already enabled, then no action is taken and this function returns + * successfully. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_meter_enable(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_error *error); + +/** + * MTR object meter profile update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[in] meter_profile_id + * Meter profile ID for the current MTR object. Needs to be valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_meter_profile_update(uint16_t port_id, + uint32_t mtr_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error); + +/** + * MTR object DSCP table update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[in] dscp_table + * When non-NULL: it points to a pre-allocated and pre-populated table with + * exactly 64 elements providing the input color for each value of the + * IPv4/IPv6 Differentiated Services Code Point (DSCP) input packet field. + * When NULL: it is equivalent to setting this parameter to an “all-green” + * populated table (i.e. table with all the 64 elements set to green color). + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_meter_dscp_table_update(uint16_t port_id, + uint32_t mtr_id, + enum rte_mtr_color *dscp_table, + struct rte_mtr_error *error); + +/** + * MTR object policer actions update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[in] action_mask + * Bit mask indicating which policer actions need to be updated. One or more + * policer actions can be updated in a single function invocation. To update + * the policer action associated with color C, bit (1 << C) needs to be set in + * *action_mask* and element at position C in the *actions* array needs to be + * valid. + * @param[in] actions + * Pre-allocated and pre-populated array of policer actions. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int __rte_experimental +rte_mtr_policer_actions_update(uint16_t port_id, + uint32_t mtr_id, + uint32_t action_mask, + enum rte_mtr_policer_action *actions, + struct rte_mtr_error *error); + +/** + * MTR object enabled statistics counters update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[in] stats_mask + * Mask of statistics counter types to be enabled for the current MTR object. + * Any statistics counter type not included in this set is to be disabled for + * the current MTR object. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_mtr_stats_type + */ +int __rte_experimental +rte_mtr_stats_update(uint16_t port_id, + uint32_t mtr_id, + uint64_t stats_mask, + struct rte_mtr_error *error); + +/** + * MTR object statistics counters read + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mtr_id + * MTR object ID. Needs to be valid. + * @param[out] stats + * When non-NULL, it contains the current value for the statistics counters + * enabled for the current MTR object. + * @param[out] stats_mask + * When non-NULL, it contains the mask of statistics counter types that are + * currently enabled for this MTR object, indicating which of the counters + * retrieved with the *stats* structure are valid. + * @param[in] clear + * When this parameter has a non-zero value, the statistics counters are + * cleared (i.e. set to zero) immediately after they have been read, + * otherwise the statistics counters are left untouched. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_mtr_stats_type + */ +int __rte_experimental +rte_mtr_stats_read(uint16_t port_id, + uint32_t mtr_id, + struct rte_mtr_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_mtr_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_MTR_H__ */ diff --git a/lib/librte_ethdev/rte_mtr_driver.h b/lib/librte_ethdev/rte_mtr_driver.h new file mode 100644 index 00000000..c9a6d7c3 --- /dev/null +++ b/lib/librte_ethdev/rte_mtr_driver.h @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef __INCLUDE_RTE_MTR_DRIVER_H__ +#define __INCLUDE_RTE_MTR_DRIVER_H__ + +/** + * @file + * RTE Generic Traffic Metering and Policing API (Driver Side) + * + * This file provides implementation helpers for internal use by PMDs, they + * are not intended to be exposed to applications and are not subject to ABI + * versioning. + */ + +#include <stdint.h> + +#include <rte_errno.h> +#include "rte_ethdev.h" +#include "rte_mtr.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int (*rte_mtr_capabilities_get_t)(struct rte_eth_dev *dev, + struct rte_mtr_capabilities *cap, + struct rte_mtr_error *error); +/**< @internal MTR capabilities get */ + +typedef int (*rte_mtr_meter_profile_add_t)(struct rte_eth_dev *dev, + uint32_t meter_profile_id, + struct rte_mtr_meter_profile *profile, + struct rte_mtr_error *error); +/**< @internal MTR meter profile add */ + +typedef int (*rte_mtr_meter_profile_delete_t)(struct rte_eth_dev *dev, + uint32_t meter_profile_id, + struct rte_mtr_error *error); +/**< @internal MTR meter profile delete */ + +typedef int (*rte_mtr_create_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_params *params, + int shared, + struct rte_mtr_error *error); +/**< @internal MTR object create */ + +typedef int (*rte_mtr_destroy_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_error *error); +/**< @internal MTR object destroy */ + +typedef int (*rte_mtr_meter_enable_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_error *error); +/**< @internal MTR object meter enable */ + +typedef int (*rte_mtr_meter_disable_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_error *error); +/**< @internal MTR object meter disable */ + +typedef int (*rte_mtr_meter_profile_update_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + uint32_t meter_profile_id, + struct rte_mtr_error *error); +/**< @internal MTR object meter profile update */ + +typedef int (*rte_mtr_meter_dscp_table_update_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + enum rte_mtr_color *dscp_table, + struct rte_mtr_error *error); +/**< @internal MTR object meter DSCP table update */ + +typedef int (*rte_mtr_policer_actions_update_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + uint32_t action_mask, + enum rte_mtr_policer_action *actions, + struct rte_mtr_error *error); +/**< @internal MTR object policer action update*/ + +typedef int (*rte_mtr_stats_update_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + uint64_t stats_mask, + struct rte_mtr_error *error); +/**< @internal MTR object enabled stats update */ + +typedef int (*rte_mtr_stats_read_t)(struct rte_eth_dev *dev, + uint32_t mtr_id, + struct rte_mtr_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_mtr_error *error); +/**< @internal MTR object stats read */ + +struct rte_mtr_ops { + /** MTR capabilities get */ + rte_mtr_capabilities_get_t capabilities_get; + + /** MTR meter profile add */ + rte_mtr_meter_profile_add_t meter_profile_add; + + /** MTR meter profile delete */ + rte_mtr_meter_profile_delete_t meter_profile_delete; + + /** MTR object create */ + rte_mtr_create_t create; + + /** MTR object destroy */ + rte_mtr_destroy_t destroy; + + /** MTR object meter enable */ + rte_mtr_meter_enable_t meter_enable; + + /** MTR object meter disable */ + rte_mtr_meter_disable_t meter_disable; + + /** MTR object meter profile update */ + rte_mtr_meter_profile_update_t meter_profile_update; + + /** MTR object meter DSCP table update */ + rte_mtr_meter_dscp_table_update_t meter_dscp_table_update; + + /** MTR object policer action update */ + rte_mtr_policer_actions_update_t policer_actions_update; + + /** MTR object enabled stats update */ + rte_mtr_stats_update_t stats_update; + + /** MTR object stats read */ + rte_mtr_stats_read_t stats_read; +}; + +/** + * Initialize generic error structure. + * + * This function also sets rte_errno to a given value. + * + * @param[out] error + * Pointer to error structure (may be NULL). + * @param[in] code + * Related error code (rte_errno). + * @param[in] type + * Cause field and error type. + * @param[in] cause + * Object responsible for the error. + * @param[in] message + * Human-readable error message. + * + * @return + * Error code. + */ +static inline int +rte_mtr_error_set(struct rte_mtr_error *error, + int code, + enum rte_mtr_error_type type, + const void *cause, + const char *message) +{ + if (error) { + *error = (struct rte_mtr_error){ + .type = type, + .cause = cause, + .message = message, + }; + } + rte_errno = code; + return code; +} + +/** + * Get generic traffic metering and policing operations structure from a port + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] error + * Error details + * + * @return + * The traffic metering and policing operations structure associated with + * port_id on success, NULL otherwise. + */ +const struct rte_mtr_ops * +rte_mtr_ops_get(uint16_t port_id, struct rte_mtr_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_MTR_DRIVER_H__ */ diff --git a/lib/librte_ethdev/rte_tm.c b/lib/librte_ethdev/rte_tm.c new file mode 100644 index 00000000..9709454f --- /dev/null +++ b/lib/librte_ethdev/rte_tm.c @@ -0,0 +1,409 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include <stdint.h> + +#include <rte_errno.h> +#include "rte_ethdev.h" +#include "rte_tm_driver.h" +#include "rte_tm.h" + +/* Get generic traffic manager operations structure from a port. */ +const struct rte_tm_ops * +rte_tm_ops_get(uint16_t port_id, struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_tm_ops *ops; + + if (!rte_eth_dev_is_valid_port(port_id)) { + rte_tm_error_set(error, + ENODEV, + RTE_TM_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(ENODEV)); + return NULL; + } + + if ((dev->dev_ops->tm_ops_get == NULL) || + (dev->dev_ops->tm_ops_get(dev, &ops) != 0) || + (ops == NULL)) { + rte_tm_error_set(error, + ENOSYS, + RTE_TM_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(ENOSYS)); + return NULL; + } + + return ops; +} + +#define RTE_TM_FUNC(port_id, func) \ +({ \ + const struct rte_tm_ops *ops = \ + rte_tm_ops_get(port_id, error); \ + if (ops == NULL) \ + return -rte_errno; \ + \ + if (ops->func == NULL) \ + return -rte_tm_error_set(error, \ + ENOSYS, \ + RTE_TM_ERROR_TYPE_UNSPECIFIED, \ + NULL, \ + rte_strerror(ENOSYS)); \ + \ + ops->func; \ +}) + +/* Get number of leaf nodes */ +int +rte_tm_get_number_of_leaf_nodes(uint16_t port_id, + uint32_t *n_leaf_nodes, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + const struct rte_tm_ops *ops = + rte_tm_ops_get(port_id, error); + + if (ops == NULL) + return -rte_errno; + + if (n_leaf_nodes == NULL) { + rte_tm_error_set(error, + EINVAL, + RTE_TM_ERROR_TYPE_UNSPECIFIED, + NULL, + rte_strerror(EINVAL)); + return -rte_errno; + } + + *n_leaf_nodes = dev->data->nb_tx_queues; + return 0; +} + +/* Check node type (leaf or non-leaf) */ +int +rte_tm_node_type_get(uint16_t port_id, + uint32_t node_id, + int *is_leaf, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_type_get)(dev, + node_id, is_leaf, error); +} + +/* Get capabilities */ +int rte_tm_capabilities_get(uint16_t port_id, + struct rte_tm_capabilities *cap, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, capabilities_get)(dev, + cap, error); +} + +/* Get level capabilities */ +int rte_tm_level_capabilities_get(uint16_t port_id, + uint32_t level_id, + struct rte_tm_level_capabilities *cap, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, level_capabilities_get)(dev, + level_id, cap, error); +} + +/* Get node capabilities */ +int rte_tm_node_capabilities_get(uint16_t port_id, + uint32_t node_id, + struct rte_tm_node_capabilities *cap, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_capabilities_get)(dev, + node_id, cap, error); +} + +/* Add WRED profile */ +int rte_tm_wred_profile_add(uint16_t port_id, + uint32_t wred_profile_id, + struct rte_tm_wred_params *profile, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, wred_profile_add)(dev, + wred_profile_id, profile, error); +} + +/* Delete WRED profile */ +int rte_tm_wred_profile_delete(uint16_t port_id, + uint32_t wred_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, wred_profile_delete)(dev, + wred_profile_id, error); +} + +/* Add/update shared WRED context */ +int rte_tm_shared_wred_context_add_update(uint16_t port_id, + uint32_t shared_wred_context_id, + uint32_t wred_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shared_wred_context_add_update)(dev, + shared_wred_context_id, wred_profile_id, error); +} + +/* Delete shared WRED context */ +int rte_tm_shared_wred_context_delete(uint16_t port_id, + uint32_t shared_wred_context_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shared_wred_context_delete)(dev, + shared_wred_context_id, error); +} + +/* Add shaper profile */ +int rte_tm_shaper_profile_add(uint16_t port_id, + uint32_t shaper_profile_id, + struct rte_tm_shaper_params *profile, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shaper_profile_add)(dev, + shaper_profile_id, profile, error); +} + +/* Delete WRED profile */ +int rte_tm_shaper_profile_delete(uint16_t port_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shaper_profile_delete)(dev, + shaper_profile_id, error); +} + +/* Add shared shaper */ +int rte_tm_shared_shaper_add_update(uint16_t port_id, + uint32_t shared_shaper_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shared_shaper_add_update)(dev, + shared_shaper_id, shaper_profile_id, error); +} + +/* Delete shared shaper */ +int rte_tm_shared_shaper_delete(uint16_t port_id, + uint32_t shared_shaper_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, shared_shaper_delete)(dev, + shared_shaper_id, error); +} + +/* Add node to port traffic manager hierarchy */ +int rte_tm_node_add(uint16_t port_id, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + uint32_t level_id, + struct rte_tm_node_params *params, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_add)(dev, + node_id, parent_node_id, priority, weight, level_id, + params, error); +} + +/* Delete node from traffic manager hierarchy */ +int rte_tm_node_delete(uint16_t port_id, + uint32_t node_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_delete)(dev, + node_id, error); +} + +/* Suspend node */ +int rte_tm_node_suspend(uint16_t port_id, + uint32_t node_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_suspend)(dev, + node_id, error); +} + +/* Resume node */ +int rte_tm_node_resume(uint16_t port_id, + uint32_t node_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_resume)(dev, + node_id, error); +} + +/* Commit the initial port traffic manager hierarchy */ +int rte_tm_hierarchy_commit(uint16_t port_id, + int clear_on_fail, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, hierarchy_commit)(dev, + clear_on_fail, error); +} + +/* Update node parent */ +int rte_tm_node_parent_update(uint16_t port_id, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_parent_update)(dev, + node_id, parent_node_id, priority, weight, error); +} + +/* Update node private shaper */ +int rte_tm_node_shaper_update(uint16_t port_id, + uint32_t node_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_shaper_update)(dev, + node_id, shaper_profile_id, error); +} + +/* Update node shared shapers */ +int rte_tm_node_shared_shaper_update(uint16_t port_id, + uint32_t node_id, + uint32_t shared_shaper_id, + int add, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_shared_shaper_update)(dev, + node_id, shared_shaper_id, add, error); +} + +/* Update node stats */ +int rte_tm_node_stats_update(uint16_t port_id, + uint32_t node_id, + uint64_t stats_mask, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_stats_update)(dev, + node_id, stats_mask, error); +} + +/* Update WFQ weight mode */ +int rte_tm_node_wfq_weight_mode_update(uint16_t port_id, + uint32_t node_id, + int *wfq_weight_mode, + uint32_t n_sp_priorities, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_wfq_weight_mode_update)(dev, + node_id, wfq_weight_mode, n_sp_priorities, error); +} + +/* Update node congestion management mode */ +int rte_tm_node_cman_update(uint16_t port_id, + uint32_t node_id, + enum rte_tm_cman_mode cman, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_cman_update)(dev, + node_id, cman, error); +} + +/* Update node private WRED context */ +int rte_tm_node_wred_context_update(uint16_t port_id, + uint32_t node_id, + uint32_t wred_profile_id, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_wred_context_update)(dev, + node_id, wred_profile_id, error); +} + +/* Update node shared WRED context */ +int rte_tm_node_shared_wred_context_update(uint16_t port_id, + uint32_t node_id, + uint32_t shared_wred_context_id, + int add, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_shared_wred_context_update)(dev, + node_id, shared_wred_context_id, add, error); +} + +/* Read and/or clear stats counters for specific node */ +int rte_tm_node_stats_read(uint16_t port_id, + uint32_t node_id, + struct rte_tm_node_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, node_stats_read)(dev, + node_id, stats, stats_mask, clear, error); +} + +/* Packet marking - VLAN DEI */ +int rte_tm_mark_vlan_dei(uint16_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, mark_vlan_dei)(dev, + mark_green, mark_yellow, mark_red, error); +} + +/* Packet marking - IPv4/IPv6 ECN */ +int rte_tm_mark_ip_ecn(uint16_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, mark_ip_ecn)(dev, + mark_green, mark_yellow, mark_red, error); +} + +/* Packet marking - IPv4/IPv6 DSCP */ +int rte_tm_mark_ip_dscp(uint16_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + return RTE_TM_FUNC(port_id, mark_ip_dscp)(dev, + mark_green, mark_yellow, mark_red, error); +} diff --git a/lib/librte_ethdev/rte_tm.h b/lib/librte_ethdev/rte_tm.h new file mode 100644 index 00000000..72554038 --- /dev/null +++ b/lib/librte_ethdev/rte_tm.h @@ -0,0 +1,1961 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2017 Intel Corporation. + * Copyright(c) 2017 Cavium. + * Copyright(c) 2017 NXP. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TM_H__ +#define __INCLUDE_RTE_TM_H__ + +/** + * @file + * RTE Generic Traffic Manager API + * + * This interface provides the ability to configure the traffic manager in a + * generic way. It includes features such as: hierarchical scheduling, + * traffic shaping, congestion management, packet marking, etc. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + */ + +#include <stdint.h> + +#include <rte_common.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Ethernet framing overhead. + * + * Overhead fields per Ethernet frame: + * 1. Preamble: 7 bytes; + * 2. Start of Frame Delimiter (SFD): 1 byte; + * 3. Inter-Frame Gap (IFG): 12 bytes. + * + * One of the typical values for the *pkt_length_adjust* field of the shaper + * profile. + * + * @see struct rte_tm_shaper_params + */ +#define RTE_TM_ETH_FRAMING_OVERHEAD 20 + +/** + * Ethernet framing overhead including the Frame Check Sequence (FCS) field. + * Useful when FCS is generated and added at the end of the Ethernet frame on + * TX side without any SW intervention. + * + * One of the typical values for the pkt_length_adjust field of the shaper + * profile. + * + * @see struct rte_tm_shaper_params + */ +#define RTE_TM_ETH_FRAMING_OVERHEAD_FCS 24 + +/** + * Invalid WRED profile ID. + * + * @see struct rte_tm_node_params + * @see rte_tm_node_add() + * @see rte_tm_node_wred_context_update() + */ +#define RTE_TM_WRED_PROFILE_ID_NONE UINT32_MAX + +/** + *Invalid shaper profile ID. + * + * @see struct rte_tm_node_params + * @see rte_tm_node_add() + * @see rte_tm_node_shaper_update() + */ +#define RTE_TM_SHAPER_PROFILE_ID_NONE UINT32_MAX + +/** + * Node ID for the parent of the root node. + * + * @see rte_tm_node_add() + */ +#define RTE_TM_NODE_ID_NULL UINT32_MAX + +/** + * Node level ID used to disable level ID checking. + * + * @see rte_tm_node_add() + */ +#define RTE_TM_NODE_LEVEL_ID_ANY UINT32_MAX + +/** + * Color + */ +enum rte_tm_color { + RTE_TM_GREEN = 0, /**< Green */ + RTE_TM_YELLOW, /**< Yellow */ + RTE_TM_RED, /**< Red */ + RTE_TM_COLORS /**< Number of colors */ +}; + +/** + * Node statistics counter type + */ +enum rte_tm_stats_type { + /** Number of packets scheduled from current node. */ + RTE_TM_STATS_N_PKTS = 1 << 0, + + /** Number of bytes scheduled from current node. */ + RTE_TM_STATS_N_BYTES = 1 << 1, + + /** Number of green packets dropped by current leaf node. */ + RTE_TM_STATS_N_PKTS_GREEN_DROPPED = 1 << 2, + + /** Number of yellow packets dropped by current leaf node. */ + RTE_TM_STATS_N_PKTS_YELLOW_DROPPED = 1 << 3, + + /** Number of red packets dropped by current leaf node. */ + RTE_TM_STATS_N_PKTS_RED_DROPPED = 1 << 4, + + /** Number of green bytes dropped by current leaf node. */ + RTE_TM_STATS_N_BYTES_GREEN_DROPPED = 1 << 5, + + /** Number of yellow bytes dropped by current leaf node. */ + RTE_TM_STATS_N_BYTES_YELLOW_DROPPED = 1 << 6, + + /** Number of red bytes dropped by current leaf node. */ + RTE_TM_STATS_N_BYTES_RED_DROPPED = 1 << 7, + + /** Number of packets currently waiting in the packet queue of current + * leaf node. + */ + RTE_TM_STATS_N_PKTS_QUEUED = 1 << 8, + + /** Number of bytes currently waiting in the packet queue of current + * leaf node. + */ + RTE_TM_STATS_N_BYTES_QUEUED = 1 << 9, +}; + +/** + * Node statistics counters + */ +struct rte_tm_node_stats { + /** Number of packets scheduled from current node. */ + uint64_t n_pkts; + + /** Number of bytes scheduled from current node. */ + uint64_t n_bytes; + + /** Statistics counters for leaf nodes only. */ + struct { + /** Number of packets dropped by current leaf node per each + * color. + */ + uint64_t n_pkts_dropped[RTE_TM_COLORS]; + + /** Number of bytes dropped by current leaf node per each + * color. + */ + uint64_t n_bytes_dropped[RTE_TM_COLORS]; + + /** Number of packets currently waiting in the packet queue of + * current leaf node. + */ + uint64_t n_pkts_queued; + + /** Number of bytes currently waiting in the packet queue of + * current leaf node. + */ + uint64_t n_bytes_queued; + } leaf; +}; + +/** + * Traffic manager dynamic updates + */ +enum rte_tm_dynamic_update_type { + /** Dynamic parent node update. The new parent node is located on same + * hierarchy level as the former parent node. Consequently, the node + * whose parent is changed preserves its hierarchy level. + */ + RTE_TM_UPDATE_NODE_PARENT_KEEP_LEVEL = 1 << 0, + + /** Dynamic parent node update. The new parent node is located on + * different hierarchy level than the former parent node. Consequently, + * the node whose parent is changed also changes its hierarchy level. + */ + RTE_TM_UPDATE_NODE_PARENT_CHANGE_LEVEL = 1 << 1, + + /** Dynamic node add/delete. */ + RTE_TM_UPDATE_NODE_ADD_DELETE = 1 << 2, + + /** Suspend/resume nodes. */ + RTE_TM_UPDATE_NODE_SUSPEND_RESUME = 1 << 3, + + /** Dynamic switch between byte-based and packet-based WFQ weights. */ + RTE_TM_UPDATE_NODE_WFQ_WEIGHT_MODE = 1 << 4, + + /** Dynamic update on number of SP priorities. */ + RTE_TM_UPDATE_NODE_N_SP_PRIORITIES = 1 << 5, + + /** Dynamic update of congestion management mode for leaf nodes. */ + RTE_TM_UPDATE_NODE_CMAN = 1 << 6, + + /** Dynamic update of the set of enabled stats counter types. */ + RTE_TM_UPDATE_NODE_STATS = 1 << 7, +}; + +/** + * Traffic manager capabilities + */ +struct rte_tm_capabilities { + /** Maximum number of nodes. */ + uint32_t n_nodes_max; + + /** Maximum number of levels (i.e. number of nodes connecting the root + * node with any leaf node, including the root and the leaf). + */ + uint32_t n_levels_max; + + /** When non-zero, this flag indicates that all the non-leaf nodes + * (with the exception of the root node) have identical capability set. + */ + int non_leaf_nodes_identical; + + /** When non-zero, this flag indicates that all the leaf nodes have + * identical capability set. + */ + int leaf_nodes_identical; + + /** Maximum number of shapers, either private or shared. In case the + * implementation does not share any resources between private and + * shared shapers, it is typically equal to the sum of + * *shaper_private_n_max* and *shaper_shared_n_max*. The + * value of zero indicates that traffic shaping is not supported. + */ + uint32_t shaper_n_max; + + /** Maximum number of private shapers. Indicates the maximum number of + * nodes that can concurrently have their private shaper enabled. The + * value of zero indicates that private shapers are not supported. + */ + uint32_t shaper_private_n_max; + + /** Maximum number of private shapers that support dual rate shaping. + * Indicates the maximum number of nodes that can concurrently have + * their private shaper enabled with dual rate support. Only valid when + * private shapers are supported. The value of zero indicates that dual + * rate shaping is not available for private shapers. The maximum value + * is *shaper_private_n_max*. + */ + int shaper_private_dual_rate_n_max; + + /** Minimum committed/peak rate (bytes per second) for any private + * shaper. Valid only when private shapers are supported. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for any private + * shaper. Valid only when private shapers are supported. + */ + uint64_t shaper_private_rate_max; + + /** Maximum number of shared shapers. The value of zero indicates that + * shared shapers are not supported. + */ + uint32_t shaper_shared_n_max; + + /** Maximum number of nodes that can share the same shared shaper. + * Only valid when shared shapers are supported. + */ + uint32_t shaper_shared_n_nodes_per_shaper_max; + + /** Maximum number of shared shapers a node can be part of. This + * parameter indicates that there is at least one node that can be + * configured with this many shared shapers, which might not be true for + * all the nodes. Only valid when shared shapers are supported, in which + * case it ranges from 1 to *shaper_shared_n_max*. + */ + uint32_t shaper_shared_n_shapers_per_node_max; + + /** Maximum number of shared shapers that can be configured with dual + * rate shaping. The value of zero indicates that dual rate shaping + * support is not available for shared shapers. + */ + uint32_t shaper_shared_dual_rate_n_max; + + /** Minimum committed/peak rate (bytes per second) for any shared + * shaper. Only valid when shared shapers are supported. + */ + uint64_t shaper_shared_rate_min; + + /** Maximum committed/peak rate (bytes per second) for any shared + * shaper. Only valid when shared shapers are supported. + */ + uint64_t shaper_shared_rate_max; + + /** Minimum value allowed for packet length adjustment for any private + * or shared shaper. + */ + int shaper_pkt_length_adjust_min; + + /** Maximum value allowed for packet length adjustment for any private + * or shared shaper. + */ + int shaper_pkt_length_adjust_max; + + /** Maximum number of children nodes. This parameter indicates that + * there is at least one non-leaf node that can be configured with this + * many children nodes, which might not be true for all the non-leaf + * nodes. + */ + uint32_t sched_n_children_max; + + /** Maximum number of supported priority levels. This parameter + * indicates that there is at least one non-leaf node that can be + * configured with this many priority levels for managing its children + * nodes, which might not be true for all the non-leaf nodes. The value + * of zero is invalid. The value of 1 indicates that only priority 0 is + * supported, which essentially means that Strict Priority (SP) + * algorithm is not supported. + */ + uint32_t sched_sp_n_priorities_max; + + /** Maximum number of sibling nodes that can have the same priority at + * any given time, i.e. maximum size of the WFQ sibling node group. This + * parameter indicates there is at least one non-leaf node that meets + * this condition, which might not be true for all the non-leaf nodes. + * The value of zero is invalid. The value of 1 indicates that WFQ + * algorithm is not supported. The maximum value is + * *sched_n_children_max*. + */ + uint32_t sched_wfq_n_children_per_group_max; + + /** Maximum number of priority levels that can have more than one child + * node at any given time, i.e. maximum number of WFQ sibling node + * groups that have two or more members. This parameter indicates there + * is at least one non-leaf node that meets this condition, which might + * not be true for all the non-leaf nodes. The value of zero states that + * WFQ algorithm is not supported. The value of 1 indicates that + * (*sched_sp_n_priorities_max* - 1) priority levels have at most one + * child node, so there can be only one priority level with two or + * more sibling nodes making up a WFQ group. The maximum value is: + * min(floor(*sched_n_children_max* / 2), *sched_sp_n_priorities_max*). + */ + uint32_t sched_wfq_n_groups_max; + + /** Maximum WFQ weight. The value of 1 indicates that all sibling nodes + * with same priority have the same WFQ weight, so WFQ is reduced to FQ. + */ + uint32_t sched_wfq_weight_max; + + /** WRED packet mode support. When non-zero, this parameter indicates + * that there is atleast one leaf node that supports the WRED packet + * mode, which might not be true for all the leaf nodes. In packet + * mode, the WRED thresholds specify the queue length in packets, as + * opposed to bytes. + */ + int cman_wred_packet_mode_supported; + + /** WRED byte mode support. When non-zero, this parameter indicates that + * there is atleast one leaf node that supports the WRED byte mode, + * which might not be true for all the leaf nodes. In byte mode, the + * WRED thresholds specify the queue length in bytes, as opposed to + * packets. + */ + int cman_wred_byte_mode_supported; + + /** Head drop algorithm support. When non-zero, this parameter + * indicates that there is at least one leaf node that supports the head + * drop algorithm, which might not be true for all the leaf nodes. + */ + int cman_head_drop_supported; + + /** Maximum number of WRED contexts, either private or shared. In case + * the implementation does not share any resources between private and + * shared WRED contexts, it is typically equal to the sum of + * *cman_wred_context_private_n_max* and + * *cman_wred_context_shared_n_max*. The value of zero indicates that + * WRED is not supported. + */ + uint32_t cman_wred_context_n_max; + + /** Maximum number of private WRED contexts. Indicates the maximum + * number of leaf nodes that can concurrently have their private WRED + * context enabled. The value of zero indicates that private WRED + * contexts are not supported. + */ + uint32_t cman_wred_context_private_n_max; + + /** Maximum number of shared WRED contexts. The value of zero + * indicates that shared WRED contexts are not supported. + */ + uint32_t cman_wred_context_shared_n_max; + + /** Maximum number of leaf nodes that can share the same WRED context. + * Only valid when shared WRED contexts are supported. + */ + uint32_t cman_wred_context_shared_n_nodes_per_context_max; + + /** Maximum number of shared WRED contexts a leaf node can be part of. + * This parameter indicates that there is at least one leaf node that + * can be configured with this many shared WRED contexts, which might + * not be true for all the leaf nodes. Only valid when shared WRED + * contexts are supported, in which case it ranges from 1 to + * *cman_wred_context_shared_n_max*. + */ + uint32_t cman_wred_context_shared_n_contexts_per_node_max; + + /** Support for VLAN DEI packet marking (per color). */ + int mark_vlan_dei_supported[RTE_TM_COLORS]; + + /** Support for IPv4/IPv6 ECN marking of TCP packets (per color). */ + int mark_ip_ecn_tcp_supported[RTE_TM_COLORS]; + + /** Support for IPv4/IPv6 ECN marking of SCTP packets (per color). */ + int mark_ip_ecn_sctp_supported[RTE_TM_COLORS]; + + /** Support for IPv4/IPv6 DSCP packet marking (per color). */ + int mark_ip_dscp_supported[RTE_TM_COLORS]; + + /** Set of supported dynamic update operations. + * @see enum rte_tm_dynamic_update_type + */ + uint64_t dynamic_update_mask; + + /** Set of supported statistics counter types. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; +}; + +/** + * Traffic manager level capabilities + */ +struct rte_tm_level_capabilities { + /** Maximum number of nodes for the current hierarchy level. */ + uint32_t n_nodes_max; + + /** Maximum number of non-leaf nodes for the current hierarchy level. + * The value of 0 indicates that current level only supports leaf + * nodes. The maximum value is *n_nodes_max*. + */ + uint32_t n_nodes_nonleaf_max; + + /** Maximum number of leaf nodes for the current hierarchy level. The + * value of 0 indicates that current level only supports non-leaf + * nodes. The maximum value is *n_nodes_max*. + */ + uint32_t n_nodes_leaf_max; + + /** When non-zero, this flag indicates that all the non-leaf nodes on + * this level have identical capability set. Valid only when + * *n_nodes_nonleaf_max* is non-zero. + */ + int non_leaf_nodes_identical; + + /** When non-zero, this flag indicates that all the leaf nodes on this + * level have identical capability set. Valid only when + * *n_nodes_leaf_max* is non-zero. + */ + int leaf_nodes_identical; + + RTE_STD_C11 + union { + /** Items valid only for the non-leaf nodes on this level. */ + struct { + /** Private shaper support. When non-zero, it indicates + * there is at least one non-leaf node on this level + * with private shaper support, which may not be the + * case for all the non-leaf nodes on this level. + */ + int shaper_private_supported; + + /** Dual rate support for private shaper. Valid only + * when private shaper is supported for the non-leaf + * nodes on the current level. When non-zero, it + * indicates there is at least one non-leaf node on this + * level with dual rate private shaper support, which + * may not be the case for all the non-leaf nodes on + * this level. + */ + int shaper_private_dual_rate_supported; + + /** Minimum committed/peak rate (bytes per second) for + * private shapers of the non-leaf nodes of this level. + * Valid only when private shaper is supported on this + * level. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for + * private shapers of the non-leaf nodes on this level. + * Valid only when private shaper is supported on this + * level. + */ + uint64_t shaper_private_rate_max; + + /** Maximum number of shared shapers that any non-leaf + * node on this level can be part of. The value of zero + * indicates that shared shapers are not supported by + * the non-leaf nodes on this level. When non-zero, it + * indicates there is at least one non-leaf node on this + * level that meets this condition, which may not be the + * case for all the non-leaf nodes on this level. + */ + uint32_t shaper_shared_n_max; + + /** Maximum number of children nodes. This parameter + * indicates that there is at least one non-leaf node on + * this level that can be configured with this many + * children nodes, which might not be true for all the + * non-leaf nodes on this level. + */ + uint32_t sched_n_children_max; + + /** Maximum number of supported priority levels. This + * parameter indicates that there is at least one + * non-leaf node on this level that can be configured + * with this many priority levels for managing its + * children nodes, which might not be true for all the + * non-leaf nodes on this level. The value of zero is + * invalid. The value of 1 indicates that only priority + * 0 is supported, which essentially means that Strict + * Priority (SP) algorithm is not supported on this + * level. + */ + uint32_t sched_sp_n_priorities_max; + + /** Maximum number of sibling nodes that can have the + * same priority at any given time, i.e. maximum size of + * the WFQ sibling node group. This parameter indicates + * there is at least one non-leaf node on this level + * that meets this condition, which may not be true for + * all the non-leaf nodes on this level. The value of + * zero is invalid. The value of 1 indicates that WFQ + * algorithm is not supported on this level. The maximum + * value is *sched_n_children_max*. + */ + uint32_t sched_wfq_n_children_per_group_max; + + /** Maximum number of priority levels that can have + * more than one child node at any given time, i.e. + * maximum number of WFQ sibling node groups that + * have two or more members. This parameter indicates + * there is at least one non-leaf node on this level + * that meets this condition, which might not be true + * for all the non-leaf nodes. The value of zero states + * that WFQ algorithm is not supported on this level. + * The value of 1 indicates that + * (*sched_sp_n_priorities_max* - 1) priority levels on + * this level have at most one child node, so there can + * be only one priority level with two or more sibling + * nodes making up a WFQ group on this level. The + * maximum value is: + * min(floor(*sched_n_children_max* / 2), + * *sched_sp_n_priorities_max*). + */ + uint32_t sched_wfq_n_groups_max; + + /** Maximum WFQ weight. The value of 1 indicates that + * all sibling nodes on this level with same priority + * have the same WFQ weight, so on this level WFQ is + * reduced to FQ. + */ + uint32_t sched_wfq_weight_max; + + /** Mask of statistics counter types supported by the + * non-leaf nodes on this level. Every supported + * statistics counter type is supported by at least one + * non-leaf node on this level, which may not be true + * for all the non-leaf nodes on this level. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; + } nonleaf; + + /** Items valid only for the leaf nodes on this level. */ + struct { + /** Private shaper support. When non-zero, it indicates + * there is at least one leaf node on this level with + * private shaper support, which may not be the case for + * all the leaf nodes on this level. + */ + int shaper_private_supported; + + /** Dual rate support for private shaper. Valid only + * when private shaper is supported for the leaf nodes + * on this level. When non-zero, it indicates there is + * at least one leaf node on this level with dual rate + * private shaper support, which may not be the case for + * all the leaf nodes on this level. + */ + int shaper_private_dual_rate_supported; + + /** Minimum committed/peak rate (bytes per second) for + * private shapers of the leaf nodes of this level. + * Valid only when private shaper is supported for the + * leaf nodes on this level. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for + * private shapers of the leaf nodes on this level. + * Valid only when private shaper is supported for the + * leaf nodes on this level. + */ + uint64_t shaper_private_rate_max; + + /** Maximum number of shared shapers that any leaf node + * on this level can be part of. The value of zero + * indicates that shared shapers are not supported by + * the leaf nodes on this level. When non-zero, it + * indicates there is at least one leaf node on this + * level that meets this condition, which may not be the + * case for all the leaf nodes on this level. + */ + uint32_t shaper_shared_n_max; + + /** WRED packet mode support. When non-zero, this + * parameter indicates that there is atleast one leaf + * node on this level that supports the WRED packet + * mode, which might not be true for all the leaf + * nodes. In packet mode, the WRED thresholds specify + * the queue length in packets, as opposed to bytes. + */ + int cman_wred_packet_mode_supported; + + /** WRED byte mode support. When non-zero, this + * parameter indicates that there is atleast one leaf + * node on this level that supports the WRED byte mode, + * which might not be true for all the leaf nodes. In + * byte mode, the WRED thresholds specify the queue + * length in bytes, as opposed to packets. + */ + int cman_wred_byte_mode_supported; + + /** Head drop algorithm support. When non-zero, this + * parameter indicates that there is at least one leaf + * node on this level that supports the head drop + * algorithm, which might not be true for all the leaf + * nodes on this level. + */ + int cman_head_drop_supported; + + /** Private WRED context support. When non-zero, it + * indicates there is at least one node on this level + * with private WRED context support, which may not be + * true for all the leaf nodes on this level. + */ + int cman_wred_context_private_supported; + + /** Maximum number of shared WRED contexts that any + * leaf node on this level can be part of. The value of + * zero indicates that shared WRED contexts are not + * supported by the leaf nodes on this level. When + * non-zero, it indicates there is at least one leaf + * node on this level that meets this condition, which + * may not be the case for all the leaf nodes on this + * level. + */ + uint32_t cman_wred_context_shared_n_max; + + /** Mask of statistics counter types supported by the + * leaf nodes on this level. Every supported statistics + * counter type is supported by at least one leaf node + * on this level, which may not be true for all the leaf + * nodes on this level. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; + } leaf; + }; +}; + +/** + * Traffic manager node capabilities + */ +struct rte_tm_node_capabilities { + /** Private shaper support for the current node. */ + int shaper_private_supported; + + /** Dual rate shaping support for private shaper of current node. + * Valid only when private shaper is supported by the current node. + */ + int shaper_private_dual_rate_supported; + + /** Minimum committed/peak rate (bytes per second) for private + * shaper of current node. Valid only when private shaper is supported + * by the current node. + */ + uint64_t shaper_private_rate_min; + + /** Maximum committed/peak rate (bytes per second) for private + * shaper of current node. Valid only when private shaper is supported + * by the current node. + */ + uint64_t shaper_private_rate_max; + + /** Maximum number of shared shapers the current node can be part of. + * The value of zero indicates that shared shapers are not supported by + * the current node. + */ + uint32_t shaper_shared_n_max; + + RTE_STD_C11 + union { + /** Items valid only for non-leaf nodes. */ + struct { + /** Maximum number of children nodes. */ + uint32_t sched_n_children_max; + + /** Maximum number of supported priority levels. The + * value of zero is invalid. The value of 1 indicates + * that only priority 0 is supported, which essentially + * means that Strict Priority (SP) algorithm is not + * supported. + */ + uint32_t sched_sp_n_priorities_max; + + /** Maximum number of sibling nodes that can have the + * same priority at any given time, i.e. maximum size + * of the WFQ sibling node group. The value of zero + * is invalid. The value of 1 indicates that WFQ + * algorithm is not supported. The maximum value is + * *sched_n_children_max*. + */ + uint32_t sched_wfq_n_children_per_group_max; + + /** Maximum number of priority levels that can have + * more than one child node at any given time, i.e. + * maximum number of WFQ sibling node groups that have + * two or more members. The value of zero states that + * WFQ algorithm is not supported. The value of 1 + * indicates that (*sched_sp_n_priorities_max* - 1) + * priority levels have at most one child node, so there + * can be only one priority level with two or more + * sibling nodes making up a WFQ group. The maximum + * value is: min(floor(*sched_n_children_max* / 2), + * *sched_sp_n_priorities_max*). + */ + uint32_t sched_wfq_n_groups_max; + + /** Maximum WFQ weight. The value of 1 indicates that + * all sibling nodes with same priority have the same + * WFQ weight, so WFQ is reduced to FQ. + */ + uint32_t sched_wfq_weight_max; + } nonleaf; + + /** Items valid only for leaf nodes. */ + struct { + /** WRED packet mode support for current node. */ + int cman_wred_packet_mode_supported; + + /** WRED byte mode support for current node. */ + int cman_wred_byte_mode_supported; + + /** Head drop algorithm support for current node. */ + int cman_head_drop_supported; + + /** Private WRED context support for current node. */ + int cman_wred_context_private_supported; + + /** Maximum number of shared WRED contexts the current + * node can be part of. The value of zero indicates that + * shared WRED contexts are not supported by the current + * node. + */ + uint32_t cman_wred_context_shared_n_max; + } leaf; + }; + + /** Mask of statistics counter types supported by the current node. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; +}; + +/** + * Congestion management (CMAN) mode + * + * This is used for controlling the admission of packets into a packet queue or + * group of packet queues on congestion. On request of writing a new packet + * into the current queue while the queue is full, the *tail drop* algorithm + * drops the new packet while leaving the queue unmodified, as opposed to *head + * drop* algorithm, which drops the packet at the head of the queue (the oldest + * packet waiting in the queue) and admits the new packet at the tail of the + * queue. + * + * The *Random Early Detection (RED)* algorithm works by proactively dropping + * more and more input packets as the queue occupancy builds up. When the queue + * is full or almost full, RED effectively works as *tail drop*. The *Weighted + * RED* algorithm uses a separate set of RED thresholds for each packet color. + */ +enum rte_tm_cman_mode { + RTE_TM_CMAN_TAIL_DROP = 0, /**< Tail drop */ + RTE_TM_CMAN_HEAD_DROP, /**< Head drop */ + RTE_TM_CMAN_WRED, /**< Weighted Random Early Detection (WRED) */ +}; + +/** + * Random Early Detection (RED) profile + */ +struct rte_tm_red_params { + /** Minimum queue threshold */ + uint32_t min_th; + + /** Maximum queue threshold */ + uint32_t max_th; + + /** Inverse of packet marking probability maximum value (maxp), i.e. + * maxp_inv = 1 / maxp + */ + uint16_t maxp_inv; + + /** Negated log2 of queue weight (wq), i.e. wq = 1 / (2 ^ wq_log2) */ + uint16_t wq_log2; +}; + +/** + * Weighted RED (WRED) profile + * + * Multiple WRED contexts can share the same WRED profile. Each leaf node with + * WRED enabled as its congestion management mode has zero or one private WRED + * context (only one leaf node using it) and/or zero, one or several shared + * WRED contexts (multiple leaf nodes use the same WRED context). A private + * WRED context is used to perform congestion management for a single leaf + * node, while a shared WRED context is used to perform congestion management + * for a group of leaf nodes. + * + * @see struct rte_tm_capabilities::cman_wred_packet_mode_supported + * @see struct rte_tm_capabilities::cman_wred_byte_mode_supported + */ +struct rte_tm_wred_params { + /** One set of RED parameters per packet color */ + struct rte_tm_red_params red_params[RTE_TM_COLORS]; + + /** When non-zero, the *min_th* and *max_th* thresholds are specified + * in packets (WRED packet mode). When zero, the *min_th* and *max_th* + * thresholds are specified in bytes (WRED byte mode) + */ + int packet_mode; +}; + +/** + * Token bucket + */ +struct rte_tm_token_bucket { + /** Token bucket rate (bytes per second) */ + uint64_t rate; + + /** Token bucket size (bytes), a.k.a. max burst size */ + uint64_t size; +}; + +/** + * Shaper (rate limiter) profile + * + * Multiple shaper instances can share the same shaper profile. Each node has + * zero or one private shaper (only one node using it) and/or zero, one or + * several shared shapers (multiple nodes use the same shaper instance). + * A private shaper is used to perform traffic shaping for a single node, while + * a shared shaper is used to perform traffic shaping for a group of nodes. + * + * Single rate shapers use a single token bucket. A single rate shaper can be + * configured by setting the rate of the committed bucket to zero, which + * effectively disables this bucket. The peak bucket is used to limit the rate + * and the burst size for the current shaper. + * + * Dual rate shapers use both the committed and the peak token buckets. The + * rate of the peak bucket has to be bigger than zero, as well as greater than + * or equal to the rate of the committed bucket. + */ +struct rte_tm_shaper_params { + /** Committed token bucket */ + struct rte_tm_token_bucket committed; + + /** Peak token bucket */ + struct rte_tm_token_bucket peak; + + /** Signed value to be added to the length of each packet for the + * purpose of shaping. Can be used to correct the packet length with + * the framing overhead bytes that are also consumed on the wire (e.g. + * RTE_TM_ETH_FRAMING_OVERHEAD_FCS). + */ + int32_t pkt_length_adjust; +}; + +/** + * Node parameters + * + * Each non-leaf node has multiple inputs (its children nodes) and single output + * (which is input to its parent node). It arbitrates its inputs using Strict + * Priority (SP) and Weighted Fair Queuing (WFQ) algorithms to schedule input + * packets to its output while observing its shaping (rate limiting) + * constraints. + * + * Algorithms such as Weighted Round Robin (WRR), Byte-level WRR, Deficit WRR + * (DWRR), etc. are considered approximations of the WFQ ideal and are + * assimilated to WFQ, although an associated implementation-dependent trade-off + * on accuracy, performance and resource usage might exist. + * + * Children nodes with different priorities are scheduled using the SP algorithm + * based on their priority, with zero (0) as the highest priority. Children with + * the same priority are scheduled using the WFQ algorithm according to their + * weights. The WFQ weight of a given child node is relative to the sum of the + * weights of all its sibling nodes that have the same priority, with one (1) as + * the lowest weight. For each SP priority, the WFQ weight mode can be set as + * either byte-based or packet-based. + * + * Each leaf node sits on top of a TX queue of the current Ethernet port. Hence, + * the leaf nodes are predefined, with their node IDs set to 0 .. (N-1), where N + * is the number of TX queues configured for the current Ethernet port. The + * non-leaf nodes have their IDs generated by the application. + */ +struct rte_tm_node_params { + /** Shaper profile for the private shaper. The absence of the private + * shaper for the current node is indicated by setting this parameter + * to RTE_TM_SHAPER_PROFILE_ID_NONE. + */ + uint32_t shaper_profile_id; + + /** User allocated array of valid shared shaper IDs. */ + uint32_t *shared_shaper_id; + + /** Number of shared shaper IDs in the *shared_shaper_id* array. */ + uint32_t n_shared_shapers; + + RTE_STD_C11 + union { + /** Parameters only valid for non-leaf nodes. */ + struct { + /** WFQ weight mode for each SP priority. When NULL, it + * indicates that WFQ is to be used for all priorities. + * When non-NULL, it points to a pre-allocated array of + * *n_sp_priorities* values, with non-zero value for + * byte-mode and zero for packet-mode. + */ + int *wfq_weight_mode; + + /** Number of SP priorities. */ + uint32_t n_sp_priorities; + } nonleaf; + + /** Parameters only valid for leaf nodes. */ + struct { + /** Congestion management mode */ + enum rte_tm_cman_mode cman; + + /** WRED parameters (only valid when *cman* is set to + * WRED). + */ + struct { + /** WRED profile for private WRED context. The + * absence of a private WRED context for the + * current leaf node is indicated by value + * RTE_TM_WRED_PROFILE_ID_NONE. + */ + uint32_t wred_profile_id; + + /** User allocated array of shared WRED context + * IDs. When set to NULL, it indicates that the + * current leaf node should not currently be + * part of any shared WRED contexts. + */ + uint32_t *shared_wred_context_id; + + /** Number of elements in the + * *shared_wred_context_id* array. Only valid + * when *shared_wred_context_id* is non-NULL, + * in which case it should be non-zero. + */ + uint32_t n_shared_wred_contexts; + } wred; + } leaf; + }; + + /** Mask of statistics counter types to be enabled for this node. This + * needs to be a subset of the statistics counter types available for + * the current node. Any statistics counter type not included in this + * set is to be disabled for the current node. + * @see enum rte_tm_stats_type + */ + uint64_t stats_mask; +}; + +/** + * Verbose error types. + * + * Most of them provide the type of the object referenced by struct + * rte_tm_error::cause. + */ +enum rte_tm_error_type { + RTE_TM_ERROR_TYPE_NONE, /**< No error. */ + RTE_TM_ERROR_TYPE_UNSPECIFIED, /**< Cause unspecified. */ + RTE_TM_ERROR_TYPE_CAPABILITIES, + RTE_TM_ERROR_TYPE_LEVEL_ID, + RTE_TM_ERROR_TYPE_WRED_PROFILE, + RTE_TM_ERROR_TYPE_WRED_PROFILE_GREEN, + RTE_TM_ERROR_TYPE_WRED_PROFILE_YELLOW, + RTE_TM_ERROR_TYPE_WRED_PROFILE_RED, + RTE_TM_ERROR_TYPE_WRED_PROFILE_ID, + RTE_TM_ERROR_TYPE_SHARED_WRED_CONTEXT_ID, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_RATE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_SIZE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_RATE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PKT_ADJUST_LEN, + RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID, + RTE_TM_ERROR_TYPE_SHARED_SHAPER_ID, + RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID, + RTE_TM_ERROR_TYPE_NODE_PRIORITY, + RTE_TM_ERROR_TYPE_NODE_WEIGHT, + RTE_TM_ERROR_TYPE_NODE_PARAMS, + RTE_TM_ERROR_TYPE_NODE_PARAMS_SHAPER_PROFILE_ID, + RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID, + RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS, + RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE, + RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES, + RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN, + RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID, + RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID, + RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS, + RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS, + RTE_TM_ERROR_TYPE_NODE_ID, +}; + +/** + * Verbose error structure definition. + * + * This object is normally allocated by applications and set by PMDs, the + * message points to a constant string which does not need to be freed by + * the application, however its pointer can be considered valid only as long + * as its associated DPDK port remains configured. Closing the underlying + * device or unloading the PMD invalidates it. + * + * Both cause and message may be NULL regardless of the error type. + */ +struct rte_tm_error { + enum rte_tm_error_type type; /**< Cause field and error type. */ + const void *cause; /**< Object responsible for the error. */ + const char *message; /**< Human-readable error message. */ +}; + +/** + * Traffic manager get number of leaf nodes + * + * Each leaf node sits on on top of a TX queue of the current Ethernet port. + * Therefore, the set of leaf nodes is predefined, their number is always equal + * to N (where N is the number of TX queues configured for the current port) + * and their IDs are 0 .. (N-1). + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] n_leaf_nodes + * Number of leaf nodes for the current port. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_get_number_of_leaf_nodes(uint16_t port_id, + uint32_t *n_leaf_nodes, + struct rte_tm_error *error); + +/** + * Traffic manager node ID validate and type (i.e. leaf or non-leaf) get + * + * The leaf nodes have predefined IDs in the range of 0 .. (N-1), where N is + * the number of TX queues of the current Ethernet port. The non-leaf nodes + * have their IDs generated by the application outside of the above range, + * which is reserved for leaf nodes. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID value. Needs to be valid. + * @param[out] is_leaf + * Set to non-zero value when node is leaf and to zero otherwise (non-leaf). + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_node_type_get(uint16_t port_id, + uint32_t node_id, + int *is_leaf, + struct rte_tm_error *error); + +/** + * Traffic manager capabilities get + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] cap + * Traffic manager capabilities. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_capabilities_get(uint16_t port_id, + struct rte_tm_capabilities *cap, + struct rte_tm_error *error); + +/** + * Traffic manager level capabilities get + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] level_id + * The hierarchy level identifier. The value of 0 identifies the level of the + * root node. + * @param[out] cap + * Traffic manager level capabilities. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_level_capabilities_get(uint16_t port_id, + uint32_t level_id, + struct rte_tm_level_capabilities *cap, + struct rte_tm_error *error); + +/** + * Traffic manager node capabilities get + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] cap + * Traffic manager node capabilities. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + */ +int +rte_tm_node_capabilities_get(uint16_t port_id, + uint32_t node_id, + struct rte_tm_node_capabilities *cap, + struct rte_tm_error *error); + +/** + * Traffic manager WRED profile add + * + * Create a new WRED profile with ID set to *wred_profile_id*. The new profile + * is used to create one or several WRED contexts. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] wred_profile_id + * WRED profile ID for the new profile. Needs to be unused. + * @param[in] profile + * WRED profile parameters. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_n_max + */ +int +rte_tm_wred_profile_add(uint16_t port_id, + uint32_t wred_profile_id, + struct rte_tm_wred_params *profile, + struct rte_tm_error *error); + +/** + * Traffic manager WRED profile delete + * + * Delete an existing WRED profile. This operation fails when there is + * currently at least one user (i.e. WRED context) of this WRED profile. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] wred_profile_id + * WRED profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_n_max + */ +int +rte_tm_wred_profile_delete(uint16_t port_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager shared WRED context add or update + * + * When *shared_wred_context_id* is invalid, a new WRED context with this ID is + * created by using the WRED profile identified by *wred_profile_id*. + * + * When *shared_wred_context_id* is valid, this WRED context is no longer using + * the profile previously assigned to it and is updated to use the profile + * identified by *wred_profile_id*. + * + * A valid shared WRED context can be assigned to several hierarchy leaf nodes + * configured to use WRED as the congestion management mode. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shared_wred_context_id + * Shared WRED context ID + * @param[in] wred_profile_id + * WRED profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max + */ +int +rte_tm_shared_wred_context_add_update(uint16_t port_id, + uint32_t shared_wred_context_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager shared WRED context delete + * + * Delete an existing shared WRED context. This operation fails when there is + * currently at least one user (i.e. hierarchy leaf node) of this shared WRED + * context. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shared_wred_context_id + * Shared WRED context ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max + */ +int +rte_tm_shared_wred_context_delete(uint16_t port_id, + uint32_t shared_wred_context_id, + struct rte_tm_error *error); + +/** + * Traffic manager shaper profile add + * + * Create a new shaper profile with ID set to *shaper_profile_id*. The new + * shaper profile is used to create one or several shapers. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shaper_profile_id + * Shaper profile ID for the new profile. Needs to be unused. + * @param[in] profile + * Shaper profile parameters. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_n_max + */ +int +rte_tm_shaper_profile_add(uint16_t port_id, + uint32_t shaper_profile_id, + struct rte_tm_shaper_params *profile, + struct rte_tm_error *error); + +/** + * Traffic manager shaper profile delete + * + * Delete an existing shaper profile. This operation fails when there is + * currently at least one user (i.e. shaper) of this shaper profile. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shaper_profile_id + * Shaper profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_n_max + */ +int +rte_tm_shaper_profile_delete(uint16_t port_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager shared shaper add or update + * + * When *shared_shaper_id* is not a valid shared shaper ID, a new shared shaper + * with this ID is created using the shaper profile identified by + * *shaper_profile_id*. + * + * When *shared_shaper_id* is a valid shared shaper ID, this shared shaper is + * no longer using the shaper profile previously assigned to it and is updated + * to use the shaper profile identified by *shaper_profile_id*. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shared_shaper_id + * Shared shaper ID + * @param[in] shaper_profile_id + * Shaper profile ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_shared_n_max + */ +int +rte_tm_shared_shaper_add_update(uint16_t port_id, + uint32_t shared_shaper_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager shared shaper delete + * + * Delete an existing shared shaper. This operation fails when there is + * currently at least one user (i.e. hierarchy node) of this shared shaper. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] shared_shaper_id + * Shared shaper ID. Needs to be the valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_shared_n_max + */ +int +rte_tm_shared_shaper_delete(uint16_t port_id, + uint32_t shared_shaper_id, + struct rte_tm_error *error); + +/** + * Traffic manager node add + * + * Create new node and connect it as child of an existing node. The new node is + * further identified by *node_id*, which needs to be unused by any of the + * existing nodes. The parent node is identified by *parent_node_id*, which + * needs to be the valid ID of an existing non-leaf node. The parent node is + * going to use the provided SP *priority* and WFQ *weight* to schedule its new + * child node. + * + * This function has to be called for both leaf and non-leaf nodes. In the case + * of leaf nodes (i.e. *node_id* is within the range of 0 .. (N-1), with N as + * the number of configured TX queues of the current port), the leaf node is + * configured rather than created (as the set of leaf nodes is predefined) and + * it is also connected as child of an existing node. + * + * The first node that is added becomes the root node and all the nodes that + * are subsequently added have to be added as descendants of the root node. The + * parent of the root node has to be specified as RTE_TM_NODE_ID_NULL and there + * can only be one node with this parent ID (i.e. the root node). Further + * restrictions for root node: needs to be non-leaf, its private shaper profile + * needs to be valid and single rate, cannot use any shared shapers. + * + * When called before rte_tm_hierarchy_commit() invocation, this function is + * typically used to define the initial start-up hierarchy for the port. + * Provided that dynamic hierarchy updates are supported by the current port (as + * advertised in the port capability set), this function can be also called + * after the rte_tm_hierarchy_commit() invocation. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be unused by any of the existing nodes. + * @param[in] parent_node_id + * Parent node ID. Needs to be the valid. + * @param[in] priority + * Node priority. The highest node priority is zero. Used by the SP algorithm + * running on the parent of the current node for scheduling this child node. + * @param[in] weight + * Node weight. The node weight is relative to the weight sum of all siblings + * that have the same priority. The lowest weight is one. Used by the WFQ + * algorithm running on the parent of the current node for scheduling this + * child node. + * @param[in] level_id + * Level ID that should be met by this node. The hierarchy level of the + * current node is already fully specified through its parent node (i.e. the + * level of this node is equal to the level of its parent node plus one), + * therefore the reason for providing this parameter is to enable the + * application to perform step-by-step checking of the node level during + * successive invocations of this function. When not desired, this check can + * be disabled by assigning value RTE_TM_NODE_LEVEL_ID_ANY to this parameter. + * @param[in] params + * Node parameters. Needs to be pre-allocated and valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see rte_tm_hierarchy_commit() + * @see RTE_TM_UPDATE_NODE_ADD_DELETE + * @see RTE_TM_NODE_LEVEL_ID_ANY + * @see struct rte_tm_capabilities + */ +int +rte_tm_node_add(uint16_t port_id, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + uint32_t level_id, + struct rte_tm_node_params *params, + struct rte_tm_error *error); + +/** + * Traffic manager node delete + * + * Delete an existing node. This operation fails when this node currently has + * at least one user (i.e. child node). + * + * When called before rte_tm_hierarchy_commit() invocation, this function is + * typically used to define the initial start-up hierarchy for the port. + * Provided that dynamic hierarchy updates are supported by the current port (as + * advertised in the port capability set), this function can be also called + * after the rte_tm_hierarchy_commit() invocation. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see RTE_TM_UPDATE_NODE_ADD_DELETE + */ +int +rte_tm_node_delete(uint16_t port_id, + uint32_t node_id, + struct rte_tm_error *error); + +/** + * Traffic manager node suspend + * + * Suspend an existing node. While the node is in suspended state, no packet is + * scheduled from this node and its descendants. The node exits the suspended + * state through the node resume operation. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see rte_tm_node_resume() + * @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME + */ +int +rte_tm_node_suspend(uint16_t port_id, + uint32_t node_id, + struct rte_tm_error *error); + +/** + * Traffic manager node resume + * + * Resume an existing node that is currently in suspended state. The node + * entered the suspended state as result of a previous node suspend operation. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see rte_tm_node_suspend() + * @see RTE_TM_UPDATE_NODE_SUSPEND_RESUME + */ +int +rte_tm_node_resume(uint16_t port_id, + uint32_t node_id, + struct rte_tm_error *error); + +/** + * Traffic manager hierarchy commit + * + * This function is called during the port initialization phase (before the + * Ethernet port is started) to freeze the start-up hierarchy. + * + * This function typically performs the following steps: + * a) It validates the start-up hierarchy that was previously defined for the + * current port through successive rte_tm_node_add() invocations; + * b) Assuming successful validation, it performs all the necessary port + * specific configuration operations to install the specified hierarchy on + * the current port, with immediate effect once the port is started. + * + * This function fails when the currently configured hierarchy is not supported + * by the Ethernet port, in which case the user can abort or try out another + * hierarchy configuration (e.g. a hierarchy with less leaf nodes), which can be + * build from scratch (when *clear_on_fail* is enabled) or by modifying the + * existing hierarchy configuration (when *clear_on_fail* is disabled). + * + * Note that this function can still fail due to other causes (e.g. not enough + * memory available in the system, etc), even though the specified hierarchy is + * supported in principle by the current port. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] clear_on_fail + * On function call failure, hierarchy is cleared when this parameter is + * non-zero and preserved when this parameter is equal to zero. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see rte_tm_node_add() + * @see rte_tm_node_delete() + */ +int +rte_tm_hierarchy_commit(uint16_t port_id, + int clear_on_fail, + struct rte_tm_error *error); + +/** + * Traffic manager node parent update + * + * Restriction for root node: its parent cannot be changed. + * + * This function can only be called after the rte_tm_hierarchy_commit() + * invocation. Its success depends on the port support for this operation, as + * advertised through the port capability set. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[in] parent_node_id + * Node ID for the new parent. Needs to be valid. + * @param[in] priority + * Node priority. The highest node priority is zero. Used by the SP algorithm + * running on the parent of the current node for scheduling this child node. + * @param[in] weight + * Node weight. The node weight is relative to the weight sum of all siblings + * that have the same priority. The lowest weight is zero. Used by the WFQ + * algorithm running on the parent of the current node for scheduling this + * child node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see RTE_TM_UPDATE_NODE_PARENT_KEEP_LEVEL + * @see RTE_TM_UPDATE_NODE_PARENT_CHANGE_LEVEL + */ +int +rte_tm_node_parent_update(uint16_t port_id, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + struct rte_tm_error *error); + +/** + * Traffic manager node private shaper update + * + * Restriction for the root node: its private shaper profile needs to be valid + * and single rate. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[in] shaper_profile_id + * Shaper profile ID for the private shaper of the current node. Needs to be + * either valid shaper profile ID or RTE_TM_SHAPER_PROFILE_ID_NONE, with + * the latter disabling the private shaper of the current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_private_n_max + */ +int +rte_tm_node_shaper_update(uint16_t port_id, + uint32_t node_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager node shared shapers update + * + * Restriction for root node: cannot use any shared rate shapers. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[in] shared_shaper_id + * Shared shaper ID. Needs to be valid. + * @param[in] add + * Set to non-zero value to add this shared shaper to current node or to zero + * to delete this shared shaper from current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::shaper_shared_n_max + */ +int +rte_tm_node_shared_shaper_update(uint16_t port_id, + uint32_t node_id, + uint32_t shared_shaper_id, + int add, + struct rte_tm_error *error); + +/** + * Traffic manager node enabled statistics counters update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[in] stats_mask + * Mask of statistics counter types to be enabled for the current node. This + * needs to be a subset of the statistics counter types available for the + * current node. Any statistics counter type not included in this set is to + * be disabled for the current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_tm_stats_type + * @see RTE_TM_UPDATE_NODE_STATS + */ +int +rte_tm_node_stats_update(uint16_t port_id, + uint32_t node_id, + uint64_t stats_mask, + struct rte_tm_error *error); + +/** + * Traffic manager node WFQ weight mode update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid non-leaf node ID. + * @param[in] wfq_weight_mode + * WFQ weight mode for each SP priority. When NULL, it indicates that WFQ is + * to be used for all priorities. When non-NULL, it points to a pre-allocated + * array of *n_sp_priorities* values, with non-zero value for byte-mode and + * zero for packet-mode. + * @param[in] n_sp_priorities + * Number of SP priorities. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see RTE_TM_UPDATE_NODE_WFQ_WEIGHT_MODE + * @see RTE_TM_UPDATE_NODE_N_SP_PRIORITIES + */ +int +rte_tm_node_wfq_weight_mode_update(uint16_t port_id, + uint32_t node_id, + int *wfq_weight_mode, + uint32_t n_sp_priorities, + struct rte_tm_error *error); + +/** + * Traffic manager node congestion management mode update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid leaf node ID. + * @param[in] cman + * Congestion management mode. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see RTE_TM_UPDATE_NODE_CMAN + */ +int +rte_tm_node_cman_update(uint16_t port_id, + uint32_t node_id, + enum rte_tm_cman_mode cman, + struct rte_tm_error *error); + +/** + * Traffic manager node private WRED context update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid leaf node ID. + * @param[in] wred_profile_id + * WRED profile ID for the private WRED context of the current node. Needs to + * be either valid WRED profile ID or RTE_TM_WRED_PROFILE_ID_NONE, with the + * latter disabling the private WRED context of the current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_private_n_max +*/ +int +rte_tm_node_wred_context_update(uint16_t port_id, + uint32_t node_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** + * Traffic manager node shared WRED context update + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid leaf node ID. + * @param[in] shared_wred_context_id + * Shared WRED context ID. Needs to be valid. + * @param[in] add + * Set to non-zero value to add this shared WRED context to current node or + * to zero to delete this shared WRED context from current node. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::cman_wred_context_shared_n_max + */ +int +rte_tm_node_shared_wred_context_update(uint16_t port_id, + uint32_t node_id, + uint32_t shared_wred_context_id, + int add, + struct rte_tm_error *error); + +/** + * Traffic manager node statistics counters read + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] node_id + * Node ID. Needs to be valid. + * @param[out] stats + * When non-NULL, it contains the current value for the statistics counters + * enabled for the current node. + * @param[out] stats_mask + * When non-NULL, it contains the mask of statistics counter types that are + * currently enabled for this node, indicating which of the counters + * retrieved with the *stats* structure are valid. + * @param[in] clear + * When this parameter has a non-zero value, the statistics counters are + * cleared (i.e. set to zero) immediately after they have been read, + * otherwise the statistics counters are left untouched. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see enum rte_tm_stats_type + */ +int +rte_tm_node_stats_read(uint16_t port_id, + uint32_t node_id, + struct rte_tm_node_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_tm_error *error); + +/** + * Traffic manager packet marking - VLAN DEI (IEEE 802.1Q) + * + * IEEE 802.1p maps the traffic class to the VLAN Priority Code Point (PCP) + * field (3 bits), while IEEE 802.1q maps the drop priority to the VLAN Drop + * Eligible Indicator (DEI) field (1 bit), which was previously named Canonical + * Format Indicator (CFI). + * + * All VLAN frames of a given color get their DEI bit set if marking is enabled + * for this color; otherwise, their DEI bit is left as is (either set or not). + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mark_green + * Set to non-zero value to enable marking of green packets and to zero to + * disable it. + * @param[in] mark_yellow + * Set to non-zero value to enable marking of yellow packets and to zero to + * disable it. + * @param[in] mark_red + * Set to non-zero value to enable marking of red packets and to zero to + * disable it. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::mark_vlan_dei_supported + */ +int +rte_tm_mark_vlan_dei(uint16_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +/** + * Traffic manager packet marking - IPv4 / IPv6 ECN (IETF RFC 3168) + * + * IETF RFCs 2474 and 3168 reorganize the IPv4 Type of Service (TOS) field + * (8 bits) and the IPv6 Traffic Class (TC) field (8 bits) into Differentiated + * Services Codepoint (DSCP) field (6 bits) and Explicit Congestion + * Notification (ECN) field (2 bits). The DSCP field is typically used to + * encode the traffic class and/or drop priority (RFC 2597), while the ECN + * field is used by RFC 3168 to implement a congestion notification mechanism + * to be leveraged by transport layer protocols such as TCP and SCTP that have + * congestion control mechanisms. + * + * When congestion is experienced, as alternative to dropping the packet, + * routers can change the ECN field of input packets from 2'b01 or 2'b10 + * (values indicating that source endpoint is ECN-capable) to 2'b11 (meaning + * that congestion is experienced). The destination endpoint can use the + * ECN-Echo (ECE) TCP flag to relay the congestion indication back to the + * source endpoint, which acknowledges it back to the destination endpoint with + * the Congestion Window Reduced (CWR) TCP flag. + * + * All IPv4/IPv6 packets of a given color with ECN set to 2’b01 or 2’b10 + * carrying TCP or SCTP have their ECN set to 2’b11 if the marking feature is + * enabled for the current color, otherwise the ECN field is left as is. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mark_green + * Set to non-zero value to enable marking of green packets and to zero to + * disable it. + * @param[in] mark_yellow + * Set to non-zero value to enable marking of yellow packets and to zero to + * disable it. + * @param[in] mark_red + * Set to non-zero value to enable marking of red packets and to zero to + * disable it. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::mark_ip_ecn_tcp_supported + * @see struct rte_tm_capabilities::mark_ip_ecn_sctp_supported + */ +int +rte_tm_mark_ip_ecn(uint16_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +/** + * Traffic manager packet marking - IPv4 / IPv6 DSCP (IETF RFC 2597) + * + * IETF RFC 2597 maps the traffic class and the drop priority to the IPv4/IPv6 + * Differentiated Services Codepoint (DSCP) field (6 bits). Here are the DSCP + * values proposed by this RFC: + * + * <pre> Class 1 Class 2 Class 3 Class 4 </pre> + * <pre> +----------+----------+----------+----------+</pre> + * <pre>Low Drop Prec | 001010 | 010010 | 011010 | 100010 |</pre> + * <pre>Medium Drop Prec | 001100 | 010100 | 011100 | 100100 |</pre> + * <pre>High Drop Prec | 001110 | 010110 | 011110 | 100110 |</pre> + * <pre> +----------+----------+----------+----------+</pre> + * + * There are 4 traffic classes (classes 1 .. 4) encoded by DSCP bits 1 and 2, + * as well as 3 drop priorities (low/medium/high) encoded by DSCP bits 3 and 4. + * + * All IPv4/IPv6 packets have their color marked into DSCP bits 3 and 4 as + * follows: green mapped to Low Drop Precedence (2’b01), yellow to Medium + * (2’b10) and red to High (2’b11). Marking needs to be explicitly enabled + * for each color; when not enabled for a given color, the DSCP field of all + * packets with that color is left as is. + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[in] mark_green + * Set to non-zero value to enable marking of green packets and to zero to + * disable it. + * @param[in] mark_yellow + * Set to non-zero value to enable marking of yellow packets and to zero to + * disable it. + * @param[in] mark_red + * Set to non-zero value to enable marking of red packets and to zero to + * disable it. + * @param[out] error + * Error details. Filled in only on error, when not NULL. + * @return + * 0 on success, non-zero error code otherwise. + * + * @see struct rte_tm_capabilities::mark_ip_dscp_supported + */ +int +rte_tm_mark_ip_dscp(uint16_t port_id, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_TM_H__ */ diff --git a/lib/librte_ethdev/rte_tm_driver.h b/lib/librte_ethdev/rte_tm_driver.h new file mode 100644 index 00000000..90114ff5 --- /dev/null +++ b/lib/librte_ethdev/rte_tm_driver.h @@ -0,0 +1,337 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#ifndef __INCLUDE_RTE_TM_DRIVER_H__ +#define __INCLUDE_RTE_TM_DRIVER_H__ + +/** + * @file + * RTE Generic Traffic Manager API (Driver Side) + * + * This file provides implementation helpers for internal use by PMDs, they + * are not intended to be exposed to applications and are not subject to ABI + * versioning. + */ + +#include <stdint.h> + +#include <rte_errno.h> +#include "rte_ethdev.h" +#include "rte_tm.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** @internal Traffic manager node ID validate and type get */ +typedef int (*rte_tm_node_type_get_t)(struct rte_eth_dev *dev, + uint32_t node_id, + int *is_leaf, + struct rte_tm_error *error); + +/** @internal Traffic manager capabilities get */ +typedef int (*rte_tm_capabilities_get_t)(struct rte_eth_dev *dev, + struct rte_tm_capabilities *cap, + struct rte_tm_error *error); + +/** @internal Traffic manager level capabilities get */ +typedef int (*rte_tm_level_capabilities_get_t)(struct rte_eth_dev *dev, + uint32_t level_id, + struct rte_tm_level_capabilities *cap, + struct rte_tm_error *error); + +/** @internal Traffic manager node capabilities get */ +typedef int (*rte_tm_node_capabilities_get_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_node_capabilities *cap, + struct rte_tm_error *error); + +/** @internal Traffic manager WRED profile add */ +typedef int (*rte_tm_wred_profile_add_t)(struct rte_eth_dev *dev, + uint32_t wred_profile_id, + struct rte_tm_wred_params *profile, + struct rte_tm_error *error); + +/** @internal Traffic manager WRED profile delete */ +typedef int (*rte_tm_wred_profile_delete_t)(struct rte_eth_dev *dev, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shared WRED context add */ +typedef int (*rte_tm_shared_wred_context_add_update_t)( + struct rte_eth_dev *dev, + uint32_t shared_wred_context_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shared WRED context delete */ +typedef int (*rte_tm_shared_wred_context_delete_t)( + struct rte_eth_dev *dev, + uint32_t shared_wred_context_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shaper profile add */ +typedef int (*rte_tm_shaper_profile_add_t)(struct rte_eth_dev *dev, + uint32_t shaper_profile_id, + struct rte_tm_shaper_params *profile, + struct rte_tm_error *error); + +/** @internal Traffic manager shaper profile delete */ +typedef int (*rte_tm_shaper_profile_delete_t)(struct rte_eth_dev *dev, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shared shaper add/update */ +typedef int (*rte_tm_shared_shaper_add_update_t)(struct rte_eth_dev *dev, + uint32_t shared_shaper_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager shared shaper delete */ +typedef int (*rte_tm_shared_shaper_delete_t)(struct rte_eth_dev *dev, + uint32_t shared_shaper_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node add */ +typedef int (*rte_tm_node_add_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + uint32_t level_id, + struct rte_tm_node_params *params, + struct rte_tm_error *error); + +/** @internal Traffic manager node delete */ +typedef int (*rte_tm_node_delete_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node suspend */ +typedef int (*rte_tm_node_suspend_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node resume */ +typedef int (*rte_tm_node_resume_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_error *error); + +/** @internal Traffic manager hierarchy commit */ +typedef int (*rte_tm_hierarchy_commit_t)(struct rte_eth_dev *dev, + int clear_on_fail, + struct rte_tm_error *error); + +/** @internal Traffic manager node parent update */ +typedef int (*rte_tm_node_parent_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t parent_node_id, + uint32_t priority, + uint32_t weight, + struct rte_tm_error *error); + +/** @internal Traffic manager node shaper update */ +typedef int (*rte_tm_node_shaper_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t shaper_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node shaper update */ +typedef int (*rte_tm_node_shared_shaper_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t shared_shaper_id, + int32_t add, + struct rte_tm_error *error); + +/** @internal Traffic manager node stats update */ +typedef int (*rte_tm_node_stats_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + uint64_t stats_mask, + struct rte_tm_error *error); + +/** @internal Traffic manager node WFQ weight mode update */ +typedef int (*rte_tm_node_wfq_weight_mode_update_t)( + struct rte_eth_dev *dev, + uint32_t node_id, + int *wfq_weight_mode, + uint32_t n_sp_priorities, + struct rte_tm_error *error); + +/** @internal Traffic manager node congestion management mode update */ +typedef int (*rte_tm_node_cman_update_t)(struct rte_eth_dev *dev, + uint32_t node_id, + enum rte_tm_cman_mode cman, + struct rte_tm_error *error); + +/** @internal Traffic manager node WRED context update */ +typedef int (*rte_tm_node_wred_context_update_t)( + struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t wred_profile_id, + struct rte_tm_error *error); + +/** @internal Traffic manager node WRED context update */ +typedef int (*rte_tm_node_shared_wred_context_update_t)( + struct rte_eth_dev *dev, + uint32_t node_id, + uint32_t shared_wred_context_id, + int add, + struct rte_tm_error *error); + +/** @internal Traffic manager read stats counters for specific node */ +typedef int (*rte_tm_node_stats_read_t)(struct rte_eth_dev *dev, + uint32_t node_id, + struct rte_tm_node_stats *stats, + uint64_t *stats_mask, + int clear, + struct rte_tm_error *error); + +/** @internal Traffic manager packet marking - VLAN DEI */ +typedef int (*rte_tm_mark_vlan_dei_t)(struct rte_eth_dev *dev, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +/** @internal Traffic manager packet marking - IPv4/IPv6 ECN */ +typedef int (*rte_tm_mark_ip_ecn_t)(struct rte_eth_dev *dev, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +/** @internal Traffic manager packet marking - IPv4/IPv6 DSCP */ +typedef int (*rte_tm_mark_ip_dscp_t)(struct rte_eth_dev *dev, + int mark_green, + int mark_yellow, + int mark_red, + struct rte_tm_error *error); + +struct rte_tm_ops { + /** Traffic manager node type get */ + rte_tm_node_type_get_t node_type_get; + + /** Traffic manager capabilities_get */ + rte_tm_capabilities_get_t capabilities_get; + /** Traffic manager level capabilities_get */ + rte_tm_level_capabilities_get_t level_capabilities_get; + /** Traffic manager node capabilities get */ + rte_tm_node_capabilities_get_t node_capabilities_get; + + /** Traffic manager WRED profile add */ + rte_tm_wred_profile_add_t wred_profile_add; + /** Traffic manager WRED profile delete */ + rte_tm_wred_profile_delete_t wred_profile_delete; + /** Traffic manager shared WRED context add/update */ + rte_tm_shared_wred_context_add_update_t + shared_wred_context_add_update; + /** Traffic manager shared WRED context delete */ + rte_tm_shared_wred_context_delete_t + shared_wred_context_delete; + + /** Traffic manager shaper profile add */ + rte_tm_shaper_profile_add_t shaper_profile_add; + /** Traffic manager shaper profile delete */ + rte_tm_shaper_profile_delete_t shaper_profile_delete; + /** Traffic manager shared shaper add/update */ + rte_tm_shared_shaper_add_update_t shared_shaper_add_update; + /** Traffic manager shared shaper delete */ + rte_tm_shared_shaper_delete_t shared_shaper_delete; + + /** Traffic manager node add */ + rte_tm_node_add_t node_add; + /** Traffic manager node delete */ + rte_tm_node_delete_t node_delete; + /** Traffic manager node suspend */ + rte_tm_node_suspend_t node_suspend; + /** Traffic manager node resume */ + rte_tm_node_resume_t node_resume; + /** Traffic manager hierarchy commit */ + rte_tm_hierarchy_commit_t hierarchy_commit; + + /** Traffic manager node parent update */ + rte_tm_node_parent_update_t node_parent_update; + /** Traffic manager node shaper update */ + rte_tm_node_shaper_update_t node_shaper_update; + /** Traffic manager node shared shaper update */ + rte_tm_node_shared_shaper_update_t node_shared_shaper_update; + /** Traffic manager node stats update */ + rte_tm_node_stats_update_t node_stats_update; + /** Traffic manager node WFQ weight mode update */ + rte_tm_node_wfq_weight_mode_update_t node_wfq_weight_mode_update; + /** Traffic manager node congestion management mode update */ + rte_tm_node_cman_update_t node_cman_update; + /** Traffic manager node WRED context update */ + rte_tm_node_wred_context_update_t node_wred_context_update; + /** Traffic manager node shared WRED context update */ + rte_tm_node_shared_wred_context_update_t + node_shared_wred_context_update; + /** Traffic manager read statistics counters for current node */ + rte_tm_node_stats_read_t node_stats_read; + + /** Traffic manager packet marking - VLAN DEI */ + rte_tm_mark_vlan_dei_t mark_vlan_dei; + /** Traffic manager packet marking - IPv4/IPv6 ECN */ + rte_tm_mark_ip_ecn_t mark_ip_ecn; + /** Traffic manager packet marking - IPv4/IPv6 DSCP */ + rte_tm_mark_ip_dscp_t mark_ip_dscp; +}; + +/** + * Initialize generic error structure. + * + * This function also sets rte_errno to a given value. + * + * @param[out] error + * Pointer to error structure (may be NULL). + * @param[in] code + * Related error code (rte_errno). + * @param[in] type + * Cause field and error type. + * @param[in] cause + * Object responsible for the error. + * @param[in] message + * Human-readable error message. + * + * @return + * Error code. + */ +static inline int +rte_tm_error_set(struct rte_tm_error *error, + int code, + enum rte_tm_error_type type, + const void *cause, + const char *message) +{ + if (error) { + *error = (struct rte_tm_error){ + .type = type, + .cause = cause, + .message = message, + }; + } + rte_errno = code; + return code; +} + +/** + * Get generic traffic manager operations structure from a port + * + * @param[in] port_id + * The port identifier of the Ethernet device. + * @param[out] error + * Error details + * + * @return + * The traffic manager operations structure associated with port_id on + * success, NULL otherwise. + */ +const struct rte_tm_ops * +rte_tm_ops_get(uint16_t port_id, struct rte_tm_error *error); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_TM_DRIVER_H__ */ |