diff options
author | 2016-11-08 13:10:15 +0200 | |
---|---|---|
committer | 2016-11-11 12:22:50 +0200 | |
commit | 7d8d95fbce7b101c51bae6f468b7942dcdaf1032 (patch) | |
tree | a359b3f5ea392df30f4c376a3d4f72de48e49df9 /external_libs/ibverbs/include/infiniband/verbs_exp.h | |
parent | 98dc1571776f4a8538e6ac72ce4e3fd4a2295026 (diff) |
mlx5 support build WIP
Signed-off-by: Hanoh Haim <hhaim@cisco.com>
Diffstat (limited to 'external_libs/ibverbs/include/infiniband/verbs_exp.h')
-rw-r--r-- | external_libs/ibverbs/include/infiniband/verbs_exp.h | 3585 |
1 files changed, 3585 insertions, 0 deletions
diff --git a/external_libs/ibverbs/include/infiniband/verbs_exp.h b/external_libs/ibverbs/include/infiniband/verbs_exp.h new file mode 100644 index 00000000..ae94deb8 --- /dev/null +++ b/external_libs/ibverbs/include/infiniband/verbs_exp.h @@ -0,0 +1,3585 @@ +/* + * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2004, 2011-2012 Intel Corporation. All rights reserved. + * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef INFINIBAND_VERBS_EXP_H +#define INFINIBAND_VERBS_EXP_H + +#include <infiniband/verbs.h> +#include <stdio.h> +#include <stdlib.h> + +#if __GNUC__ >= 3 +# define __attribute_const __attribute__((const)) +#else +# define __attribute_const +#endif + +BEGIN_C_DECLS + +#define IBV_EXP_RET_ON_INVALID_COMP_MASK(val, valid_mask, ret) \ + if ((val) > (valid_mask)) { \ + fprintf(stderr, "%s: invalid comp_mask !!! (comp_mask = 0x%x valid_mask = 0x%x)\n", \ + __FUNCTION__, val, valid_mask); \ + errno = EINVAL; \ + return ret; \ + } + +#define IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(val, valid_mask) \ + IBV_EXP_RET_ON_INVALID_COMP_MASK(val, valid_mask, NULL) + +#define IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(val, valid_mask) \ + IBV_EXP_RET_ON_INVALID_COMP_MASK(val, valid_mask, EINVAL) + + +#define IBV_EXP_IMPLICIT_MR_SIZE (~((size_t)0)) + +enum ibv_exp_func_name { + IBV_EXP_POST_SEND_FUNC, + IBV_EXP_POLL_CQ_FUNC, + IBV_POST_SEND_FUNC, + IBV_POLL_CQ_FUNC, + IBV_POST_RECV_FUNC +}; + +enum ibv_exp_start_values { + IBV_EXP_START_ENUM = 0x40, + IBV_EXP_START_FLAG_LOC = 0x20, + IBV_EXP_START_FLAG = (1ULL << IBV_EXP_START_FLAG_LOC), +}; + +/* + * Capabilities for exp_atomic_cap field in ibv_exp_device_attr struct + */ +enum ibv_exp_atomic_cap { + IBV_EXP_ATOMIC_NONE = IBV_ATOMIC_NONE, + IBV_EXP_ATOMIC_HCA = IBV_ATOMIC_HCA, + IBV_EXP_ATOMIC_GLOB = IBV_ATOMIC_GLOB, + + IBV_EXP_ATOMIC_HCA_REPLY_BE = IBV_EXP_START_ENUM /* HOST is LE and atomic reply is BE */ +}; + +/* + * Flags for exp_device_cap_flags field in ibv_exp_device_attr struct + */ +enum ibv_exp_device_cap_flags { + IBV_EXP_DEVICE_RESIZE_MAX_WR = IBV_DEVICE_RESIZE_MAX_WR, + IBV_EXP_DEVICE_BAD_PKEY_CNTR = IBV_DEVICE_BAD_PKEY_CNTR, + IBV_EXP_DEVICE_BAD_QKEY_CNTR = IBV_DEVICE_BAD_QKEY_CNTR, + IBV_EXP_DEVICE_RAW_MULTI = IBV_DEVICE_RAW_MULTI, + IBV_EXP_DEVICE_AUTO_PATH_MIG = IBV_DEVICE_AUTO_PATH_MIG, + IBV_EXP_DEVICE_CHANGE_PHY_PORT = IBV_DEVICE_CHANGE_PHY_PORT, + IBV_EXP_DEVICE_UD_AV_PORT_ENFORCE = IBV_DEVICE_UD_AV_PORT_ENFORCE, + IBV_EXP_DEVICE_CURR_QP_STATE_MOD = IBV_DEVICE_CURR_QP_STATE_MOD, + IBV_EXP_DEVICE_SHUTDOWN_PORT = IBV_DEVICE_SHUTDOWN_PORT, + IBV_EXP_DEVICE_INIT_TYPE = IBV_DEVICE_INIT_TYPE, + IBV_EXP_DEVICE_PORT_ACTIVE_EVENT = IBV_DEVICE_PORT_ACTIVE_EVENT, + IBV_EXP_DEVICE_SYS_IMAGE_GUID = IBV_DEVICE_SYS_IMAGE_GUID, + IBV_EXP_DEVICE_RC_RNR_NAK_GEN = IBV_DEVICE_RC_RNR_NAK_GEN, + IBV_EXP_DEVICE_SRQ_RESIZE = IBV_DEVICE_SRQ_RESIZE, + IBV_EXP_DEVICE_N_NOTIFY_CQ = IBV_DEVICE_N_NOTIFY_CQ, + IBV_EXP_DEVICE_XRC = IBV_DEVICE_XRC, + + IBV_EXP_DEVICE_DC_TRANSPORT = (IBV_EXP_START_FLAG << 0), + IBV_EXP_DEVICE_QPG = (IBV_EXP_START_FLAG << 1), + IBV_EXP_DEVICE_UD_RSS = (IBV_EXP_START_FLAG << 2), + IBV_EXP_DEVICE_UD_TSS = (IBV_EXP_START_FLAG << 3), + IBV_EXP_DEVICE_EXT_ATOMICS = (IBV_EXP_START_FLAG << 4), + IBV_EXP_DEVICE_NOP = (IBV_EXP_START_FLAG << 5), + IBV_EXP_DEVICE_UMR = (IBV_EXP_START_FLAG << 6), + IBV_EXP_DEVICE_ODP = (IBV_EXP_START_FLAG << 7), + IBV_EXP_DEVICE_VXLAN_SUPPORT = (IBV_EXP_START_FLAG << 10), + IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT = (IBV_EXP_START_FLAG << 11), + IBV_EXP_DEVICE_RX_CSUM_IP_PKT = (IBV_EXP_START_FLAG << 12), + IBV_EXP_DEVICE_EC_OFFLOAD = (IBV_EXP_START_FLAG << 13), + IBV_EXP_DEVICE_EXT_MASKED_ATOMICS = (IBV_EXP_START_FLAG << 14), + IBV_EXP_DEVICE_RX_TCP_UDP_PKT_TYPE = (IBV_EXP_START_FLAG << 15), + IBV_EXP_DEVICE_SCATTER_FCS = (IBV_EXP_START_FLAG << 16), + IBV_EXP_DEVICE_MEM_WINDOW = (IBV_EXP_START_FLAG << 17), + IBV_EXP_DEVICE_MEM_MGT_EXTENSIONS = (IBV_EXP_START_FLAG << 21), + IBV_EXP_DEVICE_DC_INFO = (IBV_EXP_START_FLAG << 22), + /* Jumping to 23 as of next capability in include/rdma/ib_verbs.h */ + IBV_EXP_DEVICE_MW_TYPE_2A = (IBV_EXP_START_FLAG << 23), + IBV_EXP_DEVICE_MW_TYPE_2B = (IBV_EXP_START_FLAG << 24), + IBV_EXP_DEVICE_CROSS_CHANNEL = (IBV_EXP_START_FLAG << 28), + IBV_EXP_DEVICE_MANAGED_FLOW_STEERING = (IBV_EXP_START_FLAG << 29), + IBV_EXP_DEVICE_MR_ALLOCATE = (IBV_EXP_START_FLAG << 30), + IBV_EXP_DEVICE_SHARED_MR = (IBV_EXP_START_FLAG << 31), +}; + +/* + * Flags for ibv_exp_device_attr struct comp_mask. + */ +enum ibv_exp_device_attr_comp_mask { + IBV_EXP_DEVICE_ATTR_CALC_CAP = (1 << 0), + IBV_EXP_DEVICE_ATTR_WITH_TIMESTAMP_MASK = (1 << 1), + IBV_EXP_DEVICE_ATTR_WITH_HCA_CORE_CLOCK = (1 << 2), + IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS = (1 << 3), + IBV_EXP_DEVICE_DC_RD_REQ = (1 << 4), + IBV_EXP_DEVICE_DC_RD_RES = (1 << 5), + IBV_EXP_DEVICE_ATTR_INLINE_RECV_SZ = (1 << 6), + IBV_EXP_DEVICE_ATTR_RSS_TBL_SZ = (1 << 7), + IBV_EXP_DEVICE_ATTR_EXT_ATOMIC_ARGS = (1 << 8), + IBV_EXP_DEVICE_ATTR_UMR = (1 << 9), + IBV_EXP_DEVICE_ATTR_ODP = (1 << 10), + IBV_EXP_DEVICE_ATTR_MAX_DCT = (1 << 11), + IBV_EXP_DEVICE_ATTR_MAX_CTX_RES_DOMAIN = (1 << 12), + IBV_EXP_DEVICE_ATTR_RX_HASH = (1 << 13), + IBV_EXP_DEVICE_ATTR_MAX_WQ_TYPE_RQ = (1 << 14), + IBV_EXP_DEVICE_ATTR_MAX_DEVICE_CTX = (1 << 15), + IBV_EXP_DEVICE_ATTR_MP_RQ = (1 << 16), + IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS = (1 << 17), + IBV_EXP_DEVICE_ATTR_EC_CAPS = (1 << 18), + IBV_EXP_DEVICE_ATTR_MASKED_ATOMICS = (1 << 19), + IBV_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN = (1 << 20), + IBV_EXP_DEVICE_ATTR_TSO_CAPS = (1 << 21), + IBV_EXP_DEVICE_ATTR_PACKET_PACING_CAPS = (1 << 22), + /* set supported bits for validity check */ + IBV_EXP_DEVICE_ATTR_RESERVED = (1 << 23), +}; + +struct ibv_exp_device_calc_cap { + uint64_t data_types; + uint64_t data_sizes; + uint64_t int_ops; + uint64_t uint_ops; + uint64_t fp_ops; +}; + +struct ibv_exp_ext_atomics_params { + /* defines which masked operation sizes are supported with same + * endianness as stated in atomic_cap field + */ + uint64_t log_atomic_arg_sizes; /* bit-mask of supported sizes */ + uint32_t max_fa_bit_boundary; + uint32_t log_max_atomic_inline; +}; + +struct ibv_exp_masked_atomic_params { + uint32_t max_fa_bit_boundary; + uint32_t log_max_atomic_inline; + uint64_t masked_log_atomic_arg_sizes; + uint64_t masked_log_atomic_arg_sizes_network_endianness; +}; + +enum ibv_odp_general_cap_bits { + IBV_EXP_ODP_SUPPORT = 1 << 0, +}; + +enum ibv_odp_transport_cap_bits { + IBV_EXP_ODP_SUPPORT_SEND = 1 << 0, + IBV_EXP_ODP_SUPPORT_RECV = 1 << 1, + IBV_EXP_ODP_SUPPORT_WRITE = 1 << 2, + IBV_EXP_ODP_SUPPORT_READ = 1 << 3, + IBV_EXP_ODP_SUPPORT_ATOMIC = 1 << 4, + IBV_EXP_ODP_SUPPORT_SRQ_RECV = 1 << 5, +}; + +struct ibv_exp_umr_caps { + uint32_t max_klm_list_size; + uint32_t max_send_wqe_inline_klms; + uint32_t max_umr_recursion_depth; + uint32_t max_umr_stride_dimension; +}; + +struct ibv_exp_odp_caps { + uint64_t general_odp_caps; + struct { + uint32_t rc_odp_caps; + uint32_t uc_odp_caps; + uint32_t ud_odp_caps; + uint32_t dc_odp_caps; + uint32_t xrc_odp_caps; + uint32_t raw_eth_odp_caps; + } per_transport_caps; +}; + +enum ibv_exp_supported_qp_types { + IBV_EXP_QPT_RC = 1ULL << 0, + IBV_EXP_QPT_UC = 1ULL << 1, + IBV_EXP_QPT_UD = 1ULL << 2, + IBV_EXP_QPT_XRC_INIT = 1ULL << 3, + IBV_EXP_QPT_XRC_TGT = 1ULL << 4, + IBV_EXP_QPT_RAW_PACKET = 1ULL << 5, + IBV_EXP_QPT_RESERVED = 1ULL << 6 +}; + +struct ibv_exp_rx_hash_caps { + uint32_t max_rwq_indirection_tables; + uint32_t max_rwq_indirection_table_size; + uint8_t supported_hash_functions; /* from ibv_exp_rx_hash_function_flags */ + uint64_t supported_packet_fields; /* from ibv_exp_rx_hash_fields */ + uint32_t supported_qps; /* from ibv_exp_supported_qp_types */ +}; + +enum ibv_exp_mp_rq_shifts { + IBV_EXP_MP_RQ_NO_SHIFT = 0, + IBV_EXP_MP_RQ_2BYTES_SHIFT = 1 << 0 +}; + +struct ibv_exp_mp_rq_caps { + uint32_t supported_qps; /* use ibv_exp_supported_qp_types */ + uint32_t allowed_shifts; /* use ibv_exp_mp_rq_shifts */ + uint8_t min_single_wqe_log_num_of_strides; + uint8_t max_single_wqe_log_num_of_strides; + uint8_t min_single_stride_log_num_of_bytes; + uint8_t max_single_stride_log_num_of_bytes; +}; + +struct ibv_exp_ec_caps { + uint32_t max_ec_data_vector_count; + uint32_t max_ec_calc_inflight_calcs; +}; + +#define ibv_is_qpt_supported(caps, qpt) ((caps) & (1 << (qpt))) + +struct ibv_exp_tso_caps { + uint32_t max_tso; + uint32_t supported_qpts; +}; + +struct ibv_exp_packet_pacing_caps { + uint32_t qp_rate_limit_min; + uint32_t qp_rate_limit_max; /* In kbps */ + uint32_t supported_qpts; + uint32_t reserved; +}; + +struct ibv_exp_device_attr { + char fw_ver[64]; + uint64_t node_guid; + uint64_t sys_image_guid; + uint64_t max_mr_size; + uint64_t page_size_cap; + uint32_t vendor_id; + uint32_t vendor_part_id; + uint32_t hw_ver; + int max_qp; + int max_qp_wr; + int reserved; /* place holder to align with ibv_device_attr */ + int max_sge; + int max_sge_rd; + int max_cq; + int max_cqe; + int max_mr; + int max_pd; + int max_qp_rd_atom; + int max_ee_rd_atom; + int max_res_rd_atom; + int max_qp_init_rd_atom; + int max_ee_init_rd_atom; + enum ibv_exp_atomic_cap exp_atomic_cap; + int max_ee; + int max_rdd; + int max_mw; + int max_raw_ipv6_qp; + int max_raw_ethy_qp; + int max_mcast_grp; + int max_mcast_qp_attach; + int max_total_mcast_qp_attach; + int max_ah; + int max_fmr; + int max_map_per_fmr; + int max_srq; + int max_srq_wr; + int max_srq_sge; + uint16_t max_pkeys; + uint8_t local_ca_ack_delay; + uint8_t phys_port_cnt; + uint32_t comp_mask; + struct ibv_exp_device_calc_cap calc_cap; + uint64_t timestamp_mask; + uint64_t hca_core_clock; + uint64_t exp_device_cap_flags; /* use ibv_exp_device_cap_flags */ + int max_dc_req_rd_atom; + int max_dc_res_rd_atom; + int inline_recv_sz; + uint32_t max_rss_tbl_sz; + struct ibv_exp_ext_atomics_params ext_atom; + struct ibv_exp_umr_caps umr_caps; + struct ibv_exp_odp_caps odp_caps; + int max_dct; + int max_ctx_res_domain; + struct ibv_exp_rx_hash_caps rx_hash_caps; + uint32_t max_wq_type_rq; + int max_device_ctx; + struct ibv_exp_mp_rq_caps mp_rq_caps; + uint16_t wq_vlan_offloads_cap; /* use ibv_exp_vlan_offloads enum */ + struct ibv_exp_ec_caps ec_caps; + struct ibv_exp_masked_atomic_params masked_atomic; + /* + * The alignment of the padding end address. + * When RX end of packet padding is enabled the device will pad the end + * of RX packet up until the next address which is aligned to the + * rx_pad_end_addr_align size. + * Expected size for this field is according to system cache line size, + * for example 64 or 128. When field is 0 padding is not supported. + */ + int rx_pad_end_addr_align; + struct ibv_exp_tso_caps tso_caps; + struct ibv_exp_packet_pacing_caps packet_pacing_caps; +}; + +enum ibv_exp_access_flags { + IBV_EXP_ACCESS_LOCAL_WRITE = IBV_ACCESS_LOCAL_WRITE, + IBV_EXP_ACCESS_REMOTE_WRITE = IBV_ACCESS_REMOTE_WRITE, + IBV_EXP_ACCESS_REMOTE_READ = IBV_ACCESS_REMOTE_READ, + IBV_EXP_ACCESS_REMOTE_ATOMIC = IBV_ACCESS_REMOTE_ATOMIC, + IBV_EXP_ACCESS_MW_BIND = IBV_ACCESS_MW_BIND, + + IBV_EXP_ACCESS_ALLOCATE_MR = (IBV_EXP_START_FLAG << 5), + IBV_EXP_ACCESS_SHARED_MR_USER_READ = (IBV_EXP_START_FLAG << 6), + IBV_EXP_ACCESS_SHARED_MR_USER_WRITE = (IBV_EXP_START_FLAG << 7), + IBV_EXP_ACCESS_SHARED_MR_GROUP_READ = (IBV_EXP_START_FLAG << 8), + IBV_EXP_ACCESS_SHARED_MR_GROUP_WRITE = (IBV_EXP_START_FLAG << 9), + IBV_EXP_ACCESS_SHARED_MR_OTHER_READ = (IBV_EXP_START_FLAG << 10), + IBV_EXP_ACCESS_SHARED_MR_OTHER_WRITE = (IBV_EXP_START_FLAG << 11), + IBV_EXP_ACCESS_NO_RDMA = (IBV_EXP_START_FLAG << 12), + IBV_EXP_ACCESS_MW_ZERO_BASED = (IBV_EXP_START_FLAG << 13), + IBV_EXP_ACCESS_ON_DEMAND = (IBV_EXP_START_FLAG << 14), + IBV_EXP_ACCESS_RELAXED = (IBV_EXP_START_FLAG << 15), + IBV_EXP_ACCESS_PHYSICAL_ADDR = (IBV_EXP_START_FLAG << 16), + /* set supported bits for validity check */ + IBV_EXP_ACCESS_RESERVED = (IBV_EXP_START_FLAG << 17) +}; + +/* memory window information struct that is common to types 1 and 2 */ +struct ibv_exp_mw_bind_info { + struct ibv_mr *mr; + uint64_t addr; + uint64_t length; + uint64_t exp_mw_access_flags; /* use ibv_exp_access_flags */ +}; + +/* + * Flags for ibv_exp_mw_bind struct comp_mask + */ +enum ibv_exp_bind_mw_comp_mask { + IBV_EXP_BIND_MW_RESERVED = (1 << 0) +}; + +/* type 1 specific info */ +struct ibv_exp_mw_bind { + struct ibv_qp *qp; + struct ibv_mw *mw; + uint64_t wr_id; + uint64_t exp_send_flags; /* use ibv_exp_send_flags */ + struct ibv_exp_mw_bind_info bind_info; + uint32_t comp_mask; /* reserved for future growth (must be 0) */ +}; + +enum ibv_exp_calc_op { + IBV_EXP_CALC_OP_ADD = 0, + IBV_EXP_CALC_OP_MAXLOC, + IBV_EXP_CALC_OP_BAND, + IBV_EXP_CALC_OP_BXOR, + IBV_EXP_CALC_OP_BOR, + IBV_EXP_CALC_OP_NUMBER +}; + +enum ibv_exp_calc_data_type { + IBV_EXP_CALC_DATA_TYPE_INT = 0, + IBV_EXP_CALC_DATA_TYPE_UINT, + IBV_EXP_CALC_DATA_TYPE_FLOAT, + IBV_EXP_CALC_DATA_TYPE_NUMBER +}; + +enum ibv_exp_calc_data_size { + IBV_EXP_CALC_DATA_SIZE_64_BIT = 0, + IBV_EXP_CALC_DATA_SIZE_NUMBER +}; + +enum ibv_exp_wr_opcode { + IBV_EXP_WR_RDMA_WRITE = IBV_WR_RDMA_WRITE, + IBV_EXP_WR_RDMA_WRITE_WITH_IMM = IBV_WR_RDMA_WRITE_WITH_IMM, + IBV_EXP_WR_SEND = IBV_WR_SEND, + IBV_EXP_WR_SEND_WITH_IMM = IBV_WR_SEND_WITH_IMM, + IBV_EXP_WR_RDMA_READ = IBV_WR_RDMA_READ, + IBV_EXP_WR_ATOMIC_CMP_AND_SWP = IBV_WR_ATOMIC_CMP_AND_SWP, + IBV_EXP_WR_ATOMIC_FETCH_AND_ADD = IBV_WR_ATOMIC_FETCH_AND_ADD, + + IBV_EXP_WR_SEND_WITH_INV = 8 + IBV_EXP_START_ENUM, + IBV_EXP_WR_LOCAL_INV = 10 + IBV_EXP_START_ENUM, + IBV_EXP_WR_BIND_MW = 14 + IBV_EXP_START_ENUM, + IBV_EXP_WR_TSO = 15 + IBV_EXP_START_ENUM, + IBV_EXP_WR_SEND_ENABLE = 0x20 + IBV_EXP_START_ENUM, + IBV_EXP_WR_RECV_ENABLE, + IBV_EXP_WR_CQE_WAIT, + IBV_EXP_WR_EXT_MASKED_ATOMIC_CMP_AND_SWP, + IBV_EXP_WR_EXT_MASKED_ATOMIC_FETCH_AND_ADD, + IBV_EXP_WR_NOP, + IBV_EXP_WR_UMR_FILL, + IBV_EXP_WR_UMR_INVALIDATE, +}; + +enum ibv_exp_send_flags { + IBV_EXP_SEND_FENCE = IBV_SEND_FENCE, + IBV_EXP_SEND_SIGNALED = IBV_SEND_SIGNALED, + IBV_EXP_SEND_SOLICITED = IBV_SEND_SOLICITED, + IBV_EXP_SEND_INLINE = IBV_SEND_INLINE, + + IBV_EXP_SEND_IP_CSUM = (IBV_EXP_START_FLAG << 0), + IBV_EXP_SEND_WITH_CALC = (IBV_EXP_START_FLAG << 1), + IBV_EXP_SEND_WAIT_EN_LAST = (IBV_EXP_START_FLAG << 2), + IBV_EXP_SEND_EXT_ATOMIC_INLINE = (IBV_EXP_START_FLAG << 3), +}; + +struct ibv_exp_cmp_swap { + uint64_t compare_mask; + uint64_t compare_val; + uint64_t swap_val; + uint64_t swap_mask; +}; + +struct ibv_exp_fetch_add { + uint64_t add_val; + uint64_t field_boundary; +}; + +/* + * Flags for ibv_exp_send_wr struct comp_mask + */ +enum ibv_exp_send_wr_comp_mask { + IBV_EXP_SEND_WR_ATTR_RESERVED = 1 << 0 +}; + +struct ibv_exp_mem_region { + uint64_t base_addr; + struct ibv_mr *mr; + size_t length; +}; + +struct ibv_exp_mem_repeat_block { + uint64_t base_addr; /* array, size corresponds to ndim */ + struct ibv_mr *mr; + size_t *byte_count; /* array, size corresponds to ndim */ + size_t *stride; /* array, size corresponds to ndim */ +}; + +enum ibv_exp_umr_wr_type { + IBV_EXP_UMR_MR_LIST, + IBV_EXP_UMR_REPEAT +}; + +struct ibv_exp_send_wr { + uint64_t wr_id; + struct ibv_exp_send_wr *next; + struct ibv_sge *sg_list; + int num_sge; + enum ibv_exp_wr_opcode exp_opcode; /* use ibv_exp_wr_opcode */ + int reserved; /* place holder to align with ibv_send_wr */ + union { + uint32_t imm_data; /* in network byte order */ + uint32_t invalidate_rkey; + } ex; + union { + struct { + uint64_t remote_addr; + uint32_t rkey; + } rdma; + struct { + uint64_t remote_addr; + uint64_t compare_add; + uint64_t swap; + uint32_t rkey; + } atomic; + struct { + struct ibv_ah *ah; + uint32_t remote_qpn; + uint32_t remote_qkey; + } ud; + } wr; + union { + union { + struct { + uint32_t remote_srqn; + } xrc; + } qp_type; + + uint32_t xrc_remote_srq_num; + }; + union { + struct { + uint64_t remote_addr; + uint32_t rkey; + } rdma; + struct { + uint64_t remote_addr; + uint64_t compare_add; + uint64_t swap; + uint32_t rkey; + } atomic; + struct { + struct ibv_cq *cq; + int32_t cq_count; + } cqe_wait; + struct { + struct ibv_qp *qp; + int32_t wqe_count; + } wqe_enable; + } task; + union { + struct { + enum ibv_exp_calc_op calc_op; + enum ibv_exp_calc_data_type data_type; + enum ibv_exp_calc_data_size data_size; + } calc; + } op; + struct { + struct ibv_ah *ah; + uint64_t dct_access_key; + uint32_t dct_number; + } dc; + union { + struct { + struct ibv_mw *mw; + uint32_t rkey; + struct ibv_exp_mw_bind_info bind_info; + } bind_mw; + struct { + void *hdr; + uint16_t hdr_sz; + uint16_t mss; + } tso; + }; + uint64_t exp_send_flags; /* use ibv_exp_send_flags */ + uint32_t comp_mask; /* reserved for future growth (must be 0) */ + union { + struct { + uint32_t umr_type; /* use ibv_exp_umr_wr_type */ + struct ibv_exp_mkey_list_container *memory_objects; /* used when IBV_EXP_SEND_INLINE is not set */ + uint64_t exp_access; /* use ibv_exp_access_flags */ + struct ibv_mr *modified_mr; + uint64_t base_addr; + uint32_t num_mrs; /* array size of mem_repeat_block_list or mem_reg_list */ + union { + struct ibv_exp_mem_region *mem_reg_list; /* array, size corresponds to num_mrs */ + struct { + struct ibv_exp_mem_repeat_block *mem_repeat_block_list; /* array, size corresponds to num_mr */ + size_t *repeat_count; /* array size corresponds to stride_dim */ + uint32_t stride_dim; + } rb; + } mem_list; + } umr; + struct { + uint32_t log_arg_sz; + uint64_t remote_addr; + uint32_t rkey; + union { + struct { + /* For the next four fields: + * If operand_size <= 8 then inline data is immediate + * from the corresponding field; for small opernands, + * ls bits are used. + * Else the fields are pointers in the process's address space + * where arguments are stored + */ + union { + struct ibv_exp_cmp_swap cmp_swap; + struct ibv_exp_fetch_add fetch_add; + } op; + } inline_data; /* IBV_EXP_SEND_EXT_ATOMIC_INLINE is set */ + /* in the future add support for non-inline argument provisioning */ + } wr_data; + } masked_atomics; + } ext_op; +}; + +/* + * Flags for ibv_exp_values struct comp_mask + */ +enum ibv_exp_values_comp_mask { + IBV_EXP_VALUES_HW_CLOCK_NS = 1 << 0, + IBV_EXP_VALUES_HW_CLOCK = 1 << 1, + IBV_EXP_VALUES_RESERVED = 1 << 2 +}; + +struct ibv_exp_values { + uint32_t comp_mask; + uint64_t hwclock_ns; + uint64_t hwclock; +}; + +/* + * Flags for flags field in the ibv_exp_cq_init_attr struct + */ +enum ibv_exp_cq_create_flags { + IBV_EXP_CQ_CREATE_CROSS_CHANNEL = 1 << 0, + IBV_EXP_CQ_TIMESTAMP = 1 << 1, + IBV_EXP_CQ_TIMESTAMP_TO_SYS_TIME = 1 << 2, + IBV_EXP_CQ_COMPRESSED_CQE = 1 << 3, + /* + * note: update IBV_EXP_CQ_CREATE_FLAGS_MASK when adding new fields + */ +}; + +enum { + IBV_EXP_CQ_CREATE_FLAGS_MASK = IBV_EXP_CQ_CREATE_CROSS_CHANNEL | + IBV_EXP_CQ_TIMESTAMP | + IBV_EXP_CQ_TIMESTAMP_TO_SYS_TIME | + IBV_EXP_CQ_COMPRESSED_CQE, +}; + +/* + * Flags for ibv_exp_cq_init_attr struct comp_mask + * Set flags only when relevant field is valid + */ +enum ibv_exp_cq_init_attr_mask { + IBV_EXP_CQ_INIT_ATTR_FLAGS = 1 << 0, + IBV_EXP_CQ_INIT_ATTR_RESERVED = 1 << 1, /* This field is kept for backward compatibility + * of application which use the following to set comp_mask: + * cq_init_attr.comp_mask = IBV_EXP_CQ_INIT_ATTR_RESERVED - 1 + * This kind of setting is no longer accepted and application + * may set only valid known fields, for example: + * cq_init_attr.comp_mask = IBV_EXP_CQ_INIT_ATTR_FLAGS | + * IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN + */ + IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN = 1 << 1, + IBV_EXP_CQ_INIT_ATTR_PEER_DIRECT = 1 << 2, + IBV_EXP_CQ_INIT_ATTR_RESERVED1 = 1 << 3, +}; + +struct ibv_exp_res_domain { + struct ibv_context *context; +}; + +struct ibv_exp_cq_init_attr { + uint32_t comp_mask; + uint32_t flags; + struct ibv_exp_res_domain *res_domain; + struct ibv_exp_peer_direct_attr *peer_direct_attrs; +}; + +/* + * Flags for ibv_exp_ah_attr struct comp_mask + */ +enum ibv_exp_ah_attr_attr_comp_mask { + IBV_EXP_AH_ATTR_LL = 1 << 0, + IBV_EXP_AH_ATTR_VID = 1 << 1, + IBV_EXP_AH_ATTR_RESERVED = 1 << 2 +}; + +enum ll_address_type { + LL_ADDRESS_UNKNOWN, + LL_ADDRESS_IB, + LL_ADDRESS_ETH, + LL_ADDRESS_SIZE +}; + +struct ibv_exp_ah_attr { + struct ibv_global_route grh; + uint16_t dlid; + uint8_t sl; + uint8_t src_path_bits; + uint8_t static_rate; + uint8_t is_global; + uint8_t port_num; + uint32_t comp_mask; + struct { + enum ll_address_type type; + uint32_t len; + char *address; + } ll_address; + uint16_t vid; +}; + +/* + * Flags for exp_attr_mask argument of ibv_exp_modify_qp + */ +enum ibv_exp_qp_attr_mask { + IBV_EXP_QP_STATE = IBV_QP_STATE, + IBV_EXP_QP_CUR_STATE = IBV_QP_CUR_STATE, + IBV_EXP_QP_EN_SQD_ASYNC_NOTIFY = IBV_QP_EN_SQD_ASYNC_NOTIFY, + IBV_EXP_QP_ACCESS_FLAGS = IBV_QP_ACCESS_FLAGS, + IBV_EXP_QP_PKEY_INDEX = IBV_QP_PKEY_INDEX, + IBV_EXP_QP_PORT = IBV_QP_PORT, + IBV_EXP_QP_QKEY = IBV_QP_QKEY, + IBV_EXP_QP_AV = IBV_QP_AV, + IBV_EXP_QP_PATH_MTU = IBV_QP_PATH_MTU, + IBV_EXP_QP_TIMEOUT = IBV_QP_TIMEOUT, + IBV_EXP_QP_RETRY_CNT = IBV_QP_RETRY_CNT, + IBV_EXP_QP_RNR_RETRY = IBV_QP_RNR_RETRY, + IBV_EXP_QP_RQ_PSN = IBV_QP_RQ_PSN, + IBV_EXP_QP_MAX_QP_RD_ATOMIC = IBV_QP_MAX_QP_RD_ATOMIC, + IBV_EXP_QP_ALT_PATH = IBV_QP_ALT_PATH, + IBV_EXP_QP_MIN_RNR_TIMER = IBV_QP_MIN_RNR_TIMER, + IBV_EXP_QP_SQ_PSN = IBV_QP_SQ_PSN, + IBV_EXP_QP_MAX_DEST_RD_ATOMIC = IBV_QP_MAX_DEST_RD_ATOMIC, + IBV_EXP_QP_PATH_MIG_STATE = IBV_QP_PATH_MIG_STATE, + IBV_EXP_QP_CAP = IBV_QP_CAP, + IBV_EXP_QP_DEST_QPN = IBV_QP_DEST_QPN, + + IBV_EXP_QP_GROUP_RSS = IBV_EXP_START_FLAG << 21, + IBV_EXP_QP_DC_KEY = IBV_EXP_START_FLAG << 22, + IBV_EXP_QP_FLOW_ENTROPY = IBV_EXP_START_FLAG << 23, + IBV_EXP_QP_RATE_LIMIT = IBV_EXP_START_FLAG << 25, +}; + +/* + * Flags for ibv_exp_qp_attr struct comp_mask + * Set flags only when relevant field is valid + */ +enum ibv_exp_qp_attr_comp_mask { + IBV_EXP_QP_ATTR_FLOW_ENTROPY = 1UL << 0, + IBV_EXP_QP_ATTR_RESERVED = 1UL << 1 +}; + +struct ibv_exp_qp_attr { + enum ibv_qp_state qp_state; + enum ibv_qp_state cur_qp_state; + enum ibv_mtu path_mtu; + enum ibv_mig_state path_mig_state; + uint32_t qkey; + uint32_t rq_psn; + uint32_t sq_psn; + uint32_t dest_qp_num; + int qp_access_flags; /* use ibv_access_flags form verbs.h */ + struct ibv_qp_cap cap; + struct ibv_ah_attr ah_attr; + struct ibv_ah_attr alt_ah_attr; + uint16_t pkey_index; + uint16_t alt_pkey_index; + uint8_t en_sqd_async_notify; + uint8_t sq_draining; + uint8_t max_rd_atomic; + uint8_t max_dest_rd_atomic; + uint8_t min_rnr_timer; + uint8_t port_num; + uint8_t timeout; + uint8_t retry_cnt; + uint8_t rnr_retry; + uint8_t alt_port_num; + uint8_t alt_timeout; + uint64_t dct_key; + uint32_t comp_mask; /* reserved for future growth (must be 0) */ + uint32_t flow_entropy; + uint32_t rate_limit; +}; + +/* + * Flags for ibv_exp_qp_init_attr struct comp_mask + * Set flags only when relevant field is valid + */ +enum ibv_exp_qp_init_attr_comp_mask { + IBV_EXP_QP_INIT_ATTR_PD = 1 << 0, + IBV_EXP_QP_INIT_ATTR_XRCD = 1 << 1, + IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS = 1 << 2, + IBV_EXP_QP_INIT_ATTR_INL_RECV = 1 << 3, + IBV_EXP_QP_INIT_ATTR_QPG = 1 << 4, + IBV_EXP_QP_INIT_ATTR_ATOMICS_ARG = 1 << 5, + IBV_EXP_QP_INIT_ATTR_MAX_INL_KLMS = 1 << 6, + IBV_EXP_QP_INIT_ATTR_RESERVED = 1 << 7, /* This field is kept for backward compatibility + * of application which use the following to set comp_mask: + * qp_init_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_RESERVED - 1 + * This kind of setting is no longer accepted and application + * may set only valid known fields, for example: + * qp_init_attr.comp_mask = IBV_EXP_QP_INIT_ATTR_PD | + * IBV_EXP_QP_INIT_ATTR_CREATE_FLAGS + */ + IBV_EXP_QP_INIT_ATTR_RES_DOMAIN = 1 << 7, + IBV_EXP_QP_INIT_ATTR_RX_HASH = 1 << 8, + IBV_EXP_QP_INIT_ATTR_PORT = 1 << 9, + IBV_EXP_QP_INIT_ATTR_PEER_DIRECT = 1 << 10, + IBV_EXP_QP_INIT_ATTR_MAX_TSO_HEADER = 1 << 11, + IBV_EXP_QP_INIT_ATTR_RESERVED1 = 1 << 12, +}; + +enum ibv_exp_qpg_type { + IBV_EXP_QPG_NONE = 0, + IBV_EXP_QPG_PARENT = (1<<0), + IBV_EXP_QPG_CHILD_RX = (1<<1), + IBV_EXP_QPG_CHILD_TX = (1<<2) +}; + +struct ibv_exp_qpg_init_attrib { + uint32_t tss_child_count; + uint32_t rss_child_count; +}; + +struct ibv_exp_qpg { + uint32_t qpg_type; + union { + struct ibv_qp *qpg_parent; /* see qpg_type */ + struct ibv_exp_qpg_init_attrib parent_attrib; + }; +}; + +/* + * RX Hash Function flags. +*/ +enum ibv_exp_rx_hash_function_flags { + IBV_EXP_RX_HASH_FUNC_TOEPLITZ = 1 << 0, + IBV_EXP_RX_HASH_FUNC_XOR = 1 << 1 +}; + +/* + * RX Hash flags, these flags allows to set which incoming packet field should + * participates in RX Hash. Each flag represent certain packet's field, + * when the flag is set the field that is represented by the flag will + * participate in RX Hash calculation. + * Notice: *IPV4 and *IPV6 flags can't be enabled together on the same QP + * and *TCP and *UDP flags can't be enabled together on the same QP. +*/ +enum ibv_exp_rx_hash_fields { + IBV_EXP_RX_HASH_SRC_IPV4 = 1 << 0, + IBV_EXP_RX_HASH_DST_IPV4 = 1 << 1, + IBV_EXP_RX_HASH_SRC_IPV6 = 1 << 2, + IBV_EXP_RX_HASH_DST_IPV6 = 1 << 3, + IBV_EXP_RX_HASH_SRC_PORT_TCP = 1 << 4, + IBV_EXP_RX_HASH_DST_PORT_TCP = 1 << 5, + IBV_EXP_RX_HASH_SRC_PORT_UDP = 1 << 6, + IBV_EXP_RX_HASH_DST_PORT_UDP = 1 << 7 +}; + +/* + * RX Hash QP configuration. Sets hash function, hash types and + * Indirection table for QPs with enabled IBV_QP_INIT_ATTR_RX_HASH flag. +*/ +struct ibv_exp_rx_hash_conf { + /* enum ibv_exp_rx_hash_function_flags */ + uint8_t rx_hash_function; + /* valid only for Toeplitz */ + uint8_t rx_hash_key_len; + uint8_t *rx_hash_key; + /* enum ibv_exp_rx_hash_fields */ + uint64_t rx_hash_fields_mask; + struct ibv_exp_rwq_ind_table *rwq_ind_tbl; +}; + +/* + * Flags for exp_create_flags field in ibv_exp_qp_init_attr struct + */ +enum ibv_exp_qp_create_flags { + IBV_EXP_QP_CREATE_CROSS_CHANNEL = (1 << 2), + IBV_EXP_QP_CREATE_MANAGED_SEND = (1 << 3), + IBV_EXP_QP_CREATE_MANAGED_RECV = (1 << 4), + IBV_EXP_QP_CREATE_IGNORE_SQ_OVERFLOW = (1 << 6), + IBV_EXP_QP_CREATE_IGNORE_RQ_OVERFLOW = (1 << 7), + IBV_EXP_QP_CREATE_ATOMIC_BE_REPLY = (1 << 8), + IBV_EXP_QP_CREATE_UMR = (1 << 9), + IBV_EXP_QP_CREATE_EC_PARITY_EN = (1 << 10), + IBV_EXP_QP_CREATE_RX_END_PADDING = (1 << 11), + IBV_EXP_QP_CREATE_SCATTER_FCS = (1 << 12), + IBV_EXP_QP_CREATE_INTERNAL_USE = (1 << 15), + /* set supported bits for validity check */ + IBV_EXP_QP_CREATE_MASK = (0x00001FDC) +}; + +struct ibv_exp_qp_init_attr { + void *qp_context; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; + struct ibv_srq *srq; + struct ibv_qp_cap cap; + enum ibv_qp_type qp_type; + int sq_sig_all; + + uint32_t comp_mask; /* use ibv_exp_qp_init_attr_comp_mask */ + struct ibv_pd *pd; + struct ibv_xrcd *xrcd; + uint32_t exp_create_flags; /* use ibv_exp_qp_create_flags */ + + uint32_t max_inl_recv; + struct ibv_exp_qpg qpg; + uint32_t max_atomic_arg; + uint32_t max_inl_send_klms; + struct ibv_exp_res_domain *res_domain; + struct ibv_exp_rx_hash_conf *rx_hash_conf; + uint8_t port_num; + struct ibv_exp_peer_direct_attr *peer_direct_attrs; + uint16_t max_tso_header; +}; + +/* + * Flags for ibv_exp_dct_init_attr struct comp_mask + */ +enum ibv_exp_dct_init_attr_comp_mask { + IBV_EXP_DCT_INIT_ATTR_RESERVED = 1 << 0 +}; + +enum { + IBV_EXP_DCT_CREATE_FLAGS_MASK = (1 << 0) - 1, +}; + +struct ibv_exp_dct_init_attr { + struct ibv_pd *pd; + struct ibv_cq *cq; + struct ibv_srq *srq; + uint64_t dc_key; + uint8_t port; + uint32_t access_flags; /* use ibv_access_flags form verbs.h */ + uint8_t min_rnr_timer; + uint8_t tclass; + uint32_t flow_label; + enum ibv_mtu mtu; + uint8_t pkey_index; + uint8_t gid_index; + uint8_t hop_limit; + uint32_t inline_size; + uint32_t create_flags; + uint32_t comp_mask; /* reserved for future growth (must be 0) */ +}; + +enum { + IBV_EXP_DCT_STATE_ACTIVE = 0, + IBV_EXP_DCT_STATE_DRAINING = 1, + IBV_EXP_DCT_STATE_DRAINED = 2 +}; + +/* + * Flags for ibv_exp_dct_attr struct comp_mask + */ +enum ibv_exp_dct_attr_comp_mask { + IBV_EXP_DCT_ATTR_RESERVED = 1 << 0 +}; + +struct ibv_exp_dct_attr { + uint64_t dc_key; + uint8_t port; + uint32_t access_flags; /* use ibv_access_flags form verbs.h */ + uint8_t min_rnr_timer; + uint8_t tclass; + uint32_t flow_label; + enum ibv_mtu mtu; + uint8_t pkey_index; + uint8_t gid_index; + uint8_t hop_limit; + uint32_t key_violations; + uint8_t state; + struct ibv_srq *srq; + struct ibv_cq *cq; + struct ibv_pd *pd; + uint32_t comp_mask; /* reserved for future growth (must be 0) */ +}; + +enum { + IBV_EXP_QUERY_PORT_STATE = 1 << 0, + IBV_EXP_QUERY_PORT_MAX_MTU = 1 << 1, + IBV_EXP_QUERY_PORT_ACTIVE_MTU = 1 << 2, + IBV_EXP_QUERY_PORT_GID_TBL_LEN = 1 << 3, + IBV_EXP_QUERY_PORT_CAP_FLAGS = 1 << 4, + IBV_EXP_QUERY_PORT_MAX_MSG_SZ = 1 << 5, + IBV_EXP_QUERY_PORT_BAD_PKEY_CNTR = 1 << 6, + IBV_EXP_QUERY_PORT_QKEY_VIOL_CNTR = 1 << 7, + IBV_EXP_QUERY_PORT_PKEY_TBL_LEN = 1 << 8, + IBV_EXP_QUERY_PORT_LID = 1 << 9, + IBV_EXP_QUERY_PORT_SM_LID = 1 << 10, + IBV_EXP_QUERY_PORT_LMC = 1 << 11, + IBV_EXP_QUERY_PORT_MAX_VL_NUM = 1 << 12, + IBV_EXP_QUERY_PORT_SM_SL = 1 << 13, + IBV_EXP_QUERY_PORT_SUBNET_TIMEOUT = 1 << 14, + IBV_EXP_QUERY_PORT_INIT_TYPE_REPLY = 1 << 15, + IBV_EXP_QUERY_PORT_ACTIVE_WIDTH = 1 << 16, + IBV_EXP_QUERY_PORT_ACTIVE_SPEED = 1 << 17, + IBV_EXP_QUERY_PORT_PHYS_STATE = 1 << 18, + IBV_EXP_QUERY_PORT_LINK_LAYER = 1 << 19, + /* mask of the fields that exists in the standard query_port_command */ + IBV_EXP_QUERY_PORT_STD_MASK = (1 << 20) - 1, + /* mask of all supported fields */ + IBV_EXP_QUERY_PORT_MASK = IBV_EXP_QUERY_PORT_STD_MASK, +}; + +/* + * Flags for ibv_exp_port_attr struct comp_mask + * Set flags only when relevant field is valid + */ +enum ibv_exp_query_port_attr_comp_mask { + IBV_EXP_QUERY_PORT_ATTR_MASK1 = 1 << 0, + IBV_EXP_QUERY_PORT_ATTR_RESERVED = 1 << 1, + + IBV_EXP_QUERY_PORT_ATTR_MASKS = IBV_EXP_QUERY_PORT_ATTR_RESERVED - 1 +}; + +struct ibv_exp_port_attr { + union { + struct { + enum ibv_port_state state; + enum ibv_mtu max_mtu; + enum ibv_mtu active_mtu; + int gid_tbl_len; + uint32_t port_cap_flags; + uint32_t max_msg_sz; + uint32_t bad_pkey_cntr; + uint32_t qkey_viol_cntr; + uint16_t pkey_tbl_len; + uint16_t lid; + uint16_t sm_lid; + uint8_t lmc; + uint8_t max_vl_num; + uint8_t sm_sl; + uint8_t subnet_timeout; + uint8_t init_type_reply; + uint8_t active_width; + uint8_t active_speed; + uint8_t phys_state; + uint8_t link_layer; + uint8_t reserved; + }; + struct ibv_port_attr port_attr; + }; + uint32_t comp_mask; + uint32_t mask1; +}; + +enum ibv_exp_cq_attr_mask { + IBV_EXP_CQ_MODERATION = 1 << 0, + IBV_EXP_CQ_CAP_FLAGS = 1 << 1 +}; + +enum ibv_exp_cq_cap_flags { + IBV_EXP_CQ_IGNORE_OVERRUN = (1 << 0), + /* set supported bits for validity check */ + IBV_EXP_CQ_CAP_MASK = (0x00000001) +}; + +/* + * Flags for ibv_exp_cq_attr struct comp_mask + * Set flags only when relevant field is valid + */ +enum ibv_exp_cq_attr_comp_mask { + IBV_EXP_CQ_ATTR_MODERATION = (1 << 0), + IBV_EXP_CQ_ATTR_CQ_CAP_FLAGS = (1 << 1), + /* set supported bits for validity check */ + IBV_EXP_CQ_ATTR_RESERVED = (1 << 2) +}; + +struct ibv_exp_cq_attr { + uint32_t comp_mask; + struct { + uint16_t cq_count; + uint16_t cq_period; + } moderation; + uint32_t cq_cap_flags; +}; + +enum ibv_exp_rereg_mr_flags { + IBV_EXP_REREG_MR_CHANGE_TRANSLATION = IBV_REREG_MR_CHANGE_TRANSLATION, + IBV_EXP_REREG_MR_CHANGE_PD = IBV_REREG_MR_CHANGE_PD, + IBV_EXP_REREG_MR_CHANGE_ACCESS = IBV_REREG_MR_CHANGE_ACCESS, + IBV_EXP_REREG_MR_KEEP_VALID = IBV_REREG_MR_KEEP_VALID, + IBV_EXP_REREG_MR_FLAGS_SUPPORTED = ((IBV_EXP_REREG_MR_KEEP_VALID << 1) - 1) +}; + +enum ibv_exp_rereg_mr_attr_mask { + IBV_EXP_REREG_MR_ATTR_RESERVED = (1 << 0) +}; + +struct ibv_exp_rereg_mr_attr { + uint32_t comp_mask; /* use ibv_exp_rereg_mr_attr_mask */ +}; + +/* + * Flags for ibv_exp_reg_shared_mr_in struct comp_mask + */ +enum ibv_exp_reg_shared_mr_comp_mask { + IBV_EXP_REG_SHARED_MR_RESERVED = (1 << 0) +}; + +struct ibv_exp_reg_shared_mr_in { + uint32_t mr_handle; + struct ibv_pd *pd; + void *addr; + uint64_t exp_access; /* use ibv_exp_access_flags */ + uint32_t comp_mask; /* reserved for future growth (must be 0) */ +}; + +enum ibv_exp_flow_flags { + IBV_EXP_FLOW_ATTR_FLAGS_ALLOW_LOOP_BACK = 1, +}; + +enum ibv_exp_flow_attr_type { + /* steering according to rule specifications */ + IBV_EXP_FLOW_ATTR_NORMAL = 0x0, + /* default unicast and multicast rule - + * receive all Eth traffic which isn't steered to any QP + */ + IBV_EXP_FLOW_ATTR_ALL_DEFAULT = 0x1, + /* default multicast rule - + * receive all Eth multicast traffic which isn't steered to any QP + */ + IBV_EXP_FLOW_ATTR_MC_DEFAULT = 0x2, + /* sniffer rule - receive all port traffic */ + IBV_EXP_FLOW_ATTR_SNIFFER = 0x3, +}; + +enum ibv_exp_flow_spec_type { + IBV_EXP_FLOW_SPEC_ETH = 0x20, + IBV_EXP_FLOW_SPEC_IB = 0x21, + IBV_EXP_FLOW_SPEC_IPV4 = 0x30, + IBV_EXP_FLOW_SPEC_IPV6 = 0x31, + IBV_EXP_FLOW_SPEC_IPV4_EXT = 0x32, + IBV_EXP_FLOW_SPEC_IPV6_EXT = 0x33, + IBV_EXP_FLOW_SPEC_TCP = 0x40, + IBV_EXP_FLOW_SPEC_UDP = 0x41, + IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL = 0x50, + IBV_EXP_FLOW_SPEC_INNER = 0x100, + IBV_EXP_FLOW_SPEC_ACTION_TAG = 0x1000, +}; + +struct ibv_exp_flow_eth_filter { + uint8_t dst_mac[6]; + uint8_t src_mac[6]; + uint16_t ether_type; + /* + * same layout as 802.1q: prio 3, cfi 1, vlan id 12 + */ + uint16_t vlan_tag; +}; + +struct ibv_exp_flow_spec_eth { + enum ibv_exp_flow_spec_type type; + uint16_t size; + struct ibv_exp_flow_eth_filter val; + struct ibv_exp_flow_eth_filter mask; +}; + +struct ibv_exp_flow_ib_filter { + uint32_t qpn; + uint8_t dst_gid[16]; +}; + +struct ibv_exp_flow_spec_ib { + enum ibv_exp_flow_spec_type type; + uint16_t size; + struct ibv_exp_flow_ib_filter val; + struct ibv_exp_flow_ib_filter mask; +}; + +struct ibv_exp_flow_ipv4_filter { + uint32_t src_ip; + uint32_t dst_ip; +}; + +struct ibv_exp_flow_spec_ipv4 { + enum ibv_exp_flow_spec_type type; + uint16_t size; + struct ibv_exp_flow_ipv4_filter val; + struct ibv_exp_flow_ipv4_filter mask; +}; + +struct ibv_exp_flow_ipv6_filter { + uint8_t src_ip[16]; + uint8_t dst_ip[16]; +}; + +struct ibv_exp_flow_spec_ipv6 { + enum ibv_exp_flow_spec_type type; + uint16_t size; + struct ibv_exp_flow_ipv6_filter val; + struct ibv_exp_flow_ipv6_filter mask; +}; + +struct ibv_exp_flow_spec_action_tag { + enum ibv_exp_flow_spec_type type; + uint16_t size; + uint32_t tag_id; +}; + +struct ibv_exp_flow_ipv6_ext_filter { + uint8_t src_ip[16]; + uint8_t dst_ip[16]; + uint32_t flow_label; + uint8_t next_hdr; + uint8_t traffic_class; + uint8_t hop_limit; +}; + +struct ibv_exp_flow_spec_ipv6_ext { + enum ibv_exp_flow_spec_type type; + uint16_t size; + struct ibv_exp_flow_ipv6_ext_filter val; + struct ibv_exp_flow_ipv6_ext_filter mask; +}; + +struct ibv_exp_flow_ipv4_ext_filter { + uint32_t src_ip; + uint32_t dst_ip; + uint8_t proto; + uint8_t tos; + uint8_t ttl; + uint8_t flags; +}; + +struct ibv_exp_flow_spec_ipv4_ext { + enum ibv_exp_flow_spec_type type; + uint16_t size; + struct ibv_exp_flow_ipv4_ext_filter val; + struct ibv_exp_flow_ipv4_ext_filter mask; +}; + +struct ibv_exp_flow_tcp_udp_filter { + uint16_t dst_port; + uint16_t src_port; +}; + +struct ibv_exp_flow_spec_tcp_udp { + enum ibv_exp_flow_spec_type type; + uint16_t size; + struct ibv_exp_flow_tcp_udp_filter val; + struct ibv_exp_flow_tcp_udp_filter mask; +}; + +struct ibv_exp_flow_tunnel_filter { + uint32_t tunnel_id; +}; + +struct ibv_exp_flow_spec_tunnel { + enum ibv_exp_flow_spec_type type; + uint16_t size; + struct ibv_exp_flow_tunnel_filter val; + struct ibv_exp_flow_tunnel_filter mask; +}; + +struct ibv_exp_flow_spec { + union { + struct { + uint32_t type; + uint16_t size; + } hdr; + struct ibv_exp_flow_spec_ib ib; + struct ibv_exp_flow_spec_eth eth; + struct ibv_exp_flow_spec_ipv4 ipv4; + struct ibv_exp_flow_spec_ipv4_ext ipv4_ext; + struct ibv_exp_flow_spec_tcp_udp tcp_udp; + struct ibv_exp_flow_spec_ipv6 ipv6; + struct ibv_exp_flow_spec_ipv6_ext ipv6_ext; + struct ibv_exp_flow_spec_tunnel tunnel; + struct ibv_exp_flow_spec_action_tag flow_tag; + }; +}; + +struct ibv_exp_flow_attr { + enum ibv_exp_flow_attr_type type; + uint16_t size; + uint16_t priority; + uint8_t num_of_specs; + uint8_t port; + uint32_t flags; + /* Following are the optional layers according to user request + * struct ibv_exp_flow_spec_xxx [L2] + * struct ibv_exp_flow_spec_yyy [L3/L4] + */ + uint64_t reserved; /* reserved for future growth (must be 0) */ +}; + +struct ibv_exp_flow { + struct ibv_context *context; + uint32_t handle; +}; + +struct ibv_exp_dct { + struct ibv_context *context; + uint32_t handle; + uint32_t dct_num; + struct ibv_pd *pd; + struct ibv_srq *srq; + struct ibv_cq *cq; + pthread_mutex_t mutex; + pthread_cond_t cond; + uint32_t events_completed; +}; + +enum ibv_exp_wc_opcode { + IBV_EXP_WC_SEND, + IBV_EXP_WC_RDMA_WRITE, + IBV_EXP_WC_RDMA_READ, + IBV_EXP_WC_COMP_SWAP, + IBV_EXP_WC_FETCH_ADD, + IBV_EXP_WC_BIND_MW, + IBV_EXP_WC_LOCAL_INV = 7, + IBV_EXP_WC_MASKED_COMP_SWAP = 9, + IBV_EXP_WC_MASKED_FETCH_ADD = 10, + IBV_EXP_WC_TSO, + IBV_EXP_WC_UMR = 0x100, +/* + * Set value of IBV_EXP_WC_RECV so consumers can test if a completion is a + * receive by testing (opcode & IBV_EXP_WC_RECV). + */ + IBV_EXP_WC_RECV = 1 << 7, + IBV_EXP_WC_RECV_RDMA_WITH_IMM +}; + +enum ibv_exp_wc_flags { + IBV_EXP_WC_GRH = IBV_WC_GRH, + IBV_EXP_WC_WITH_IMM = IBV_WC_WITH_IMM, + + IBV_EXP_WC_WITH_INV = IBV_EXP_START_FLAG << 2, + IBV_EXP_WC_WITH_SL = IBV_EXP_START_FLAG << 4, + IBV_EXP_WC_WITH_SLID = IBV_EXP_START_FLAG << 5, + IBV_EXP_WC_WITH_TIMESTAMP = IBV_EXP_START_FLAG << 6, + IBV_EXP_WC_QP = IBV_EXP_START_FLAG << 7, + IBV_EXP_WC_SRQ = IBV_EXP_START_FLAG << 8, + IBV_EXP_WC_DCT = IBV_EXP_START_FLAG << 9, + IBV_EXP_WC_RX_IP_CSUM_OK = IBV_EXP_START_FLAG << 10, + IBV_EXP_WC_RX_TCP_UDP_CSUM_OK = IBV_EXP_START_FLAG << 11, + IBV_EXP_WC_RX_IPV4_PACKET = IBV_EXP_START_FLAG << 12, + IBV_EXP_WC_RX_IPV6_PACKET = IBV_EXP_START_FLAG << 13, + IBV_EXP_WC_RX_TUNNEL_PACKET = IBV_EXP_START_FLAG << 14, + IBV_EXP_WC_RX_OUTER_IP_CSUM_OK = IBV_EXP_START_FLAG << 15, + IBV_EXP_WC_RX_OUTER_TCP_UDP_CSUM_OK = IBV_EXP_START_FLAG << 16, + IBV_EXP_WC_RX_OUTER_IPV4_PACKET = IBV_EXP_START_FLAG << 17, + IBV_EXP_WC_RX_OUTER_IPV6_PACKET = IBV_EXP_START_FLAG << 18, +}; + +struct ibv_exp_wc { + uint64_t wr_id; + enum ibv_wc_status status; + enum ibv_exp_wc_opcode exp_opcode; + uint32_t vendor_err; + uint32_t byte_len; + uint32_t imm_data; /* in network byte order */ + uint32_t qp_num; + uint32_t src_qp; + int reserved; /* place holder to align with ibv_wc */ + uint16_t pkey_index; + uint16_t slid; /* invalid when TS is used */ + uint8_t sl; /* invalid when TS is used */ + uint8_t dlid_path_bits; + uint64_t timestamp; + struct ibv_qp *qp; + struct ibv_srq *srq; + struct ibv_exp_dct *dct; + uint64_t exp_wc_flags; /* use ibv_exp_wc_flags */ +}; + +/* + * Flags for ibv_exp_prefetch_mr comp_mask + */ +enum ibv_exp_prefetch_attr_comp_mask { + IBV_EXP_PREFETCH_MR_RESERVED = (1 << 0), +}; + +/* + * Flags for ibv_exp_prefetch_mr flags + */ +enum ibv_exp_prefetch_attr_flags { + /* request prefetching for write access. Used for both local and remote */ + IBV_EXP_PREFETCH_WRITE_ACCESS = (1 << 0), +}; + +struct ibv_exp_prefetch_attr { + /* Use enum ibv_exp_prefetch_attr_flags */ + uint32_t flags; + /* Address of the area to prefetch */ + void *addr; + /* Length of the area to prefetch */ + size_t length; + uint32_t comp_mask; +}; + +/* + * Flags for ibv_exp_reg_mr_in struct comp_mask + */ +enum ibv_exp_reg_mr_in_comp_mask { + /* set supported bits for validity check */ + IBV_EXP_REG_MR_CREATE_FLAGS = (1 << 0), + IBV_EXP_REG_MR_RESERVED = (1 << 1) +}; + +enum ibv_exp_reg_mr_create_flags { + IBV_EXP_REG_MR_CREATE_CONTIG = (1 << 0) /* register mr with contiguous pages */ +}; + +struct ibv_exp_reg_mr_in { + struct ibv_pd *pd; + void *addr; + size_t length; + uint64_t exp_access; /* use ibv_exp_access_flags */ + uint32_t comp_mask; /* reserved for future growth (must be 0) */ + uint32_t create_flags; /* use ibv_exp_reg_mr_create_flags */ +}; + + +enum ibv_exp_task_type { + IBV_EXP_TASK_SEND = 0, + IBV_EXP_TASK_RECV = 1 +}; + +/* + * Flags for ibv_exp_task struct comp_mask + */ +enum ibv_exp_task_comp_mask { + IBV_EXP_TASK_RESERVED = (1 << 0) +}; + +struct ibv_exp_task { + enum ibv_exp_task_type task_type; + struct { + struct ibv_qp *qp; + union { + struct ibv_exp_send_wr *send_wr; + struct ibv_recv_wr *recv_wr; + }; + } item; + struct ibv_exp_task *next; + uint32_t comp_mask; /* reserved for future growth (must be 0) */ +}; + +/* + * Flags for ibv_exp_arm_attr struct comp_mask + */ +enum ibv_exp_arm_attr_comp_mask { + IBV_EXP_ARM_ATTR_RESERVED = (1 << 0) +}; +struct ibv_exp_arm_attr { + uint32_t comp_mask; /* reserved for future growth (must be 0) */ +}; + +enum ibv_exp_mr_create_flags { + IBV_EXP_MR_SIGNATURE_EN = (1 << 0), + IBV_EXP_MR_INDIRECT_KLMS = (1 << 1) +}; + +struct ibv_exp_mr_init_attr { + uint32_t max_klm_list_size; /* num of entries */ + uint32_t create_flags; /* use ibv_exp_mr_create_flags */ + uint64_t exp_access_flags; /* use ibv_exp_access_flags */ +}; + +/* + * Comp_mask for ibv_exp_create_mr_in struct comp_mask + */ +enum ibv_exp_create_mr_in_comp_mask { + IBV_EXP_CREATE_MR_IN_RESERVED = (1 << 0) +}; + +struct ibv_exp_create_mr_in { + struct ibv_pd *pd; + struct ibv_exp_mr_init_attr attr; + uint32_t comp_mask; /* use ibv_exp_create_mr_in_comp_mask */ +}; + +/* + * Flags for ibv_exp_mkey_attr struct comp_mask + */ +enum ibv_exp_mkey_attr_comp_mask { + IBV_EXP_MKEY_ATTR_RESERVED = (1 << 0) +}; + +struct ibv_exp_mkey_attr { + uint32_t max_klm_list_size; + uint32_t comp_mask; /* use ibv_exp_mkey_attr_comp_mask */ +}; + +struct ibv_exp_mkey_list_container { + uint32_t max_klm_list_size; + struct ibv_context *context; +}; + +enum ibv_exp_mkey_list_type { + IBV_EXP_MKEY_LIST_TYPE_INDIRECT_MR +}; + +/* + * Flags for ibv_exp_mkey_list_container_attr struct comp_mask + */ +enum ibv_exp_alloc_mkey_list_comp_mask { + IBV_EXP_MKEY_LIST_CONTAINER_RESERVED = (1 << 0) +}; + +struct ibv_exp_mkey_list_container_attr { + struct ibv_pd *pd; + uint32_t mkey_list_type; /* use ibv_exp_mkey_list_type */ + uint32_t max_klm_list_size; + uint32_t comp_mask; /*use ibv_exp_alloc_mkey_list_comp_mask */ +}; + +/* + * Flags for ibv_exp_rereg_out struct comp_mask + */ +enum ibv_exp_rereg_mr_comp_mask { + IBV_EXP_REREG_MR_RESERVED = (1 << 0) +}; + +struct ibv_exp_rereg_out { + int need_dofork; + uint32_t comp_mask; /* use ibv_exp_rereg_mr_comp_mask */ +}; + +/* + * Flags for ibv_exp_dereg_out struct comp_mask + */ +enum ibv_exp_dereg_mr_comp_mask { + IBV_EXP_DEREG_MR_RESERVED = (1 << 0) +}; + +struct ibv_exp_dereg_out { + int need_dofork; + uint32_t comp_mask; /* use ibv_exp_dereg_mr_comp_mask */ +}; + +struct verbs_env_item { + char *name; + char *value; + struct verbs_env_item *next; +}; + +struct verbs_environment { + struct verbs_env_item *head; + pthread_mutex_t mtx; +}; + +/* RSS stuff */ + +enum ibv_exp_wq_type { + IBV_EXP_WQT_RQ, + IBV_EXP_WQT_SRQ +}; + +enum ibv_exp_wq_state { + IBV_EXP_WQS_RESET, + IBV_EXP_WQS_RDY, + IBV_EXP_WQS_ERR, + IBV_EXP_WQS_UNKNOWN +}; + +/* VLAN Offloads */ +enum ibv_exp_vlan_offloads { + /* Represents C-VLAN stripping feature */ + IBV_EXP_RECEIVE_WQ_CVLAN_STRIP = (1 << 0), + /* Represents C-VLAN insertion feature*/ + IBV_EXP_RECEIVE_WQ_CVLAN_INSERTION = (1 << 1), + IBV_EXP_RECEIVE_WQ_VLAN_OFFLOADS_RESERVED = (1 << 2), +}; + +/* + * Work Queue. QP can be created without internal WQs "packaged" inside it, + * this QPs can be configured to use "external" WQ object as its + * receive/send queue. + * WQ associated (many to one) with Completion Queue it owns WQ properties + * (PD, WQ size etc). + * WQ of type IBV_EXP_WQT_RQ contains receive WQEs, in which case its PD serves + * scatter as well. + * WQ of type IBV_EXP_WQT_SRQ is associated (many to one) with regular ibv_srq, + * in which case it does not hold receive WQEs. + * QPs can be associated with IBV_EXP_WQT_S/RQ WQs via WQ Indirection Table. + */ +struct ibv_exp_wq { + struct ibv_context *context; + void *wq_context; /* Associated Context of the WQ */ + uint32_t handle; + /* Protection domain WQ should be associated with */ + struct ibv_pd *pd; + /* CQ to be associated with the WQ */ + struct ibv_cq *cq; + /* SRQ handle if WQ is to be associated with an SRQ, otherwise NULL */ + struct ibv_srq *srq; + uint32_t wq_num; + enum ibv_exp_wq_state state; + enum ibv_exp_wq_type wq_type; + uint32_t comp_mask; +}; + +enum ibv_exp_wq_init_attr_mask { + IBV_EXP_CREATE_WQ_RES_DOMAIN = (1 << 0), + IBV_EXP_CREATE_WQ_MP_RQ = (1 << 1), + IBV_EXP_CREATE_WQ_VLAN_OFFLOADS = (1 << 2), + IBV_EXP_CREATE_WQ_FLAGS = (1 << 3), + IBV_EXP_CREATE_WQ_RESERVED = (1 << 4) +}; + +struct ibv_exp_wq_mp_rq { + enum ibv_exp_mp_rq_shifts use_shift; + uint8_t single_wqe_log_num_of_strides; + uint8_t single_stride_log_num_of_bytes; +}; + +enum ibv_exp_wq_init_attr_flags { + IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING = (1ULL << 0), + IBV_EXP_CREATE_WQ_FLAG_SCATTER_FCS = (1ULL << 1), + IBV_EXP_CREATE_WQ_FLAG_RESERVED = (1ULL << 2) +}; + +struct ibv_exp_wq_init_attr { + /* Associated Context of the WQ */ + void *wq_context; + enum ibv_exp_wq_type wq_type; + /* Valid for non IBV_EXP_WQT_SRQ WQ */ + uint32_t max_recv_wr; + /* Valid for non IBV_EXP_WQT_SRQ WQ */ + uint32_t max_recv_sge; + /* Protection domain WQ should be associated with */ + struct ibv_pd *pd; + /* CQ to be associated with the WQ */ + struct ibv_cq *cq; + /* SRQ handle if WQ is of type IBV_EXP_WQT_SRQ, otherwise NULL */ + struct ibv_srq *srq; + /* refers to ibv_exp_wq_init_attr_mask */ + uint32_t comp_mask; + struct ibv_exp_res_domain *res_domain; + struct ibv_exp_wq_mp_rq mp_rq; + uint16_t vlan_offloads; /* use ibv_exp_vlan_offloads enum */ + uint64_t flags; /* general wq create flags */ +}; + +enum ibv_exp_wq_attr_mask { + IBV_EXP_WQ_ATTR_STATE = 1 << 0, + IBV_EXP_WQ_ATTR_CURR_STATE = 1 << 1, + IBV_EXP_WQ_ATTR_VLAN_OFFLOADS = 1 << 2, + IBV_EXP_WQ_ATTR_RESERVED = 1 << 3 +}; + +struct ibv_exp_wq_attr { + /* enum ibv_exp_wq_attr_mask */ + uint32_t attr_mask; + /* Move the RQ to this state */ + enum ibv_exp_wq_state wq_state; + /* Assume this is the current RQ state */ + enum ibv_exp_wq_state curr_wq_state; + uint16_t vlan_offloads; /* use ibv_exp_vlan_offloads enum */ +}; + +/* + * Receive Work Queue Indirection Table. + * It's used in order to distribute incoming packets between different + * Receive Work Queues. Associating Receive WQs with different CPU cores + * allows to workload the traffic between different CPU cores. + * The Indirection Table can contain only WQs of type IBV_EXP_WQT_S/RQ. +*/ +struct ibv_exp_rwq_ind_table { + struct ibv_context *context; + struct ibv_pd *pd; + int ind_tbl_handle; + int ind_tbl_num; + uint32_t comp_mask; +}; + +enum ibv_exp_ind_table_init_attr_mask { + IBV_EXP_CREATE_IND_TABLE_RESERVED = (1 << 0) +}; + +/* + * Receive Work Queue Indirection Table attributes +*/ +struct ibv_exp_rwq_ind_table_init_attr { + struct ibv_pd *pd; + /* Log, base 2, of Indirection table size */ + uint32_t log_ind_tbl_size; + /* Each entry is a pointer to Receive Work Queue */ + struct ibv_exp_wq **ind_tbl; + uint32_t comp_mask; +}; + +/* Accelerated verbs */ +enum ibv_exp_thread_model { + IBV_EXP_THREAD_SAFE, /* The lib responsible to protect the object in multithreaded environment */ + IBV_EXP_THREAD_UNSAFE, /* The application responsible to protect the object in multithreaded environment */ + IBV_EXP_THREAD_SINGLE /* The object is called from only one thread */ +}; + +enum ibv_exp_msg_model { + IBV_EXP_MSG_DEFAULT, /* Use the provider default message model */ + IBV_EXP_MSG_LOW_LATENCY, /* Hint the provider to optimize for low latency */ + IBV_EXP_MSG_HIGH_BW, /* Hint the provider to optimize for high bandwidth */ + IBV_EXP_MSG_FORCE_LOW_LATENCY, /* Force the provider to optimize for low latency */ +}; + +/* + * Resource domains + */ +enum ibv_exp_res_domain_init_attr_comp_mask { + IBV_EXP_RES_DOMAIN_THREAD_MODEL = (1 << 0), + IBV_EXP_RES_DOMAIN_MSG_MODEL = (1 << 1), + IBV_EXP_RES_DOMAIN_RESERVED = (1 << 2), +}; + +struct ibv_exp_res_domain_init_attr { + uint32_t comp_mask; /* use ibv_exp_res_domain_init_attr_comp_mask */ + enum ibv_exp_thread_model thread_model; + enum ibv_exp_msg_model msg_model; +}; + +enum ibv_exp_destroy_res_domain_comp_mask { + IBV_EXP_DESTROY_RES_DOMAIN_RESERVED = (1 << 0), +}; + +struct ibv_exp_destroy_res_domain_attr { + uint32_t comp_mask; /* use ibv_exp_destroy_res_domain_comp_mask */ +}; + +/* + * Query interface (specialized Verbs) + */ + +enum ibv_exp_query_intf_flags { + /* Interface functions includes correctness and validity checks */ + IBV_EXP_QUERY_INTF_FLAG_ENABLE_CHECKS = (1 << 0), +}; + +enum ibv_exp_intf_family { + IBV_EXP_INTF_QP_BURST, + IBV_EXP_INTF_CQ, + IBV_EXP_INTF_WQ, + IBV_EXP_INTF_RESERVED, +}; + +enum ibv_exp_experimental_intf_family { + IBV_EXP_EXPERIMENTAL_INTF_RESERVED, +}; + +enum ibv_exp_intf_scope { + IBV_EXP_INTF_GLOBAL, /* Permanent interface, identified by + * the ibv_exp_intf_family enum + */ + IBV_EXP_INTF_EXPERIMENTAL, /* Interface under evaluation, identified by + * the ibv_exp_experimental_intf_family enum + * This interface may change between lib + * versions + */ + IBV_EXP_INTF_VENDOR, /* Vendor specific interface, defined in vendor + * separate header file + */ + IBV_EXP_INTF_VENDOR_EXPERIMENTAL, /* Vendor interface under evaluation, + * defined in vendor separate header + * file + */ +}; + +/* Return status from ibv_exp_query_intf */ +enum ibv_exp_query_intf_status { + IBV_EXP_INTF_STAT_OK, + IBV_EXP_INTF_STAT_VENDOR_NOT_SUPPORTED, /* The provided 'vendor_guid' is not supported */ + IBV_EXP_INTF_STAT_INTF_NOT_SUPPORTED, /* The provided 'intf' is not supported */ + IBV_EXP_INTF_STAT_VERSION_NOT_SUPPORTED, /* The provided 'intf_version' is not supported */ + IBV_EXP_INTF_STAT_INVAL_PARARM, /* General invalid parameter */ + IBV_EXP_INTF_STAT_INVAL_OBJ_STATE, /* QP is not in INIT, RTR or RTS state */ + IBV_EXP_INTF_STAT_INVAL_OBJ, /* Mismatch between the provided 'obj'(CQ/QP/WQ) and requested 'intf' */ + IBV_EXP_INTF_STAT_FLAGS_NOT_SUPPORTED, /* The provided set of 'flags' is not supported */ + IBV_EXP_INTF_STAT_FAMILY_FLAGS_NOT_SUPPORTED, /* The provided set of 'family_flags' is not supported */ +}; + +enum ibv_exp_query_intf_comp_mask { + IBV_EXP_QUERY_INTF_RESERVED = (1 << 0), +}; + +struct ibv_exp_query_intf_params { + uint32_t flags; /* use ibv_exp_query_intf_flags */ + enum ibv_exp_intf_scope intf_scope; + uint64_t vendor_guid; /* set in case VENDOR intf_scope selected */ + uint32_t intf; /* for GLOBAL intf_scope use ibv_exp_intf_family enum */ + uint32_t intf_version; /* Version */ + void *obj; /* interface object (CQ/QP/WQ) */ + void *family_params; /* Family-specific params */ + uint32_t family_flags; /* Family-specific flags */ + uint32_t comp_mask; /* use ibv_exp_query_intf_comp_mask */ +}; + +enum ibv_exp_release_intf_comp_mask { + IBV_EXP_RELEASE_INTF_RESERVED = (1 << 0), +}; + +struct ibv_exp_release_intf_params { + uint32_t comp_mask; /* use ibv_exp_release_intf_comp_mask */ +}; + +/* + * Family interfaces + */ + +/* QP burst family */ + +/* Flags to use in family_flags field of ibv_exp_query_intf_params on family creation */ +enum ibv_exp_qp_burst_family_create_flags { + /* To disable loop-back of multi-cast messages in RAW-ETH */ + IBV_EXP_QP_BURST_CREATE_DISABLE_ETH_LOOPBACK = (1 << 0), + /* To enable Multi-Packet send WR when possible */ + IBV_EXP_QP_BURST_CREATE_ENABLE_MULTI_PACKET_SEND_WR = (1 << 1), +}; + +/* Flags to use on send functions of QP burst family */ +enum ibv_exp_qp_burst_family_flags { + IBV_EXP_QP_BURST_SIGNALED = 1 << 0, + IBV_EXP_QP_BURST_SOLICITED = 1 << 1, + IBV_EXP_QP_BURST_IP_CSUM = 1 << 2, + IBV_EXP_QP_BURST_TUNNEL = 1 << 3, + IBV_EXP_QP_BURST_FENCE = 1 << 4, +}; + +/* All functions of QP family included in QP family version 1 */ +struct ibv_exp_qp_burst_family { + int (*send_pending)(struct ibv_qp *qp, uint64_t addr, uint32_t length, uint32_t lkey, uint32_t flags); + int (*send_pending_inline)(struct ibv_qp *qp, void *addr, uint32_t length, uint32_t flags); + int (*send_pending_sg_list)(struct ibv_qp *qp, struct ibv_sge *sg_list, uint32_t num, uint32_t flags); + int (*send_flush)(struct ibv_qp *qp); + int (*send_burst)(struct ibv_qp *qp, struct ibv_sge *msg_list, uint32_t num, uint32_t flags); + int (*recv_burst)(struct ibv_qp *qp, struct ibv_sge *msg_list, uint32_t num); +}; + +struct ibv_exp_qp_burst_family_v1 { + /* + * send_pending - Put one message in the provider send queue. + * + * Common usage: After calling several times to send_pending + * the application need to call send_flush to ensure the send + * of the pending messages. + * Note: Use ibv_exp_qp_burst_family_flags for the flags field + */ + int (*send_pending)(struct ibv_qp *qp, uint64_t addr, uint32_t length, uint32_t lkey, uint32_t flags); + /* + * send_pending_inline - Put one inline message in the provider send queue. + * + * Common usage: Same as send_pending + * Notes: + * - The message length must fit the max inline size of the QP. + * Providing bigger messages may lead to data corruption and + * segmentation fault. + * - Use ibv_exp_qp_burst_family_flags for the flags field + */ + int (*send_pending_inline)(struct ibv_qp *qp, void *addr, uint32_t length, uint32_t flags); + /* + * send_pending_sg_list - Put one scatter-gather(sg) message in the provider send queue. + * + * Common usage: Same as send_pending + * Notes: + * - The number of sg entries must fit the max_send_sge of the QP. + * Providing bigger list of sg entries may lead to data corruption and + * segmentation fault. + * - Use ibv_exp_qp_burst_family_flags for the flags field + */ + int (*send_pending_sg_list)(struct ibv_qp *qp, struct ibv_sge *sg_list, uint32_t num, uint32_t flags); + /* + * send_flush - To flush the pending messages. + * + * Note: Use ibv_exp_qp_burst_family_flags for the flags field + */ + int (*send_flush)(struct ibv_qp *qp); + /* + * send_burst - Send a list of 'num' messages (no send_flush required in this case) + */ + int (*send_burst)(struct ibv_qp *qp, struct ibv_sge *msg_list, uint32_t num, uint32_t flags); + /* + * recv_burst - Post a set of 'num' receive buffers. + * + * Note: One sge per message is supported by this function + */ + int (*recv_burst)(struct ibv_qp *qp, struct ibv_sge *msg_list, uint32_t num); + /* + * send_pending_vlan - Put one message in the provider send queue + * and insert vlan_tci to header. + * + * Common usage: Same as send_pending + * Note: + * - Same as send_pending + * - Not supported when MP enable. + */ + int (*send_pending_vlan)(struct ibv_qp *qp, uint64_t addr, uint32_t length, + uint32_t lkey, uint32_t flags, uint16_t *vlan_tci); + /* + * send_pending_inline - Put one inline message in the provider send queue + * and insert vlan_tci to header. + * + * Common usage: Same as send_pending_inline + * Notes: + * - Same as send_pending_inline + * - Not supported when MP enable. + */ + int (*send_pending_inline_vlan)(struct ibv_qp *qp, void *addr, uint32_t length, + uint32_t flags, uint16_t *vlan_tci); + /* + * send_pending_sg_list - Put one scatter-gather(sg) message in the provider send queue + * and insert vlan_tci to header. + * + * Common usage: Same as send_pending_sg_list + * Notes: + * - Same as send_pending_sg_list + * - Not supported when MP enable. + */ + int (*send_pending_sg_list_vlan)(struct ibv_qp *qp, struct ibv_sge *sg_list, uint32_t num, + uint32_t flags, uint16_t *vlan_tci); +}; + +/* WQ family */ +struct ibv_exp_wq_family { + /* + * recv_sg_list - Post one scatter-gather(sg) receive buffer. + * + * Note: + * - The number of sg entries must fit the max_recv_sge of the WQ. + * Providing bigger list of sg entries may lead to data corruption and + * segmentation fault. + */ + int (*recv_sg_list)(struct ibv_exp_wq *wq, struct ibv_sge *sg_list, uint32_t num_sg); + /* + * recv_burst - Post a set of 'num' receive buffers. + * + * Note: One sge per message is supported by this function + */ + int (*recv_burst)(struct ibv_exp_wq *wq, struct ibv_sge *msg_list, uint32_t num); +}; + +/* CQ family */ +enum ibv_exp_cq_family_flags { + /* RX offloads flags */ + /* The cq_family_flags are applicable + * according to the existence of the + * related device capabilities flags */ + IBV_EXP_CQ_RX_IP_CSUM_OK = 1 << 0, /* IBV_EXP_DEVICE_RX_CSUM_IP_PKT or IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT */ + IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK = 1 << 1, /* IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT */ + IBV_EXP_CQ_RX_IPV4_PACKET = 1 << 2, /* IBV_EXP_DEVICE_RX_CSUM_IP_PKT or IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT */ + IBV_EXP_CQ_RX_IPV6_PACKET = 1 << 3, /* IBV_EXP_DEVICE_RX_CSUM_IP_PKT or IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT */ + IBV_EXP_CQ_RX_TUNNEL_PACKET = 1 << 4, /* IBV_EXP_DEVICE_VXLAN_SUPPORT */ + IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK = 1 << 5, /* IBV_EXP_DEVICE_VXLAN_SUPPORT */ + IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK = 1 << 6, /* IBV_EXP_DEVICE_VXLAN_SUPPORT */ + IBV_EXP_CQ_RX_OUTER_IPV4_PACKET = 1 << 7, /* IBV_EXP_DEVICE_VXLAN_SUPPORT */ + IBV_EXP_CQ_RX_OUTER_IPV6_PACKET = 1 << 8, /* IBV_EXP_DEVICE_VXLAN_SUPPORT */ + + /* Flags supported from CQ family version 1 */ + /* Multi-Packet RQ flag */ + IBV_EXP_CQ_RX_MULTI_PACKET_LAST_V1 = 1 << 9, /* Last packet on WR */ + /* CVLAN stripping RQ flag */ + IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1 = 1 << 10, /* + * When set, CVLAN is stripped + * from incoming packets. + */ + + /* The RX TCP/UDP packet flags + * applicable according to + * IBV_EXP_DEVICE_RX_TCP_UDP_PKT_TYPE + * device capabilities flag + */ + IBV_EXP_CQ_RX_TCP_PACKET = 1 << 11, + IBV_EXP_CQ_RX_UDP_PACKET = 1 << 12, +}; + +/* All functions of CQ family included in CQ family version 1 */ +struct ibv_exp_cq_family { + int32_t (*poll_cnt)(struct ibv_cq *cq, uint32_t max); + int32_t (*poll_length)(struct ibv_cq *cq, void *buf, uint32_t *inl); + int32_t (*poll_length_flags)(struct ibv_cq *cq, void *buf, uint32_t *inl, uint32_t *flags); +}; + +struct ibv_exp_cq_family_v1 { + /* + * poll_cnt - Poll up to 'max' valid completions + * + * The function returns the number of valid completions it + * managed to drain from the CQ. + * + * Usage example: In case a CQ is connected to one send-queue + * the application may use this function to get + * the number of the QP send-completions. + * + * Return value (n): + * n >= 0 : number extracted completions. + * n == -1 : operation failed. completion is not extracted. + * To extract this completion, ibv_poll_cq() must be used + * + * Note: The function designed to support TX completion, it may also be + * used for RX completion but it is not supporting RX inline-scatter. + */ + int32_t (*poll_cnt)(struct ibv_cq *cq, uint32_t max); + /* + * poll_length - Poll one receive completion and provide the related + * message length. + * + * The function returns only the length of the completed message. + * In case of inline received message the message will be copied + * to the provided buffer ('buf') and the '*inl' status will be set. + * The function extracts only completion of regular receive-messages. + * In case of send-message completion or SRQ receive-message completion + * it returns -1. + * + * Usage example: In case a CQ is connected to one receive-queue + * the application may use this function to get + * the size of the next received message. + * + * Return value (n): + * n > 0 : successful completion with positive length. + * *inl will be set to 1 if data was copied to buffer. + * + * 0 : Empty. + * n == -1 : operation failed. completion is not extracted. + * To extract this completion, ibv_poll_cq() must be used + */ + int32_t (*poll_length)(struct ibv_cq *cq, void *buf, uint32_t *inl); + /* + * poll_length_flags - Poll one receive completion and provide the related + * message length and completion flags. + * + * The same as poll_length but also retrieves completion flags as + * defined by the enum ibv_exp_cq_family_flags + */ + int32_t (*poll_length_flags)(struct ibv_cq *cq, void *buf, uint32_t *inl, uint32_t *flags); + /* + * poll_length_flags_mp_rq - Poll one receive completion and provide the related + * message length, packet-offset and completion flags. + * + * The same as poll_length_flags but: + * - Without the inline-receive support. + * - Also retrieves offset in the WR posted buffer as defined by the WR SG list. + * The start of the received packet is located in this offset. + */ + int32_t (*poll_length_flags_mp_rq)(struct ibv_cq *cq, uint32_t *offset, uint32_t *flags); + /* + * poll_length_flags_cvlan - Poll one receive completion and provide the + * related message length, completion flags + * and CVLAN TCI. + * The CVLAN TCI value is valid only when + * IBV_EXP_CQ_RX_CVLAN_STRIPPED_V1 flag is + * set. + * + * The same as poll_length_flags but: + * - Also retrievs the packet's CVLAN TCI that was stripped by the HW. + */ + int32_t (*poll_length_flags_cvlan)(struct ibv_cq *cq, void *buf, + uint32_t *inl, uint32_t *flags, + uint16_t *vlan_tci); + /* + * poll_length_flags_mp_rq_cvlan - Poll one receive completion and provide + * the related message length, + * packet-offset, completion flags and + * CVLAN TCI + * + * The same as poll_length_flags_cvlan but: + * - Without the inline-receive support. + * - Also retrives offset in the WR posted buffer as defined by the + * WR SG list. The start of the received packet is located in this + * offset. + */ + int32_t (*poll_length_flags_mp_rq_cvlan)(struct ibv_cq *cq, + uint32_t *offset, + uint32_t *flags, + uint16_t *vlan_tci); +}; + +enum { + IBV_EXP_NUM_DC_INFO_LIDS = 30 +}; + +struct ibv_exp_dc_info_ent { + uint16_t lid[IBV_EXP_NUM_DC_INFO_LIDS]; + uint32_t seqnum; +}; + +enum ibv_exp_roce_gid_type { + IBV_EXP_IB_ROCE_V1_GID_TYPE, + IBV_EXP_ROCE_V2_GID_TYPE, + IBV_EXP_ROCE_V1_5_GID_TYPE, +}; + +enum ibv_exp_query_gid_attr { + IBV_EXP_QUERY_GID_ATTR_TYPE = (1 << 0), + IBV_EXP_QUERY_GID_ATTR_GID = (1 << 1), + IBV_EXP_QUERY_GID_ATTR_RESERVED = (1 << 2), +}; + +struct ibv_exp_gid_attr { + uint32_t comp_mask; + enum ibv_exp_roce_gid_type type; + union ibv_gid gid; +}; + +/** + * enum ibv_exp_ec_calc_attr_comp_mask - erasure coding context + * init attributes compatibility enumeration + */ +enum ibv_exp_ec_calc_attr_comp_mask { + IBV_EXP_EC_CALC_ATTR_MAX_INFLIGHT = (1 << 0), + IBV_EXP_EC_CALC_ATTR_K = (1 << 1), + IBV_EXP_EC_CALC_ATTR_M = (1 << 2), + IBV_EXP_EC_CALC_ATTR_W = (1 << 3), + IBV_EXP_EC_CALC_ATTR_MAX_DATA_SGE = (1 << 4), + IBV_EXP_EC_CALC_ATTR_MAX_CODE_SGE = (1 << 5), + IBV_EXP_EC_CALC_ATTR_ENCODE_MAT = (1 << 6), + IBV_EXP_EC_CALC_ATTR_AFFINITY = (1 << 7), + IBV_EXP_EC_CALC_ATTR_POLLING = (1 << 8), + IBV_EXP_EC_CALC_INIT_ATTR_RESERVED = (1 << 9), +}; + +/** + * struct ibv_exp_ec_calc_init_attr - erasure coding engine + * initialization attributes + * + * @comp_mask: compatibility bitmask + * @max_inflight_calcs: maximum inflight calculations + * @k: number of data blocks + * @m: number of core blocks + * @w: Galois field bits GF(2^w) + * @max_data_sge: maximum data sg elements to be used for encode/decode + * @max_code_sge: maximum code sg elements to be used for encode/decode + * @encode_matrix: buffer that contain the encoding matrix + * @affinity_hint: affinity hint for asynchronous calcs completion + * steering. + * @polling: polling mode (if set no completions will be generated + * by events). + */ +struct ibv_exp_ec_calc_init_attr { + uint32_t comp_mask; + uint32_t max_inflight_calcs; + int k; + int m; + int w; + int max_data_sge; + int max_code_sge; + uint8_t *encode_matrix; + int affinity_hint; + int polling; +}; + +/** + * enum ibv_exp_ec_status - EX calculation status + * + * @IBV_EXP_EC_CALC_SUCCESS: EC calc operation succeded + * @IBV_EXP_EC_CALC_FAIL: EC calc operation failed + */ +enum ibv_exp_ec_status { + IBV_EXP_EC_CALC_SUCCESS, + IBV_EXP_EC_CALC_FAIL, +}; + +/** + * struct ibv_exp_ec_comp - completion context of EC calculation + * + * @done: function handle of the EC calculation completion + * @status: status of the EC calculation + * + * The consumer is expected to embed this structure in his calculation context + * so that the user context would be acquired back using offsetof() + */ +struct ibv_exp_ec_comp { + void (*done)(struct ibv_exp_ec_comp *comp); + enum ibv_exp_ec_status status; +}; + +/** + * struct ibv_exp_ec_calc - erasure coding engine context + * + * @pd: protection domain + */ +struct ibv_exp_ec_calc { + struct ibv_pd *pd; +}; + +/** + * struct ibv_exp_ec_mem - erasure coding memory layout context + * + * @data_blocks: array of data sg elements + * @num_data_sge: number of data sg elements + * @code_blocks: array of code sg elements + * @num_code_sge: number of code sg elements + * @block_size: logical block size + */ +struct ibv_exp_ec_mem { + struct ibv_sge *data_blocks; + int num_data_sge; + struct ibv_sge *code_blocks; + int num_code_sge; + int block_size; +}; + +/** + * struct ibv_exp_ec_stripe - erasure coding stripe descriptor + * + * @qp: queue-pair connected to the relevant peer + * @wr: send work request, can either be a RDMA wr or a SEND + */ +struct ibv_exp_ec_stripe { + struct ibv_qp *qp; + struct ibv_send_wr *wr; +}; + +struct ibv_exp_peer_commit; +struct ibv_exp_rollback_ctx; + + +struct ibv_exp_peer_peek; +struct ibv_exp_peer_abort_peek; + +struct verbs_context_exp { + /* "grows up" - new fields go here */ + int (*exp_peer_peek_cq)(struct ibv_cq *ibcq, + struct ibv_exp_peer_peek *peek_ctx); + int (*exp_peer_abort_peek_cq)(struct ibv_cq *ibcq, + struct ibv_exp_peer_abort_peek *ack_ctx); + int (*exp_peer_commit_qp)(struct ibv_qp *qp, + struct ibv_exp_peer_commit *peer); + int (*exp_rollback_send)(struct ibv_qp *qp, + struct ibv_exp_rollback_ctx *rollback); + int (*ec_update_sync)(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem, + uint8_t *data_updates, + uint8_t *code_updates); + int (*ec_update_async)(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem, + uint8_t *data_updates, + uint8_t *code_updates, + struct ibv_exp_ec_comp *ec_comp); + struct ibv_exp_ec_calc *(*alloc_ec_calc)(struct ibv_pd *pd, + struct ibv_exp_ec_calc_init_attr *attr); + void (*dealloc_ec_calc)(struct ibv_exp_ec_calc *calc); + int (*ec_encode_async)(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem, + struct ibv_exp_ec_comp *ec_comp); + int (*ec_encode_sync)(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem); + int (*ec_decode_async)(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem, + uint8_t *erasures, + uint8_t *decode_matrix, + struct ibv_exp_ec_comp *ec_comp); + int (*ec_decode_sync)(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem, + uint8_t *erasures, + uint8_t *decode_matrix); + int (*ec_poll)(struct ibv_exp_ec_calc *calc, int n); + int (*ec_encode_send)(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem, + struct ibv_exp_ec_stripe *data_stripes, + struct ibv_exp_ec_stripe *code_stripes); + int (*exp_query_gid_attr)(struct ibv_context *context, uint8_t port_num, + unsigned int index, + struct ibv_exp_gid_attr *attr); + int (*exp_destroy_rwq_ind_table)(struct ibv_exp_rwq_ind_table *rwq_ind_table); + struct ibv_exp_rwq_ind_table *(*exp_create_rwq_ind_table)(struct ibv_context *context, + struct ibv_exp_rwq_ind_table_init_attr *init_attr); + int (*exp_destroy_wq)(struct ibv_exp_wq *wq); + int (*exp_modify_wq)(struct ibv_exp_wq *wq, + struct ibv_exp_wq_attr *wq_attr); + struct ibv_exp_wq * (*exp_create_wq)(struct ibv_context *context, + struct ibv_exp_wq_init_attr *wq_init_attr); + int (*drv_exp_poll_dc_info)(struct ibv_context *context, + struct ibv_exp_dc_info_ent *ents, + int nent, int port); + void *(*exp_query_intf)(struct ibv_context *context, struct ibv_exp_query_intf_params *params, + enum ibv_exp_query_intf_status *status); + int (*exp_release_intf)(struct ibv_context *context, void *intf, + struct ibv_exp_release_intf_params *params); + struct ibv_exp_res_domain *(*exp_create_res_domain)(struct ibv_context *context, + struct ibv_exp_res_domain_init_attr *attr); + int (*exp_destroy_res_domain)(struct ibv_context *context, + struct ibv_exp_res_domain *res_dom, + struct ibv_exp_destroy_res_domain_attr *attr); + int (*lib_exp_use_priv_env)(struct ibv_context *context); + int (*lib_exp_setenv)(struct ibv_context *context, const char *name, + const char *value, int overwrite); + struct verbs_environment *venv; + int (*drv_exp_dereg_mr)(struct ibv_mr *mr, struct ibv_exp_dereg_out *out); + int (*exp_rereg_mr)(struct ibv_mr *mr, int flags, struct ibv_pd *pd, + void *addr, size_t length, uint64_t access, + struct ibv_exp_rereg_mr_attr *attr); + int (*drv_exp_rereg_mr)(struct ibv_mr *mr, int flags, struct ibv_pd *pd, + void *addr, size_t length, uint64_t access, + struct ibv_exp_rereg_mr_attr *attr, struct ibv_exp_rereg_out *out); + int (*drv_exp_prefetch_mr)(struct ibv_mr *mr, + struct ibv_exp_prefetch_attr *attr); + int (*lib_exp_prefetch_mr)(struct ibv_mr *mr, + struct ibv_exp_prefetch_attr *attr); + struct ibv_exp_mkey_list_container * (*drv_exp_alloc_mkey_list_memory)(struct ibv_exp_mkey_list_container_attr *attr); + struct ibv_exp_mkey_list_container * (*lib_exp_alloc_mkey_list_memory)(struct ibv_exp_mkey_list_container_attr *attr); + int (*drv_exp_dealloc_mkey_list_memory)(struct ibv_exp_mkey_list_container *mem); + int (*lib_exp_dealloc_mkey_list_memory)(struct ibv_exp_mkey_list_container *mem); + int (*drv_exp_query_mkey)(struct ibv_mr *mr, struct ibv_exp_mkey_attr *mkey_attr); + int (*lib_exp_query_mkey)(struct ibv_mr *mr, struct ibv_exp_mkey_attr *mkey_attr); + struct ibv_mr * (*drv_exp_create_mr)(struct ibv_exp_create_mr_in *in); + struct ibv_mr * (*lib_exp_create_mr)(struct ibv_exp_create_mr_in *in); + int (*drv_exp_arm_dct)(struct ibv_exp_dct *dct, struct ibv_exp_arm_attr *attr); + int (*lib_exp_arm_dct)(struct ibv_exp_dct *dct, struct ibv_exp_arm_attr *attr); + int (*drv_exp_bind_mw)(struct ibv_exp_mw_bind *mw_bind); + int (*lib_exp_bind_mw)(struct ibv_exp_mw_bind *mw_bind); + int (*drv_exp_post_send)(struct ibv_qp *qp, + struct ibv_exp_send_wr *wr, + struct ibv_exp_send_wr **bad_wr); + struct ibv_mr * (*drv_exp_reg_mr)(struct ibv_exp_reg_mr_in *in); + struct ibv_mr * (*lib_exp_reg_mr)(struct ibv_exp_reg_mr_in *in); + struct ibv_ah * (*drv_exp_ibv_create_ah)(struct ibv_pd *pd, + struct ibv_exp_ah_attr *attr_exp); + int (*drv_exp_query_values)(struct ibv_context *context, int q_values, + struct ibv_exp_values *values); + struct ibv_cq * (*exp_create_cq)(struct ibv_context *context, int cqe, + struct ibv_comp_channel *channel, + int comp_vector, struct ibv_exp_cq_init_attr *attr); + int (*drv_exp_ibv_poll_cq)(struct ibv_cq *ibcq, int num_entries, + struct ibv_exp_wc *wc, uint32_t wc_size); + void * (*drv_exp_get_legacy_xrc) (struct ibv_srq *ibv_srq); + void (*drv_exp_set_legacy_xrc) (struct ibv_srq *ibv_srq, void *legacy_xrc); + struct ibv_mr * (*drv_exp_ibv_reg_shared_mr)(struct ibv_exp_reg_shared_mr_in *in); + struct ibv_mr * (*lib_exp_ibv_reg_shared_mr)(struct ibv_exp_reg_shared_mr_in *in); + int (*drv_exp_modify_qp)(struct ibv_qp *qp, struct ibv_exp_qp_attr *attr, + uint64_t exp_attr_mask); + int (*lib_exp_modify_qp)(struct ibv_qp *qp, struct ibv_exp_qp_attr *attr, + uint64_t exp_attr_mask); + int (*drv_exp_post_task)(struct ibv_context *context, + struct ibv_exp_task *task, + struct ibv_exp_task **bad_task); + int (*lib_exp_post_task)(struct ibv_context *context, + struct ibv_exp_task *task, + struct ibv_exp_task **bad_task); + int (*drv_exp_modify_cq)(struct ibv_cq *cq, + struct ibv_exp_cq_attr *attr, int attr_mask); + int (*lib_exp_modify_cq)(struct ibv_cq *cq, + struct ibv_exp_cq_attr *attr, int attr_mask); + int (*drv_exp_ibv_destroy_flow) (struct ibv_exp_flow *flow); + int (*lib_exp_ibv_destroy_flow) (struct ibv_exp_flow *flow); + struct ibv_exp_flow * (*drv_exp_ibv_create_flow) (struct ibv_qp *qp, + struct ibv_exp_flow_attr + *flow_attr); + struct ibv_exp_flow * (*lib_exp_ibv_create_flow) (struct ibv_qp *qp, + struct ibv_exp_flow_attr + *flow_attr); + + int (*drv_exp_query_port)(struct ibv_context *context, uint8_t port_num, + struct ibv_exp_port_attr *port_attr); + int (*lib_exp_query_port)(struct ibv_context *context, uint8_t port_num, + struct ibv_exp_port_attr *port_attr); + struct ibv_exp_dct *(*create_dct)(struct ibv_context *context, + struct ibv_exp_dct_init_attr *attr); + int (*destroy_dct)(struct ibv_exp_dct *dct); + int (*query_dct)(struct ibv_exp_dct *dct, struct ibv_exp_dct_attr *attr); + int (*drv_exp_query_device)(struct ibv_context *context, + struct ibv_exp_device_attr *attr); + int (*lib_exp_query_device)(struct ibv_context *context, + struct ibv_exp_device_attr *attr); + struct ibv_qp *(*drv_exp_create_qp)(struct ibv_context *context, + struct ibv_exp_qp_init_attr *init_attr); + struct ibv_qp *(*lib_exp_create_qp)(struct ibv_context *context, + struct ibv_exp_qp_init_attr *init_attr); + size_t sz; /* Set by library on struct allocation, */ + /* must be located as last field */ +}; + + +static inline struct verbs_context_exp *verbs_get_exp_ctx(struct ibv_context *ctx) +{ + struct verbs_context *app_ex_ctx = verbs_get_ctx(ctx); + char *actual_ex_ctx; + + if (!app_ex_ctx || !(app_ex_ctx->has_comp_mask & VERBS_CONTEXT_EXP)) + return NULL; + + actual_ex_ctx = ((char *)ctx) - (app_ex_ctx->sz - sizeof(struct ibv_context)); + + return (struct verbs_context_exp *)(actual_ex_ctx - sizeof(struct verbs_context_exp)); +} + +#define verbs_get_exp_ctx_op(ctx, op) ({ \ + struct verbs_context_exp *_vctx = verbs_get_exp_ctx(ctx); \ + (!_vctx || (_vctx->sz < sizeof(*_vctx) - offsetof(struct verbs_context_exp, op)) || \ + !_vctx->op) ? NULL : _vctx; }) + +#define verbs_set_exp_ctx_op(_vctx, op, ptr) ({ \ + struct verbs_context_exp *vctx = _vctx; \ + if (vctx && (vctx->sz >= sizeof(*vctx) - offsetof(struct verbs_context_exp, op))) \ + vctx->op = ptr; }) + + +/* + * ibv_exp_alloc_ec_calc() - allocate an erasure coding + * calculation offload context + * @pd: user allocated protection domain + * @attrs: initialization attributes + * + * Returns handle handle to the EC calculation APIs + */ +static inline struct ibv_exp_ec_calc * +ibv_exp_alloc_ec_calc(struct ibv_pd *pd, + struct ibv_exp_ec_calc_init_attr *attr) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(pd->context, alloc_ec_calc); + if (!vctx) { + errno = ENOSYS; + return NULL; + } + IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_EC_CALC_INIT_ATTR_RESERVED - 1); + + return vctx->alloc_ec_calc(pd, attr); +} + +/* + * ibv_exp_dealloc_ec_calc() - free an erasure coding + * calculation offload context + * @ec_calc: ec context + */ +static inline void ibv_exp_dealloc_ec_calc(struct ibv_exp_ec_calc *calc) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(calc->pd->context, dealloc_ec_calc); + if (!vctx) { + errno = ENOSYS; + return; + } + + vctx->dealloc_ec_calc(calc); +} + +/** + * ibv_exp_ec_encode_async() - asynchronous encode of given data blocks + * and place in code_blocks + * @ec_calc: erasure coding calculation engine + * @ec_mem: erasure coding memory layout + * @ec_comp: EC calculation completion context + * + * Restrictions: + * - ec_calc is an initialized erasure coding calc engine structure + * - ec_mem.data_blocks sg array must describe the data memory + * layout, the total length of the sg elements must satisfy + * k * ec_mem.block_size. + * - ec_mem.num_data_sg must not exceed the calc max_data_sge + * - ec_mem.code_blocks sg array must describe the code memory + * layout, the total length of the sg elements must satisfy + * m * ec_mem.block_size. + * - ec_mem.num_code_sg must not exceed the calc max_code_sge + * + * Notes: + * The ec_calc will perform the erasure coding calc operation, + * once it completes, it will call ec_comp->done() handle. + * The caller will take it from there. + */ +static inline int +ibv_exp_ec_encode_async(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem, + struct ibv_exp_ec_comp *ec_comp) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(calc->pd->context, ec_encode_async); + if (!vctx) + return ENOSYS; + + return vctx->ec_encode_async(calc, ec_mem, ec_comp); +} + +/** + * ibv_exp_ec_encode_sync() - synchronous encode of given data blocks + * and place in code_blocks + * @ec_calc: erasure coding calculation engine + * @ec_mem: erasure coding memory layout + * + * Restrictions: + * - ec_calc is an initialized erasure coding calc engine structure + * - ec_mem.data_blocks sg array must describe the data memory + * layout, the total length of the sg elements must satisfy + * k * ec_mem.block_size. + * - ec_mem.num_data_sg must not exceed the calc max_data_sge + * - ec_mem.code_blocks sg array must describe the code memory + * layout, the total length of the sg elements must satisfy + * m * ec_mem.block_size. + * - ec_mem.num_code_sg must not exceed the calc max_code_sge + * + * Returns 0 on success, non-zero on failure. + * + */ +static inline int +ibv_exp_ec_encode_sync(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(calc->pd->context, ec_encode_sync); + if (!vctx) + return ENOSYS; + + return vctx->ec_encode_sync(calc, ec_mem); +} + +/** + * ibv_exp_ec_decode_async() - decode a given set of data blocks + * and code_blocks and place into output recovery blocks + * @ec_calc: erasure coding calculation engine + * @ec_mem: erasure coding memory layout + * @erasures: pointer to byte-map of which blocks were erased + * and needs to be recovered + * @decode_matrix: buffer that contains the decode matrix + * @ec_comp: EC calculation completion context + * + * Restrictions: + * - ec_calc is an initialized erasure coding calc engine structure + * - ec_mem.data_blocks sg array must describe the data memory + * layout, the total length of the sg elements must satisfy + * k * ec_mem.block_size. + * - ec_mem.num_data_sg must not exceed the calc max_data_sge + * - ec_mem.code_blocks sg array must describe the code memory + * layout, the total length of the sg elements must satisfy + * number of missing blocks * ec_mem.block_size. + * - ec_mem.num_code_sg must not exceed the calc max_code_sge + * - erasures byte-mask consists of the survived and erased blocks. + * The first k bytes stand for the k data blocks followed by + * m bytes that stand for the code blocks. + * + * Returns 0 on success, or non-zero on failure with a corresponding + * errno. + * + * Notes: + * The ec_calc will perform the erasure coding calc operation, + * once it completes, it will call ec_comp->done() handle. + * The caller will take it from there + */ +static inline int +ibv_exp_ec_decode_async(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem, + uint8_t *erasures, + uint8_t *decode_matrix, + struct ibv_exp_ec_comp *ec_comp) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(calc->pd->context, ec_decode_async); + if (!vctx) + return ENOSYS; + + return vctx->ec_decode_async(calc, ec_mem, erasures, + decode_matrix, ec_comp); +} + +/** + * ibv_exp_ec_decode_sync() - decode a given set of data blocks + * and code_blocks and place into output recovery blocks + * @ec_calc: erasure coding calculation engine + * @ec_mem: erasure coding memory layout + * @erasures: pointer to byte-map of which blocks were erased + * and needs to be recovered + * @decode_matrix: registered buffer of the decode matrix + * + * Restrictions: + * - ec_calc is an initialized erasure coding calc engine structure + * - ec_mem.data_blocks sg array must describe the data memory + * layout, the total length of the sg elements must satisfy + * k * ec_mem.block_size. + * - ec_mem.num_data_sg must not exceed the calc max_data_sge + * - ec_mem.code_blocks sg array must describe the code memory + * layout, the total length of the sg elements must satisfy + * number of missing blocks * ec_mem.block_size. + * - ec_mem.num_code_sg must not exceed the calc max_code_sge + * - erasures byte-map consists of the survived and erased blocks. + * The first k bytes stand for the k data blocks followed by + * m bytes that stand for the code blocks. + * + * Returns 0 on success, or non-zero on failure with a corresponding + * errno. + */ +static inline int +ibv_exp_ec_decode_sync(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem, + uint8_t *erasures, + uint8_t *decode_matrix) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(calc->pd->context, ec_decode_sync); + if (!vctx) + return ENOSYS; + + return vctx->ec_decode_sync(calc, ec_mem, erasures, decode_matrix); +} + +/** + * ibv_exp_ec_update_async() - copmutes redundancies based on updated blocks, + * their replacements and old redundancies and place into output code blocks + * @ec_calc: erasure coding calculation engine + * @ec_mem: erasure coding memory layout + * @data_updates: array which is a map of data blocks that are updated + * @code_updates: array which is a map of code blocks to be computed + * @ec_comp: EC calculation completion context + * + * Restrictions: + * - ec_calc is an initialized erasure coding calc engine structure + * - ec_mem.data_blocks sg array must describe the data memory + * layout in the following way: + * assume we want to update blocks d_i and d_j with i<j, + * then sg enries should be as follows: + * c_0 ... c_m d_i d'_i d_j d'_j + * were c_0 ... c_m are all previous redundancies, + * d_i is an original i-th block, d'_i is new i-th block + * - ec_mem.num_data_sg should be equal to the number of sg entries, + * i.e. to num of code blocks to be updated + 2*num of updates + * - ec_mem.code_blocks sg array must describe the code memory + * layout, the total length of the sg elements must satisfy + * number of overall code blocks to be updated. + * - ec_mem.num_code_sg must be equal to the number of code blocks + * to be updated and not to exceed the calc max_code_sge + * - data_updates is an array of size k (=number of data blocks) + * and is a byte-map for blocks to be updated, i.e + * if we want to update i-th block and do not want to update j-th block, + * then data_updates[i]=1 and data_updates[j]=0. + * - code_updates is an array of size m(=number of code blocks) + * and is a byte-map of code blocks that should be computed, i.e + * if we want to compute i-th block and do not want to compute j-th block, + * then code_updates[i]=1 and code_updates[j]=0. + * + * Returns 0 on success, or non-zero on failure with a corresponding + * errno. + */ + +static inline int +ibv_exp_ec_update_async(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem, + uint8_t *data_updates, + uint8_t *code_updates, + struct ibv_exp_ec_comp *ec_comp) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(calc->pd->context, ec_update_async); + if (!vctx) + return -ENOSYS; + + return vctx->ec_update_async(calc, ec_mem, data_updates, + code_updates, ec_comp); +} + +/** + * ibv_exp_ec_update_sync() - copmutes redundancies based on updated blocks, + * their replacements and old redundancies and place into output code blocks + * @ec_calc: erasure coding calculation engine + * @ec_mem: erasure coding memory layout + * @data_updates: array which is a map of data blocks that are updated + * @code_updates: array which is a map of code blocks to be computed + * + * Restrictions: + * - ec_calc is an initialized erasure coding calc engine structure + * - ec_mem.data_blocks sg array must describe the data memory + * layout in the following way: + * assume we want to update blocks d_i and d_j with i<j, + * then enries of sg should be as follows: + * c_0..c_m d_i d'_i d_j d'_j + * were c_0 .. c_m are previous redundancies, + * d_i is an original i-th block, d'_i is new i-th block + * - ec_mem.num_data_sg should be equal to the number of sg entries, + * i.e. to num of code blocks to be updated + 2*num of updates + * - ec_mem.code_blocks sg array must describe the code memory + * layout, the total length of the sg elements must satisfy + * number of overall code blocks to be updated. + * - ec_mem.num_code_sg must be equal to the number of code blocks + * to be updated and not to exceed the calc max_code_sge + * - data_updates is an array of size k (=number of data blocks) + * and is a byte-map for blocks to be updated, i.e + * if we want to update i-th block and do not want to update j-th block, + * then data_updates[i]=1 and data_updates[j]=0. + * - code_updates is an array of size m(=number of code blocks) + * and is a bytemap of code blocks that should be computed, i.e + * if we want to compute i-th block and do not want to compute j-th block, + * then code_updates[i]=1 and code_updates[j]=0. + * + * Returns 0 on success, or non-zero on failure with a corresponding + * errno. + */ + +static inline int +ibv_exp_ec_update_sync(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem, + uint8_t *data_updates, + uint8_t *code_updates) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(calc->pd->context, ec_update_sync); + if (!vctx) + return -ENOSYS; + + return vctx->ec_update_sync(calc, ec_mem, data_updates, code_updates); +} +/** + * ibv_exp_ec_calc_poll() - poll for EC calculation + * + * @calc: EC calc context + * @n: number of calculations to poll + * + * Returns the number of calc completions processed which + * is lower or equal to n. Relevant only when EC calc context + * was allocated in polling mode. + */ +static inline int +ibv_exp_ec_poll(struct ibv_exp_ec_calc *calc, int n) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(calc->pd->context, ec_poll); + if (!vctx) + return ENOSYS; + + return vctx->ec_poll(calc, n); +} + +/** + * ibv_exp_ec_encode_send() - encode a given data blocks + * initiate the data and code blocks transfers to the wire with the qps array. + * @ec_calc: erasure coding calculation engine + * @ec_mem: erasure coding memory layout context + * @data_stripes: array of stripe handles, each represents a data block channel + * @code_stripes: array of qp handles, each represents a code block channel + * + * Restrictions: + * - ec_calc is an initialized erasure coding calc engine structure + * - ec_mem.data_blocks sg array must describe the data memory + * layout, the total length of the sg elements must satisfy + * k * ec_mem.block_size. + * - ec_mem.num_data_sg must not exceed the calc max_data_sge + * - ec_mem.code_blocks sg array must describe the code memory + * layout, the total length of the sg elements must satisfy + * m * ec_mem.block_size. + * - ec_mem.num_code_sg must not exceed the calc max_code_sge + * + * Returns 0 on success, or non-zero on failure with a corresponding + * errno. + */ +static inline int +ibv_exp_ec_encode_send(struct ibv_exp_ec_calc *calc, + struct ibv_exp_ec_mem *ec_mem, + struct ibv_exp_ec_stripe *data_stripes, + struct ibv_exp_ec_stripe *code_stripes) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(calc->pd->context, ec_encode_send); + if (!vctx) + return -ENOSYS; + + return vctx->ec_encode_send(calc, ec_mem, data_stripes, code_stripes); +} + +static inline struct ibv_qp * +ibv_exp_create_qp(struct ibv_context *context, struct ibv_exp_qp_init_attr *qp_init_attr) +{ + struct verbs_context_exp *vctx; + uint32_t mask = qp_init_attr->comp_mask; + + if (mask == IBV_EXP_QP_INIT_ATTR_PD) + return ibv_create_qp(qp_init_attr->pd, + (struct ibv_qp_init_attr *) qp_init_attr); + + vctx = verbs_get_exp_ctx_op(context, lib_exp_create_qp); + if (!vctx) { + errno = ENOSYS; + return NULL; + } + IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(qp_init_attr->comp_mask, + IBV_EXP_QP_INIT_ATTR_RESERVED1 - 1); + + return vctx->lib_exp_create_qp(context, qp_init_attr); +} + +/* + * ibv_exp_use_priv_env + * + * switch to use private environment + */ +static inline int ibv_exp_use_priv_env(struct ibv_context *context) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(context, lib_exp_use_priv_env); + if (!vctx) { + errno = ENOSYS; + return -1; + } + + return vctx->lib_exp_use_priv_env(context); +} + +/* + * ibv_exp_poll_dc_info + * + * The function is not thread safe. Any locking must be done by the user. + * + * Return: >= 0 number of returned entries + * < 0 error + * + */ +static inline int ibv_exp_poll_dc_info(struct ibv_context *context, + struct ibv_exp_dc_info_ent *ents, + int nent, int port) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(context, drv_exp_poll_dc_info); + if (!vctx) { + errno = ENOSYS; + return -1; + } + + return vctx->drv_exp_poll_dc_info(context, ents, nent, port); +} + +/* + * ibv_exp_setenv + * + * see man setenv for parameter description + */ +static inline int ibv_exp_setenv(struct ibv_context *context, + const char *name, + const char *value, + int overwrite) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(context, lib_exp_setenv); + if (!vctx) + return setenv(name, value, overwrite); + + return vctx->lib_exp_setenv(context, name, value, overwrite); +} + +static inline int ibv_exp_query_device(struct ibv_context *context, + struct ibv_exp_device_attr *attr) +{ + struct verbs_context_exp *vctx = verbs_get_exp_ctx_op(context, + lib_exp_query_device); + if (!vctx) + return ENOSYS; + + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_DEVICE_ATTR_RESERVED - 1); + return vctx->lib_exp_query_device(context, attr); +} + +static inline struct ibv_exp_dct * +ibv_exp_create_dct(struct ibv_context *context, + struct ibv_exp_dct_init_attr *attr) +{ + struct verbs_context_exp *vctx; + struct ibv_exp_dct *dct; + + vctx = verbs_get_exp_ctx_op(context, create_dct); + if (!vctx) { + errno = ENOSYS; + return NULL; + } + + IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_DCT_INIT_ATTR_RESERVED - 1); + pthread_mutex_lock(&context->mutex); + dct = vctx->create_dct(context, attr); + if (dct) + dct->context = context; + + pthread_mutex_unlock(&context->mutex); + + return dct; +} + +static inline int ibv_exp_destroy_dct(struct ibv_exp_dct *dct) +{ + struct verbs_context_exp *vctx; + struct ibv_context *context = dct->context; + int err; + + vctx = verbs_get_exp_ctx_op(context, destroy_dct); + if (!vctx) { + errno = ENOSYS; + return errno; + } + + pthread_mutex_lock(&context->mutex); + err = vctx->destroy_dct(dct); + pthread_mutex_unlock(&context->mutex); + + return err; +} + +static inline int ibv_exp_query_dct(struct ibv_exp_dct *dct, + struct ibv_exp_dct_attr *attr) +{ + struct verbs_context_exp *vctx; + struct ibv_context *context = dct->context; + int err; + + vctx = verbs_get_exp_ctx_op(context, query_dct); + if (!vctx) { + errno = ENOSYS; + return errno; + } + + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_DCT_ATTR_RESERVED - 1); + pthread_mutex_lock(&context->mutex); + err = vctx->query_dct(dct, attr); + pthread_mutex_unlock(&context->mutex); + + return err; +} + +static inline int ibv_exp_arm_dct(struct ibv_exp_dct *dct, + struct ibv_exp_arm_attr *attr) +{ + struct verbs_context_exp *vctx; + struct ibv_context *context = dct->context; + int err; + + vctx = verbs_get_exp_ctx_op(context, lib_exp_arm_dct); + if (!vctx) { + errno = ENOSYS; + return errno; + } + + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_ARM_ATTR_RESERVED - 1); + pthread_mutex_lock(&context->mutex); + err = vctx->lib_exp_arm_dct(dct, attr); + pthread_mutex_unlock(&context->mutex); + + return err; +} + +static inline int ibv_exp_query_port(struct ibv_context *context, + uint8_t port_num, + struct ibv_exp_port_attr *port_attr) +{ + struct verbs_context_exp *vctx; + + if (0 == port_attr->comp_mask) + return ibv_query_port(context, port_num, + &port_attr->port_attr); + + /* Check that only valid flags were given */ + if ((!port_attr->comp_mask & IBV_EXP_QUERY_PORT_ATTR_MASK1) || + (port_attr->comp_mask & ~IBV_EXP_QUERY_PORT_ATTR_MASKS) || + (port_attr->mask1 & ~IBV_EXP_QUERY_PORT_MASK)) { + errno = EINVAL; + return -errno; + } + + vctx = verbs_get_exp_ctx_op(context, lib_exp_query_port); + + if (!vctx) { + /* Fallback to legacy mode */ + if (port_attr->comp_mask == IBV_EXP_QUERY_PORT_ATTR_MASK1 && + !(port_attr->mask1 & ~IBV_EXP_QUERY_PORT_STD_MASK)) + return ibv_query_port(context, port_num, + &port_attr->port_attr); + + /* Unsupported field was requested */ + errno = ENOSYS; + return -errno; + } + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(port_attr->comp_mask, + IBV_EXP_QUERY_PORT_ATTR_RESERVED - 1); + + return vctx->lib_exp_query_port(context, port_num, port_attr); +} + +/** + * ibv_exp_post_task - Post a list of tasks to different QPs. + */ +static inline int ibv_exp_post_task(struct ibv_context *context, + struct ibv_exp_task *task, + struct ibv_exp_task **bad_task) +{ + struct verbs_context_exp *vctx = verbs_get_exp_ctx_op(context, + lib_exp_post_task); + if (!vctx) + return ENOSYS; + + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(task->comp_mask, + IBV_EXP_TASK_RESERVED - 1); + + return vctx->lib_exp_post_task(context, task, bad_task); +} + +static inline int ibv_exp_query_values(struct ibv_context *context, int q_values, + struct ibv_exp_values *values) +{ + struct verbs_context_exp *vctx = verbs_get_exp_ctx_op(context, + drv_exp_query_values); + if (!vctx) { + errno = ENOSYS; + return -errno; + } + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(values->comp_mask, + IBV_EXP_VALUES_RESERVED - 1); + + return vctx->drv_exp_query_values(context, q_values, values); +} + +static inline struct ibv_exp_flow *ibv_exp_create_flow(struct ibv_qp *qp, + struct ibv_exp_flow_attr *flow) +{ + struct verbs_context_exp *vctx = verbs_get_exp_ctx_op(qp->context, + lib_exp_ibv_create_flow); + if (!vctx || !vctx->lib_exp_ibv_create_flow) + return NULL; + + if (flow->reserved != 0L) { + fprintf(stderr, "%s:%d: flow->reserved must be 0\n", __FUNCTION__, __LINE__); + flow->reserved = 0L; + } + + return vctx->lib_exp_ibv_create_flow(qp, flow); +} + +static inline int ibv_exp_destroy_flow(struct ibv_exp_flow *flow_id) +{ + struct verbs_context_exp *vctx = verbs_get_exp_ctx_op(flow_id->context, + lib_exp_ibv_destroy_flow); + if (!vctx || !vctx->lib_exp_ibv_destroy_flow) + return -ENOSYS; + + return vctx->lib_exp_ibv_destroy_flow(flow_id); +} + +static inline int ibv_exp_poll_cq(struct ibv_cq *ibcq, int num_entries, + struct ibv_exp_wc *wc, uint32_t wc_size) +{ + struct verbs_context_exp *vctx = verbs_get_exp_ctx_op(ibcq->context, + drv_exp_ibv_poll_cq); + if (!vctx) + return -ENOSYS; + + return vctx->drv_exp_ibv_poll_cq(ibcq, num_entries, wc, wc_size); +} + +/** + * ibv_exp_post_send - Post a list of work requests to a send queue. + */ +static inline int ibv_exp_post_send(struct ibv_qp *qp, + struct ibv_exp_send_wr *wr, + struct ibv_exp_send_wr **bad_wr) +{ + struct verbs_context_exp *vctx = verbs_get_exp_ctx_op(qp->context, + drv_exp_post_send); + if (!vctx) + return -ENOSYS; + + return vctx->drv_exp_post_send(qp, wr, bad_wr); +} + +/** + * ibv_exp_reg_shared_mr - Register to an existing shared memory region + * @in - Experimental register shared MR input data. + */ +static inline struct ibv_mr *ibv_exp_reg_shared_mr(struct ibv_exp_reg_shared_mr_in *mr_in) +{ + struct verbs_context_exp *vctx = verbs_get_exp_ctx_op(mr_in->pd->context, + lib_exp_ibv_reg_shared_mr); + if (!vctx) { + errno = ENOSYS; + return NULL; + } + IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(mr_in->comp_mask, + IBV_EXP_REG_SHARED_MR_RESERVED - 1); + + return vctx->lib_exp_ibv_reg_shared_mr(mr_in); +} + +/** + * ibv_exp_modify_cq - Modifies the attributes for the specified CQ. + * @cq: The CQ to modify. + * @cq_attr: Specifies the CQ attributes to modify. + * @cq_attr_mask: A bit-mask used to specify which attributes of the CQ + * are being modified. + */ +static inline int ibv_exp_modify_cq(struct ibv_cq *cq, + struct ibv_exp_cq_attr *cq_attr, + int cq_attr_mask) +{ + struct verbs_context_exp *vctx = verbs_get_exp_ctx_op(cq->context, + lib_exp_modify_cq); + if (!vctx) + return ENOSYS; + + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(cq_attr->comp_mask, + IBV_EXP_CQ_ATTR_RESERVED - 1); + + return vctx->lib_exp_modify_cq(cq, cq_attr, cq_attr_mask); +} + +static inline struct ibv_cq *ibv_exp_create_cq(struct ibv_context *context, + int cqe, + void *cq_context, + struct ibv_comp_channel *channel, + int comp_vector, + struct ibv_exp_cq_init_attr *attr) +{ + struct verbs_context_exp *vctx; + struct ibv_cq *cq; + + vctx = verbs_get_exp_ctx_op(context, exp_create_cq); + if (!vctx) { + errno = ENOSYS; + return NULL; + } + + IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_CQ_INIT_ATTR_RESERVED1 - 1); + pthread_mutex_lock(&context->mutex); + cq = vctx->exp_create_cq(context, cqe, channel, comp_vector, attr); + if (cq) { + cq->context = context; + cq->channel = channel; + if (channel) + ++channel->refcnt; + cq->cq_context = cq_context; + cq->comp_events_completed = 0; + cq->async_events_completed = 0; + pthread_mutex_init(&cq->mutex, NULL); + pthread_cond_init(&cq->cond, NULL); + } + + pthread_mutex_unlock(&context->mutex); + + return cq; +} + +/** + * ibv_exp_modify_qp - Modify a queue pair. + * The argument exp_attr_mask specifies the QP attributes to be modified. + * Use ibv_exp_qp_attr_mask for this argument. + */ +static inline int +ibv_exp_modify_qp(struct ibv_qp *qp, struct ibv_exp_qp_attr *attr, uint64_t exp_attr_mask) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(qp->context, lib_exp_modify_qp); + if (!vctx) { + errno = ENOSYS; + return errno; + } + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_QP_ATTR_RESERVED - 1); + + return vctx->lib_exp_modify_qp(qp, attr, exp_attr_mask); +} + +/** + * ibv_exp_reg_mr - Register a memory region + */ +static inline struct ibv_mr *ibv_exp_reg_mr(struct ibv_exp_reg_mr_in *in) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(in->pd->context, lib_exp_reg_mr); + if (!vctx) { + errno = ENOSYS; + return NULL; + } + IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(in->comp_mask, + IBV_EXP_REG_MR_RESERVED - 1); + + return vctx->lib_exp_reg_mr(in); +} + + +/** + * ibv_exp_bind_mw - Bind a memory window to a region + */ +static inline int ibv_exp_bind_mw(struct ibv_exp_mw_bind *mw_bind) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(mw_bind->mw->context, lib_exp_bind_mw); + if (!vctx) { + errno = ENOSYS; + return errno; + } + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(mw_bind->comp_mask, + IBV_EXP_BIND_MW_RESERVED - 1); + + return vctx->lib_exp_bind_mw(mw_bind); +} + +/** + * ibv_exp_prefetch_mr - Prefetch part of a memory region. + * + * Can be used only with MRs registered with IBV_EXP_ACCESS_ON_DEMAND + * + * Returns 0 on success, + * - ENOSYS libibverbs or provider driver doesn't support the prefetching verb. + * - EFAULT when the range requested is out of the memory region bounds, or when + * parts of it are not part of the process address space. + * - EINVAL when the MR is invalid. + */ +static inline int ibv_exp_prefetch_mr( + struct ibv_mr *mr, + struct ibv_exp_prefetch_attr *attr) +{ + struct verbs_context_exp *vctx = verbs_get_exp_ctx_op(mr->context, + lib_exp_prefetch_mr); + + if (!vctx) { + errno = ENOSYS; + return errno; + } + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_PREFETCH_MR_RESERVED - 1); + + return vctx->lib_exp_prefetch_mr(mr, attr); +} + +typedef int (*drv_exp_post_send_func)(struct ibv_qp *qp, + struct ibv_exp_send_wr *wr, + struct ibv_exp_send_wr **bad_wr); +typedef int (*drv_post_send_func)(struct ibv_qp *qp, struct ibv_send_wr *wr, + struct ibv_send_wr **bad_wr); +typedef int (*drv_exp_poll_cq_func)(struct ibv_cq *ibcq, int num_entries, + struct ibv_exp_wc *wc, uint32_t wc_size); +typedef int (*drv_poll_cq_func)(struct ibv_cq *cq, int num_entries, struct ibv_wc *wc); +typedef int (*drv_post_recv_func)(struct ibv_qp *qp, struct ibv_recv_wr *wr, + struct ibv_recv_wr **bad_wr); + +static inline void *ibv_exp_get_provider_func(struct ibv_context *context, + enum ibv_exp_func_name name) +{ + struct verbs_context_exp *vctx; + + switch (name) { + case IBV_EXP_POST_SEND_FUNC: + vctx = verbs_get_exp_ctx_op(context, drv_exp_post_send); + if (!vctx) + goto error; + + return (void *)vctx->drv_exp_post_send; + + case IBV_EXP_POLL_CQ_FUNC: + vctx = verbs_get_exp_ctx_op(context, drv_exp_ibv_poll_cq); + if (!vctx) + goto error; + + return (void *)vctx->drv_exp_ibv_poll_cq; + + case IBV_POST_SEND_FUNC: + if (!context->ops.post_send) + goto error; + + return (void *)context->ops.post_send; + + case IBV_POLL_CQ_FUNC: + if (!context->ops.poll_cq) + goto error; + + return (void *)context->ops.poll_cq; + + case IBV_POST_RECV_FUNC: + if (!context->ops.post_recv) + goto error; + + return (void *)context->ops.post_recv; + + default: + break; + } + +error: + errno = ENOSYS; + return NULL; +} + +static inline struct ibv_mr *ibv_exp_create_mr(struct ibv_exp_create_mr_in *in) +{ + struct verbs_context_exp *vctx; + struct ibv_mr *mr; + + vctx = verbs_get_exp_ctx_op(in->pd->context, lib_exp_create_mr); + if (!vctx) { + errno = ENOSYS; + return NULL; + } + + IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(in->comp_mask, + IBV_EXP_CREATE_MR_IN_RESERVED - 1); + mr = vctx->lib_exp_create_mr(in); + if (mr) + mr->pd = in->pd; + + return mr; +} + +static inline int ibv_exp_query_mkey(struct ibv_mr *mr, + struct ibv_exp_mkey_attr *attr) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(mr->context, lib_exp_query_mkey); + if (!vctx) { + errno = ENOSYS; + return errno; + } + + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_MKEY_ATTR_RESERVED - 1); + + return vctx->lib_exp_query_mkey(mr, attr); +} + +static inline int ibv_exp_dealloc_mkey_list_memory(struct ibv_exp_mkey_list_container *mem) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(mem->context, + lib_exp_dealloc_mkey_list_memory); + if (!vctx) { + errno = ENOSYS; + return errno; + } + + return vctx->lib_exp_dealloc_mkey_list_memory(mem); +} + +static inline struct ibv_exp_mkey_list_container * +ibv_exp_alloc_mkey_list_memory(struct ibv_exp_mkey_list_container_attr *attr) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(attr->pd->context, + lib_exp_alloc_mkey_list_memory); + if (!vctx) { + errno = ENOSYS; + return NULL; + } + + IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_MKEY_LIST_CONTAINER_RESERVED - 1); + + return vctx->lib_exp_alloc_mkey_list_memory(attr); +} + +/** + * ibv_rereg_mr - Re-Register a memory region + * + * For exp_access use ibv_exp_access_flags + */ +static inline int ibv_exp_rereg_mr(struct ibv_mr *mr, int flags, + struct ibv_pd *pd, void *addr, + size_t length, uint64_t exp_access, + struct ibv_exp_rereg_mr_attr *attr) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(mr->context, exp_rereg_mr); + if (!vctx) + return errno = ENOSYS; + + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_REREG_MR_ATTR_RESERVED - 1); + + return vctx->exp_rereg_mr(mr, flags, pd, addr, length, exp_access, attr); +} + +/** + * ibv_exp_create_res_domain - create resource domain + */ +static inline struct ibv_exp_res_domain *ibv_exp_create_res_domain(struct ibv_context *context, + struct ibv_exp_res_domain_init_attr *attr) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(context, exp_create_res_domain); + if (!vctx) { + errno = ENOSYS; + return NULL; + } + + IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_RES_DOMAIN_RESERVED - 1); + + return vctx->exp_create_res_domain(context, attr); +} + +/** + * ibv_exp_destroy_res_domain - destroy resource domain + */ +static inline int ibv_exp_destroy_res_domain(struct ibv_context *context, + struct ibv_exp_res_domain *res_dom, + struct ibv_exp_destroy_res_domain_attr *attr) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(context, exp_destroy_res_domain); + if (!vctx) + return errno = ENOSYS; + + if (attr) + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_DESTROY_RES_DOMAIN_RESERVED - 1); + + return vctx->exp_destroy_res_domain(context, res_dom, attr); +} + +/** + * ibv_exp_query_intf - query for family of verbs interface for specific QP/CQ + * + * Usually family of data-path verbs. + * Application may call ibv_exp_query_intf for QPs in the following states: + * IBV_QPS_INIT, IBV_QPS_RTR and IBV_QPS_RTS + * + * Returns the family of verbs. + * On failure returns NULL. The failure reason provided by the 'status' + * output variable. + */ +static inline void *ibv_exp_query_intf(struct ibv_context *context, + struct ibv_exp_query_intf_params *params, + enum ibv_exp_query_intf_status *status) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(context, exp_query_intf); + if (!vctx) { + errno = ENOSYS; + return NULL; + } + + IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(params->comp_mask, + IBV_EXP_QUERY_INTF_RESERVED - 1); + + return vctx->exp_query_intf(context, params, status); +} + +/** + * ibv_exp_release_intf - release the queried interface + */ +static inline int ibv_exp_release_intf(struct ibv_context *context, void *intf, + struct ibv_exp_release_intf_params *params) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(context, exp_release_intf); + if (!vctx) + return errno = ENOSYS; + + if (params) + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(params->comp_mask, + IBV_EXP_RELEASE_INTF_RESERVED - 1); + + return vctx->exp_release_intf(context, intf, params); +} + +static inline struct ibv_exp_wq *ibv_exp_create_wq(struct ibv_context *context, + struct ibv_exp_wq_init_attr *wq_init_attr) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(context, exp_create_wq); + if (!vctx) { + errno = ENOSYS; + return NULL; + } + + IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(wq_init_attr->comp_mask, + IBV_EXP_CREATE_WQ_RESERVED - 1); + + return vctx->exp_create_wq(context, wq_init_attr); +} + +static inline int ibv_exp_modify_wq(struct ibv_exp_wq *wq, struct ibv_exp_wq_attr *wq_attr) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(wq->context, exp_modify_wq); + if (!vctx) + return ENOSYS; + + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(wq_attr->attr_mask, + IBV_EXP_WQ_ATTR_RESERVED - 1); + return vctx->exp_modify_wq(wq, wq_attr); +} + +static inline int ibv_exp_destroy_wq(struct ibv_exp_wq *wq) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(wq->context, exp_destroy_wq); + if (!vctx) + return ENOSYS; + + return vctx->exp_destroy_wq(wq); +} + +/* + * ibv_exp_create_rwq_ind_table - Creates a RQ Indirection Table associated + * with the specified protection domain. + * @pd: The protection domain associated with the Indirection Table. + * @ibv_exp_rwq_ind_table_init_attr: A list of initial attributes required to + * create the Indirection Table. + * Return Value + * ibv_exp_create_rwq_ind_table returns a pointer to the created + * Indirection Table, or NULL if the request fails. + */ +static inline struct ibv_exp_rwq_ind_table *ibv_exp_create_rwq_ind_table(struct ibv_context *context, + struct ibv_exp_rwq_ind_table_init_attr *init_attr) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(context, exp_create_rwq_ind_table); + if (!vctx) { + errno = ENOSYS; + return NULL; + } + + IBV_EXP_RET_NULL_ON_INVALID_COMP_MASK(init_attr->comp_mask, + IBV_EXP_CREATE_IND_TABLE_RESERVED - 1); + return vctx->exp_create_rwq_ind_table(context, init_attr); +} + +/* + * ibv_exp_destroy_rwq_ind_table - Destroys the specified Indirection Table. + * @rwq_ind_table: The Indirection Table to destroy. + * Return Value + * ibv_destroy_rwq_ind_table() returns 0 on success, or the value of errno + * on failure (which indicates the failure reason). +*/ +static inline int ibv_exp_destroy_rwq_ind_table(struct ibv_exp_rwq_ind_table *rwq_ind_table) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(rwq_ind_table->context, exp_destroy_rwq_ind_table); + if (!vctx) + return ENOSYS; + + return vctx->exp_destroy_rwq_ind_table(rwq_ind_table); +} + +/* + * ibv_exp_query_gid_attr - query a GID attributes + * @context: ib context + * @port_num: port number + * @index: gid index in the gids table + * @attr: the gid attributes of index in the gids table + * Return value + * ibv_exp_query_gid_attr return 0 on success, or the value of errno on failure. + */ +static inline int ibv_exp_query_gid_attr(struct ibv_context *context, + uint8_t port_num, + unsigned int index, + struct ibv_exp_gid_attr *attr) +{ + struct verbs_context_exp *vctx; + + vctx = verbs_get_exp_ctx_op(context, exp_query_gid_attr); + if (!vctx) + return ENOSYS; + + IBV_EXP_RET_EINVAL_ON_INVALID_COMP_MASK(attr->comp_mask, + IBV_EXP_QUERY_GID_ATTR_RESERVED - 1); + return vctx->exp_query_gid_attr(context, port_num, index, attr); +} +END_C_DECLS + +#define VERBS_MAX_ENV_VAL 4096 + +# undef __attribute_const + + +#endif /* INFINIBAND_VERBS_EXP_H */ |