summaryrefslogtreecommitdiffstats
path: root/external_libs/ibverbs/include/infiniband/peer_ops.h
blob: d2fd265dfedcf2c220d38295fe40f42bb1c4625f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
/*
 * Copyright (c) 2016 Mellanox Technologies Inc.  All rights reserved.
 *
 * This software is available to you under a choice of one of two
 * licenses.  You may choose to be licensed under the terms of the GNU
 * General Public License (GPL) Version 2, available from the file
 * COPYING in the main directory of this source tree, or the
 * OpenIB.org BSD license below:
 *
 *     Redistribution and use in source and binary forms, with or
 *     without modification, are permitted provided that the following
 *     conditions are met:
 *
 *	- Redistributions of source code must retain the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer.
 *
 *	- Redistributions in binary form must reproduce the above
 *	  copyright notice, this list of conditions and the following
 *	  disclaimer in the documentation and/or other materials
 *	  provided with the distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

#ifndef PEER_OPS_H
#define PEER_OPS_H

#include <errno.h>
#include <stddef.h>
#include <stdint.h>
#include <infiniband/verbs.h>

BEGIN_C_DECLS

enum ibv_exp_peer_op {
	IBV_EXP_PEER_OP_RESERVED1	= 1,

	IBV_EXP_PEER_OP_FENCE		= 0,

	IBV_EXP_PEER_OP_STORE_DWORD	= 4,
	IBV_EXP_PEER_OP_STORE_QWORD	= 2,
	IBV_EXP_PEER_OP_COPY_BLOCK	= 3,

	IBV_EXP_PEER_OP_POLL_AND_DWORD	= 12,
	IBV_EXP_PEER_OP_POLL_NOR_DWORD	= 13,
	IBV_EXP_PEER_OP_POLL_GEQ_DWORD	= 14,
};

enum ibv_exp_peer_op_caps {
	IBV_EXP_PEER_OP_FENCE_CAP	= (1 << IBV_EXP_PEER_OP_FENCE),
	IBV_EXP_PEER_OP_STORE_DWORD_CAP	= (1 << IBV_EXP_PEER_OP_STORE_DWORD),
	IBV_EXP_PEER_OP_STORE_QWORD_CAP	= (1 << IBV_EXP_PEER_OP_STORE_QWORD),
	IBV_EXP_PEER_OP_COPY_BLOCK_CAP	= (1 << IBV_EXP_PEER_OP_COPY_BLOCK),
	IBV_EXP_PEER_OP_POLL_AND_DWORD_CAP
		= (1 << IBV_EXP_PEER_OP_POLL_AND_DWORD),
	IBV_EXP_PEER_OP_POLL_NOR_DWORD_CAP
		= (1 << IBV_EXP_PEER_OP_POLL_NOR_DWORD),
	IBV_EXP_PEER_OP_POLL_GEQ_DWORD_CAP
		= (1 << IBV_EXP_PEER_OP_POLL_GEQ_DWORD),
};

enum ibv_exp_peer_fence {
	IBV_EXP_PEER_FENCE_OP_READ		= (1 << 0),
	IBV_EXP_PEER_FENCE_OP_WRITE		= (1 << 1),
	IBV_EXP_PEER_FENCE_FROM_CPU		= (1 << 2),
	IBV_EXP_PEER_FENCE_FROM_HCA		= (1 << 3),
	IBV_EXP_PEER_FENCE_MEM_SYS		= (1 << 4),
	IBV_EXP_PEER_FENCE_MEM_PEER		= (1 << 5),
};

/* Indicate HW entities supposed to access memory buffer:
 * IBV_EXP_PEER_DIRECTION_FROM_X means X writes to the buffer
 * IBV_EXP_PEER_DIRECTION_TO_Y means Y read from the buffer
 */
enum ibv_exp_peer_direction {
	IBV_EXP_PEER_DIRECTION_FROM_CPU	 = (1 << 0),
	IBV_EXP_PEER_DIRECTION_FROM_HCA	 = (1 << 1),
	IBV_EXP_PEER_DIRECTION_FROM_PEER = (1 << 2),
	IBV_EXP_PEER_DIRECTION_TO_CPU	 = (1 << 3),
	IBV_EXP_PEER_DIRECTION_TO_HCA	 = (1 << 4),
	IBV_EXP_PEER_DIRECTION_TO_PEER	 = (1 << 5),
};

struct ibv_exp_peer_buf_alloc_attr {
	size_t length;
	/* Bitmask from enum ibv_exp_peer_direction */
	uint32_t dir;
	/* The ID of the peer device which will be
	 * accessing the allocated buffer
	 */
	uint64_t peer_id;
	/* Data alignment */
	uint32_t alignment;
	/* Reserved for future extensions, must be 0 */
	uint32_t comp_mask;
};

struct ibv_exp_peer_buf {
	void *addr;
	size_t length;
	/* Reserved for future extensions, must be 0 */
	uint32_t comp_mask;
};

enum ibv_exp_peer_direct_attr_mask {
	IBV_EXP_PEER_DIRECT_VERSION	= (1 << 0) /* Must be set */
};

#define IBV_EXP_PEER_IOMEMORY ((struct ibv_exp_peer_buf *)-1UL)

struct ibv_exp_peer_direct_attr {
	/* Unique ID per peer device.
	 * Used to identify specific HW devices where relevant.
	 */
	uint64_t peer_id;
	/* buf_alloc callback should return struct ibv_exp_peer_buf with buffer
	 * of at least attr->length.
	 * @attr: description of desired buffer
	 *
	 * Buffer should be mapped in the application address space
	 * for read/write (depends on attr->dir value).
	 * attr->dir value is supposed to indicate the expected directions
	 * of access to the buffer, to allow optimization by the peer driver.
	 * If NULL returned then buffer will be allocated in system memory
	 * by ibverbs driver.
	 */
	struct ibv_exp_peer_buf *(*buf_alloc)(struct ibv_exp_peer_buf_alloc_attr *attr);
	/* If buffer was allocated by buf_alloc then buf_release will be
	 * called to release it.
	 * @pb: struct returned by buf_alloc
	 *
	 * buf_release is responsible to release everything allocated by
	 * buf_alloc.
	 * Return 0 on succes.
	 */
	int (*buf_release)(struct ibv_exp_peer_buf *pb);
	/* register_va callback should register virtual address from the
	 * application as an area the peer is allowed to access.
	 * @start: pointer to beginning of region in virtual space
	 * @length: length of region
	 * @peer_id: the ID of the peer device which will be accessing
	 * the region.
	 * @pb: if registering a buffer that was returned from buf_alloc(),
	 * pb is the struct that was returned. If registering io memory area,
	 * pb is IBV_EXP_PEER_IOMEMORY. Otherwise - NULL
	 *
	 * Return id of registered address on success, 0 on failure.
	 */
	uint64_t (*register_va)(void *start, size_t length, uint64_t peer_id,
				struct ibv_exp_peer_buf *pb);
	/* If virtual address was registered with register_va then
	 * unregister_va will be called to unregister it.
	 * @target_id: id returned by register_va
	 * @peer_id: the ID of the peer device passed to register_va
	 *
	 * Return 0 on success.
	 */
	int (*unregister_va)(uint64_t target_id, uint64_t peer_id);
	/* Bitmask from ibv_exp_peer_op_caps */
	uint64_t caps;
	/* Maximal length of DMA operation the peer can do in copy-block */
	size_t peer_dma_op_map_len;
	/* From ibv_exp_peer_direct_attr_mask */
	uint32_t comp_mask;
	/* Feature version, must be 1 */
	uint32_t version;
};

/* QP API - CPU posts send work-requests without exposing them to the HW.
 * Later, the peer device exposes the relevant work requests to the HCA
 * for execution.
 */

struct peer_op_wr {
	struct peer_op_wr *next;
	enum ibv_exp_peer_op type;
	union {
		struct {
			uint64_t fence_flags; /* from ibv_exp_peer_fence */
		} fence;

		struct {
			uint32_t  data;
			uint64_t  target_id;
			size_t	  offset;
		} dword_va; /* Use for all operations targeting dword */

		struct {
			uint64_t  data;
			uint64_t  target_id;
			size_t	  offset;
		} qword_va; /* Use for all operations targeting qword */

		struct {
			void	 *src;
			uint64_t  target_id;
			size_t	  offset;
			size_t	  len;
		} copy_op;
	} wr;
	uint32_t comp_mask; /* Reserved for future expensions, must be 0 */
};

struct ibv_exp_peer_commit {
	/* IN/OUT - linked list of empty/filled descriptors */
	struct peer_op_wr *storage;
	/* IN/OUT - number of allocated/filled descriptors */
	uint32_t entries;
	/* OUT - identifier used in ibv_exp_rollback_qp to rollback WQEs set */
	uint64_t rollback_id;
	uint32_t comp_mask; /* Reserved for future expensions, must be 0 */
};

/**
 * ibv_exp_peer_commit_qp - request descriptors for committing all WQEs
 * currently posted to the send work queue
 * @qp: the QP being requested
 * @peer: context with list of &struct peer_op_wr describing actions
 *   necessary to commit WQEs
 *
 * Function
 * - fill peer->storage with descriptors
 * - put number of filled descriptors to peer->entries;
 * - put data necessary for rollback to peer->rollback_id
 * If number of entries is not sufficient - return -ENOSPC
 *
 * Note: caller is responsible to ensure that the peer fences any data store
 * before executing the commit
 */
static inline int ibv_exp_peer_commit_qp(struct ibv_qp *qp,
					 struct ibv_exp_peer_commit *peer)
{
	struct verbs_context_exp *vctx;

	vctx = verbs_get_exp_ctx_op(qp->context, exp_peer_commit_qp);
	if (!vctx)
		return ENOSYS;

	return vctx->exp_peer_commit_qp(qp, peer);
}

enum ibv_exp_rollback_flags {
	/* Abort all WQEs which were not committed to HW yet.
	 * rollback_id is ignored. **/
	IBV_EXP_ROLLBACK_ABORT_UNCOMMITED = (1 << 0),
	/* Abort the request even if there are following requests
	 * being aborted as well. **/
	IBV_EXP_ROLLBACK_ABORT_LATE = (1 << 1),
};

struct ibv_exp_rollback_ctx {
	uint64_t rollback_id; /* from ibv_exp_peer_commit call */
	uint32_t flags; /* from ibv_exp_rollback_flags */
	uint32_t comp_mask; /* Reserved for future expensions, must be 0 */
};

/**
 * ibv_exp_rollback_qp - indicate that the commit attempt failed
 * @qp: the QP being rolled back
 * @rollback: context with rollback_id returned by
 *   earlier ibv_exp_peer_commit_qp and flags
 */
static inline int ibv_exp_rollback_qp(struct ibv_qp *qp,
				      struct ibv_exp_rollback_ctx *rollback)
{
	struct verbs_context_exp *vctx;

	vctx = verbs_get_exp_ctx_op(qp->context, exp_rollback_send);
	if (!vctx)
		return ENOSYS;

	return vctx->exp_rollback_send(qp, rollback);
}

/* CQ interface - peek into a CQ and describe how to check if
 * there is a CQ entry available.
 */

enum {
	IBV_EXP_PEER_PEEK_ABSOLUTE,
	IBV_EXP_PEER_PEEK_RELATIVE
};

struct ibv_exp_peer_peek {
	/* IN/OUT - linked list of empty/filled descriptors */
	struct peer_op_wr *storage;
	/* IN/OUT - number of allocated/filled descriptors */
	uint32_t entries;
	/* IN - Which CQ entry does the peer want to peek for
	 * completion. According to "whence" directive entry
	 * chosen as follows:
	 * IBV_EXP_PEER_PEEK_ABSOLUTE -
	 *	"offset" is absolute index of entry wrapped to 32-bit
	 * IBV_EXP_PEER_PEEK_RELATIVE -
	 *      "offset" is relative to current poll_cq location.
	 */
	uint32_t whence;
	uint32_t offset;
	/* OUT - identifier used in ibv_exp_peer_ack_peek_cq to advance CQ */
	uint64_t peek_id;
	uint32_t comp_mask; /* Reserved for future expensions, must be 0 */
};

/**
 * ibv_exp_peer_peek_cq - request descriptors for peeking CQ in specific
 *   offset from currently expected CQ entry.
 * @cq: the CQ being requested
 * @peer_ctx: context with list of &struct peer_op_wr describing actions
 *   necessary to wait for desired CQ entry is delivered and report
 *   this to ibverbs.
 *
 * A peek CQ request places a "block" on the relevant CQ entry.
 *   Poll CQ requests to poll the CQ entry will fail with an error.
 *   The block will be removed by executing the descriptors.
 *   If the peer will not be able to execute the descriptors,
 *   it should call ibv_exp_peer_abort_peek_cq to remove the block.
 *
 * Function
 * - fill peek_ctx->storage with descriptors.
 * - put number of filled descriptors to peek_ctx->entries.
 * - put data necessary to abort peek.
 * If number of entries is not sufficient - return -ENOSPC.
 */
static inline int ibv_exp_peer_peek_cq(struct ibv_cq *cq,
				       struct ibv_exp_peer_peek *peek_ctx)
{
	struct verbs_context_exp *vctx;

	vctx = verbs_get_exp_ctx_op(cq->context, exp_peer_peek_cq);
	if (!vctx)
		return ENOSYS;

	return vctx->exp_peer_peek_cq(cq, peek_ctx);
}

struct ibv_exp_peer_abort_peek {
	uint64_t peek_id; /* From the peer_peek_cq call */
	uint32_t comp_mask; /* Reserved for future expensions, must be 0 */
};

/**
 * ibv_exp_peer_abort_peek_cq - indicate that peek is aborted
 * @cq: the CQ being rolled back
 * @abort_ctx: context with peek_id returned by earlier ibv_exp_peer_peek_cq
 *
 * Note: This should be done only if the peek descriptors were not executed
 */
static inline int ibv_exp_peer_abort_peek_cq(struct ibv_cq *cq,
				     struct ibv_exp_peer_abort_peek *abort_ctx)
{
	struct verbs_context_exp *vctx;

	vctx = verbs_get_exp_ctx_op(cq->context, exp_peer_abort_peek_cq);
	if (!vctx)
		return ENOSYS;

	return vctx->exp_peer_abort_peek_cq(cq, abort_ctx);
}

END_C_DECLS

#endif /* PEER_OPS_H */