aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/event/dsw/dsw_evdev.h
blob: dc28ab125d8ed1d7c6142a3ac95e91c78c5f2bc1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
/* SPDX-License-Identifier: BSD-3-Clause
 * Copyright(c) 2018 Ericsson AB
 */

#ifndef _DSW_EVDEV_H_
#define _DSW_EVDEV_H_

#include <rte_event_ring.h>
#include <rte_eventdev.h>

#define DSW_PMD_NAME RTE_STR(event_dsw)

/* Code changes are required to allow more ports. */
#define DSW_MAX_PORTS (64)
#define DSW_MAX_PORT_DEQUEUE_DEPTH (128)
#define DSW_MAX_PORT_ENQUEUE_DEPTH (128)
#define DSW_MAX_PORT_OUT_BUFFER (32)

#define DSW_MAX_QUEUES (16)

#define DSW_MAX_EVENTS (16384)

/* Code changes are required to allow more flows than 32k. */
#define DSW_MAX_FLOWS_BITS (15)
#define DSW_MAX_FLOWS (1<<(DSW_MAX_FLOWS_BITS))
#define DSW_MAX_FLOWS_MASK (DSW_MAX_FLOWS-1)

/* Eventdev RTE_SCHED_TYPE_PARALLEL doesn't have a concept of flows,
 * but the 'dsw' scheduler (more or less) randomly assign flow id to
 * events on parallel queues, to be able to reuse some of the
 * migration mechanism and scheduling logic from
 * RTE_SCHED_TYPE_ATOMIC. By moving one of the parallel "flows" from a
 * particular port, the likely-hood of events being scheduled to this
 * port is reduced, and thus a kind of statistical load balancing is
 * achieved.
 */
#define DSW_PARALLEL_FLOWS (1024)

/* 'Background tasks' are polling the control rings for *
 *  migration-related messages, or flush the output buffer (so
 *  buffered events doesn't linger too long). Shouldn't be too low,
 *  since the system won't benefit from the 'batching' effects from
 *  the output buffer, and shouldn't be too high, since it will make
 *  buffered events linger too long in case the port goes idle.
 */
#define DSW_MAX_PORT_OPS_PER_BG_TASK (128)

/* Avoid making small 'loans' from the central in-flight event credit
 * pool, to improve efficiency.
 */
#define DSW_MIN_CREDIT_LOAN (64)
#define DSW_PORT_MAX_CREDITS (2*DSW_MIN_CREDIT_LOAN)
#define DSW_PORT_MIN_CREDITS (DSW_MIN_CREDIT_LOAN)

/* The rings are dimensioned so that all in-flight events can reside
 * on any one of the port rings, to avoid the trouble of having to
 * care about the case where there's no room on the destination port's
 * input ring.
 */
#define DSW_IN_RING_SIZE (DSW_MAX_EVENTS)

#define DSW_MAX_LOAD (INT16_MAX)
#define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100))
#define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD)

/* The thought behind keeping the load update interval shorter than
 * the migration interval is that the load from newly migrated flows
 * should 'show up' on the load measurement before new migrations are
 * considered. This is to avoid having too many flows, from too many
 * source ports, to be migrated too quickly to a lightly loaded port -
 * in particular since this might cause the system to oscillate.
 */
#define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
#define DSW_OLD_LOAD_WEIGHT (1)

/* The minimum time (in us) between two flow migrations. What puts an
 * upper limit on the actual migration rate is primarily the pace in
 * which the ports send and receive control messages, which in turn is
 * largely a function of how much cycles are spent the processing of
 * an event burst.
 */
#define DSW_MIGRATION_INTERVAL (1000)
#define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70))
#define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95))

#define DSW_MAX_EVENTS_RECORDED (128)

/* Only one outstanding migration per port is allowed */
#define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS)

/* Enough room for paus request/confirm and unpaus request/confirm for
 * all possible senders.
 */
#define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4)

/* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of
 * dequeue(), arrange events so that events with the same flow id on
 * the same queue forms a back-to-back "burst", and also so that such
 * bursts of different flow ids, but on the same queue, also come
 * consecutively. All this in an attempt to improve data and
 * instruction cache usage for the application, at the cost of a
 * scheduler overhead increase.
 */

/* #define DSW_SORT_DEQUEUED */

struct dsw_queue_flow {
	uint8_t queue_id;
	uint16_t flow_hash;
};

enum dsw_migration_state {
	DSW_MIGRATION_STATE_IDLE,
	DSW_MIGRATION_STATE_PAUSING,
	DSW_MIGRATION_STATE_FORWARDING,
	DSW_MIGRATION_STATE_UNPAUSING
};

struct dsw_port {
	uint16_t id;

	/* Keeping a pointer here to avoid container_of() calls, which
	 * are expensive since they are very frequent and will result
	 * in an integer multiplication (since the port id is an index
	 * into the dsw_evdev port array).
	 */
	struct dsw_evdev *dsw;

	uint16_t dequeue_depth;
	uint16_t enqueue_depth;

	int32_t inflight_credits;

	int32_t new_event_threshold;

	uint16_t pending_releases;

	uint16_t next_parallel_flow_id;

	uint16_t ops_since_bg_task;

	/* most recent 'background' processing */
	uint64_t last_bg;

	/* For port load measurement. */
	uint64_t next_load_update;
	uint64_t load_update_interval;
	uint64_t measurement_start;
	uint64_t busy_start;
	uint64_t busy_cycles;
	uint64_t total_busy_cycles;

	/* For the ctl interface and flow migration mechanism. */
	uint64_t next_migration;
	uint64_t migration_interval;
	enum dsw_migration_state migration_state;

	uint64_t migration_start;
	uint64_t migrations;
	uint64_t migration_latency;

	uint8_t migration_target_port_id;
	struct dsw_queue_flow migration_target_qf;
	uint8_t cfm_cnt;

	uint16_t paused_flows_len;
	struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS];

	/* In a very contrived worst case all inflight events can be
	 * laying around paused here.
	 */
	uint16_t paused_events_len;
	struct rte_event paused_events[DSW_MAX_EVENTS];

	uint16_t seen_events_len;
	uint16_t seen_events_idx;
	struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED];

	uint64_t new_enqueued;
	uint64_t forward_enqueued;
	uint64_t release_enqueued;
	uint64_t queue_enqueued[DSW_MAX_QUEUES];

	uint64_t dequeued;
	uint64_t queue_dequeued[DSW_MAX_QUEUES];

	uint16_t out_buffer_len[DSW_MAX_PORTS];
	struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];

	uint16_t in_buffer_len;
	uint16_t in_buffer_start;
	/* This buffer may contain events that were read up from the
	 * in_ring during the flow migration process.
	 */
	struct rte_event in_buffer[DSW_MAX_EVENTS];

	struct rte_event_ring *in_ring __rte_cache_aligned;

	struct rte_ring *ctl_in_ring __rte_cache_aligned;

	/* Estimate of current port load. */
	rte_atomic16_t load __rte_cache_aligned;
} __rte_cache_aligned;

struct dsw_queue {
	uint8_t schedule_type;
	uint8_t serving_ports[DSW_MAX_PORTS];
	uint16_t num_serving_ports;

	uint8_t flow_to_port_map[DSW_MAX_FLOWS] __rte_cache_aligned;
};

struct dsw_evdev {
	struct rte_eventdev_data *data;

	struct dsw_port ports[DSW_MAX_PORTS];
	uint16_t num_ports;
	struct dsw_queue queues[DSW_MAX_QUEUES];
	uint8_t num_queues;
	int32_t max_inflight;

	rte_atomic32_t credits_on_loan __rte_cache_aligned;
};

#define DSW_CTL_PAUS_REQ (0)
#define DSW_CTL_UNPAUS_REQ (1)
#define DSW_CTL_CFM (2)

/* sizeof(struct dsw_ctl_msg) must be equal or less than
 * sizeof(void *), to fit on the control ring.
 */
struct dsw_ctl_msg {
	uint8_t type:2;
	uint8_t originating_port_id:6;
	uint8_t queue_id;
	uint16_t flow_hash;
} __rte_packed;

uint16_t dsw_event_enqueue(void *port, const struct rte_event *event);
uint16_t dsw_event_enqueue_burst(void *port,
				 const struct rte_event events[],
				 uint16_t events_len);
uint16_t dsw_event_enqueue_new_burst(void *port,
				     const struct rte_event events[],
				     uint16_t events_len);
uint16_t dsw_event_enqueue_forward_burst(void *port,
					 const struct rte_event events[],
					 uint16_t events_len);

uint16_t dsw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait);
uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events,
				 uint16_t num, uint64_t wait);

int dsw_xstats_get_names(const struct rte_eventdev *dev,
			 enum rte_event_dev_xstats_mode mode,
			 uint8_t queue_port_id,
			 struct rte_event_dev_xstats_name *xstats_names,
			 unsigned int *ids, unsigned int size);
int dsw_xstats_get(const struct rte_eventdev *dev,
		   enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id,
		   const unsigned int ids[], uint64_t values[], unsigned int n);
uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev,
				const char *name, unsigned int *id);

static inline struct dsw_evdev *
dsw_pmd_priv(const struct rte_eventdev *eventdev)
{
	return eventdev->data->dev_private;
}

#define DSW_LOG_DP(level, fmt, args...)					\
	RTE_LOG_DP(level, EVENTDEV, "[%s] %s() line %u: " fmt,		\
		   DSW_PMD_NAME,					\
		   __func__, __LINE__, ## args)

#define DSW_LOG_DP_PORT(level, port_id, fmt, args...)		\
	DSW_LOG_DP(level, "<Port %d> " fmt, port_id, ## args)

#endif