1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
|
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright 2017 6WIND S.A.
* Copyright 2017 Mellanox.
*/
#include <errno.h>
#include <inttypes.h>
#include <linux/netlink.h>
#include <string.h>
#include <sys/socket.h>
#include <unistd.h>
#include <rte_malloc.h>
#include <tap_netlink.h>
#include <rte_random.h>
/* Must be quite large to support dumping a huge list of QDISC or filters. */
#define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */
#define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */
#define RCVBUF_SIZE 32768 /* Receive buffer size for the netlink socket */
struct nested_tail {
struct rtattr *tail;
struct nested_tail *prev;
};
/**
* Initialize a netlink socket for communicating with the kernel.
*
* @param nl_groups
* Set it to a netlink group value (e.g. RTMGRP_LINK) to receive messages for
* specific netlink multicast groups. Otherwise, no subscription will be made.
*
* @return
* netlink socket file descriptor on success, -1 otherwise.
*/
int
tap_nl_init(uint32_t nl_groups)
{
int fd, sndbuf_size = SNDBUF_SIZE, rcvbuf_size = RCVBUF_SIZE;
struct sockaddr_nl local = {
.nl_family = AF_NETLINK,
.nl_groups = nl_groups,
};
fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
if (fd < 0) {
RTE_LOG(ERR, PMD, "Unable to create a netlink socket\n");
return -1;
}
if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) {
RTE_LOG(ERR, PMD, "Unable to set socket buffer send size\n");
return -1;
}
if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) {
RTE_LOG(ERR, PMD, "Unable to set socket buffer receive size\n");
return -1;
}
if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
RTE_LOG(ERR, PMD, "Unable to bind to the netlink socket\n");
return -1;
}
return fd;
}
/**
* Clean up a netlink socket once all communicating with the kernel is finished.
*
* @param[in] nlsk_fd
* The netlink socket file descriptor used for communication.
*
* @return
* 0 on success, -1 otherwise.
*/
int
tap_nl_final(int nlsk_fd)
{
if (close(nlsk_fd)) {
RTE_LOG(ERR, PMD, "Failed to close netlink socket: %s (%d)\n",
strerror(errno), errno);
return -1;
}
return 0;
}
/**
* Send a message to the kernel on the netlink socket.
*
* @param[in] nlsk_fd
* The netlink socket file descriptor used for communication.
* @param[in] nh
* The netlink message send to the kernel.
*
* @return
* the number of sent bytes on success, -1 otherwise.
*/
int
tap_nl_send(int nlsk_fd, struct nlmsghdr *nh)
{
/* man 7 netlink EXAMPLE */
struct sockaddr_nl sa = {
.nl_family = AF_NETLINK,
};
struct iovec iov = {
.iov_base = nh,
.iov_len = nh->nlmsg_len,
};
struct msghdr msg = {
.msg_name = &sa,
.msg_namelen = sizeof(sa),
.msg_iov = &iov,
.msg_iovlen = 1,
};
int send_bytes;
nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
nh->nlmsg_seq = (uint32_t)rte_rand();
send_bytes = sendmsg(nlsk_fd, &msg, 0);
if (send_bytes < 0) {
RTE_LOG(ERR, PMD, "Failed to send netlink message: %s (%d)\n",
strerror(errno), errno);
return -1;
}
return send_bytes;
}
/**
* Check that the kernel sends an appropriate ACK in response
* to an tap_nl_send().
*
* @param[in] nlsk_fd
* The netlink socket file descriptor used for communication.
*
* @return
* 0 on success, -1 otherwise with errno set.
*/
int
tap_nl_recv_ack(int nlsk_fd)
{
return tap_nl_recv(nlsk_fd, NULL, NULL);
}
/**
* Receive a message from the kernel on the netlink socket, following an
* tap_nl_send().
*
* @param[in] nlsk_fd
* The netlink socket file descriptor used for communication.
* @param[in] cb
* The callback function to call for each netlink message received.
* @param[in, out] arg
* Custom arguments for the callback.
*
* @return
* 0 on success, -1 otherwise with errno set.
*/
int
tap_nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg)
{
/* man 7 netlink EXAMPLE */
struct sockaddr_nl sa;
char buf[BUF_SIZE];
struct iovec iov = {
.iov_base = buf,
.iov_len = sizeof(buf),
};
struct msghdr msg = {
.msg_name = &sa,
.msg_namelen = sizeof(sa),
.msg_iov = &iov,
/* One message at a time */
.msg_iovlen = 1,
};
int multipart = 0;
int ret = 0;
do {
struct nlmsghdr *nh;
int recv_bytes = 0;
recv_bytes = recvmsg(nlsk_fd, &msg, 0);
if (recv_bytes < 0)
return -1;
for (nh = (struct nlmsghdr *)buf;
NLMSG_OK(nh, (unsigned int)recv_bytes);
nh = NLMSG_NEXT(nh, recv_bytes)) {
if (nh->nlmsg_type == NLMSG_ERROR) {
struct nlmsgerr *err_data = NLMSG_DATA(nh);
if (err_data->error < 0) {
errno = -err_data->error;
return -1;
}
/* Ack message. */
return 0;
}
/* Multi-part msgs and their trailing DONE message. */
if (nh->nlmsg_flags & NLM_F_MULTI) {
if (nh->nlmsg_type == NLMSG_DONE)
return 0;
multipart = 1;
}
if (cb)
ret = cb(nh, arg);
}
} while (multipart);
return ret;
}
/**
* Append a netlink attribute to a message.
*
* @param[in, out] nh
* The netlink message to parse, received from the kernel.
* @param[in] type
* The type of attribute to append.
* @param[in] data_len
* The length of the data to append.
* @param[in] data
* The data to append.
*/
void
tap_nlattr_add(struct nlmsghdr *nh, unsigned short type,
unsigned int data_len, const void *data)
{
/* see man 3 rtnetlink */
struct rtattr *rta;
rta = (struct rtattr *)NLMSG_TAIL(nh);
rta->rta_len = RTA_LENGTH(data_len);
rta->rta_type = type;
memcpy(RTA_DATA(rta), data, data_len);
nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
}
/**
* Append a uint8_t netlink attribute to a message.
*
* @param[in, out] nh
* The netlink message to parse, received from the kernel.
* @param[in] type
* The type of attribute to append.
* @param[in] data
* The data to append.
*/
void
tap_nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data)
{
tap_nlattr_add(nh, type, sizeof(uint8_t), &data);
}
/**
* Append a uint16_t netlink attribute to a message.
*
* @param[in, out] nh
* The netlink message to parse, received from the kernel.
* @param[in] type
* The type of attribute to append.
* @param[in] data
* The data to append.
*/
void
tap_nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data)
{
tap_nlattr_add(nh, type, sizeof(uint16_t), &data);
}
/**
* Append a uint16_t netlink attribute to a message.
*
* @param[in, out] nh
* The netlink message to parse, received from the kernel.
* @param[in] type
* The type of attribute to append.
* @param[in] data
* The data to append.
*/
void
tap_nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data)
{
tap_nlattr_add(nh, type, sizeof(uint32_t), &data);
}
/**
* Start a nested netlink attribute.
* It must be followed later by a call to tap_nlattr_nested_finish().
*
* @param[in, out] msg
* The netlink message where to edit the nested_tails metadata.
* @param[in] type
* The nested attribute type to append.
*
* @return
* -1 if adding a nested netlink attribute failed, 0 otherwise.
*/
int
tap_nlattr_nested_start(struct nlmsg *msg, uint16_t type)
{
struct nested_tail *tail;
tail = rte_zmalloc(NULL, sizeof(struct nested_tail), 0);
if (!tail) {
RTE_LOG(ERR, PMD,
"Couldn't allocate memory for nested netlink"
" attribute\n");
return -1;
}
tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh);
tap_nlattr_add(&msg->nh, type, 0, NULL);
tail->prev = msg->nested_tails;
msg->nested_tails = tail;
return 0;
}
/**
* End a nested netlink attribute.
* It follows a call to tap_nlattr_nested_start().
* In effect, it will modify the nested attribute length to include every bytes
* from the nested attribute start, up to here.
*
* @param[in, out] msg
* The netlink message where to edit the nested_tails metadata.
*/
void
tap_nlattr_nested_finish(struct nlmsg *msg)
{
struct nested_tail *tail = msg->nested_tails;
tail->tail->rta_len = (char *)NLMSG_TAIL(&msg->nh) - (char *)tail->tail;
if (tail->prev)
msg->nested_tails = tail->prev;
rte_free(tail);
}
|