summaryrefslogtreecommitdiffstats
path: root/src/vlibapi/api_common.h
blob: a955636ba3f799a3b3e4c419f91f10212445b135 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
/*
 *------------------------------------------------------------------
 * api_common.h
 *
 * Copyright (c) 2009-2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *------------------------------------------------------------------
 */

#ifndef included_api_common_h
#define included_api_common_h

/** \file api_common.h
 *  API common definitions
 * See api_doc.md for more info
 */

#include <vppinfra/clib_error.h>
#include <vppinfra/elog.h>
#include <vppinfra/cJSON.h>
#include <vlibapi/api_types.h>
#include <svm/svm_common.h>
#include <svm/queue.h>

/** API registration types
 */
typedef enum
{
  REGISTRATION_TYPE_FREE = 0,
  REGISTRATION_TYPE_SHMEM,	/**< Shared memory connection */
  REGISTRATION_TYPE_SOCKET_LISTEN, /**< Socket listener  */
  REGISTRATION_TYPE_SOCKET_SERVER, /**< Socket server */
  REGISTRATION_TYPE_SOCKET_CLIENT, /**< Socket client */
} vl_registration_type_t;

/** An API client registration, only in vpp/vlib */

typedef struct vl_api_registration_
{
  vl_registration_type_t registration_type; /**< type */

  /** Index in VLIB's brain (not shared memory). */
  u32 vl_api_registration_pool_index;

  u8 *name;			/**< Client name */

  /* Zombie apocalypse checking */
  f64 last_heard;
  int last_queue_head;
  int unanswered_pings;
  int is_being_removed;

  /** shared memory only: pointer to client input queue */
  svm_queue_t *vl_input_queue;
  svm_region_t *vlib_rp;
  void *shmem_hdr;

  /* socket server and client */
  u32 clib_file_index;		/**< Socket only: file index */
  i8 *unprocessed_input;	/**< Socket only: pending input */
  u32 unprocessed_msg_length;	/**< Socket only: unprocssed length */
  u8 *output_vector;		/**< Socket only: output vector */
  int *additional_fds_to_close;

  /* socket client only */
  u32 server_handle;		/**< Socket client only: server handle */
  u32 server_index;		/**< Socket client only: server index */
} vl_api_registration_t;

#define VL_API_INVALID_FI ((u32)~0)

/** Trace configuration for a single message */
typedef struct
{
  int size;			/**< for sanity checking */
  int trace_enable;		/**< trace this message  */
  int replay_enable;		/**< This message can be replayed  */
} trace_cfg_t;

/**
 * API trace state
 */
typedef struct
{
  u8 endian;			/**< trace endianness */
  u8 enabled;			/**< trace is enabled  */
  u8 wrapped;			/**< trace has wrapped */
  u8 pad;
  u32 nitems;			/**< Number of trace records */
  u32 curindex;			/**< Current index in circular buffer  */
  u8 **traces;			/**< Trace ring */
} vl_api_trace_t;

/** Trace RX / TX enum */
typedef enum
{
  VL_API_TRACE_TX,
  VL_API_TRACE_RX,
} vl_api_trace_which_t;

#define VL_API_LITTLE_ENDIAN 0x00
#define VL_API_BIG_ENDIAN 0x01

/** Message range (belonging to a plugin) */
typedef struct
{
  u8 *name;			/**< name of the plugin  */
  u16 first_msg_id;		/**< first assigned message ID */
  u16 last_msg_id;		/**< last assigned message ID */
} vl_api_msg_range_t;

/** Message configuration definition */
typedef struct
{
  int id;			/**< the message ID */
  char *name;			/**< the message name */
  u32 crc;			/**< message definition CRC  */
  void *handler;		/**< the message handler  */
  void *cleanup;		/**< non-default message cleanup handler */
  void *endian;			/**< message endian function  */
  void *print;			/**< message print function  */
  void *print_json;		/**< message print function (JSON format)  */
  void *tojson;			/**< binary to JSON convert function */
  void *fromjson;		/**< JSON to binary convert function */
  int size;			/**< message size  */
  int traced;			/**< is this message to be traced?  */
  int replay;			/**< is this message to be replayed?  */
  int message_bounce;		/**< do not free message after processing */
  int is_mp_safe;		/**< worker thread barrier required?  */
  int is_autoendian;		/**< endian conversion required?  */
} vl_msg_api_msg_config_t;

/** Message header structure */
typedef struct msgbuf_
{
  svm_queue_t *q; /**< message allocated in this shmem ring  */
  u32 data_len;			 /**< message length not including header  */
  u32 gc_mark_timestamp;	 /**< message garbage collector mark TS  */
  u8 data[0];			 /**< actual message begins here  */
} msgbuf_t;

CLIB_NOSANITIZE_ADDR static inline void
VL_MSG_API_UNPOISON (const void *a)
{
  const msgbuf_t *m = &((const msgbuf_t *) a)[-1];
  CLIB_MEM_UNPOISON (m, sizeof (*m) + ntohl (m->data_len));
}

CLIB_NOSANITIZE_ADDR static inline void
VL_MSG_API_SVM_QUEUE_UNPOISON (const svm_queue_t * q)
{
  CLIB_MEM_UNPOISON (q, sizeof (*q) + q->elsize * q->maxsize);
}

static inline void
VL_MSG_API_POISON (const void *a)
{
  const msgbuf_t *m = &((const msgbuf_t *) a)[-1];
  CLIB_MEM_POISON (m, sizeof (*m) + ntohl (m->data_len));
}

/* api_shared.c prototypes */
void vl_msg_api_handler (void *the_msg);
void vl_msg_api_handler_no_free (void *the_msg);
void vl_msg_api_handler_no_trace_no_free (void *the_msg);
void vl_msg_api_trace_only (void *the_msg);
void vl_msg_api_cleanup_handler (void *the_msg);
void vl_msg_api_replay_handler (void *the_msg);
void vl_msg_api_socket_handler (void *the_msg);
void vl_msg_api_set_handlers (int msg_id, char *msg_name, void *handler,
			      void *cleanup, void *endian, void *print,
			      int msg_size, int traced, void *print_json,
			      void *tojson, void *fromjson);
void vl_msg_api_clean_handlers (int msg_id);
void vl_msg_api_config (vl_msg_api_msg_config_t *);
void vl_msg_api_set_cleanup_handler (int msg_id, void *fp);
void vl_msg_api_queue_handler (svm_queue_t * q);

void vl_msg_api_barrier_sync (void) __attribute__ ((weak));
void vl_msg_api_barrier_release (void) __attribute__ ((weak));
#ifdef BARRIER_TRACING
void vl_msg_api_barrier_trace_context (const char *context)
  __attribute__ ((weak));
#else
#define vl_msg_api_barrier_trace_context(X)
#endif
void vl_msg_api_free (void *);
void vl_noop_handler (void *mp);
void vl_msg_api_increment_missing_client_counter (void);
void vl_msg_api_post_mortem_dump (void);
void vl_msg_api_post_mortem_dump_enable_disable (int enable);
void vl_msg_api_register_pd_handler (void *handler,
				     u16 msg_id_host_byte_order);
int vl_msg_api_pd_handler (void *mp, int rv);

void vl_msg_api_set_first_available_msg_id (u16 first_avail);
u16 vl_msg_api_get_msg_ids (const char *name, int n);
u32 vl_msg_api_get_msg_index (u8 * name_and_crc);
void *vl_msg_push_heap (void);
void *vl_msg_push_heap_w_region (svm_region_t * vlib_rp);
void vl_msg_pop_heap (void *oldheap);
void vl_msg_pop_heap_w_region (svm_region_t * vlib_rp, void *oldheap);

typedef clib_error_t *(vl_msg_api_init_function_t) (u32 client_index);

typedef struct _vl_msg_api_init_function_list_elt
{
  struct _vl_msg_api_init_function_list_elt *next_init_function;
  vl_msg_api_init_function_t *f;
} _vl_msg_api_function_list_elt_t;

typedef struct
{
  u32 major;
  u32 minor;
  u32 patch;
  char name[64];
} api_version_t;

/** API main structure, used by both vpp and binary API clients */
typedef struct api_main_t
{
  /** Message handler vector  */
  void (**msg_handlers) (void *);
  /** Plaform-dependent (aka hardware) message handler vector */
  int (**pd_msg_handlers) (void *, int);

  /** non-default message cleanup handler vector */
  void (**msg_cleanup_handlers) (void *);

  /** Message endian handler vector */
  void (**msg_endian_handlers) (void *);

  /** Message print function vector */
  void (**msg_print_handlers) (void *, void *);

  /** Message print function vector in JSON */
  void (**msg_print_json_handlers) (void *, void *);

  /** Message convert function vector */
  cJSON *(**msg_tojson_handlers) (void *);

  /** Message convert function vector */
  void *(**msg_fromjson_handlers) (cJSON *, int *);

  /** Message name vector */
  const char **msg_names;

  /** API message ID by name hash table */
  uword *msg_id_by_name;

  /** Don't automatically free message buffer vetor */
  u8 *message_bounce;

  /** Message is mp safe vector */
  u8 *is_mp_safe;

  /** Message requires us to do endian conversion */
  u8 *is_autoendian;

  /** Allocator ring vectors (in shared memory) */
  struct ring_alloc_ *arings;

  /** Number of times that the ring allocator failed */
  u32 ring_misses;

  /** Number of garbage-collected message buffers */
  u32 garbage_collects;

  /** Number of missing clients / failed message sends */
  u32 missing_clients;

  /** Received message trace configuration */
  vl_api_trace_t *rx_trace;

  /** Sent message trace configuration */
  vl_api_trace_t *tx_trace;

  /** Print every received message */
  int msg_print_flag;

  /** Current trace configuration */
  trace_cfg_t *api_trace_cfg;

  /** Current process PID */
  int our_pid;

  /** Current binary api segment descriptor */
  svm_region_t *vlib_rp;

  /** Primary api segment descriptor */
  svm_region_t *vlib_primary_rp;

  /** Vector of all mapped shared-VM segments */
  svm_region_t **vlib_private_rps;
  svm_region_t **mapped_shmem_regions;

  /** Binary API shared-memory segment header pointer */
  struct vl_shmem_hdr_ *shmem_hdr;

  /** vlib/vpp only: vector of client registrations */
  vl_api_registration_t **vl_clients;

  /** vlib/vpp only: serialized (message, name, crc) table */
  u8 *serialized_message_table_in_shmem;

  /** First available message ID, for theplugin msg allocator */
  u16 first_available_msg_id;

  /** Message range by name hash */
  uword *msg_range_by_name;

  /** vector of message ranges */
  vl_api_msg_range_t *msg_ranges;

  /** uid for the api shared memory region */
  int api_uid;

  /** gid for the api shared memory region */
  int api_gid;

  /** base virtual address for global VM region */
  u64 global_baseva;

  /** size of the global VM region */
  u64 global_size;

  /** size of the API region */
  u64 api_size;

  /** size of the global VM private mheap */
  u64 global_pvt_heap_size;

  /** size of the api private mheap */
  u64 api_pvt_heap_size;

  /** Peer input queue pointer */
  svm_queue_t *vl_input_queue;

  /**
   * All VLIB-side message handlers use my_client_index to identify
   * the queue / client. This works in sim replay.
   */
  int my_client_index;
  /**
   * This is the (shared VM) address of the registration,
   * don't use it to id the connection since it can't possibly
   * work in simulator replay.
   */
  vl_api_registration_t *my_registration;

  /** vpp/vlib input queue length */
  u32 vlib_input_queue_length;

  /** client message index hash table */
  uword *msg_index_by_name_and_crc;

  /** api version list */
  api_version_t *api_version_list;

  /** Shared VM binary API region name */
  const char *region_name;

  /** Chroot path to the shared memory API files */
  const char *root_path;

  /** Replay in progress? */
  int replay_in_progress;

  /** Dump (msg-name, crc) snapshot here at startup */
  u8 *save_msg_table_filename;

  /** List of API client reaper functions */
  _vl_msg_api_function_list_elt_t *reaper_function_registrations;

  /** Bin API thread handle */
  pthread_t rx_thread_handle;

  /** event log */
  elog_main_t *elog_main;
  int elog_trace_api_messages;

  /** performance counter callback **/
  void (**perf_counter_cbs)
    (struct api_main_t *, u32 id, int before_or_after);
  void (**perf_counter_cbs_tmp)
    (struct api_main_t *, u32 id, int before_or_after);

} api_main_t;

extern __thread api_main_t *my_api_main;
extern api_main_t api_global_main;

always_inline api_main_t *
vlibapi_get_main (void)
{
  return my_api_main;
}

always_inline void
vlibapi_set_main (api_main_t * am)
{
  my_api_main = am;
}

#endif /* included_api_common_h */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */
ass="n">vlib_buffer_t * b; u32 bi; const uword buffer_size = VLIB_BUFFER_DATA_SIZE; /** Make sure we have some RX buffers. */ { uword n_left = vec_len (tm->rx_buffers); uword n_alloc; if (n_left < VLIB_FRAME_SIZE / 2) { if (! tm->rx_buffers) vec_alloc (tm->rx_buffers, VLIB_FRAME_SIZE); n_alloc = vlib_buffer_alloc (vm, tm->rx_buffers + n_left, VLIB_FRAME_SIZE - n_left); _vec_len (tm->rx_buffers) = n_left + n_alloc; } } /** Allocate RX buffers from end of rx_buffers. Turn them into iovecs to pass to readv. */ { uword i_rx = vec_len (tm->rx_buffers) - 1; vlib_buffer_t * b; word i, n_bytes_left, n_bytes_in_packet; /** We should have enough buffers left for an MTU sized packet. */ ASSERT (vec_len (tm->rx_buffers) >= tm->mtu_buffers); vec_validate (tm->iovecs, tm->mtu_buffers - 1); for (i = 0; i < tm->mtu_buffers; i++) { b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - i]); tm->iovecs[i].iov_base = b->data; tm->iovecs[i].iov_len = buffer_size; } n_bytes_left = readv (tm->dev_net_tun_fd, tm->iovecs, tm->mtu_buffers); n_bytes_in_packet = n_bytes_left; if (n_bytes_left <= 0) { if (errno != EAGAIN) clib_unix_warning ("readv %d", n_bytes_left); return 0; } bi = tm->rx_buffers[i_rx]; while (1) { b = vlib_get_buffer (vm, tm->rx_buffers[i_rx]); b->flags = 0; b->current_data = 0; b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size; n_bytes_left -= buffer_size; if (n_bytes_left <= 0) { break; } i_rx--; b->flags |= VLIB_BUFFER_NEXT_PRESENT; b->next_buffer = tm->rx_buffers[i_rx]; } /** Interface counters for tuntap interface. */ vlib_increment_combined_counter (vnet_main.interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, vlib_get_thread_index(), tm->sw_if_index, 1, n_bytes_in_packet); _vec_len (tm->rx_buffers) = i_rx; } b = vlib_get_buffer (vm, bi); { u32 next_index; uword n_trace = vlib_get_trace_count (vm, node); vnet_buffer (b)->sw_if_index[VLIB_RX] = tm->sw_if_index; vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32)~0; /* * Turn this on if you run into * "bad monkey" contexts, and you want to know exactly * which nodes they've visited... */ if (VLIB_BUFFER_TRACE_TRAJECTORY) b->pre_data[0] = 0; b->error = node->errors[0]; if (tm->is_ether) { next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; } else switch (b->data[0] & 0xf0) { case 0x40: next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT; break; case 0x60: next_index = VNET_DEVICE_INPUT_NEXT_IP6_INPUT; break; default: next_index = VNET_DEVICE_INPUT_NEXT_DROP; break; } /* The linux kernel couldn't care less if our interface is up */ if (tm->have_normal_interface) { vnet_main_t *vnm = vnet_get_main(); vnet_sw_interface_t * si; si = vnet_get_sw_interface (vnm, tm->sw_if_index); if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) next_index = VNET_DEVICE_INPUT_NEXT_DROP; } vnet_feature_start_device_input_x1 (tm->sw_if_index, &next_index, b); vlib_set_next_frame_buffer (vm, node, next_index, bi); if (n_trace > 0) { vlib_trace_buffer (vm, node, next_index, b, /* follow_chain */ 1); vlib_set_trace_count (vm, node, n_trace - 1); } } return 1; } /** * @brief TUNTAP_RX error strings */ static char * tuntap_rx_error_strings[] = { "unknown packet type", }; VLIB_REGISTER_NODE (tuntap_rx_node,static) = { .function = tuntap_rx, .name = "tuntap-rx", .sibling_of = "device-input", .type = VLIB_NODE_TYPE_INPUT, .state = VLIB_NODE_STATE_INTERRUPT, .vector_size = 4, .n_errors = 1, .error_strings = tuntap_rx_error_strings, }; /** * @brief Gets called when file descriptor is ready from epoll. * * @param *uf - unix_file_t * * @return error - clib_error_t */ static clib_error_t * tuntap_read_ready (unix_file_t * uf) { vlib_main_t * vm = vlib_get_main(); vlib_node_set_interrupt_pending (vm, tuntap_rx_node.index); return 0; } /** * @brief Clean up the tun/tap device * * @param *vm - vlib_main_t * * @return error - clib_error_t * */ static clib_error_t * tuntap_exit (vlib_main_t * vm) { tuntap_main_t *tm = &tuntap_main; struct ifreq ifr; int sfd; /* Not present. */ if (! tm->dev_net_tun_fd || tm->dev_net_tun_fd < 0) return 0; sfd = socket (AF_INET, SOCK_STREAM, 0); if (sfd < 0) clib_unix_warning("provisioning socket"); memset(&ifr, 0, sizeof (ifr)); strncpy (ifr.ifr_name, tm->tun_name, sizeof (ifr.ifr_name)-1); /* get flags, modify to bring down interface... */ if (ioctl (sfd, SIOCGIFFLAGS, &ifr) < 0) clib_unix_warning ("SIOCGIFFLAGS"); ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); if (ioctl (sfd, SIOCSIFFLAGS, &ifr) < 0) clib_unix_warning ("SIOCSIFFLAGS"); /* Turn off persistence */ if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 0) < 0) clib_unix_warning ("TUNSETPERSIST"); close(tm->dev_tap_fd); if (tm->dev_net_tun_fd >= 0) close(tm->dev_net_tun_fd); if (sfd >= 0) close (sfd); return 0; } VLIB_MAIN_LOOP_EXIT_FUNCTION (tuntap_exit); /** * @brief CLI function for tun/tap config * * @param *vm - vlib_main_t * @param *input - unformat_input_t * * @return error - clib_error_t * */ static clib_error_t * tuntap_config (vlib_main_t * vm, unformat_input_t * input) { tuntap_main_t *tm = &tuntap_main; clib_error_t * error = 0; struct ifreq ifr; u8 * name; int flags = IFF_TUN | IFF_NO_PI; int is_enabled = 0, is_ether = 0, have_normal_interface = 0; const uword buffer_size = VLIB_BUFFER_DATA_SIZE; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "mtu %d", &tm->mtu_bytes)) ; else if (unformat (input, "enable")) is_enabled = 1; else if (unformat (input, "disable")) is_enabled = 0; else if (unformat (input, "ethernet") || unformat (input, "ether")) is_ether = 1; else if (unformat (input, "have-normal-interface") || unformat (input, "have-normal")) have_normal_interface = 1; else if (unformat (input, "name %s", &name)) tm->tun_name = (char *) name; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); } tm->dev_net_tun_fd = -1; tm->dev_tap_fd = -1; if (is_enabled == 0) return 0; if (geteuid()) { clib_warning ("tuntap disabled: must be superuser"); return 0; } tm->is_ether = is_ether; tm->have_normal_interface = have_normal_interface; if (is_ether) flags = IFF_TAP | IFF_NO_PI; if ((tm->dev_net_tun_fd = open ("/dev/net/tun", O_RDWR)) < 0) { error = clib_error_return_unix (0, "open /dev/net/tun"); goto done; } memset (&ifr, 0, sizeof (ifr)); strncpy(ifr.ifr_name, tm->tun_name, sizeof(ifr.ifr_name)-1); ifr.ifr_flags = flags; if (ioctl (tm->dev_net_tun_fd, TUNSETIFF, (void *)&ifr) < 0) { error = clib_error_return_unix (0, "ioctl TUNSETIFF"); goto done; } /* Make it persistent, at least until we split. */ if (ioctl (tm->dev_net_tun_fd, TUNSETPERSIST, 1) < 0) { error = clib_error_return_unix (0, "TUNSETPERSIST"); goto done; } /* Open a provisioning socket */ if ((tm->dev_tap_fd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL))) < 0 ) { error = clib_error_return_unix (0, "socket"); goto done; } /* Find the interface index. */ { struct ifreq ifr; struct sockaddr_ll sll; memset (&ifr, 0, sizeof(ifr)); strncpy (ifr.ifr_name, tm->tun_name, sizeof(ifr.ifr_name)-1); if (ioctl (tm->dev_tap_fd, SIOCGIFINDEX, &ifr) < 0 ) { error = clib_error_return_unix (0, "ioctl SIOCGIFINDEX"); goto done; } /* Bind the provisioning socket to the interface. */ memset(&sll, 0, sizeof(sll)); sll.sll_family = AF_PACKET; sll.sll_ifindex = ifr.ifr_ifindex; sll.sll_protocol = htons(ETH_P_ALL); if (bind(tm->dev_tap_fd, (struct sockaddr*) &sll, sizeof(sll)) < 0) { error = clib_error_return_unix (0, "bind"); goto done; } } /* non-blocking I/O on /dev/tapX */ { int one = 1; if (ioctl (tm->dev_net_tun_fd, FIONBIO, &one) < 0) { error = clib_error_return_unix (0, "ioctl FIONBIO"); goto done; } } tm->mtu_buffers = (tm->mtu_bytes + (buffer_size - 1)) / buffer_size; ifr.ifr_mtu = tm->mtu_bytes; if (ioctl (tm->dev_tap_fd, SIOCSIFMTU, &ifr) < 0) { error = clib_error_return_unix (0, "ioctl SIOCSIFMTU"); goto done; } /* get flags, modify to bring up interface... */ if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0) { error = clib_error_return_unix (0, "ioctl SIOCGIFFLAGS"); goto done; } ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0) { error = clib_error_return_unix (0, "ioctl SIOCSIFFLAGS"); goto done; } if (is_ether) { if (ioctl (tm->dev_tap_fd, SIOCGIFHWADDR, &ifr) < 0) { error = clib_error_return_unix (0, "ioctl SIOCGIFHWADDR"); goto done; } else clib_memcpy (tm->ether_dst_mac, ifr.ifr_hwaddr.sa_data, 6); } if (have_normal_interface) { vnet_main_t *vnm = vnet_get_main(); error = ethernet_register_interface (vnm, tuntap_dev_class.index, 0 /* device instance */, tm->ether_dst_mac /* ethernet address */, &tm->hw_if_index, 0 /* flag change */); if (error) clib_error_report (error); tm->sw_if_index = tm->hw_if_index; vm->os_punt_frame = tuntap_nopunt_frame; } else { vnet_main_t *vnm = vnet_get_main(); vnet_hw_interface_t * hi; vm->os_punt_frame = tuntap_punt_frame; tm->hw_if_index = vnet_register_interface (vnm, tuntap_dev_class.index, 0 /* device instance */, tuntap_interface_class.index, 0); hi = vnet_get_hw_interface (vnm, tm->hw_if_index); tm->sw_if_index = hi->sw_if_index; /* Interface is always up. */ vnet_hw_interface_set_flags (vnm, tm->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); vnet_sw_interface_set_flags (vnm, tm->sw_if_index, VNET_SW_INTERFACE_FLAG_ADMIN_UP); } { unix_file_t template = {0}; template.read_function = tuntap_read_ready; template.file_descriptor = tm->dev_net_tun_fd; tm->unix_file_index = unix_file_add (&unix_main, &template); } done: if (error) { if (tm->dev_net_tun_fd >= 0) close (tm->dev_net_tun_fd); if (tm->dev_tap_fd >= 0) close (tm->dev_tap_fd); } return error; } VLIB_CONFIG_FUNCTION (tuntap_config, "tuntap"); /** * @brief Add or Del IP4 address to tun/tap interface * * @param *im - ip4_main_t * @param opaque - uword * @param sw_if_index - u32 * @param *address - ip4_address_t * @param is_delete - u32 * */ void tuntap_ip4_add_del_interface_address (ip4_main_t * im, uword opaque, u32 sw_if_index, ip4_address_t * address, u32 address_length, u32 if_address_index, u32 is_delete) { tuntap_main_t * tm = &tuntap_main; struct ifreq ifr; subif_address_t subif_addr, * ap; uword * p; /** Tuntap disabled, or using a "normal" interface. */ if (tm->have_normal_interface || tm->dev_tap_fd < 0) return; /** See if we already know about this subif */ memset (&subif_addr, 0, sizeof (subif_addr)); subif_addr.sw_if_index = sw_if_index; clib_memcpy (&subif_addr.addr, address, sizeof (*address)); p = mhash_get (&tm->subif_mhash, &subif_addr); if (p) ap = pool_elt_at_index (tm->subifs, p[0]); else { pool_get (tm->subifs, ap); *ap = subif_addr; mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0); } /* Use subif pool index to select alias device. */ memset (&ifr, 0, sizeof (ifr)); snprintf (ifr.ifr_name, sizeof(ifr.ifr_name), "%s:%d", tm->tun_name, (int)(ap - tm->subifs)); /* the tuntap punt/inject is enabled for IPv4 RX so long as * any vpp interface has an IPv4 address. * this is also ref counted. */ ip4_sw_interface_enable_disable (tm->sw_if_index, !is_delete); if (! is_delete) { struct sockaddr_in * sin; sin = (struct sockaddr_in *)&ifr.ifr_addr; /* Set ipv4 address, netmask. */ sin->sin_family = AF_INET; clib_memcpy (&sin->sin_addr.s_addr, address, 4); if (ioctl (tm->dev_tap_fd, SIOCSIFADDR, &ifr) < 0) clib_unix_warning ("ioctl SIOCSIFADDR"); sin->sin_addr.s_addr = im->fib_masks[address_length]; if (ioctl (tm->dev_tap_fd, SIOCSIFNETMASK, &ifr) < 0) clib_unix_warning ("ioctl SIOCSIFNETMASK"); } else { mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */); pool_put (tm->subifs, ap); } /* get flags, modify to bring up interface... */ if (ioctl (tm->dev_tap_fd, SIOCGIFFLAGS, &ifr) < 0) clib_unix_warning ("ioctl SIOCGIFFLAGS"); if (is_delete) ifr.ifr_flags &= ~(IFF_UP | IFF_RUNNING); else ifr.ifr_flags |= (IFF_UP | IFF_RUNNING); if (ioctl (tm->dev_tap_fd, SIOCSIFFLAGS, &ifr) < 0) clib_unix_warning ("ioctl SIOCSIFFLAGS"); } /** * @brief workaround for a known include file bug. * including @c <linux/ipv6.h> causes multiple definitions if * @c <netinet/in.h is also included. */ struct in6_ifreq { struct in6_addr ifr6_addr; u32 ifr6_prefixlen; int ifr6_ifindex; }; /** * @brief Add or Del tun/tap interface address. * * Both the v6 interface address API and the way ifconfig * displays subinterfaces differ from their v4 couterparts. * The code given here seems to work but YMMV. * * @param *im - ip6_main_t * @param opaque - uword * @param sw_if_index - u32 * @param *address - ip6_address_t * @param address_length - u32 * @param if_address_index - u32 * @param is_delete - u32 */ void tuntap_ip6_add_del_interface_address (ip6_main_t * im, uword opaque, u32 sw_if_index, ip6_address_t * address, u32 address_length, u32 if_address_index, u32 is_delete) { tuntap_main_t * tm = &tuntap_main; struct ifreq ifr; struct in6_ifreq ifr6; subif_address_t subif_addr, * ap; uword * p; /* Tuntap disabled, or using a "normal" interface. */ if (tm->have_normal_interface || tm->dev_tap_fd < 0) return; /* See if we already know about this subif */ memset (&subif_addr, 0, sizeof (subif_addr)); subif_addr.sw_if_index = sw_if_index; subif_addr.is_v6 = 1; clib_memcpy (&subif_addr.addr, address, sizeof (*address)); p = mhash_get (&tm->subif_mhash, &subif_addr); if (p) ap = pool_elt_at_index (tm->subifs, p[0]); else { pool_get (tm->subifs, ap); *ap = subif_addr; mhash_set (&tm->subif_mhash, ap, ap - tm->subifs, 0); } /* Use subif pool index to select alias device. */ memset (&ifr, 0, sizeof (ifr)); memset (&ifr6, 0, sizeof (ifr6)); snprintf (ifr.ifr_name, sizeof(ifr.ifr_name), "%s:%d", tm->tun_name, (int)(ap - tm->subifs)); /* the tuntap punt/inject is enabled for IPv6 RX so long as * any vpp interface has an IPv6 address. * this is also ref counted. */ ip6_sw_interface_enable_disable (tm->sw_if_index, !is_delete); if (! is_delete) { int sockfd = socket (AF_INET6, SOCK_STREAM, 0); if (sockfd < 0) clib_unix_warning ("get ifindex socket"); if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0) clib_unix_warning ("get ifindex"); ifr6.ifr6_ifindex = ifr.ifr_ifindex; ifr6.ifr6_prefixlen = address_length; clib_memcpy (&ifr6.ifr6_addr, address, 16); if (ioctl (sockfd, SIOCSIFADDR, &ifr6) < 0) clib_unix_warning ("set address"); if (sockfd >= 0) close (sockfd); } else { int sockfd = socket (AF_INET6, SOCK_STREAM, 0); if (sockfd < 0) clib_unix_warning ("get ifindex socket"); if (ioctl (sockfd, SIOGIFINDEX, &ifr) < 0) clib_unix_warning ("get ifindex"); ifr6.ifr6_ifindex = ifr.ifr_ifindex; ifr6.ifr6_prefixlen = address_length; clib_memcpy (&ifr6.ifr6_addr, address, 16); if (ioctl (sockfd, SIOCDIFADDR, &ifr6) < 0) clib_unix_warning ("del address"); if (sockfd >= 0) close (sockfd); mhash_unset (&tm->subif_mhash, &subif_addr, 0 /* old value ptr */); pool_put (tm->subifs, ap); } } /** * @brief TX the tun/tap frame * * @param *vm - vlib_main_t * @param *node - vlib_node_runtime_t * @param *frame - vlib_frame_t * */ static void tuntap_punt_frame (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { tuntap_tx (vm, node, frame); vlib_frame_free (vm, node, frame); } /** * @brief Free the tun/tap frame * * @param *vm - vlib_main_t * @param *node - vlib_node_runtime_t * @param *frame - vlib_frame_t * */ static void tuntap_nopunt_frame (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 * buffers = vlib_frame_args (frame); uword n_packets = frame->n_vectors; vlib_buffer_free (vm, buffers, n_packets); vlib_frame_free (vm, node, frame); } VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = { .name = "tuntap", .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P, }; /** * @brief Format tun/tap interface name * * @param *s - u8 - formatter string * @param *args - va_list * * @return *s - u8 - formatted string * */ static u8 * format_tuntap_interface_name (u8 * s, va_list * args) { u32 i = va_arg (*args, u32); s = format (s, "tuntap-%d", i); return s; } /** * @brief TX packet out tun/tap * * @param *vm - vlib_main_t * @param *node - vlib_node_runtime_t * @param *frame - vlib_frame_t * * @return n_buffers - uword - Packets transmitted * */ static uword tuntap_intfc_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { tuntap_main_t * tm = &tuntap_main; u32 * buffers = vlib_frame_args (frame); uword n_buffers = frame->n_vectors; /* Normal interface transmit happens only on the normal interface... */ if (tm->have_normal_interface) return tuntap_tx (vm, node, frame); vlib_buffer_free (vm, buffers, n_buffers); return n_buffers; } VNET_DEVICE_CLASS (tuntap_dev_class,static) = { .name = "tuntap", .tx_function = tuntap_intfc_tx, .format_device_name = format_tuntap_interface_name, }; /** * @brief tun/tap node init * * @param *vm - vlib_main_t * * @return error - clib_error_t * */ static clib_error_t * tuntap_init (vlib_main_t * vm) { clib_error_t * error; ip4_main_t * im4 = &ip4_main; ip6_main_t * im6 = &ip6_main; ip4_add_del_interface_address_callback_t cb4; ip6_add_del_interface_address_callback_t cb6; tuntap_main_t * tm = &tuntap_main; error = vlib_call_init_function (vm, ip4_init); if (error) return error; mhash_init (&tm->subif_mhash, sizeof (u32), sizeof(subif_address_t)); cb4.function = tuntap_ip4_add_del_interface_address; cb4.function_opaque = 0; vec_add1 (im4->add_del_interface_address_callbacks, cb4); cb6.function = tuntap_ip6_add_del_interface_address; cb6.function_opaque = 0; vec_add1 (im6->add_del_interface_address_callbacks, cb6); return 0; } VLIB_INIT_FUNCTION (tuntap_init);