aboutsummaryrefslogtreecommitdiffstats
path: root/vnet/vnet/ip/ip6_packet.h
blob: 1e551c8b67d0805c9eb8f3cd231fc62fb1c604f7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/*
 * ip6/packet.h: ip6 packet format
 *
 * Copyright (c) 2008 Eliot Dresselhaus
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef included_ip6_packet_h
#define included_ip6_packet_h

#include <vnet/ip/tcp_packet.h>
#include <vnet/ip/ip4_packet.h>

typedef union
{
  u8 as_u8[16];
  u16 as_u16[8];
  u32 as_u32[4];
  u64 as_u64[2];
  uword as_uword[16 / sizeof (uword)];
}
ip6_address_t;

/* Packed so that the mhash key doesn't include uninitialized pad bytes */
/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
  /* IP address must be first for ip_interface_address_get_address() to work */
  ip6_address_t ip6_addr;
  u32 fib_index;
}) ip6_address_fib_t;
/* *INDENT-ON* */

/* *INDENT-OFF* */
typedef CLIB_PACKED (union {
  struct {
    u32 pad[3];
    ip4_address_t ip4;
  };
  ip6_address_t ip6;
  u8 as_u8[16];
  u64 as_u64[2];
}) ip46_address_t;
/* *INDENT-ON* */
#define ip46_address_is_ip4(ip46)	(((ip46)->pad[0] | (ip46)->pad[1] | (ip46)->pad[2]) == 0)
#define ip46_address_mask_ip4(ip46)	((ip46)->pad[0] = (ip46)->pad[1] = (ip46)->pad[2] = 0)
#define ip46_address_set_ip4(ip46, ip)	(ip46_address_mask_ip4(ip46), (ip46)->ip4 = (ip)[0])
#define ip46_address_reset(ip46)	((ip46)->as_u64[0] = (ip46)->as_u64[1] = 0)
#define ip46_address_cmp(ip46_1, ip46_2) (memcmp(ip46_1, ip46_2, sizeof(*ip46_1)))
#define ip46_address_is_zero(ip46)	(((ip46)->as_u64[0] == 0) && ((ip46)->as_u64[1] == 0))

always_inline void
ip46_from_addr_buf (u32 is_ipv6, u8 * buf, ip46_address_t * ip)
{
  if (is_ipv6)
    ip->ip6 = *((ip6_address_t *) buf);
  else
    ip46_address_set_ip4 (ip, (ip4_address_t *) buf);
}

always_inline void
ip6_addr_fib_init (ip6_address_fib_t * addr_fib, ip6_address_t * address,
		   u32 fib_index)
{
  addr_fib->ip6_addr.as_u64[0] = address->as_u64[0];
  addr_fib->ip6_addr.as_u64[1] = address->as_u64[1];
  addr_fib->fib_index = fib_index;
}

/* Special addresses:
   unspecified		::/128
   loopback		::1/128
   global unicast       2000::/3
   unique local unicast fc00::/7
   link local unicast	fe80::/10
   multicast		ff00::/8
   ietf reserved	everything else. */

#define foreach_ip6_multicast_address_scope	\
  _ (loopback, 0x1)				\
  _ (link_local, 0x2)				\
  _ (admin_local, 0x4)				\
  _ (site_local, 0x5)				\
  _ (organization_local, 0x8)			\
  _ (global, 0xe)

#define foreach_ip6_multicast_link_local_group_id	\
  _ (all_hosts, 0x1)					\
  _ (all_routers, 0x2)					\
  _ (rip_routers, 0x9)					\
  _ (eigrp_routers, 0xa)				\
  _ (pim_routers, 0xd)                            \
 _ (mldv2_routers, 0x16)

typedef enum
{
#define _(f,n) IP6_MULTICAST_SCOPE_##f = n,
  foreach_ip6_multicast_address_scope
#undef _
} ip6_multicast_address_scope_t;

typedef enum
{
#define _(f,n) IP6_MULTICAST_GROUP_ID_##f = n,
  foreach_ip6_multicast_link_local_group_id
#undef _
} ip6_multicast_link_local_group_id_t;

always_inline uword
ip6_address_is_multicast (ip6_address_t * a)
{
  return a->as_u8[0] == 0xff;
}

always_inline uword
ip46_address_is_multicast (ip46_address_t * a)
{
  return ip46_address_is_ip4 (a) ? ip4_address_is_multicast (&a->ip4) :
    ip6_address_is_multicast (&a->ip6);
}

always_inline void
ip6_set_reserved_multicast_address (ip6_address_t * a,
				    ip6_multicast_address_scope_t scope,
				    u16 id)
{
  a->as_u64[0] = a->as_u64[1] = 0;
  a->as_u16[0] = clib_host_to_net_u16 (0xff00 | scope);
  a->as_u16[7] = clib_host_to_net_u16 (id);
}

always_inline void
ip6_set_solicited_node_multicast_address (ip6_address_t * a, u32 id)
{
  /* 0xff02::1:ffXX:XXXX. */
  a->as_u64[0] = a->as_u64[1] = 0;
  a->as_u16[0] = clib_host_to_net_u16 (0xff02);
  a->as_u8[11] = 1;
  ASSERT ((id >> 24) == 0);
  id |= 0xff << 24;
  a->as_u32[3] = clib_host_to_net_u32 (id);
}

always_inline void
ip6_link_local_address_from_ethernet_address (ip6_address_t * a,
					      u8 * ethernet_address)
{
  a->as_u64[0] = a->as_u64[1] = 0;
  a->as_u16[0] = clib_host_to_net_u16 (0xfe80);
  /* Always set locally administered bit (6). */
  a->as_u8[0x8] = ethernet_address[0] | (1 << 6);
  a->as_u8[0x9] = ethernet_address[1];
  a->as_u8[0xa] = ethernet_address[2];
  a->as_u8[0xb] = 0xff;
  a->as_u8[0xc] = 0xfe;
  a->as_u8[0xd] = ethernet_address[3];
  a->as_u8[0xe] = ethernet_address[4];
  a->as_u8[0xf] = ethernet_address[5];
}

always_inline void
ip6_multicast_ethernet_address (u8 * ethernet_address, u32 group_id)
{
  ethernet_address[0] = 0x33;
  ethernet_address[1] = 0x33;
  ethernet_address[2] = ((group_id >> 24) & 0xff);
  ethernet_address[3] = ((group_id >> 16) & 0xff);
  ethernet_address[4] = ((group_id >> 8) & 0xff);
  ethernet_address[5] = ((group_id >> 0) & 0xff);
}

always_inline uword
ip6_address_is_equal (ip6_address_t * a, ip6_address_t * b)
{
  int i;
  for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
    if (a->as_uword[i] != b->as_uword[i])
      return 0;
  return 1;
}

always_inline uword
ip6_address_is_equal_masked (ip6_address_t * a, ip6_address_t * b,
			     ip6_address_t * mask)
{
  int i;
  for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
    {
      uword a_masked, b_masked;
      a_masked = a->as_uword[i] & mask->as_uword[i];
      b_masked = b->as_uword[i] & mask->as_uword[i];

      if (a_masked != b_masked)
	return 0;
    }
  return 1;
}

always_inline void
ip6_address_mask (ip6_address_t * a, ip6_address_t * mask)
{
  int i;
  for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
    a->as_uword[i] &= mask->as_uword[i];
}

always_inline void
ip6_address_set_zero (ip6_address_t * a)
{
  int i;
  for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
    a->as_uword[i] = 0;
}

always_inline void
ip6_address_mask_from_width (ip6_address_t * a, u32 width)
{
  int i, byte, bit, bitnum;
  ASSERT (width <= 128);
  memset (a, 0, sizeof (a[0]));
  for (i = 0; i < width; i++)
    {
      bitnum = (7 - (i & 7));
      byte = i / 8;
      bit = 1 << bitnum;
      a->as_u8[byte] |= bit;
    }
}

always_inline uword
ip6_address_is_zero (ip6_address_t * a)
{
  int i;
  for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
    if (a->as_uword[i] != 0)
      return 0;
  return 1;
}

/* Check for unspecified address ::0 */
always_inline uword
ip6_address_is_unspecified (ip6_address_t * a)
{
  return ip6_address_is_zero (a);
}

/* Check for loopback address ::1 */
always_inline uword
ip6_address_is_loopback (ip6_address_t * a)
{
  uword is_loopback;
  u8 save = a->as_u8[15];
  a->as_u8[15] = save ^ 1;
  is_loopback = ip6_address_is_zero (a);
  a->as_u8[15] = save;
  return is_loopback;
}

/* Check for link local unicast fe80::/10. */
always_inline uword
ip6_address_is_link_local_unicast (ip6_address_t * a)
{
  return a->as_u8[0] == 0xfe && (a->as_u8[1] & 0xc0) == 0x80;
}

/* Check for unique local unicast fc00::/7. */
always_inline uword
ip6_address_is_local_unicast (ip6_address_t * a)
{
  return (a->as_u8[0] & 0xfe) == 0xfc;
}

/* Check for unique global unicast 2000::/3. */
always_inline uword
ip6_address_is_global_unicast (ip6_address_t * a)
{
  return (a->as_u8[0] & 0xe0) == 0x20;
}

/* Check for solicited node multicast 0xff02::1:ff00:0/104 */
always_inline uword
ip6_is_solicited_node_multicast_address (ip6_address_t * a)
{
  return (a->as_u32[0] == clib_host_to_net_u32 (0xff020000)
	  && a->as_u32[1] == 0
	  && a->as_u32[2] == clib_host_to_net_u32 (1)
	  && a->as_u8[12] == 0xff);
}

typedef struct
{
  /* 4 bit version, 8 bit traffic class and 20 bit flow label. */
  u32 ip_version_traffic_class_and_flow_label;

  /* Total packet length not including this header (but including
     any extension headers if present). */
  u16 payload_length;

  /* Protocol for next header. */
  u8 protocol;

  /* Hop limit decremented by router at each hop. */
  u8 hop_limit;

  /* Source and destination address. */
  ip6_address_t src_address, dst_address;
} ip6_header_t;

always_inline void *
ip6_next_header (ip6_header_t * i)
{
  return (void *) (i + 1);
}

always_inline void
ip6_copy_header (ip6_header_t * dst, const ip6_header_t * src)
{
  dst->ip_version_traffic_class_and_flow_label =
    src->ip_version_traffic_class_and_flow_label;
  dst->payload_length = src->payload_length;
  dst->protocol = src->protocol;
  dst->hop_limit = src->hop_limit;

  dst->src_address.as_uword[0] = src->src_address.as_uword[0];
  dst->src_address.as_uword[1] = src->src_address.as_uword[1];
  dst->dst_address.as_uword[0] = src->dst_address.as_uword[0];
  dst->dst_address.as_uword[1] = src->dst_address.as_uword[1];
}

always_inline void
ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0)
{
  {
    ip6_address_t src0, dst0;

    src0 = ip0->src_address;
    dst0 = ip0->dst_address;
    ip0->src_address = dst0;
    ip0->dst_address = src0;
  }

  {
    u16 src0, dst0;

    src0 = tcp0->ports.src;
    dst0 = tcp0->ports.dst;
    tcp0->ports.src = dst0;
    tcp0->ports.dst = src0;
  }
}

always_inline void
ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1,
		  tcp_header_t * tcp0, tcp_header_t * tcp1)
{
  {
    ip6_address_t src0, dst0, src1, dst1;

    src0 = ip0->src_address;
    src1 = ip1->src_address;
    dst0 = ip0->dst_address;
    dst1 = ip1->dst_address;
    ip0->src_address = dst0;
    ip1->src_address = dst1;
    ip0->dst_address = src0;
    ip1->dst_address = src1;
  }

  {
    u16 src0, dst0, src1, dst1;

    src0 = tcp0->ports.src;
    src1 = tcp1->ports.src;
    dst0 = tcp0->ports.dst;
    dst1 = tcp1->ports.dst;
    tcp0->ports.src = dst0;
    tcp1->ports.src = dst1;
    tcp0->ports.dst = src0;
    tcp1->ports.dst = src1;
  }
}


/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
  u8 data;
}) ip6_pad1_option_t;
/* *INDENT-ON* */

/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
  u8 type;
  u8 len;
  u8 data[0];
}) ip6_padN_option_t;
/* *INDENT-ON* */

/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
#define IP6_MLDP_ALERT_TYPE  0x5
  u8 type;
  u8 len;
  u16 value;
}) ip6_router_alert_option_t;
/* *INDENT-ON* */

/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
  u8 next_hdr;
  /* Length of this header plus option data in 8 byte units. */
  u8 n_data_u64s;
}) ip6_ext_header_t;

always_inline u8 ip6_ext_hdr(u8 nexthdr)
{
  /*
   * find out if nexthdr is an extension header or a protocol
   */
  return   (nexthdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ||
    (nexthdr == IP_PROTOCOL_IP6_NONXT) ||
    (nexthdr == IP_PROTOCOL_IPV6_FRAGMENTATION)  ||
    (nexthdr == IP_PROTOCOL_IPSEC_AH)      ||
    (nexthdr == IP_PROTOCOL_IPV6_ROUTE)      ||
    (nexthdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS);
}

#define ip6_ext_header_len(p)  (((p)->n_data_u64s+1) << 3)
#define ip6_ext_authhdr_len(p) (((p)->n_data_u64s+2) << 2)

always_inline void *
ip6_ext_next_header (ip6_ext_header_t *ext_hdr )
{ return (void *)((u8 *) ext_hdr + ip6_ext_header_len(ext_hdr)); }

typedef CLIB_PACKED (struct {
  u8 next_hdr;
  /* Length of this header plus option data in 8 byte units. */
  u8 n_data_u64s;
  u8 data[0];
}) ip6_hop_by_hop_ext_t;
/* *INDENT-ON* */

/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
  u8 next_hdr;
  u8 rsv;
  u16 fragment_offset_and_more;
  u32 identification;
}) ip6_frag_hdr_t;
/* *INDENT-ON* */

#define ip6_frag_hdr_offset(hdr) \
  (clib_net_to_host_u16((hdr)->fragment_offset_and_more) >> 3)

#define ip6_frag_hdr_more(hdr) \
  (clib_net_to_host_u16((hdr)->fragment_offset_and_more) & 0x1)

#define ip6_frag_hdr_offset_and_more(offset, more) \
  clib_host_to_net_u16(((offset) << 3) + !!(more))

#endif /* included_ip6_packet_h */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */
class="n">vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN, &addr2); if ((u32) addr == addr2) return 0; return 1; } static void virtio_pci_legacy_del_queue (vlib_main_t * vm, virtio_if_t * vif, u16 queue_id) { u32 src = 0; vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, &queue_id); vlib_pci_write_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN, &src); } inline void virtio_pci_legacy_notify_queue (vlib_main_t * vm, virtio_if_t * vif, u16 queue_id) { vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NOTIFY, &queue_id); } /* Enable one vector (0) for Link State Intrerrupt */ static u16 virtio_pci_legacy_set_config_irq (vlib_main_t * vm, virtio_if_t * vif, u16 vec) { vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_CONFIG_VECTOR, &vec); vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_CONFIG_VECTOR, &vec); return vec; } static u16 virtio_pci_legacy_set_queue_irq (vlib_main_t * vm, virtio_if_t * vif, u16 vec, u16 queue_id) { vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, &queue_id); vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_QUEUE_VECTOR, &vec); vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_QUEUE_VECTOR, &vec); return vec; } static u32 virtio_pci_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags) { return 0; } static clib_error_t * virtio_pci_get_max_virtqueue_pairs (vlib_main_t * vm, virtio_if_t * vif) { virtio_net_config_t config; clib_error_t *error = 0; u16 max_queue_pairs = 1; if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MQ)) { virtio_pci_legacy_read_config (vm, vif, &config.max_virtqueue_pairs, sizeof (config.max_virtqueue_pairs), STRUCT_OFFSET_OF (virtio_net_config_t, max_virtqueue_pairs)); max_queue_pairs = config.max_virtqueue_pairs; } virtio_log_debug (vif, "max queue pair is %x", max_queue_pairs); if (max_queue_pairs < 1 || max_queue_pairs > 0x8000) return clib_error_return (error, "max queue pair is %x", max_queue_pairs); vif->max_queue_pairs = max_queue_pairs; return error; } static void virtio_pci_set_mac (vlib_main_t * vm, virtio_if_t * vif) { virtio_pci_legacy_write_config (vm, vif, vif->mac_addr, sizeof (vif->mac_addr), 0); } static u32 virtio_pci_get_mac (vlib_main_t * vm, virtio_if_t * vif) { if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MAC)) { virtio_pci_legacy_read_config (vm, vif, vif->mac_addr, sizeof (vif->mac_addr), 0); return 0; } return 1; } static u16 virtio_pci_is_link_up (vlib_main_t * vm, virtio_if_t * vif) { /* * Minimal driver: assumes link is up */ u16 status = 1; if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_STATUS)) virtio_pci_legacy_read_config (vm, vif, &status, sizeof (status), /* mac */ STRUCT_OFFSET_OF (virtio_net_config_t, status)); return status; } static void virtio_pci_irq_0_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) { vnet_main_t *vnm = vnet_get_main (); virtio_main_t *vim = &virtio_main; uword pd = vlib_pci_get_private_data (vm, h); virtio_if_t *vif = pool_elt_at_index (vim->interfaces, pd); u16 qid = line; vnet_device_input_set_interrupt_pending (vnm, vif->hw_if_index, qid); } static void virtio_pci_irq_1_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line) { vnet_main_t *vnm = vnet_get_main (); virtio_main_t *vim = &virtio_main; uword pd = vlib_pci_get_private_data (vm, h); virtio_if_t *vif = pool_elt_at_index (vim->interfaces, pd); if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP) { vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; vnet_hw_interface_set_flags (vnm, vif->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); } else { vif->flags &= ~VIRTIO_IF_FLAG_ADMIN_UP; vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); } } static void virtio_pci_irq_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h) { virtio_main_t *vim = &virtio_main; uword pd = vlib_pci_get_private_data (vm, h); virtio_if_t *vif = pool_elt_at_index (vim->interfaces, pd); u8 isr = 0; u16 line = 0; isr = virtio_pci_legacy_get_isr (vm, vif); /* * If the lower bit is set: look through the used rings of * all virtqueues for the device, to see if any progress has * been made by the device which requires servicing. */ if (isr & VIRTIO_PCI_ISR_INTR) virtio_pci_irq_0_handler (vm, h, line); if (isr & VIRTIO_PCI_ISR_CONFIG) virtio_pci_irq_1_handler (vm, h, line); } inline void device_status (vlib_main_t * vm, virtio_if_t * vif) { struct status_struct { u8 bit; char *str; }; struct status_struct *status_entry; static struct status_struct status_array[] = { #define _(s,b) { .str = #s, .bit = b, }, foreach_virtio_config_status_flags #undef _ {.str = NULL} }; vlib_cli_output (vm, " status 0x%x", vif->status); status_entry = (struct status_struct *) &status_array; while (status_entry->str) { if (vif->status & status_entry->bit) vlib_cli_output (vm, " %s (%x)", status_entry->str, status_entry->bit); status_entry++; } } inline void debug_device_config_space (vlib_main_t * vm, virtio_if_t * vif) { u32 data_u32; u16 data_u16; u8 data_u8; vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_HOST_FEATURES, &data_u32); vlib_cli_output (vm, "remote features 0x%lx", data_u32); vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_GUEST_FEATURES, &data_u32); vlib_cli_output (vm, "guest features 0x%lx", data_u32); vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_PFN, &data_u32); vlib_cli_output (vm, "queue address 0x%lx", data_u32); vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NUM, &data_u16); vlib_cli_output (vm, "queue size 0x%x", data_u16); vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, &data_u16); vlib_cli_output (vm, "queue select 0x%x", data_u16); vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_NOTIFY, &data_u16); vlib_cli_output (vm, "queue notify 0x%x", data_u16); vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_STATUS, &data_u8); vlib_cli_output (vm, "status 0x%x", data_u8); vlib_pci_read_io_u8 (vm, vif->pci_dev_handle, VIRTIO_PCI_ISR, &data_u8); vlib_cli_output (vm, "isr 0x%x", data_u8); if (vif->msix_enabled == VIRTIO_MSIX_ENABLED) { vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_CONFIG_VECTOR, &data_u16); vlib_cli_output (vm, "config vector 0x%x", data_u16); u16 queue_id = 0; vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL, &queue_id); vlib_pci_read_io_u16 (vm, vif->pci_dev_handle, VIRTIO_MSI_QUEUE_VECTOR, &data_u16); vlib_cli_output (vm, "queue vector for queue (0) 0x%x", data_u16); } u8 mac[6]; virtio_pci_legacy_read_config (vm, vif, mac, sizeof (mac), 0); vlib_cli_output (vm, "mac %U", format_ethernet_address, mac); virtio_pci_legacy_read_config (vm, vif, &data_u16, sizeof (u16), /* offset to status */ 6); vlib_cli_output (vm, "link up/down status 0x%x", data_u16); virtio_pci_legacy_read_config (vm, vif, &data_u16, sizeof (u16), /* offset to max_virtqueue */ 8); vlib_cli_output (vm, "num of virtqueue 0x%x", data_u16); virtio_pci_legacy_read_config (vm, vif, &data_u16, sizeof (u16), /* offset to mtu */ 10); vlib_cli_output (vm, "mtu 0x%x", data_u16); u32 i = PCI_CONFIG_SIZE (vif) + 12, a = 4; i += a; i &= ~a; for (; i < 64; i += 4) { u32 data = 0; vlib_pci_read_io_u32 (vm, vif->pci_dev_handle, i, &data); vlib_cli_output (vm, "0x%lx", data); } } struct virtio_ctrl_msg { struct virtio_net_ctrl_hdr ctrl; virtio_net_ctrl_ack status; u8 data[1024]; }; static int virtio_pci_send_ctrl_msg (vlib_main_t * vm, virtio_if_t * vif, struct virtio_ctrl_msg *data, u32 len) { virtio_vring_t *vring = vif->cxq_vring; virtio_net_ctrl_ack status = VIRTIO_NET_ERR; struct virtio_ctrl_msg result; u32 buffer_index; vlib_buffer_t *b; u16 used, next, avail; u16 sz = vring->size; u16 mask = sz - 1; used = vring->desc_in_use; next = vring->desc_next; avail = vring->avail->idx; struct vring_desc *d = &vring->desc[next]; if (vlib_buffer_alloc (vm, &buffer_index, 1)) b = vlib_get_buffer (vm, buffer_index); else return VIRTIO_NET_ERR; /* * current_data may not be initialized with 0 and may contain * previous offset. */ b->current_data = 0; clib_memcpy (vlib_buffer_get_current (b), data, sizeof (struct virtio_ctrl_msg)); d->flags = VRING_DESC_F_NEXT; d->addr = vlib_buffer_get_current_pa (vm, b); d->len = sizeof (struct virtio_net_ctrl_hdr); vring->avail->ring[avail & mask] = next; avail++; next = (next + 1) & mask; d->next = next; used++; d = &vring->desc[next]; d->flags = VRING_DESC_F_NEXT; d->addr = vlib_buffer_get_current_pa (vm, b) + STRUCT_OFFSET_OF (struct virtio_ctrl_msg, data); d->len = len; next = (next + 1) & mask; d->next = next; used++; d = &vring->desc[next]; d->flags = VRING_DESC_F_WRITE; d->addr = vlib_buffer_get_current_pa (vm, b) + STRUCT_OFFSET_OF (struct virtio_ctrl_msg, status); d->len = sizeof (data->status); next = (next + 1) & mask; used++; CLIB_MEMORY_STORE_BARRIER (); vring->avail->idx = avail; vring->desc_next = next; vring->desc_in_use = used; if ((vring->used->flags & VIRTIO_RING_FLAG_MASK_INT) == 0) { virtio_kick (vm, vring, vif); } u16 last = vring->last_used_idx, n_left = 0; n_left = vring->used->idx - last; while (n_left) { struct vring_used_elem *e = &vring->used->ring[last & mask]; u16 slot = e->id; d = &vring->desc[slot]; while (d->flags & VRING_DESC_F_NEXT) { used--; slot = d->next; d = &vring->desc[slot]; } used--; last++; n_left--; } vring->desc_in_use = used; vring->last_used_idx = last; CLIB_MEMORY_BARRIER (); clib_memcpy (&result, vlib_buffer_get_current (b), sizeof (struct virtio_ctrl_msg)); virtio_log_debug (vif, "ctrl-queue: status %u", result.status); status = result.status; vlib_buffer_free (vm, &buffer_index, 1); return status; } static int virtio_pci_disable_offload (vlib_main_t * vm, virtio_if_t * vif) { struct virtio_ctrl_msg offload_hdr; virtio_net_ctrl_ack status = VIRTIO_NET_ERR; offload_hdr.ctrl.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; offload_hdr.ctrl.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; offload_hdr.status = VIRTIO_NET_ERR; u64 offloads = 0ULL; clib_memcpy (offload_hdr.data, &offloads, sizeof (offloads)); status = virtio_pci_send_ctrl_msg (vm, vif, &offload_hdr, sizeof (offloads)); virtio_log_debug (vif, "disable offloads"); vif->remote_features = virtio_pci_legacy_get_host_features (vm, vif); virtio_pci_legacy_get_guest_features (vm, vif); return status; } static int virtio_pci_enable_checksum_offload (vlib_main_t * vm, virtio_if_t * vif) { struct virtio_ctrl_msg csum_offload_hdr; virtio_net_ctrl_ack status = VIRTIO_NET_ERR; csum_offload_hdr.ctrl.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; csum_offload_hdr.ctrl.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; csum_offload_hdr.status = VIRTIO_NET_ERR; u64 offloads = 0ULL; offloads |= VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM); clib_memcpy (csum_offload_hdr.data, &offloads, sizeof (offloads)); status = virtio_pci_send_ctrl_msg (vm, vif, &csum_offload_hdr, sizeof (offloads)); virtio_log_debug (vif, "enable checksum offload"); vif->remote_features = virtio_pci_legacy_get_host_features (vm, vif); virtio_pci_legacy_get_guest_features (vm, vif); return status; } static int virtio_pci_enable_gso (vlib_main_t * vm, virtio_if_t * vif) { struct virtio_ctrl_msg gso_hdr; virtio_net_ctrl_ack status = VIRTIO_NET_ERR; gso_hdr.ctrl.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; gso_hdr.ctrl.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; gso_hdr.status = VIRTIO_NET_ERR; u64 offloads = VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM) | VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4) | VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6); clib_memcpy (gso_hdr.data, &offloads, sizeof (offloads)); status = virtio_pci_send_ctrl_msg (vm, vif, &gso_hdr, sizeof (offloads)); virtio_log_debug (vif, "enable gso"); vif->remote_features = virtio_pci_legacy_get_host_features (vm, vif); virtio_pci_legacy_get_guest_features (vm, vif); return status; } static int virtio_pci_offloads (vlib_main_t * vm, virtio_if_t * vif, int gso_enabled, int csum_offload_enabled) { vnet_main_t *vnm = vnet_get_main (); vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index); if ((vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) && (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS))) { if (gso_enabled && (vif->features & (VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO4) | VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO6)))) { if (virtio_pci_enable_gso (vm, vif)) { virtio_log_warning (vif, "gso is not enabled"); } else { vif->gso_enabled = 1; vif->csum_offload_enabled = 0; hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO | VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; } } else if (csum_offload_enabled && (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CSUM))) { if (virtio_pci_enable_checksum_offload (vm, vif)) { virtio_log_warning (vif, "checksum offload is not enabled"); } else { vif->csum_offload_enabled = 1; vif->gso_enabled = 0; hw->flags &= ~VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO; hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD; } } else { if (virtio_pci_disable_offload (vm, vif)) { virtio_log_warning (vif, "offloads are not disabled"); } else { vif->csum_offload_enabled = 0; vif->gso_enabled = 0; hw->flags &= ~(VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO | VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD); } } } return 0; } static int virtio_pci_enable_multiqueue (vlib_main_t * vm, virtio_if_t * vif, u16 num_queues) { struct virtio_ctrl_msg mq_hdr; virtio_net_ctrl_ack status = VIRTIO_NET_ERR; mq_hdr.ctrl.class = VIRTIO_NET_CTRL_MQ; mq_hdr.ctrl.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; mq_hdr.status = VIRTIO_NET_ERR; clib_memcpy (mq_hdr.data, &num_queues, sizeof (num_queues)); status = virtio_pci_send_ctrl_msg (vm, vif, &mq_hdr, sizeof (num_queues)); virtio_log_debug (vif, "multi-queue enable %u queues", num_queues); return status; } static u8 virtio_pci_queue_size_valid (u16 qsz) { if (qsz < 64 || qsz > 4096) return 0; if ((qsz % 64) != 0) return 0; return 1; } clib_error_t * virtio_pci_control_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 queue_num) { clib_error_t *error = 0; u16 queue_size = 0; virtio_vring_t *vring; struct vring vr; u32 i = 0; void *ptr = NULL; queue_size = virtio_pci_legacy_get_queue_num (vm, vif, queue_num); if (!virtio_pci_queue_size_valid (queue_size)) clib_warning ("queue size is not valid"); if (!is_pow2 (queue_size)) return clib_error_return (0, "ring size must be power of 2"); if (queue_size > 32768) return clib_error_return (0, "ring size must be 32768 or lower"); if (queue_size == 0) queue_size = 256; vec_validate_aligned (vif->cxq_vring, 0, CLIB_CACHE_LINE_BYTES); vring = vec_elt_at_index (vif->cxq_vring, 0); i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN); i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN); ptr = vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN, vif->numa_node); if (!ptr) return vlib_physmem_last_error (vm); clib_memset (ptr, 0, i); vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN); vring->desc = vr.desc; vring->avail = vr.avail; vring->used = vr.used; vring->queue_id = queue_num; vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT; ASSERT (vring->buffers == 0); vring->size = queue_size; virtio_log_debug (vif, "control-queue: number %u, size %u", queue_num, queue_size); virtio_pci_legacy_setup_queue (vm, vif, queue_num, ptr); vring->kick_fd = -1; return error; } clib_error_t * virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 queue_num) { clib_error_t *error = 0; u16 queue_size = 0; virtio_vring_t *vring; struct vring vr; u32 i = 0; void *ptr = NULL; queue_size = virtio_pci_legacy_get_queue_num (vm, vif, queue_num); if (!virtio_pci_queue_size_valid (queue_size)) clib_warning ("queue size is not valid"); if (!is_pow2 (queue_size)) return clib_error_return (0, "ring size must be power of 2"); if (queue_size > 32768) return clib_error_return (0, "ring size must be 32768 or lower"); if (queue_size == 0) queue_size = 256; if (queue_num % 2) { vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num), CLIB_CACHE_LINE_BYTES); vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num)); clib_spinlock_init (&vring->lockp); } else { vec_validate_aligned (vif->rxq_vrings, RX_QUEUE_ACCESS (queue_num), CLIB_CACHE_LINE_BYTES); vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (queue_num)); } i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN); i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN); ptr = vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN, vif->numa_node); if (!ptr) return vlib_physmem_last_error (vm); clib_memset (ptr, 0, i); vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN); vring->desc = vr.desc; vring->avail = vr.avail; vring->used = vr.used; vring->queue_id = queue_num; vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT; ASSERT (vring->buffers == 0); vec_validate_aligned (vring->buffers, queue_size, CLIB_CACHE_LINE_BYTES); if (queue_num % 2) { virtio_log_debug (vif, "tx-queue: number %u, size %u", queue_num, queue_size); } else { virtio_log_debug (vif, "rx-queue: number %u, size %u", queue_num, queue_size); } vring->size = queue_size; if (virtio_pci_legacy_setup_queue (vm, vif, queue_num, ptr)) return clib_error_return (0, "error in queue address setup"); vring->kick_fd = -1; return error; } static void virtio_negotiate_features (vlib_main_t * vm, virtio_if_t * vif, u64 req_features) { /* * if features are not requested * default: all supported features */ u64 supported_features = VIRTIO_FEATURE (VIRTIO_NET_F_CSUM) | VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_CSUM) | VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | VIRTIO_FEATURE (VIRTIO_NET_F_MTU) | VIRTIO_FEATURE (VIRTIO_NET_F_MAC) | VIRTIO_FEATURE (VIRTIO_NET_F_GSO) | VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO4) | VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_TSO6) | VIRTIO_FEATURE (VIRTIO_NET_F_GUEST_UFO) | VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO4) | VIRTIO_FEATURE (VIRTIO_NET_F_HOST_TSO6) | VIRTIO_FEATURE (VIRTIO_NET_F_HOST_UFO) | VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) | VIRTIO_FEATURE (VIRTIO_NET_F_STATUS) | VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ) | VIRTIO_FEATURE (VIRTIO_NET_F_MQ) | VIRTIO_FEATURE (VIRTIO_F_NOTIFY_ON_EMPTY) | VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT) | VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC); if (req_features == 0) { req_features = supported_features; } vif->features = req_features & vif->remote_features & supported_features; if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MTU)) { virtio_net_config_t config; virtio_pci_legacy_read_config (vm, vif, &config.mtu, sizeof (config.mtu), STRUCT_OFFSET_OF (virtio_net_config_t, mtu)); if (config.mtu < 64) vif->features &= ~VIRTIO_FEATURE (VIRTIO_NET_F_MTU); } vif->features = virtio_pci_legacy_set_guest_features (vm, vif, vif->features); } void virtio_pci_read_device_feature (vlib_main_t * vm, virtio_if_t * vif) { vif->remote_features = virtio_pci_legacy_get_host_features (vm, vif); } int virtio_pci_reset_device (vlib_main_t * vm, virtio_if_t * vif) { u8 status = 0; /* * Reset the device */ status = virtio_pci_legacy_reset (vm, vif); /* * Set the Acknowledge status bit */ virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_ACK); /* * Set the Driver status bit */ virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_DRIVER); /* * Read the status and verify it */ status = virtio_pci_legacy_get_status (vm, vif); if (! ((status & VIRTIO_CONFIG_STATUS_ACK) && (status & VIRTIO_CONFIG_STATUS_DRIVER))) return -1; vif->status = status; return 0; } clib_error_t * virtio_pci_read_caps (vlib_main_t * vm, virtio_if_t * vif) { clib_error_t *error = 0; struct virtio_pci_cap cap; u8 pos, common_cfg = 0, notify_base = 0, dev_cfg = 0, isr = 0, pci_cfg = 0; vlib_pci_dev_handle_t h = vif->pci_dev_handle; if ((error = vlib_pci_read_config_u8 (vm, h, PCI_CAPABILITY_LIST, &pos))) { virtio_log_error (vif, "error in reading capabilty list position"); clib_error_return (error, "error in reading capabilty list position"); } while (pos) { if ((error = vlib_pci_read_write_config (vm, h, VLIB_READ, pos, &cap, sizeof (cap)))) { virtio_log_error (vif, "%s [%2x]", "error in reading the capability at", pos); clib_error_return (error, "error in reading the capability at [%2x]", pos); } if (cap.cap_vndr == PCI_CAP_ID_MSIX) { u16 flags, table_size, table_size_mask = 0x07FF; if ((error = vlib_pci_read_write_config (vm, h, VLIB_READ, pos + 2, &flags, sizeof (flags)))) clib_error_return (error, "error in reading the capability at [%2x]", pos + 2); table_size = flags & table_size_mask; virtio_log_debug (vif, "flags:0x%x %s 0x%x", flags, "msix interrupt vector table-size", table_size); if (flags & PCI_MSIX_ENABLE) { virtio_log_debug (vif, "msix interrupt enabled"); vif->msix_enabled = VIRTIO_MSIX_ENABLED; } else { virtio_log_debug (vif, "msix interrupt disabled"); vif->msix_enabled = VIRTIO_MSIX_DISABLED; } } if (cap.cap_vndr != PCI_CAP_ID_VNDR) { virtio_log_debug (vif, "[%2x] %s %2x ", pos, "skipping non VNDR cap id:", cap.cap_vndr); goto next; } virtio_log_debug (vif, "[%4x] cfg type: %u, bar: %u, offset: %04x, len: %u", pos, cap.cfg_type, cap.bar, cap.offset, cap.length); switch (cap.cfg_type) { case VIRTIO_PCI_CAP_COMMON_CFG: common_cfg = 1; break; case VIRTIO_PCI_CAP_NOTIFY_CFG: notify_base = 1; break; case VIRTIO_PCI_CAP_DEVICE_CFG: dev_cfg = 1; break; case VIRTIO_PCI_CAP_ISR_CFG: isr = 1; break; case VIRTIO_PCI_CAP_PCI_CFG: if (cap.bar == 0) pci_cfg = 1; break; } next: pos = cap.cap_next; } if (common_cfg == 0 || notify_base == 0 || dev_cfg == 0 || isr == 0) { virtio_log_debug (vif, "legacy virtio pci device found"); return error; } if (!pci_cfg) clib_error_return (error, "modern virtio pci device found"); virtio_log_debug (vif, "transitional virtio pci device found"); return error; } static clib_error_t * virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif, virtio_pci_create_if_args_t * args) { clib_error_t *error = 0; vlib_thread_main_t *vtm = vlib_get_thread_main (); u8 status = 0; if ((error = virtio_pci_read_caps (vm, vif))) clib_error_return (error, "Device is not supported"); if (virtio_pci_reset_device (vm, vif) < 0) { virtio_log_error (vif, "Failed to reset the device"); clib_error_return (error, "Failed to reset the device"); } /* * read device features and negotiate (user) requested features */ virtio_pci_read_device_feature (vm, vif); virtio_negotiate_features (vm, vif, args->features); /* * After FEATURE_OK, driver should not accept new feature bits */ virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_FEATURES_OK); status = virtio_pci_legacy_get_status (vm, vif); if (!(status & VIRTIO_CONFIG_STATUS_FEATURES_OK)) { virtio_log_error (vif, "error encountered: Device doesn't support requested features"); clib_error_return (error, "Device doesn't support requested features"); } vif->status = status; /* * get or set the mac address */ if (virtio_pci_get_mac (vm, vif)) { f64 now = vlib_time_now (vm); u32 rnd; rnd = (u32) (now * 1e6); rnd = random_u32 (&rnd); memcpy (vif->mac_addr + 2, &rnd, sizeof (rnd)); vif->mac_addr[0] = 2; vif->mac_addr[1] = 0xfe; virtio_pci_set_mac (vm, vif); } virtio_set_net_hdr_size (vif); /* * Initialize the virtqueues */ if ((error = virtio_pci_get_max_virtqueue_pairs (vm, vif))) goto err; for (int i = 0; i < vif->max_queue_pairs; i++) { if ((error = virtio_pci_vring_init (vm, vif, RX_QUEUE (i)))) { virtio_log_warning (vif, "%s (%u) %s", "error in rxq-queue", RX_QUEUE (i), "initialization"); } else { vif->num_rxqs++; } if (i >= vtm->n_vlib_mains) { /* * There is 1:1 mapping between tx queue and vpp worker thread. * tx queue 0 is bind with thread index 0, tx queue 1 on thread * index 1 and so on. * Multiple worker threads can poll same tx queue when number of * workers are more than tx queues. In this case, 1:N mapping * between tx queue and vpp worker thread. */ virtio_log_debug (vif, "%s %u, %s", "tx-queue: number", TX_QUEUE (i), "no VPP worker thread is available"); continue; } if ((error = virtio_pci_vring_init (vm, vif, TX_QUEUE (i)))) { virtio_log_warning (vif, "%s (%u) %s", "error in txq-queue", TX_QUEUE (i), "initialization"); } else { vif->num_txqs++; } } if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) { if ((error = virtio_pci_control_vring_init (vm, vif, vif->max_queue_pairs * 2))) { virtio_log_warning (vif, "%s (%u) %s", "error in control-queue", vif->max_queue_pairs * 2, "initialization"); if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MQ)) vif->features &= ~VIRTIO_FEATURE (VIRTIO_NET_F_MQ); } } else { virtio_log_debug (vif, "control queue is not available"); vif->cxq_vring = NULL; } /* * set the msix interrupts */ if (vif->msix_enabled == VIRTIO_MSIX_ENABLED) { if (virtio_pci_legacy_set_config_irq (vm, vif, 1) == VIRTIO_MSI_NO_VECTOR) virtio_log_warning (vif, "config vector 1 is not set"); if (virtio_pci_legacy_set_queue_irq (vm, vif, 0, 0) == VIRTIO_MSI_NO_VECTOR) virtio_log_warning (vif, "queue vector 0 is not set"); } /* * set the driver status OK */ virtio_pci_legacy_set_status (vm, vif, VIRTIO_CONFIG_STATUS_DRIVER_OK); vif->status = virtio_pci_legacy_get_status (vm, vif); err: return error; } void virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args) { vnet_main_t *vnm = vnet_get_main (); virtio_main_t *vim = &virtio_main; virtio_if_t *vif; vlib_pci_dev_handle_t h; clib_error_t *error = 0; /* *INDENT-OFF* */ pool_foreach (vif, vim->interfaces, ({ if (vif->pci_addr.as_u32 == args->addr) { args->rv = VNET_API_ERROR_INVALID_VALUE; args->error = clib_error_return (error, "PCI address in use"); vlib_log (VLIB_LOG_LEVEL_ERR, vim->log_default, "%U: %s", format_vlib_pci_addr, &args->addr, " PCI address in use"); return; } })); /* *INDENT-ON* */ pool_get (vim->interfaces, vif); vif->dev_instance = vif - vim->interfaces; vif->per_interface_next_index = ~0; vif->pci_addr.as_u32 = args->addr; if ((error = vlib_pci_device_open (vm, (vlib_pci_addr_t *) & vif->pci_addr, virtio_pci_device_ids, &h))) { args->rv = VNET_API_ERROR_INVALID_INTERFACE; args->error = clib_error_return (error, "pci-addr %U", format_vlib_pci_addr, &vif->pci_addr); vlib_log (VLIB_LOG_LEVEL_ERR, vim->log_default, "%U: %s", format_vlib_pci_addr, &vif->pci_addr, "error encountered on pci device open"); pool_put (vim->interfaces, vif); return; } vif->pci_dev_handle = h; vlib_pci_set_private_data (vm, h, vif->dev_instance); vif->numa_node = vlib_pci_get_numa_node (vm, h); vif->type = VIRTIO_IF_TYPE_PCI; if ((error = vlib_pci_bus_master_enable (vm, h))) { virtio_log_error (vif, "error encountered on pci bus master enable"); goto error; } if ((error = vlib_pci_io_region (vm, h, 0))) { virtio_log_error (vif, "error encountered on pci io region"); goto error; } if (vlib_pci_get_num_msix_interrupts (vm, h) > 1) { if ((error = vlib_pci_register_msix_handler (vm, h, 0, 1, &virtio_pci_irq_0_handler))) { virtio_log_error (vif, "error encountered on pci register msix handler 0"); goto error; } if ((error = vlib_pci_register_msix_handler (vm, h, 1, 1, &virtio_pci_irq_1_handler))) { virtio_log_error (vif, "error encountered on pci register msix handler 1"); goto error; } if ((error = vlib_pci_enable_msix_irq (vm, h, 0, 2))) { virtio_log_error (vif, "error encountered on pci enable msix irq"); goto error; } vif->support_int_mode = 1; virtio_log_debug (vif, "device supports msix interrupts"); } else if (vlib_pci_get_num_msix_interrupts (vm, h) == 1) { /* * if msix table-size is 1, fall back to intX. */ if ((error = vlib_pci_register_intx_handler (vm, h, &virtio_pci_irq_handler))) { virtio_log_error (vif, "error encountered on pci register interrupt handler"); goto error; } vif->support_int_mode = 1; virtio_log_debug (vif, "pci register interrupt handler"); } else { /* * WARN: intX is showing some weird behaviour. * Please don't use interrupt mode with UIO driver. */ vif->support_int_mode = 0; virtio_log_debug (vif, "driver is configured in poll mode only"); } if ((error = vlib_pci_intr_enable (vm, h))) { virtio_log_error (vif, "error encountered on pci interrupt enable"); goto error; } if ((error = virtio_pci_device_init (vm, vif, args))) { virtio_log_error (vif, "error encountered on device init"); goto error; } /* create interface */ error = ethernet_register_interface (vnm, virtio_device_class.index, vif->dev_instance, vif->mac_addr, &vif->hw_if_index, virtio_pci_flag_change); if (error) { virtio_log_error (vif, "error encountered on ethernet register interface"); goto error; } vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, vif->hw_if_index); vif->sw_if_index = sw->sw_if_index; args->sw_if_index = sw->sw_if_index; vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index); hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; vnet_hw_interface_set_input_node (vnm, vif->hw_if_index, virtio_input_node.index); u32 i = 0; vec_foreach_index (i, vif->rxq_vrings) { vnet_hw_interface_assign_rx_thread (vnm, vif->hw_if_index, i, ~0); virtio_vring_set_numa_node (vm, vif, RX_QUEUE (i)); /* Set default rx mode to POLLING */ vnet_hw_interface_set_rx_mode (vnm, vif->hw_if_index, i, VNET_HW_INTERFACE_RX_MODE_POLLING); } if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP) { vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP; vnet_hw_interface_set_flags (vnm, vif->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); } else vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); virtio_pci_offloads (vm, vif, args->gso_enabled, args->checksum_offload_enabled); if ((vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) && (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MQ))) { if (virtio_pci_enable_multiqueue (vm, vif, vif->max_queue_pairs)) virtio_log_warning (vif, "multiqueue is not set"); } return; error: virtio_pci_delete_if (vm, vif); args->rv = VNET_API_ERROR_INVALID_INTERFACE; args->error = error; } int virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif) { vnet_main_t *vnm = vnet_get_main (); virtio_main_t *vim = &virtio_main; u32 i = 0; if (vif->type != VIRTIO_IF_TYPE_PCI) return VNET_API_ERROR_INVALID_INTERFACE; vlib_pci_intr_disable (vm, vif->pci_dev_handle); for (i = 0; i < vif->max_queue_pairs; i++) { virtio_pci_legacy_del_queue (vm, vif, RX_QUEUE (i)); virtio_pci_legacy_del_queue (vm, vif, TX_QUEUE (i)); } if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) virtio_pci_legacy_del_queue (vm, vif, vif->max_queue_pairs * 2); virtio_pci_legacy_reset (vm, vif); if (vif->hw_if_index) { vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0); vec_foreach_index (i, vif->rxq_vrings) { vnet_hw_interface_unassign_rx_thread (vnm, vif->hw_if_index, i); } ethernet_delete_interface (vnm, vif->hw_if_index); } vlib_pci_device_close (vm, vif->pci_dev_handle); vec_foreach_index (i, vif->rxq_vrings) { virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, i); if (vring->kick_fd != -1) close (vring->kick_fd); if (vring->used) { virtio_free_rx_buffers (vm, vring); } vec_free (vring->buffers); vlib_physmem_free (vm, vring->desc); } vec_foreach_index (i, vif->txq_vrings) { virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, i); if (vring->kick_fd != -1) close (vring->kick_fd); if (vring->used) { virtio_free_used_desc (vm, vring); } vec_free (vring->buffers); clib_spinlock_free (&vring->lockp); vlib_physmem_free (vm, vring->desc); } if (vif->cxq_vring != NULL) { u16 last = vif->cxq_vring->last_used_idx; u16 n_left = vif->cxq_vring->used->idx - last; while (n_left) { last++; n_left--; } vif->cxq_vring->last_used_idx = last; vlib_physmem_free (vm, vif->cxq_vring->desc); } vec_free (vif->rxq_vrings); vec_free (vif->txq_vrings); vec_free (vif->cxq_vring); clib_error_free (vif->error); memset (vif, 0, sizeof (*vif)); pool_put (vim->interfaces, vif); return 0; } int virtio_pci_enable_disable_offloads (vlib_main_t * vm, virtio_if_t * vif, int gso_enabled, int checksum_offload_enabled, int offloads_disabled) { if (vif->type != VIRTIO_IF_TYPE_PCI) return VNET_API_ERROR_INVALID_INTERFACE; if (gso_enabled) virtio_pci_offloads (vm, vif, 1, 0); else if (checksum_offload_enabled) virtio_pci_offloads (vm, vif, 0, 1); else if (offloads_disabled) virtio_pci_offloads (vm, vif, 0, 0); return 0; } /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */