aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/l2/l2_rw.c
blob: fec047740568853c1a226efd19c0d71c061ed853 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <vlib/vlib.h>
#include <vnet/l2/feat_bitmap.h>
#include <vnet/l2/l2_rw.h>

/**
 * @file
 * @brief Layer 2 Rewrite.
 *
 * Layer 2-Rewrite node uses classify tables to match packets. Then, using
 * the provisioned mask and value, modfies the packet header.
 */


l2_rw_main_t l2_rw_main;

vlib_node_registration_t l2_rw_node;

typedef struct
{
  u32 sw_if_index;
  u32 classify_table_index;
  u32 rewrite_entry_index;
} l2_rw_trace_t;

static u8 *
format_l2_rw_entry (u8 * s, va_list * args)
{
  l2_rw_entry_t *e = va_arg (*args, l2_rw_entry_t *);
  l2_rw_main_t *rw = &l2_rw_main;
  s = format (s, "%d -  mask:%U value:%U\n",
	      e - rw->entries,
	      format_hex_bytes, e->mask,
	      e->rewrite_n_vectors * sizeof (u32x4), format_hex_bytes,
	      e->value, e->rewrite_n_vectors * sizeof (u32x4));
  s =
    format (s, "      hits:%d skip_bytes:%d", e->hit_count,
	    e->skip_n_vectors * sizeof (u32x4));
  return s;
}

static u8 *
format_l2_rw_config (u8 * s, va_list * args)
{
  l2_rw_config_t *c = va_arg (*args, l2_rw_config_t *);
  return format (s, "table-index:%d miss-index:%d",
		 c->table_index, c->miss_index);
}

/* packet trace format function */
static u8 *
format_l2_rw_trace (u8 * s, va_list * args)
{
  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
  l2_rw_trace_t *t = va_arg (*args, l2_rw_trace_t *);
  return format (s, "l2-rw: sw_if_index %d, table %d, entry %d",
		 t->sw_if_index, t->classify_table_index,
		 t->rewrite_entry_index);
}

always_inline l2_rw_config_t *
l2_rw_get_config (u32 sw_if_index)
{
  l2_rw_main_t *rw = &l2_rw_main;
  if (PREDICT_FALSE (!clib_bitmap_get (rw->configs_bitmap, sw_if_index)))
    {
      vec_validate (rw->configs, sw_if_index);
      rw->configs[sw_if_index].table_index = ~0;
      rw->configs[sw_if_index].miss_index = ~0;
      rw->configs_bitmap =
	clib_bitmap_set (rw->configs_bitmap, sw_if_index, 1);
    }
  return &rw->configs[sw_if_index];
}

static_always_inline void
l2_rw_rewrite (l2_rw_entry_t * rwe, u8 * h)
{
  if (U32X4_ALIGNED (h))
    {
      u32x4 *d = ((u32x4 *) h) + rwe->skip_n_vectors;
      switch (rwe->rewrite_n_vectors)
	{
	case 5:
	  d[4] = (d[4] & ~rwe->mask[4]) | rwe->value[4];
	  /* FALLTHROUGH */
	case 4:
	  d[3] = (d[3] & ~rwe->mask[3]) | rwe->value[3];
	  /* FALLTHROUGH */
	case 3:
	  d[2] = (d[2] & ~rwe->mask[2]) | rwe->value[2];
	  /* FALLTHROUGH */
	case 2:
	  d[1] = (d[1] & ~rwe->mask[1]) | rwe->value[1];
	  /* FALLTHROUGH */
	case 1:
	  d[0] = (d[0] & ~rwe->mask[0]) | rwe->value[0];
	  break;
	default:
	  abort ();
	}
    }
  else
    {
      u64 *d = ((u64 *) h) + rwe->skip_n_vectors * 2;
      switch (rwe->rewrite_n_vectors)
	{
	case 5:
	  d[8] =
	    (d[8] & ~(((u64 *) rwe->mask)[8])) | (((u64 *) rwe->value)[8]);
	  d[9] =
	    (d[9] & ~(((u64 *) rwe->mask)[9])) | (((u64 *) rwe->value)[9]);
	  /* FALLTHROUGH */
	case 4:
	  d[6] =
	    (d[6] & ~(((u64 *) rwe->mask)[6])) | (((u64 *) rwe->value)[6]);
	  d[7] =
	    (d[7] & ~(((u64 *) rwe->mask)[7])) | (((u64 *) rwe->value)[7]);
	  /* FALLTHROUGH */
	case 3:
	  d[4] =
	    (d[4] & ~(((u64 *) rwe->mask)[4])) | (((u64 *) rwe->value)[4]);
	  d[5] =
	    (d[5] & ~(((u64 *) rwe->mask)[5])) | (((u64 *) rwe->value)[5]);
	  /* FALLTHROUGH */
	case 2:
	  d[2] =
	    (d[2] & ~(((u64 *) rwe->mask)[2])) | (((u64 *) rwe->value)[2]);
	  d[3] =
	    (d[3] & ~(((u64 *) rwe->mask)[3])) | (((u64 *) rwe->value)[3]);
	  /* FALLTHROUGH */
	case 1:
	  d[0] =
	    (d[0] & ~(((u64 *) rwe->mask)[0])) | (((u64 *) rwe->value)[0]);
	  d[1] =
	    (d[1] & ~(((u64 *) rwe->mask)[1])) | (((u64 *) rwe->value)[1]);
	  break;
	default:
	  abort ();
	}
    }
}

static uword
l2_rw_node_fn (vlib_main_t * vm,
	       vlib_node_runtime_t * node, vlib_frame_t * frame)
{
  l2_rw_main_t *rw = &l2_rw_main;
  u32 n_left_from, *from, *to_next, next_index;
  vnet_classify_main_t *vcm = &vnet_classify_main;
  f64 now = vlib_time_now (vlib_get_main ());
  u32 prefetch_size = 0;

  from = vlib_frame_vector_args (frame);
  n_left_from = frame->n_vectors;	/* number of packets to process */
  next_index = node->cached_next_index;

  while (n_left_from > 0)
    {
      u32 n_left_to_next;

      /* get space to enqueue frame to graph node "next_index" */
      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);

      while (n_left_from >= 4 && n_left_to_next >= 2)
	{
	  u32 bi0, next0, sw_if_index0, rwe_index0;
	  u32 bi1, next1, sw_if_index1, rwe_index1;
	  vlib_buffer_t *b0, *b1;
	  ethernet_header_t *h0, *h1;
	  l2_rw_config_t *config0, *config1;
	  u64 hash0, hash1;
	  vnet_classify_table_t *t0, *t1;
	  vnet_classify_entry_t *e0, *e1;
	  l2_rw_entry_t *rwe0, *rwe1;

	  {
	    vlib_buffer_t *p2, *p3;
	    p2 = vlib_get_buffer (vm, from[2]);
	    p3 = vlib_get_buffer (vm, from[3]);

	    vlib_prefetch_buffer_header (p2, LOAD);
	    vlib_prefetch_buffer_header (p3, LOAD);
	    CLIB_PREFETCH (vlib_buffer_get_current (p2), prefetch_size, LOAD);
	    CLIB_PREFETCH (vlib_buffer_get_current (p3), prefetch_size, LOAD);
	  }

	  bi0 = from[0];
	  bi1 = from[1];
	  to_next[0] = bi0;
	  to_next[1] = bi1;
	  from += 2;
	  to_next += 2;
	  n_left_from -= 2;
	  n_left_to_next -= 2;

	  b0 = vlib_get_buffer (vm, bi0);
	  b1 = vlib_get_buffer (vm, bi1);
	  h0 = vlib_buffer_get_current (b0);
	  h1 = vlib_buffer_get_current (b1);

	  sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
	  sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
	  config0 = l2_rw_get_config (sw_if_index0);	/*TODO: check sw_if_index0 value */
	  config1 = l2_rw_get_config (sw_if_index1);	/*TODO: check sw_if_index0 value */
	  t0 = pool_elt_at_index (vcm->tables, config0->table_index);
	  t1 = pool_elt_at_index (vcm->tables, config1->table_index);
	  prefetch_size =
	    (t1->skip_n_vectors + t1->match_n_vectors) * sizeof (u32x4);

	  hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
	  hash1 = vnet_classify_hash_packet (t1, (u8 *) h1);
	  e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
	  e1 = vnet_classify_find_entry (t1, (u8 *) h1, hash1, now);

	  while (!e0 && (t0->next_table_index != ~0))
	    {
	      t0 = pool_elt_at_index (vcm->tables, t0->next_table_index);
	      hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
	      e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
	    }

	  while (!e1 && (t1->next_table_index != ~0))
	    {
	      t1 = pool_elt_at_index (vcm->tables, t1->next_table_index);
	      hash1 = vnet_classify_hash_packet (t1, (u8 *) h1);
	      e1 = vnet_classify_find_entry (t1, (u8 *) h1, hash1, now);
	    }

	  rwe_index0 = e0 ? e0->opaque_index : config0->miss_index;
	  rwe_index1 = e1 ? e1->opaque_index : config1->miss_index;

	  if (rwe_index0 != ~0)
	    {
	      rwe0 = pool_elt_at_index (rw->entries, rwe_index0);
	      l2_rw_rewrite (rwe0, (u8 *) h0);
	    }
	  if (rwe_index1 != ~0)
	    {
	      rwe1 = pool_elt_at_index (rw->entries, rwe_index1);
	      l2_rw_rewrite (rwe1, (u8 *) h1);
	    }

	  if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
	    {
	      l2_rw_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
	      t->sw_if_index = sw_if_index0;
	      t->classify_table_index = config0->table_index;
	      t->rewrite_entry_index = rwe_index0;
	    }

	  if (PREDICT_FALSE ((b1->flags & VLIB_BUFFER_IS_TRACED)))
	    {
	      l2_rw_trace_t *t = vlib_add_trace (vm, node, b1, sizeof (*t));
	      t->sw_if_index = sw_if_index1;
	      t->classify_table_index = config1->table_index;
	      t->rewrite_entry_index = rwe_index1;
	    }

	  /* Update feature bitmap and get next feature index */
	  next0 = vnet_l2_feature_next (b0, rw->feat_next_node_index,
					L2INPUT_FEAT_RW);
	  next1 = vnet_l2_feature_next (b1, rw->feat_next_node_index,
					L2INPUT_FEAT_RW);

	  vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
					   to_next, n_left_to_next,
					   bi0, bi1, next0, next1);
	}

      while (n_left_from > 0 && n_left_to_next > 0)
	{
	  u32 bi0, next0, sw_if_index0, rwe_index0;
	  vlib_buffer_t *b0;
	  ethernet_header_t *h0;
	  l2_rw_config_t *config0;
	  u64 hash0;
	  vnet_classify_table_t *t0;
	  vnet_classify_entry_t *e0;
	  l2_rw_entry_t *rwe0;

	  bi0 = from[0];
	  to_next[0] = bi0;
	  from += 1;
	  to_next += 1;
	  n_left_from -= 1;
	  n_left_to_next -= 1;

	  b0 = vlib_get_buffer (vm, bi0);
	  h0 = vlib_buffer_get_current (b0);

	  sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
	  config0 = l2_rw_get_config (sw_if_index0);	/*TODO: check sw_if_index0 value */
	  t0 = pool_elt_at_index (vcm->tables, config0->table_index);

	  hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
	  e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);

	  while (!e0 && (t0->next_table_index != ~0))
	    {
	      t0 = pool_elt_at_index (vcm->tables, t0->next_table_index);
	      hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
	      e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
	    }

	  rwe_index0 = e0 ? e0->opaque_index : config0->miss_index;

	  if (rwe_index0 != ~0)
	    {
	      rwe0 = pool_elt_at_index (rw->entries, rwe_index0);
	      l2_rw_rewrite (rwe0, (u8 *) h0);
	    }

	  if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
	    {
	      l2_rw_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
	      t->sw_if_index = sw_if_index0;
	      t->classify_table_index = config0->table_index;
	      t->rewrite_entry_index = rwe_index0;
	    }

	  /* Update feature bitmap and get next feature index */
	  next0 = vnet_l2_feature_next (b0, rw->feat_next_node_index,
					L2INPUT_FEAT_RW);

	  vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
					   to_next, n_left_to_next,
					   bi0, next0);
	}
      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
    }

  return frame->n_vectors;
}

int
l2_rw_mod_entry (u32 * index,
		 u8 * mask, u8 * value, u32 len, u32 skip, u8 is_del)
{
  l2_rw_main_t *rw = &l2_rw_main;
  l2_rw_entry_t *e = 0;
  if (*index != ~0)
    {
      if (pool_is_free_index (rw->entries, *index))
	{
	  return -1;
	}
      e = pool_elt_at_index (rw->entries, *index);
    }
  else
    {
      pool_get (rw->entries, e);
      *index = e - rw->entries;
    }

  if (!e)
    return -1;

  if (is_del)
    {
      pool_put (rw->entries, e);
      return 0;
    }

  e->skip_n_vectors = skip / sizeof (u32x4);
  skip -= e->skip_n_vectors * sizeof (u32x4);
  e->rewrite_n_vectors = (skip + len - 1) / sizeof (u32x4) + 1;
  vec_alloc_aligned (e->mask, e->rewrite_n_vectors, sizeof (u32x4));
  memset (e->mask, 0, e->rewrite_n_vectors * sizeof (u32x4));
  vec_alloc_aligned (e->value, e->rewrite_n_vectors, sizeof (u32x4));
  memset (e->value, 0, e->rewrite_n_vectors * sizeof (u32x4));

  clib_memcpy (((u8 *) e->value) + skip, value, len);
  clib_memcpy (((u8 *) e->mask) + skip, mask, len);

  int i;
  for (i = 0; i < e->rewrite_n_vectors; i++)
    {
      e->value[i] &= e->mask[i];
    }

  return 0;
}

static clib_error_t *
l2_rw_entry_cli_fn (vlib_main_t * vm,
		    unformat_input_t * input, vlib_cli_command_t * cmd)
{
  u32 index = ~0;
  u8 *mask = 0;
  u8 *value = 0;
  u32 skip = 0;
  u8 del = 0;

  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
    {
      if (unformat (input, "index %d", &index))
	;
      else if (unformat (input, "mask %U", unformat_hex_string, &mask))
	;
      else if (unformat (input, "value %U", unformat_hex_string, &value))
	;
      else if (unformat (input, "skip %d", &skip))
	;
      else if (unformat (input, "del"))
	del = 1;
      else
	break;
    }

  if (!mask || !value)
    return clib_error_return (0, "Unspecified mask or value");

  if (vec_len (mask) != vec_len (value))
    return clib_error_return (0, "Mask and value lengths must be identical");

  int ret;
  if ((ret =
       l2_rw_mod_entry (&index, mask, value, vec_len (mask), skip, del)))
    return clib_error_return (0, "Could not add entry");

  return 0;
}

/*?
 * Layer 2-Rewrite node uses classify tables to match packets. Then, using
 * the provisioned mask and value, modfies the packet header.
 *
 * @cliexpar
 * @todo This is incomplete. This needs a detailed description and a
 * practical example.
?*/
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_entry_cli, static) = {
  .path = "l2 rewrite entry",
  .short_help =
  "l2 rewrite entry [index <index>] [mask <hex-mask>] [value <hex-value>] [skip <n_bytes>] [del]",
  .function = l2_rw_entry_cli_fn,
};
/* *INDENT-ON* */

int
l2_rw_interface_set_table (u32 sw_if_index, u32 table_index, u32 miss_index)
{
  l2_rw_config_t *c = l2_rw_get_config (sw_if_index);
  l2_rw_main_t *rw = &l2_rw_main;

  c->table_index = table_index;
  c->miss_index = miss_index;
  u32 feature_bitmap = (table_index == ~0) ? 0 : L2INPUT_FEAT_RW;

  l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_RW, feature_bitmap);

  if (c->table_index == ~0)
    clib_bitmap_set (rw->configs_bitmap, sw_if_index, 0);

  return 0;
}

static clib_error_t *
l2_rw_interface_cli_fn (vlib_main_t * vm,
			unformat_input_t * input, vlib_cli_command_t * cmd)
{
  vnet_main_t *vnm = vnet_get_main ();
  u32 table_index = ~0;
  u32 sw_if_index = ~0;
  u32 miss_index = ~0;

  if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
    {
      unformat (input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index);
    }

  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
    {
      if (unformat (input, "table %d", &table_index))
	;
      else if (unformat (input, "miss-index %d", &miss_index))
	;
      else
	break;
    }

  if (sw_if_index == ~0)
    return clib_error_return (0,
			      "You must specify an interface 'iface <interface>'",
			      format_unformat_error, input);
  int ret;
  if ((ret =
       l2_rw_interface_set_table (sw_if_index, table_index, miss_index)))
    return clib_error_return (0, "l2_rw_interface_set_table returned %d",
			      ret);

  return 0;
}

/*?
 * Layer 2-Rewrite node uses classify tables to match packets. Then, using
 * the provisioned mask and value, modfies the packet header.
 *
 * @cliexpar
 * @todo This is incomplete. This needs a detailed description and a
 * practical example.
?*/
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_interface_cli, static) = {
  .path = "set interface l2 rewrite",
  .short_help =
  "set interface l2 rewrite <interface> [table <table index>] [miss-index <entry-index>]",
  .function = l2_rw_interface_cli_fn,
};
/* *INDENT-ON* */

static clib_error_t *
l2_rw_show_interfaces_cli_fn (vlib_main_t * vm,
			      unformat_input_t * input,
			      vlib_cli_command_t * cmd)
{
  l2_rw_main_t *rw = &l2_rw_main;
  if (clib_bitmap_count_set_bits (rw->configs_bitmap) == 0)
    vlib_cli_output (vm, "No interface is currently using l2 rewrite\n");

  uword i;
  /* *INDENT-OFF* */
  clib_bitmap_foreach(i, rw->configs_bitmap, {
      vlib_cli_output (vm, "sw_if_index:%d %U\n", i, format_l2_rw_config, &rw->configs[i]);
  });
  /* *INDENT-ON* */
  return 0;
}

/*?
 * Layer 2-Rewrite node uses classify tables to match packets. Then, using
 * the provisioned mask and value, modfies the packet header.
 *
 * @cliexpar
 * @todo This is incomplete. This needs a detailed description and a
 * practical example.
?*/
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_show_interfaces_cli, static) = {
  .path = "show l2 rewrite interfaces",
  .short_help =
  "show l2 rewrite interfaces",
  .function = l2_rw_show_interfaces_cli_fn,
};
/* *INDENT-ON* */

static clib_error_t *
l2_rw_show_entries_cli_fn (vlib_main_t * vm,
			   unformat_input_t * input, vlib_cli_command_t * cmd)
{
  l2_rw_main_t *rw = &l2_rw_main;
  l2_rw_entry_t *e;
  if (pool_elts (rw->entries) == 0)
    vlib_cli_output (vm, "No entries\n");

  /* *INDENT-OFF* */
  pool_foreach(e, rw->entries, {
    vlib_cli_output (vm, "%U\n", format_l2_rw_entry, e);
  });
  /* *INDENT-ON* */
  return 0;
}

/*?
 * Layer 2-Rewrite node uses classify tables to match packets. Then, using
 * the provisioned mask and value, modfies the packet header.
 *
 * @cliexpar
 * @todo This is incomplete. This needs a detailed description and a
 * practical example.
?*/
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_show_entries_cli, static) = {
  .path = "show l2 rewrite entries",
  .short_help =
  "show l2 rewrite entries",
  .function = l2_rw_show_entries_cli_fn,
};
/* *INDENT-ON* */

int
l2_rw_enable_disable (u32 bridge_domain, u8 disable)
{
  u32 mask = L2INPUT_FEAT_RW;
  l2input_set_bridge_features (bridge_domain, mask, disable ? 0 : mask);
  return 0;
}

static clib_error_t *
l2_rw_set_cli_fn (vlib_main_t * vm,
		  unformat_input_t * input, vlib_cli_command_t * cmd)
{
  u32 bridge_domain;
  u8 disable = 0;

  if (unformat_check_input (input) == UNFORMAT_END_OF_INPUT ||
      !unformat (input, "%d", &bridge_domain))
    {
      return clib_error_return (0, "You must specify a bridge domain");
    }

  if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT &&
      unformat (input, "disable"))
    {
      disable = 1;
    }

  if (l2_rw_enable_disable (bridge_domain, disable))
    return clib_error_return (0, "Could not enable or disable rewrite");

  return 0;
}

/*?
 * Layer 2-Rewrite node uses classify tables to match packets. Then, using
 * the provisioned mask and value, modfies the packet header.
 *
 * @cliexpar
 * @todo This is incomplete. This needs a detailed description and a
 * practical example.
?*/
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_set_cli, static) = {
  .path = "set bridge-domain rewrite",
  .short_help =
  "set bridge-domain rewrite <bridge-domain> [disable]",
  .function = l2_rw_set_cli_fn,
};
/* *INDENT-ON* */

static clib_error_t *
l2_rw_init (vlib_main_t * vm)
{
  l2_rw_main_t *rw = &l2_rw_main;
  rw->configs = 0;
  rw->entries = 0;
  clib_bitmap_alloc (rw->configs_bitmap, 1);
  feat_bitmap_init_next_nodes (vm,
			       l2_rw_node.index,
			       L2INPUT_N_FEAT,
			       l2input_get_feat_names (),
			       rw->feat_next_node_index);
  return 0;
}

VLIB_INIT_FUNCTION (l2_rw_init);

enum
{
  L2_RW_NEXT_DROP,
  L2_RW_N_NEXT,
};

#define foreach_l2_rw_error               \
_(UNKNOWN, "Unknown error")

typedef enum
{
#define _(sym,str) L2_RW_ERROR_##sym,
  foreach_l2_rw_error
#undef _
    L2_RW_N_ERROR,
} l2_rw_error_t;

static char *l2_rw_error_strings[] = {
#define _(sym,string) string,
  foreach_l2_rw_error
#undef _
};

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_rw_node) = {
  .function = l2_rw_node_fn,
  .name = "l2-rw",
  .vector_size = sizeof (u32),
  .format_trace = format_l2_rw_trace,
  .type = VLIB_NODE_TYPE_INTERNAL,
  .n_errors = ARRAY_LEN(l2_rw_error_strings),
  .error_strings = l2_rw_error_strings,
  .runtime_data_bytes = 0,
  .n_next_nodes = L2_RW_N_NEXT,
  .next_nodes = { [L2_RW_NEXT_DROP]  = "error-drop"},
};
/* *INDENT-ON* */

VLIB_NODE_FUNCTION_MULTIARCH (l2_rw_node, l2_rw_node_fn)
/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */
n>->connection); quic_ctx_free (sctx); free (stream->data); } static int quic_on_stop_sending (quicly_stream_t * stream, int error_code) { QUIC_DBG (2, "received STOP_SENDING: %d", error_code); return 0; } static int quic_on_receive_reset (quicly_stream_t * stream, int error_code) { QUIC_DBG (2, "received RESET_STREAM: %d", error_code); return 0; } static session_t * get_stream_session_from_stream (quicly_stream_t * stream) { quic_ctx_t *ctx; quic_stream_data_t *stream_data; stream_data = (quic_stream_data_t *) stream->data; ctx = quic_ctx_get (stream_data->ctx_id, stream_data->thread_index); return session_get (ctx->c_s_index, stream_data->thread_index); } static int quic_on_receive (quicly_stream_t * stream, size_t off, const void *src, size_t len) { QUIC_DBG (3, "received data: %lu bytes, offset %lu", len, off); u32 max_enq; quic_ctx_t *sctx; session_t *stream_session; app_worker_t *app_wrk; svm_fifo_t *f; quic_stream_data_t *stream_data; int rlen; stream_data = (quic_stream_data_t *) stream->data; sctx = quic_ctx_get (stream_data->ctx_id, stream_data->thread_index); stream_session = session_get (sctx->c_s_index, stream_data->thread_index); f = stream_session->rx_fifo; max_enq = svm_fifo_max_enqueue_prod (f); QUIC_DBG (3, "Enqueuing %u at off %u in %u space", len, off, max_enq); if (off + len > max_enq) { /* TODO : can we find a better solution, listening on RX fifo evts ? */ QUIC_DBG (3, "Ingoring packet, RX fifo is full"); return QUICLY_ERROR_PACKET_IGNORED; } if (off == 0) { rlen = svm_fifo_enqueue (f, len, (u8 *) src); ASSERT (rlen >= len); quicly_stream_sync_recvbuf (stream, rlen); app_wrk = app_worker_get_if_valid (stream_session->app_wrk_index); if (PREDICT_TRUE (app_wrk != 0)) app_worker_lock_and_send_event (app_wrk, stream_session, SESSION_IO_EVT_RX); } else { rlen = svm_fifo_enqueue_with_offset (f, off, len, (u8 *) src); ASSERT (rlen == 0); } return 0; } void quic_fifo_egress_shift (quicly_stream_t * stream, size_t delta) { session_t *stream_session; svm_fifo_t *f; stream_session = get_stream_session_from_stream (stream); f = stream_session->tx_fifo; ASSERT (svm_fifo_dequeue_drop (f, delta) == delta); quicly_stream_sync_sendbuf (stream, 0); } int quic_fifo_egress_emit (quicly_stream_t * stream, size_t off, void *dst, size_t * len, int *wrote_all) { session_t *stream_session; svm_fifo_t *f; u32 deq_max, first_deq, max_rd_chunk, rem_offset; stream_session = get_stream_session_from_stream (stream); f = stream_session->tx_fifo; QUIC_DBG (3, "Emitting %u, offset %u", *len, off); deq_max = svm_fifo_max_dequeue_cons (f); ASSERT (off <= deq_max); if (off + *len < deq_max) { *wrote_all = 0; } else { QUIC_DBG (3, "Wrote ALL"); *wrote_all = 1; *len = deq_max - off; } /* TODO, use something like : return svm_fifo_peek (f, off, *len, dst); */ max_rd_chunk = svm_fifo_max_read_chunk (f); first_deq = 0; if (off < max_rd_chunk) { first_deq = clib_min (*len, max_rd_chunk - off); clib_memcpy_fast (dst, svm_fifo_head (f) + off, first_deq); } if (max_rd_chunk < off + *len) { rem_offset = max_rd_chunk < off ? off - max_rd_chunk : 0; clib_memcpy_fast (dst + first_deq, f->head_chunk->data + rem_offset, *len - first_deq); } return 0; } static const quicly_stream_callbacks_t quic_stream_callbacks = { .on_destroy = quic_on_stream_destroy, .on_send_shift = quic_fifo_egress_shift, .on_send_emit = quic_fifo_egress_emit, .on_send_stop = quic_on_stop_sending, .on_receive = quic_on_receive, .on_receive_reset = quic_on_receive_reset }; static void quic_accept_stream (void *s) { quicly_stream_t *stream = (quicly_stream_t *) s; session_t *stream_session, *quic_session; quic_stream_data_t *stream_data; app_worker_t *app_wrk; quic_ctx_t *qctx, *sctx; u32 sctx_id; int rv; sctx_id = quic_ctx_alloc (vlib_get_thread_index ()); qctx = quic_get_conn_ctx (stream->conn); stream_session = session_alloc (qctx->c_thread_index); QUIC_DBG (2, "Allocated stream_session, id %u, thread %u ctx %u", stream_session->session_index, stream_session->thread_index, sctx_id); sctx = quic_ctx_get (sctx_id, qctx->c_thread_index); sctx->c_quic_ctx_id.parent_app_wrk_id = qctx->c_quic_ctx_id.parent_app_wrk_id; sctx->c_quic_ctx_id.parent_app_id = qctx->c_quic_ctx_id.parent_app_id; sctx->c_quic_ctx_id.quic_connection_ctx_id = qctx->c_c_index; sctx->c_c_index = sctx_id; sctx->c_quic_ctx_id.is_stream = 1; sctx->c_s_index = stream_session->session_index; sctx->c_quic_ctx_id.stream = stream; stream_data = (quic_stream_data_t *) stream->data; stream_data->ctx_id = sctx_id; stream_data->thread_index = sctx->c_thread_index; sctx->c_s_index = stream_session->session_index; stream_session->session_state = SESSION_STATE_CREATED; stream_session->flags |= SESSION_F_QUIC_STREAM; stream_session->app_wrk_index = sctx->c_quic_ctx_id.parent_app_wrk_id; stream_session->connection_index = sctx->c_c_index; stream_session->session_type = session_type_from_proto_and_ip (TRANSPORT_PROTO_QUIC, qctx->c_quic_ctx_id.udp_is_ip4); quic_session = session_get (qctx->c_s_index, qctx->c_thread_index); stream_session->listener_handle = listen_session_get_handle (quic_session); app_wrk = app_worker_get (stream_session->app_wrk_index); if ((rv = app_worker_init_connected (app_wrk, stream_session))) { QUIC_DBG (1, "failed to allocate fifos"); session_free (stream_session); quicly_reset_stream (stream, 0x30001); return; } rv = app_worker_accept_notify (app_wrk, stream_session); if (rv) { QUIC_DBG (1, "failed to notify accept worker app"); session_free_w_fifos (stream_session); quicly_reset_stream (stream, 0x30002); return; } session_lookup_add_connection (&sctx->connection, session_handle (stream_session)); } static int quic_on_stream_open (quicly_stream_open_t * self, quicly_stream_t * stream) { QUIC_DBG (2, "on_stream_open called"); stream->data = malloc (sizeof (quic_stream_data_t)); stream->callbacks = &quic_stream_callbacks; /* Notify accept on parent qsession, but only if this is not a locally * initiated stream */ if (!quicly_stream_is_self_initiated (stream)) { quic_accept_stream (stream); } return 0; } static quicly_stream_open_t on_stream_open = { &quic_on_stream_open }; static void quic_on_conn_close (quicly_closed_by_peer_t * self, quicly_conn_t * conn, int code, uint64_t frame_type, const char *reason, size_t reason_len) { QUIC_DBG (2, "connection closed, reason: %.*s", reason, reason_len); quic_ctx_t *ctx = quic_get_conn_ctx (conn); session_transport_closing_notify (&ctx->connection); } static quicly_closed_by_peer_t on_closed_by_peer = { &quic_on_conn_close }; /***************************************************************************** * END QUICLY CALLBACKS *****************************************************************************/ /* single-entry session cache */ struct st_util_session_cache_t { ptls_encrypt_ticket_t super; uint8_t id[32]; ptls_iovec_t data; }; static int encrypt_ticket_cb (ptls_encrypt_ticket_t * _self, ptls_t * tls, int is_encrypt, ptls_buffer_t * dst, ptls_iovec_t src) { struct st_util_session_cache_t *self = (void *) _self; int ret; if (is_encrypt) { /* replace the cached entry along with a newly generated session id */ free (self->data.base); if ((self->data.base = malloc (src.len)) == NULL) return PTLS_ERROR_NO_MEMORY; ptls_get_context (tls)->random_bytes (self->id, sizeof (self->id)); memcpy (self->data.base, src.base, src.len); self->data.len = src.len; /* store the session id in buffer */ if ((ret = ptls_buffer_reserve (dst, sizeof (self->id))) != 0) return ret; memcpy (dst->base + dst->off, self->id, sizeof (self->id)); dst->off += sizeof (self->id); } else { /* check if session id is the one stored in cache */ if (src.len != sizeof (self->id)) return PTLS_ERROR_SESSION_NOT_FOUND; if (memcmp (self->id, src.base, sizeof (self->id)) != 0) return PTLS_ERROR_SESSION_NOT_FOUND; /* return the cached value */ if ((ret = ptls_buffer_reserve (dst, self->data.len)) != 0) return ret; memcpy (dst->base + dst->off, self->data.base, self->data.len); dst->off += self->data.len; } return 0; } /* *INDENT-OFF* */ static struct st_util_session_cache_t sc = { .super = { .cb = encrypt_ticket_cb, }, }; static ptls_context_t quic_tlsctx = { .random_bytes = ptls_openssl_random_bytes, .get_time = &ptls_get_time, .key_exchanges = ptls_openssl_key_exchanges, .cipher_suites = ptls_openssl_cipher_suites, .certificates = { .list = NULL, .count = 0 }, .esni = NULL, .on_client_hello = NULL, .emit_certificate = NULL, .sign_certificate = NULL, .verify_certificate = NULL, .ticket_lifetime = 86400, .max_early_data_size = 8192, .hkdf_label_prefix__obsolete = NULL, .require_dhe_on_psk = 1, .encrypt_ticket = &sc.super, }; /* *INDENT-ON* */ static int ptls_compare_separator_line (const char *line, const char *begin_or_end, const char *label) { int ret = strncmp (line, "-----", 5); size_t text_index = 5; if (ret == 0) { size_t begin_or_end_length = strlen (begin_or_end); ret = strncmp (line + text_index, begin_or_end, begin_or_end_length); text_index += begin_or_end_length; } if (ret == 0) { ret = line[text_index] - ' '; text_index++; } if (ret == 0) { size_t label_length = strlen (label); ret = strncmp (line + text_index, label, label_length); text_index += label_length; } if (ret == 0) { ret = strncmp (line + text_index, "-----", 5); } return ret; } static int ptls_get_bio_pem_object (BIO * bio, const char *label, ptls_buffer_t * buf) { int ret = PTLS_ERROR_PEM_LABEL_NOT_FOUND; char line[256]; ptls_base64_decode_state_t state; /* Get the label on a line by itself */ while (BIO_gets (bio, line, 256)) { if (ptls_compare_separator_line (line, "BEGIN", label) == 0) { ret = 0; ptls_base64_decode_init (&state); break; } } /* Get the data in the buffer */ while (ret == 0 && BIO_gets (bio, line, 256)) { if (ptls_compare_separator_line (line, "END", label) == 0) { if (state.status == PTLS_BASE64_DECODE_DONE || (state.status == PTLS_BASE64_DECODE_IN_PROGRESS && state.nbc == 0)) { ret = 0; } else { ret = PTLS_ERROR_INCORRECT_BASE64; } break; } else { ret = ptls_base64_decode (line, &state, buf); } } return ret; } static int ptls_load_bio_pem_objects (BIO * bio, const char *label, ptls_iovec_t * list, size_t list_max, size_t * nb_objects) { int ret = 0; size_t count = 0; *nb_objects = 0; if (ret == 0) { while (count < list_max) { ptls_buffer_t buf; ptls_buffer_init (&buf, "", 0); ret = ptls_get_bio_pem_object (bio, label, &buf); if (ret == 0) { if (buf.off > 0 && buf.is_allocated) { list[count].base = buf.base; list[count].len = buf.off; count++; } else { ptls_buffer_dispose (&buf); } } else { ptls_buffer_dispose (&buf); break; } } } if (ret == PTLS_ERROR_PEM_LABEL_NOT_FOUND && count > 0) { ret = 0; } *nb_objects = count; return ret; } #define PTLS_MAX_CERTS_IN_CONTEXT 16 static int ptls_load_bio_certificates (ptls_context_t * ctx, BIO * bio) { int ret = 0; ctx->certificates.list = (ptls_iovec_t *) malloc (PTLS_MAX_CERTS_IN_CONTEXT * sizeof (ptls_iovec_t)); if (ctx->certificates.list == NULL) { ret = PTLS_ERROR_NO_MEMORY; } else { ret = ptls_load_bio_pem_objects (bio, "CERTIFICATE", ctx->certificates.list, PTLS_MAX_CERTS_IN_CONTEXT, &ctx->certificates.count); } return ret; } static inline void load_bio_certificate_chain (ptls_context_t * ctx, const char *cert_data) { BIO *cert_bio; cert_bio = BIO_new_mem_buf (cert_data, -1); if (ptls_load_bio_certificates (ctx, cert_bio) != 0) { BIO_free (cert_bio); fprintf (stderr, "failed to load certificate:%s\n", strerror (errno)); exit (1); } BIO_free (cert_bio); } static inline void load_bio_private_key (ptls_context_t * ctx, const char *pk_data) { static ptls_openssl_sign_certificate_t sc; EVP_PKEY *pkey; BIO *key_bio; key_bio = BIO_new_mem_buf (pk_data, -1); pkey = PEM_read_bio_PrivateKey (key_bio, NULL, NULL, NULL); BIO_free (key_bio); if (pkey == NULL) { fprintf (stderr, "failed to read private key from app configuration\n"); exit (1); } ptls_openssl_init_sign_certificate (&sc, pkey); EVP_PKEY_free (pkey); ctx->sign_certificate = &sc.super; } static inline void quic_make_connection_key (clib_bihash_kv_16_8_t * kv, const quicly_cid_plaintext_t * id) { kv->key[0] = ((u64) id->master_id) << 32 | (u64) id->thread_id; kv->key[1] = id->node_id; } static void quic_connection_closed (u32 ctx_index, u32 thread_index) { /* TODO : free fifos */ QUIC_DBG (2, "QUIC connection closed"); tw_timer_wheel_1t_3w_1024sl_ov_t *tw; clib_bihash_kv_16_8_t kv; quicly_conn_t *conn; quic_ctx_t *ctx; ctx = quic_ctx_get (ctx_index, thread_index); ASSERT (!ctx->c_quic_ctx_id.is_stream); /* TODO if connection is not established, just delete the session? */ /* Stop the timer */ if (ctx->timer_handle != QUIC_TIMER_HANDLE_INVALID) { tw = &quic_main.wrk_ctx[thread_index].timer_wheel; tw_timer_stop_1t_3w_1024sl_ov (tw, ctx->timer_handle); } /* Delete the connection from the connection map */ conn = ctx->c_quic_ctx_id.conn; quic_make_connection_key (&kv, quicly_get_master_id (conn)); QUIC_DBG (2, "Deleting conn with id %lu %lu", kv.key[0], kv.key[1]); clib_bihash_add_del_16_8 (&quic_main.connection_hash, &kv, 0 /* is_add */ ); // session_close (session_get_from_handle (ctx->c_quic_ctx_id.udp_session_handle)); quic_disconnect_transport (ctx); session_transport_delete_notify (&ctx->connection); /* Do not try to send anything anymore */ quicly_free (ctx->c_quic_ctx_id.conn); ctx->c_quic_ctx_id.conn = NULL; quic_ctx_free (ctx); } static void allocate_quicly_ctx (application_t * app, u8 is_client) { struct { quicly_context_t _; char cid_key[17]; } *ctx_data; quicly_context_t *quicly_ctx; ptls_iovec_t key_vec; QUIC_DBG (2, "Called allocate_quicly_ctx"); if (app->quicly_ctx) { QUIC_DBG (1, "Trying to reallocate quicly_ctx"); return; } ctx_data = malloc (sizeof (*ctx_data)); quicly_ctx = &ctx_data->_; app->quicly_ctx = (u64 *) quicly_ctx; memcpy (quicly_ctx, &quicly_spec_context, sizeof (quicly_context_t)); quicly_ctx->max_packet_size = QUIC_MAX_PACKET_SIZE; quicly_ctx->tls = &quic_tlsctx; quicly_ctx->stream_open = &on_stream_open; quicly_ctx->closed_by_peer = &on_closed_by_peer; quicly_ctx->now = &quicly_vpp_now_cb; quicly_amend_ptls_context (quicly_ctx->tls); quicly_ctx->event_log.mask = 0; /* logs */ quicly_ctx->event_log.cb = quicly_new_default_event_logger (stderr); quicly_ctx->transport_params.max_data = QUIC_INT_MAX; quicly_ctx->transport_params.max_streams_uni = QUIC_INT_MAX; quicly_ctx->transport_params.max_streams_bidi = QUIC_INT_MAX; quicly_ctx->transport_params.max_stream_data.bidi_local = (QUIC_FIFO_SIZE - 1); /* max_enq is SIZE - 1 */ quicly_ctx->transport_params.max_stream_data.bidi_remote = (QUIC_FIFO_SIZE - 1); /* max_enq is SIZE - 1 */ quicly_ctx->transport_params.max_stream_data.uni = QUIC_INT_MAX; quicly_ctx->tls->random_bytes (ctx_data->cid_key, 16); ctx_data->cid_key[16] = 0; key_vec = ptls_iovec_init (ctx_data->cid_key, strlen (ctx_data->cid_key)); quicly_ctx->cid_encryptor = quicly_new_default_cid_encryptor (&ptls_openssl_bfecb, &ptls_openssl_sha256, key_vec); if (!is_client && app->tls_key != NULL && app->tls_cert != NULL) { load_bio_private_key (quicly_ctx->tls, (char *) app->tls_key); load_bio_certificate_chain (quicly_ctx->tls, (char *) app->tls_cert); } } /***************************************************************************** * BEGIN TIMERS HANDLING *****************************************************************************/ static int64_t quic_get_thread_time (u8 thread_index) { return quic_main.wrk_ctx[thread_index].time_now; } static int64_t quic_get_time (quicly_now_t * self) { u8 thread_index = vlib_get_thread_index (); return quic_get_thread_time (thread_index); } static u32 quic_set_time_now (u32 thread_index) { vlib_main_t *vlib_main = vlib_get_main (); f64 time = vlib_time_now (vlib_main); quic_main.wrk_ctx[thread_index].time_now = (int64_t) (time * 1000.f); return quic_main.wrk_ctx[thread_index].time_now; } /* Transport proto callback */ static void quic_update_time (f64 now, u8 thread_index) { tw_timer_wheel_1t_3w_1024sl_ov_t *tw; tw = &quic_main.wrk_ctx[thread_index].timer_wheel; quic_set_time_now (thread_index); tw_timer_expire_timers_1t_3w_1024sl_ov (tw, now); } static void quic_timer_expired (u32 conn_index) { quic_ctx_t *ctx; QUIC_DBG (4, "Timer expired for conn %u at %ld", conn_index, quic_get_time (NULL)); ctx = quic_ctx_get (conn_index, vlib_get_thread_index ()); ctx->timer_handle = QUIC_TIMER_HANDLE_INVALID; quic_send_packets (ctx); } static void quic_update_timer (quic_ctx_t * ctx) { tw_timer_wheel_1t_3w_1024sl_ov_t *tw; int64_t next_timeout, next_interval; session_t *quic_session; /* This timeout is in ms which is the unit of our timer */ next_timeout = quicly_get_first_timeout (ctx->c_quic_ctx_id.conn); next_interval = next_timeout - quic_get_time (NULL); if (next_timeout == 0 || next_interval <= 0) { if (ctx->c_s_index == QUIC_SESSION_INVALID) { next_interval = 1; } else { quic_session = session_get (ctx->c_s_index, ctx->c_thread_index); if (svm_fifo_set_event (quic_session->tx_fifo)) session_send_io_evt_to_thread_custom (quic_session, quic_session->thread_index, SESSION_IO_EVT_BUILTIN_TX); return; } } tw = &quic_main.wrk_ctx[vlib_get_thread_index ()].timer_wheel; QUIC_DBG (4, "Timer set to %ld (int %ld) for ctx %u", next_timeout, next_interval, ctx->c_c_index); if (ctx->timer_handle == QUIC_TIMER_HANDLE_INVALID) { if (next_timeout == INT64_MAX) { QUIC_DBG (4, "timer for ctx %u already stopped", ctx->c_c_index); return; } ctx->timer_handle = tw_timer_start_1t_3w_1024sl_ov (tw, ctx->c_c_index, 0, next_interval); } else { if (next_timeout == INT64_MAX) { tw_timer_stop_1t_3w_1024sl_ov (tw, ctx->timer_handle); ctx->timer_handle = QUIC_TIMER_HANDLE_INVALID; QUIC_DBG (4, "Stopping timer for ctx %u", ctx->c_c_index); } else tw_timer_update_1t_3w_1024sl_ov (tw, ctx->timer_handle, next_interval); } return; } static void quic_expired_timers_dispatch (u32 * expired_timers) { int i; for (i = 0; i < vec_len (expired_timers); i++) { quic_timer_expired (expired_timers[i]); } } /***************************************************************************** * END TIMERS HANDLING * * BEGIN TRANSPORT PROTO FUNCTIONS *****************************************************************************/ static int quic_connect (transport_endpoint_cfg_t * tep) { QUIC_DBG (2, "Called quic_connect"); session_endpoint_cfg_t *sep; int connect_stream = 0; sep = (session_endpoint_cfg_t *) tep; if (sep->port == 0) { /* TODO: better logic to detect if this is a stream or a connection request */ connect_stream = 1; } if (connect_stream) { return quic_connect_new_stream (sep); } else { return quic_connect_new_connection (sep); } } static int quic_connect_new_stream (session_endpoint_cfg_t * sep) { uint64_t quic_session_handle; session_t *quic_session, *stream_session; quic_stream_data_t *stream_data; quicly_stream_t *stream; quicly_conn_t *conn; app_worker_t *app_wrk; quic_ctx_t *qctx, *sctx; u32 sctx_index; int rv; /* Find base session to which the user want to attach a stream */ quic_session_handle = sep->transport_opts; QUIC_DBG (2, "Opening new stream (qsession %u)", sep->transport_opts); quic_session = session_get_from_handle (quic_session_handle); if (quic_session->session_type != session_type_from_proto_and_ip (TRANSPORT_PROTO_QUIC, sep->is_ip4)) { QUIC_DBG (1, "received incompatible session"); return -1; } app_wrk = app_worker_get_if_valid (quic_session->app_wrk_index); if (!app_wrk) { QUIC_DBG (1, "Invalid app worker :("); return -1; } sctx_index = quic_ctx_alloc (quic_session->thread_index); /* Allocate before we get pointers */ sctx = quic_ctx_get (sctx_index, quic_session->thread_index); qctx = quic_ctx_get (quic_session->connection_index, quic_session->thread_index); if (qctx->c_quic_ctx_id.is_stream) { QUIC_DBG (1, "session is a stream"); quic_ctx_free (sctx); return -1; } sctx->c_quic_ctx_id.parent_app_wrk_id = qctx->c_quic_ctx_id.parent_app_wrk_id; sctx->c_quic_ctx_id.parent_app_id = qctx->c_quic_ctx_id.parent_app_id; sctx->c_quic_ctx_id.quic_connection_ctx_id = qctx->c_c_index; sctx->c_c_index = sctx_index; sctx->c_quic_ctx_id.is_stream = 1; conn = qctx->c_quic_ctx_id.conn; if (!conn || !quicly_connection_is_ready (conn)) return -1; if ((rv = quicly_open_stream (conn, &stream, 0 /* uni */ ))) { QUIC_DBG (2, "Stream open failed with %d", rv); return -1; } sctx->c_quic_ctx_id.stream = stream; QUIC_DBG (2, "Opened stream %d, creating session", stream->stream_id); stream_session = session_alloc (qctx->c_thread_index); QUIC_DBG (2, "Allocated stream_session, id %u, thread %u ctx %u", stream_session->session_index, stream_session->thread_index, sctx_index); stream_session->flags |= SESSION_F_QUIC_STREAM; stream_session->app_wrk_index = app_wrk->wrk_index; stream_session->connection_index = sctx_index; stream_session->listener_handle = quic_session_handle; stream_session->session_type = session_type_from_proto_and_ip (TRANSPORT_PROTO_QUIC, qctx->c_quic_ctx_id.udp_is_ip4); sctx->c_s_index = stream_session->session_index; if (app_worker_init_connected (app_wrk, stream_session)) { QUIC_DBG (1, "failed to app_worker_init_connected"); quicly_reset_stream (stream, 0x30003); session_free_w_fifos (stream_session); quic_ctx_free (sctx); return app_worker_connect_notify (app_wrk, NULL, sep->opaque); } stream_session->session_state = SESSION_STATE_READY; if (app_worker_connect_notify (app_wrk, stream_session, sep->opaque)) { QUIC_DBG (1, "failed to notify app"); quicly_reset_stream (stream, 0x30004); session_free_w_fifos (stream_session); quic_ctx_free (sctx); return -1; } session_lookup_add_connection (&sctx->connection, session_handle (stream_session)); stream_data = (quic_stream_data_t *) stream->data; stream_data->ctx_id = sctx->c_c_index; stream_data->thread_index = sctx->c_thread_index; return 0; } static int quic_connect_new_connection (session_endpoint_cfg_t * sep) { vnet_connect_args_t _cargs = { {}, }, *cargs = &_cargs; quic_main_t *qm = &quic_main; quic_ctx_t *ctx; app_worker_t *app_wrk; application_t *app; u32 ctx_index; int error; ctx_index = quic_ctx_alloc (vlib_get_thread_index ()); ctx = quic_ctx_get (ctx_index, vlib_get_thread_index ()); ctx->c_quic_ctx_id.parent_app_wrk_id = sep->app_wrk_index; ctx->c_s_index = QUIC_SESSION_INVALID; ctx->c_c_index = ctx_index; ctx->c_quic_ctx_id.udp_is_ip4 = sep->is_ip4; ctx->timer_handle = QUIC_TIMER_HANDLE_INVALID; ctx->conn_state = QUIC_CONN_STATE_HANDSHAKE; ctx->client_opaque = sep->opaque; if (sep->hostname) { ctx->srv_hostname = format (0, "%v", sep->hostname); vec_terminate_c_string (ctx->srv_hostname); } else { /* needed by quic for crypto + determining client / server */ ctx->srv_hostname = format (0, "%U", format_ip46_address, &sep->ip, sep->is_ip4); } clib_memcpy (&cargs->sep, sep, sizeof (session_endpoint_cfg_t)); cargs->sep.transport_proto = TRANSPORT_PROTO_UDPC; cargs->app_index = qm->app_index; cargs->api_context = ctx_index; app_wrk = app_worker_get (sep->app_wrk_index); app = application_get (app_wrk->app_index); ctx->c_quic_ctx_id.parent_app_id = app_wrk->app_index; cargs->sep_ext.ns_index = app->ns_index; allocate_quicly_ctx (app, 1 /* is client */ ); if ((error = vnet_connect (cargs))) return error; return 0; } static void quic_disconnect (u32 ctx_index, u32 thread_index) { QUIC_DBG (2, "Called quic_disconnect"); quic_ctx_t *ctx; ctx = quic_ctx_get (ctx_index, thread_index); if (ctx->c_quic_ctx_id.is_stream) { QUIC_DBG (2, "Closing stream %x, session %x", ctx_index, ctx->c_s_index); quicly_stream_t *stream = ctx->c_quic_ctx_id.stream; quicly_reset_stream (stream, 0x30000); } else { QUIC_DBG (2, "Closing connection %x, session %x", ctx_index, ctx->c_s_index); quicly_conn_t *conn = ctx->c_quic_ctx_id.conn; /* Start connection closing. Keep sending packets until quicly_send returns QUICLY_ERROR_FREE_CONNECTION */ quicly_close (conn, 0, ""); /* This also causes all streams to be closed (and the cb called) */ quic_send_packets (ctx); } } static u32 quic_start_listen (u32 quic_listen_session_index, transport_endpoint_t * tep) { vnet_listen_args_t _bargs, *args = &_bargs; quic_main_t *qm = &quic_main; session_handle_t udp_handle; session_endpoint_cfg_t *sep; session_t *udp_listen_session; app_worker_t *app_wrk; application_t *app; quic_ctx_t *lctx; u32 lctx_index; app_listener_t *app_listener; sep = (session_endpoint_cfg_t *) tep; app_wrk = app_worker_get (sep->app_wrk_index); /* We need to call this because we call app_worker_init_connected in * quic_accept_stream, which assumes the connect segment manager exists */ app_worker_alloc_connects_segment_manager (app_wrk); app = application_get (app_wrk->app_index); QUIC_DBG (2, "Called quic_start_listen for app %d", app_wrk->app_index); allocate_quicly_ctx (app, 0 /* is_client */ ); sep->transport_proto = TRANSPORT_PROTO_UDPC; memset (args, 0, sizeof (*args)); args->app_index = qm->app_index; args->sep_ext = *sep; args->sep_ext.ns_index = app->ns_index; if (vnet_listen (args)) return -1; lctx_index = quic_ctx_alloc (0); /* listener */ udp_handle = args->handle; app_listener = app_listener_get_w_handle (udp_handle); udp_listen_session = app_listener_get_session (app_listener); udp_listen_session->opaque = lctx_index; lctx = quic_ctx_get (lctx_index, 0); /* listener */ lctx->is_listener = 1; lctx->c_quic_ctx_id.parent_app_wrk_id = sep->app_wrk_index; lctx->c_quic_ctx_id.parent_app_id = app_wrk->app_index; lctx->c_quic_ctx_id.udp_session_handle = udp_handle; lctx->c_quic_ctx_id.udp_is_ip4 = sep->is_ip4; lctx->c_s_index = quic_listen_session_index; QUIC_DBG (2, "Started listening %d", lctx_index); return lctx_index; } static u32 quic_stop_listen (u32 lctx_index) { QUIC_DBG (2, "Called quic_stop_listen"); quic_ctx_t *lctx; lctx = quic_ctx_get (lctx_index, 0); /* listener */ vnet_unlisten_args_t a = { .handle = lctx->c_quic_ctx_id.udp_session_handle, .app_index = quic_main.app_index, .wrk_map_index = 0 /* default wrk */ }; if (vnet_unlisten (&a)) clib_warning ("unlisten errored"); /* TODO: crypto state cleanup */ quic_ctx_free (lctx); /* listener */ return 0; } static transport_connection_t * quic_connection_get (u32 ctx_index, u32 thread_index) { QUIC_DBG (2, "Called quic_connection_get"); quic_ctx_t *ctx; ctx = quic_ctx_get (ctx_index, thread_index); return &ctx->connection; } static transport_connection_t * quic_listener_get (u32 listener_index) { QUIC_DBG (2, "Called quic_listener_get"); quic_ctx_t *ctx; ctx = quic_ctx_get (listener_index, 0); return &ctx->connection; } static u8 * format_quic_ctx (u8 * s, va_list * args) { quic_ctx_t *ctx = va_arg (*args, quic_ctx_t *); u32 verbose = va_arg (*args, u32); if (!ctx) return s; s = format (s, "[#%d][%s] ", ctx->c_thread_index, "Q"); if (ctx->is_listener) { s = format (s, "%s Listener: ", ctx->c_quic_ctx_id.is_stream ? "Stream" : "QSession"); if (verbose) s = format (s, "app %d wrk %d", ctx->c_quic_ctx_id.parent_app_id, ctx->c_quic_ctx_id.parent_app_wrk_id); } else { if (ctx->c_is_ip4) s = format (s, "%U:%d->%U:%d", format_ip4_address, &ctx->c_lcl_ip4, clib_net_to_host_u16 (ctx->c_lcl_port), format_ip4_address, &ctx->c_rmt_ip4, clib_net_to_host_u16 (ctx->c_rmt_port)); else s = format (s, "%U:%d->%U:%d", format_ip6_address, &ctx->c_lcl_ip6, clib_net_to_host_u16 (ctx->c_lcl_port), format_ip6_address, &ctx->c_rmt_ip6, clib_net_to_host_u16 (ctx->c_rmt_port)); } return s; } static u8 * format_quic_connection (u8 * s, va_list * args) { u32 qc_index = va_arg (*args, u32); u32 thread_index = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); quic_ctx_t *ctx = quic_ctx_get (qc_index, thread_index); if (ctx) s = format (s, "%-50U", format_quic_ctx, ctx, verbose); return s; } static u8 * format_quic_half_open (u8 * s, va_list * args) { u32 qc_index = va_arg (*args, u32); quic_ctx_t *ctx = quic_ctx_get (qc_index, vlib_get_thread_index ()); s = format (s, "[QUIC] half-open app %u", ctx->c_quic_ctx_id.parent_app_id); return s; } /* TODO improve */ static u8 * format_quic_listener (u8 * s, va_list * args) { u32 tci = va_arg (*args, u32); u32 verbose = va_arg (*args, u32); quic_ctx_t *ctx = quic_ctx_get (tci, vlib_get_thread_index ()); if (ctx) { ASSERT (ctx->is_listener); s = format (s, "%-50U", format_quic_ctx, ctx, verbose); } return s; } /***************************************************************************** * END TRANSPORT PROTO FUNCTIONS * * START SESSION CALLBACKS * Called from UDP layer *****************************************************************************/ static inline void quic_build_sockaddr (struct sockaddr *sa, socklen_t * salen, ip46_address_t * addr, u16 port, u8 is_ip4) { if (is_ip4) { struct sockaddr_in *sa4 = (struct sockaddr_in *) sa; sa4->sin_family = AF_INET; sa4->sin_port = port; sa4->sin_addr.s_addr = addr->ip4.as_u32; *salen = sizeof (struct sockaddr_in); } else { struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) sa; sa6->sin6_family = AF_INET6; sa6->sin6_port = port; clib_memcpy (&sa6->sin6_addr, &addr->ip6, 16); *salen = sizeof (struct sockaddr_in6); } } static int quic_notify_app_connected (quic_ctx_t * ctx) { QUIC_DBG (1, "quic_notify_app_connected"); session_t *quic_session; app_worker_t *app_wrk; u32 ctx_id = ctx->c_c_index; u32 thread_index = ctx->c_thread_index; quic_ctx_t *lctx; app_wrk = app_worker_get_if_valid (ctx->c_quic_ctx_id.parent_app_wrk_id); if (!app_wrk) { quic_disconnect_transport (ctx); return -1; } quic_session = session_alloc (thread_index); lctx = quic_ctx_get (ctx->c_quic_ctx_id.listener_ctx_id, 0); QUIC_DBG (2, "Allocated quic_session, id %u, thread %u", quic_session->session_index, quic_session->thread_index); ctx->c_s_index = quic_session->session_index; quic_session->app_wrk_index = ctx->c_quic_ctx_id.parent_app_wrk_id; quic_session->connection_index = ctx->c_c_index; quic_session->listener_handle = lctx->c_s_index; quic_session->session_type = session_type_from_proto_and_ip (TRANSPORT_PROTO_QUIC, ctx->c_quic_ctx_id.udp_is_ip4); if (app_worker_init_connected (app_wrk, quic_session)) { QUIC_DBG (1, "failed to app_worker_init_connected"); quic_disconnect (ctx_id, thread_index); return app_worker_connect_notify (app_wrk, NULL, ctx->client_opaque); } quic_session->session_state = SESSION_STATE_CONNECTING; if (app_worker_connect_notify (app_wrk, quic_session, ctx->client_opaque)) { QUIC_DBG (1, "failed to notify app"); quic_disconnect (ctx_id, thread_index); return -1; } /* If the app opens a stream in its callback it may invalidate ctx */ ctx = quic_ctx_get (ctx_id, thread_index); quic_session->session_state = SESSION_STATE_LISTENING; session_lookup_add_connection (&ctx->connection, session_handle (quic_session)); return 0; } static int quic_session_connected_callback (u32 quic_app_index, u32 ctx_index, session_t * udp_session, u8 is_fail) { QUIC_DBG (2, "QSession is now connected (id %u)", udp_session->session_index); /* This should always be called before quic_connect returns since UDP always * connects instantly. */ clib_bihash_kv_16_8_t kv; struct sockaddr_in6 sa6; struct sockaddr *sa = (struct sockaddr *) &sa6; socklen_t salen; transport_connection_t *tc; app_worker_t *app_wrk; quicly_conn_t *conn; application_t *app; quic_ctx_t *ctx; u32 thread_index = vlib_get_thread_index (); int ret; ctx = quic_ctx_get (ctx_index, thread_index); if (is_fail) { u32 api_context; int rv = 0; app_wrk = app_worker_get_if_valid (ctx->c_quic_ctx_id.parent_app_wrk_id); if (app_wrk) { api_context = ctx->c_s_index; app_worker_connect_notify (app_wrk, 0, api_context); } return rv; } app_wrk = app_worker_get_if_valid (ctx->c_quic_ctx_id.parent_app_wrk_id); if (!app_wrk) { QUIC_DBG (1, "Appwrk not found"); return -1; } app = application_get (app_wrk->app_index); ctx->c_thread_index = thread_index; ctx->c_c_index = ctx_index; QUIC_DBG (2, "Quic connect returned %u. New ctx [%u]%x", is_fail, thread_index, (ctx) ? ctx_index : ~0); ctx->c_quic_ctx_id.udp_session_handle = session_handle (udp_session); udp_session->opaque = ctx->c_quic_ctx_id.parent_app_id; udp_session->session_state = SESSION_STATE_READY; /* Init QUIC lib connection * Generate required sockaddr & salen */ tc = session_get_transport (udp_session); quic_build_sockaddr (sa, &salen, &tc->rmt_ip, tc->rmt_port, tc->is_ip4); ret = quicly_connect (&ctx->c_quic_ctx_id.conn, (quicly_context_t *) app->quicly_ctx, (char *) ctx->srv_hostname, sa, salen, &quic_main.next_cid, &quic_main.hs_properties, NULL); ++quic_main.next_cid.master_id; /* Save context handle in quicly connection */ quic_store_conn_ctx (ctx->c_quic_ctx_id.conn, ctx); assert (ret == 0); /* Register connection in connections map */ conn = ctx->c_quic_ctx_id.conn; quic_make_connection_key (&kv, quicly_get_master_id (conn)); kv.value = ((u64) thread_index) << 32 | (u64) ctx_index; QUIC_DBG (2, "Registering conn with id %lu %lu", kv.key[0], kv.key[1]); clib_bihash_add_del_16_8 (&quic_main.connection_hash, &kv, 1 /* is_add */ ); quic_send_packets (ctx); /* UDP stack quirk? preemptively transfer connection if that happens */ if (udp_session->thread_index != thread_index) quic_transfer_connection (ctx_index, udp_session->thread_index); return ret; } static void quic_receive_connection (void *arg) { u32 new_ctx_id, thread_index = vlib_get_thread_index (); quic_ctx_t *temp_ctx, *new_ctx; clib_bihash_kv_16_8_t kv; quicly_conn_t *conn; temp_ctx = arg; new_ctx_id = quic_ctx_alloc (thread_index); new_ctx = quic_ctx_get (new_ctx_id, thread_index); QUIC_DBG (2, "Received conn %u (now %u)", temp_ctx->c_thread_index, new_ctx_id); memcpy (new_ctx, temp_ctx, sizeof (quic_ctx_t)); free (temp_ctx); new_ctx->c_thread_index = thread_index; new_ctx->c_c_index = new_ctx_id; conn = new_ctx->c_quic_ctx_id.conn; quic_store_conn_ctx (conn, new_ctx); quic_make_connection_key (&kv, quicly_get_master_id (conn)); kv.value = ((u64) thread_index) << 32 | (u64) new_ctx_id; QUIC_DBG (2, "Registering conn with id %lu %lu", kv.key[0], kv.key[1]); clib_bihash_add_del_16_8 (&quic_main.connection_hash, &kv, 1 /* is_add */ ); new_ctx->timer_handle = QUIC_TIMER_HANDLE_INVALID; quic_update_timer (new_ctx); /* Trigger read on this connection ? */ } static void quic_transfer_connection (u32 ctx_index, u32 dest_thread) { tw_timer_wheel_1t_3w_1024sl_ov_t *tw; quic_ctx_t *ctx, *temp_ctx; clib_bihash_kv_16_8_t kv; quicly_conn_t *conn; u32 thread_index = vlib_get_thread_index (); QUIC_DBG (2, "Transferring conn %u to thread %u", ctx_index, dest_thread); temp_ctx = malloc (sizeof (quic_ctx_t)); ASSERT (temp_ctx); ctx = quic_ctx_get (ctx_index, thread_index); memcpy (temp_ctx, ctx, sizeof (quic_ctx_t)); /* Remove from lookup hash, timer wheel and thread-local pool */ conn = ctx->c_quic_ctx_id.conn; quic_make_connection_key (&kv, quicly_get_master_id (conn)); clib_bihash_add_del_16_8 (&quic_main.connection_hash, &kv, 0 /* is_add */ ); if (ctx->timer_handle != QUIC_TIMER_HANDLE_INVALID) { tw = &quic_main.wrk_ctx[thread_index].timer_wheel; tw_timer_stop_1t_3w_1024sl_ov (tw, ctx->timer_handle); } quic_ctx_free (ctx); /* Send connection to destination thread */ session_send_rpc_evt_to_thread (dest_thread, quic_receive_connection, (void *) temp_ctx); } static void quic_transfer_connection_rpc (void *arg) { u64 arg_int = (u64) arg; u32 ctx_index, dest_thread; ctx_index = (u32) (arg_int >> 32); dest_thread = (u32) (arg_int & UINT32_MAX); quic_transfer_connection (ctx_index, dest_thread); } /* * This assumes that the connection is not yet associated to a session * So currently it only works on the client side when receiving the first packet * from the server */ static void quic_move_connection_to_thread (u32 ctx_index, u32 owner_thread, u32 to_thread) { QUIC_DBG (2, "Requesting transfer of conn %u from thread %u", ctx_index, owner_thread); u64 arg = ((u64) ctx_index) << 32 | to_thread; session_send_rpc_evt_to_thread (owner_thread, quic_transfer_connection_rpc, (void *) arg); } static void quic_session_disconnect_callback (session_t * s) { clib_warning ("UDP session disconnected???"); } static void quic_session_reset_callback (session_t * s) { clib_warning ("UDP session reset???"); } int quic_session_accepted_callback (session_t * udp_session) { /* New UDP connection, try to accept it */ QUIC_DBG (2, "UDP session accepted"); u32 ctx_index; u32 *pool_index; quic_ctx_t *ctx, *lctx; session_t *udp_listen_session; u32 thread_index = vlib_get_thread_index (); udp_listen_session = listen_session_get_from_handle (udp_session->listener_handle); ctx_index = quic_ctx_alloc (thread_index); ctx = quic_ctx_get (ctx_index, thread_index); ctx->c_thread_index = udp_session->thread_index; ctx->c_c_index = ctx_index; ctx->c_s_index = QUIC_SESSION_INVALID; ctx->c_quic_ctx_id.udp_session_handle = session_handle (udp_session); ctx->c_quic_ctx_id.listener_ctx_id = udp_listen_session->opaque; lctx = quic_ctx_get (udp_listen_session->opaque, udp_listen_session->thread_index); ctx->c_quic_ctx_id.udp_is_ip4 = lctx->c_quic_ctx_id.udp_is_ip4; ctx->c_quic_ctx_id.parent_app_id = lctx->c_quic_ctx_id.parent_app_id; ctx->c_quic_ctx_id.parent_app_wrk_id = lctx->c_quic_ctx_id.parent_app_wrk_id; ctx->timer_handle = QUIC_TIMER_HANDLE_INVALID; ctx->conn_state = QUIC_CONN_STATE_OPENED; udp_session->opaque = ctx->c_quic_ctx_id.parent_app_id; /* Put this ctx in the "opening" pool */ pool_get (quic_main.wrk_ctx[ctx->c_thread_index].opening_ctx_pool, pool_index); *pool_index = ctx_index; /* TODO timeout to delete these if they never connect */ return 0; } static int quic_add_segment_callback (u32 client_index, u64 seg_handle) { QUIC_DBG (2, "Called quic_add_segment_callback"); QUIC_DBG (2, "NOT IMPLEMENTED"); /* No-op for builtin */ return 0; } static int quic_del_segment_callback (u32 client_index, u64 seg_handle) { QUIC_DBG (2, "Called quic_del_segment_callback"); QUIC_DBG (2, "NOT IMPLEMENTED"); /* No-op for builtin */ return 0; } static int quic_custom_tx_callback (void *s) { session_t *stream_session = (session_t *) s; quicly_stream_t *stream; quic_ctx_t *ctx; int rv; svm_fifo_unset_event (stream_session->tx_fifo); if (PREDICT_FALSE (stream_session->session_state >= SESSION_STATE_TRANSPORT_CLOSING)) return 0; ctx = quic_ctx_get (stream_session->connection_index, stream_session->thread_index); if (PREDICT_FALSE (!ctx->c_quic_ctx_id.is_stream)) { goto tx_end; /* Most probably a reschedule */ } stream = ctx->c_quic_ctx_id.stream; if (!quicly_sendstate_is_open (&stream->sendstate)) { QUIC_DBG (1, "Warning: tried to send on closed stream"); return -1; } if ((rv = quicly_stream_sync_sendbuf (stream, 1)) != 0) return rv; tx_end: quic_send_packets (ctx); return 0; } /* * Returns 0 if a matching connection is found and is on the right thread. * If a connection is found, even on the wrong thread, ctx_thread and ctx_index * will be set. */ static inline int quic_find_packet_ctx (u32 * ctx_thread, u32 * ctx_index, struct sockaddr *sa, socklen_t salen, quicly_decoded_packet_t * packet, u32 caller_thread_index) { quic_ctx_t *ctx_; quicly_conn_t *conn_; clib_bihash_kv_16_8_t kv; clib_bihash_16_8_t *h; h = &quic_main.connection_hash; quic_make_connection_key (&kv, &packet->cid.dest.plaintext); QUIC_DBG (3, "Searching conn with id %lu %lu", kv.key[0], kv.key[1]); if (clib_bihash_search_16_8 (h, &kv, &kv) == 0) { u32 index = kv.value & UINT32_MAX; u8 thread_id = kv.value >> 32; /* Check if this connection belongs to this thread, otherwise * ask for it to be moved */ if (thread_id != caller_thread_index) { QUIC_DBG (2, "Connection is on wrong thread"); /* Cannot make full check with quicly_is_destination... */ *ctx_index = index; *ctx_thread = thread_id; return -1; } ctx_ = quic_ctx_get (index, vlib_get_thread_index ()); conn_ = ctx_->c_quic_ctx_id.conn; if (conn_ && quicly_is_destination (conn_, sa, salen, packet)) { QUIC_DBG (3, "Connection found"); *ctx_index = index; *ctx_thread = thread_id; return 0; } } QUIC_DBG (3, "connection not found"); return -1; } static int quic_receive (quic_ctx_t * ctx, quicly_conn_t * conn, quicly_decoded_packet_t packet) { int rv; u32 ctx_id = ctx->c_c_index; u32 thread_index = ctx->c_thread_index; /* TODO : QUICLY_ERROR_PACKET_IGNORED sould be handled */ rv = quicly_receive (conn, &packet); if (rv) { QUIC_DBG (2, "Quicly receive ignored packet code : %u", rv); return 0; } /* ctx pointer may change if a new stream is opened */ ctx = quic_ctx_get (ctx_id, thread_index); /* Conn may be set to null if the connection is terminated */ if (ctx->c_quic_ctx_id.conn && ctx->conn_state == QUIC_CONN_STATE_HANDSHAKE) { if (quicly_connection_is_ready (conn)) { ctx->conn_state = QUIC_CONN_STATE_READY; if (quicly_is_client (conn)) { quic_notify_app_connected (ctx); ctx = quic_ctx_get (ctx_id, thread_index); } } } return quic_send_packets (ctx); } static int quic_create_quic_session (quic_ctx_t * ctx) { session_t *quic_session; app_worker_t *app_wrk; quic_ctx_t *lctx; int rv; quic_session = session_alloc (ctx->c_thread_index); QUIC_DBG (2, "Allocated quic_session, id %u, thread %u ctx %u", quic_session->session_index, quic_session->thread_index, ctx->c_c_index); quic_session->session_state = SESSION_STATE_LISTENING; ctx->c_s_index = quic_session->session_index; lctx = quic_ctx_get (ctx->c_quic_ctx_id.listener_ctx_id, 0); quic_session->app_wrk_index = lctx->c_quic_ctx_id.parent_app_wrk_id; quic_session->connection_index = ctx->c_c_index; quic_session->session_type = session_type_from_proto_and_ip (TRANSPORT_PROTO_QUIC, ctx->c_quic_ctx_id.udp_is_ip4); quic_session->listener_handle = lctx->c_quic_ctx_id.listener_ctx_id; /* TODO: don't alloc fifos when we don't transfer data on this session * but we still need fifos for the events? */ if ((rv = app_worker_init_accepted (quic_session))) { QUIC_DBG (1, "failed to allocate fifos"); session_free (quic_session); return rv; } session_lookup_add_connection (&ctx->connection, session_handle (quic_session)); app_wrk = app_worker_get (quic_session->app_wrk_index); rv = app_worker_accept_notify (app_wrk, quic_session); if (rv) { QUIC_DBG (1, "failed to notify accept worker app"); return rv; } return 0; } static int quic_create_connection (quicly_context_t * quicly_ctx, u64 udp_session_handle, u32 ctx_index, struct sockaddr *sa, socklen_t salen, quicly_decoded_packet_t packet) { clib_bihash_kv_16_8_t kv; quic_ctx_t *ctx; quicly_conn_t *conn; u32 thread_index = vlib_get_thread_index (); int rv; /* new connection, accept and create context if packet is valid * TODO: check if socket is actually listening? */ if ((rv = quicly_accept (&conn, quicly_ctx, sa, salen, &packet, ptls_iovec_init (NULL, 0), &quic_main.next_cid, NULL))) { /* Invalid packet, pass */ assert (conn == NULL); QUIC_DBG (1, "Accept failed with %d", rv); /* TODO: cleanup created quic ctx and UDP session */ return 0; } assert (conn != NULL); ++quic_main.next_cid.master_id; ctx = quic_ctx_get (ctx_index, thread_index); /* Save ctx handle in quicly connection */ quic_store_conn_ctx (conn, ctx); ctx->c_quic_ctx_id.conn = conn; ctx->conn_state = QUIC_CONN_STATE_HANDSHAKE; quic_create_quic_session (ctx); /* Register connection in connections map */ quic_make_connection_key (&kv, quicly_get_master_id (conn)); kv.value = ((u64) thread_index) << 32 | (u64) ctx_index; clib_bihash_add_del_16_8 (&quic_main.connection_hash, &kv, 1 /* is_add */ ); QUIC_DBG (2, "Registering conn with id %lu %lu", kv.key[0], kv.key[1]); return quic_send_packets (ctx); } static int quic_reset_connection (quicly_context_t * quicly_ctx, u64 udp_session_handle, struct sockaddr *sa, socklen_t salen, quicly_decoded_packet_t packet) { /* short header packet; potentially a dead connection. No need to check the * length of the incoming packet, because loop is prevented by authenticating * the CID (by checking node_id and thread_id). If the peer is also sending a * reset, then the next CID is highly likely to contain a non-authenticating * CID, ... */ QUIC_DBG (2, "Sending stateless reset"); quicly_datagram_t *dgram; session_t *udp_session; if (packet.cid.dest.plaintext.node_id == 0 && packet.cid.dest.plaintext.thread_id == 0) { dgram = quicly_send_stateless_reset (quicly_ctx, sa, salen, &packet.cid.dest.plaintext); if (dgram == NULL) return 1; udp_session = session_get_from_handle (udp_session_handle); return quic_send_datagram (udp_session, dgram); /* TODO : set event on fifo */ } return 0; } static int quic_app_rx_callback (session_t * udp_session) { /* Read data from UDP rx_fifo and pass it to the quicly conn. */ quicly_decoded_packet_t packet; session_dgram_hdr_t ph; application_t *app; quic_ctx_t *ctx = NULL; svm_fifo_t *f; size_t plen; struct sockaddr_in6 sa6; struct sockaddr *sa = (struct sockaddr *) &sa6; socklen_t salen; u32 max_deq, len, full_len, ctx_index = UINT32_MAX, ctx_thread = UINT32_MAX, ret; u8 *data; int err; u32 *opening_ctx_pool, *ctx_index_ptr; u32 app_index = udp_session->opaque; u64 udp_session_handle = session_handle (udp_session); int rv = 0; u32 thread_index = vlib_get_thread_index (); app = application_get_if_valid (app_index); if (!app) { QUIC_DBG (1, "Got RX on detached app"); /* TODO: close this session, cleanup state? */ return 1; } do { udp_session = session_get_from_handle (udp_session_handle); /* session alloc might have happened */ f = udp_session->rx_fifo; svm_fifo_unset_event (f); max_deq = svm_fifo_max_dequeue (f); if (max_deq < sizeof (session_dgram_hdr_t)) return 0; ret = svm_fifo_peek (f, 0, SESSION_CONN_HDR_LEN, (u8 *) & ph); if (ret != SESSION_CONN_HDR_LEN) { QUIC_DBG (1, "Not enough data for header in RX"); return 1; } if (ph.data_length < ph.data_offset) { QUIC_DBG (1, "Not enough data vs offset in RX"); return 1; } len = ph.data_length - ph.data_offset; full_len = ph.data_length + ph.data_offset + SESSION_CONN_HDR_LEN; if (full_len > max_deq) { QUIC_DBG (1, "Not enough data in fifo RX"); return 1; } /* Quicly can read len bytes from the fifo at offset: * ph.data_offset + SESSION_CONN_HDR_LEN */ data = malloc (ph.data_length); ret = svm_fifo_peek (f, ph.data_offset + SESSION_CONN_HDR_LEN, ph.data_length, data); if (ret != ph.data_length) { QUIC_DBG (1, "Not enough data peeked in RX"); free (data); return 1; } plen = quicly_decode_packet ((quicly_context_t *) app->quicly_ctx, &packet, data, len); rv = 0; quic_build_sockaddr (sa, &salen, &ph.rmt_ip, ph.rmt_port, ph.is_ip4); plen = quicly_decode_packet ((quicly_context_t *) app->quicly_ctx, &packet, data, len); if (plen != SIZE_MAX) { err = quic_find_packet_ctx (&ctx_thread, &ctx_index, sa, salen, &packet, thread_index); if (err == 0) { ctx = quic_ctx_get (ctx_index, thread_index); quic_receive (ctx, ctx->c_quic_ctx_id.conn, packet); } else if (ctx_thread != UINT32_MAX) { /* Connection found but on wrong thread, ask move */ quic_move_connection_to_thread (ctx_index, ctx_thread, thread_index); } else if ((packet.octets.base[0] & QUICLY_PACKET_TYPE_BITMASK) == QUICLY_PACKET_TYPE_INITIAL) { /* Try to find matching "opening" ctx */ opening_ctx_pool = quic_main.wrk_ctx[thread_index].opening_ctx_pool; /* *INDENT-OFF* */ pool_foreach (ctx_index_ptr, opening_ctx_pool, ({ ctx = quic_ctx_get (*ctx_index_ptr, thread_index); if (ctx->c_quic_ctx_id.udp_session_handle == udp_session_handle) { /* Right ctx found, create conn & remove from pool */ quic_create_connection ((quicly_context_t *) app->quicly_ctx, udp_session_handle, *ctx_index_ptr, sa, salen, packet); pool_put (opening_ctx_pool, ctx_index_ptr); goto ctx_search_done; } })); /* *INDENT-ON* */ } else { quic_reset_connection ((quicly_context_t *) app->quicly_ctx, udp_session_handle, sa, salen, packet); } } ctx_search_done: svm_fifo_dequeue_drop (f, ph.data_length + ph.data_offset + SESSION_CONN_HDR_LEN); free (data); } while (1); return rv; } always_inline void quic_common_get_transport_endpoint (quic_ctx_t * ctx, transport_endpoint_t * tep, u8 is_lcl) { session_t *udp_session; if (ctx->c_quic_ctx_id.is_stream) { tep->is_ip4 = 255; /* well this is ugly */ } else { udp_session = session_get_from_handle (ctx->c_quic_ctx_id.udp_session_handle); session_get_endpoint (udp_session, tep, is_lcl); } } static void quic_get_transport_listener_endpoint (u32 listener_index, transport_endpoint_t * tep, u8 is_lcl) { quic_ctx_t *ctx; app_listener_t *app_listener; session_t *udp_listen_session; ctx = quic_ctx_get (listener_index, vlib_get_thread_index ()); if (ctx->is_listener) { app_listener = app_listener_get_w_handle (ctx->c_quic_ctx_id.udp_session_handle); udp_listen_session = app_listener_get_session (app_listener); return session_get_endpoint (udp_listen_session, tep, is_lcl); } quic_common_get_transport_endpoint (ctx, tep, is_lcl); } static void quic_get_transport_endpoint (u32 ctx_index, u32 thread_index, transport_endpoint_t * tep, u8 is_lcl) { quic_ctx_t *ctx; ctx = quic_ctx_get (ctx_index, thread_index); quic_common_get_transport_endpoint (ctx, tep, is_lcl); } /***************************************************************************** * END TRANSPORT PROTO FUNCTIONS *****************************************************************************/ /* *INDENT-OFF* */ static session_cb_vft_t quic_app_cb_vft = { .session_accept_callback = quic_session_accepted_callback, .session_disconnect_callback = quic_session_disconnect_callback, .session_connected_callback = quic_session_connected_callback, .session_reset_callback = quic_session_reset_callback, .add_segment_callback = quic_add_segment_callback, .del_segment_callback = quic_del_segment_callback, .builtin_app_rx_callback = quic_app_rx_callback, }; static const transport_proto_vft_t quic_proto = { .connect = quic_connect, .close = quic_disconnect, .start_listen = quic_start_listen, .stop_listen = quic_stop_listen, .get_connection = quic_connection_get, .get_listener = quic_listener_get, .update_time = quic_update_time, .custom_tx = quic_custom_tx_callback, .tx_type = TRANSPORT_TX_INTERNAL, .service_type = TRANSPORT_SERVICE_APP, .format_connection = format_quic_connection, .format_half_open = format_quic_half_open, .format_listener = format_quic_listener, .get_transport_endpoint = quic_get_transport_endpoint, .get_transport_listener_endpoint = quic_get_transport_listener_endpoint, }; /* *INDENT-ON* */ static clib_error_t * quic_init (vlib_main_t * vm) { u32 add_segment_size = (4096ULL << 20) - 1, segment_size = 512 << 20; vlib_thread_main_t *vtm = vlib_get_thread_main (); tw_timer_wheel_1t_3w_1024sl_ov_t *tw; vnet_app_attach_args_t _a, *a = &_a; u64 options[APP_OPTIONS_N_OPTIONS]; quic_main_t *qm = &quic_main; u32 fifo_size = QUIC_FIFO_SIZE; u32 num_threads, i; num_threads = 1 /* main thread */ + vtm->n_threads; memset (a, 0, sizeof (*a)); memset (options, 0, sizeof (options)); a->session_cb_vft = &quic_app_cb_vft; a->api_client_index = APP_INVALID_INDEX; a->options = options; a->name = format (0, "quic"); a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size; a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = add_segment_size; a->options[APP_OPTIONS_RX_FIFO_SIZE] = fifo_size; a->options[APP_OPTIONS_TX_FIFO_SIZE] = fifo_size; a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; a->options[APP_OPTIONS_FLAGS] |= APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE; a->options[APP_OPTIONS_FLAGS] |= APP_OPTIONS_FLAGS_IS_TRANSPORT_APP; if (vnet_application_attach (a)) { clib_warning ("failed to attach quic app"); return clib_error_return (0, "failed to attach quic app"); } vec_validate (qm->ctx_pool, num_threads - 1); vec_validate (qm->wrk_ctx, num_threads - 1); /* Timer wheels, one per thread. */ for (i = 0; i < num_threads; i++) { tw = &qm->wrk_ctx[i].timer_wheel; tw_timer_wheel_init_1t_3w_1024sl_ov (tw, quic_expired_timers_dispatch, 1e-3 /* timer period 1ms */ , ~0); tw->last_run_time = vlib_time_now (vlib_get_main ()); } clib_bihash_init_16_8 (&qm->connection_hash, "quic connections", 1024, 4 << 20); if (!qm->ca_cert_path) qm->ca_cert_path = QUIC_DEFAULT_CA_CERT_PATH; qm->app_index = a->app_index; qm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock / QUIC_TSTAMP_RESOLUTION; transport_register_protocol (TRANSPORT_PROTO_QUIC, &quic_proto, FIB_PROTOCOL_IP4, ~0); transport_register_protocol (TRANSPORT_PROTO_QUIC, &quic_proto, FIB_PROTOCOL_IP6, ~0); vec_free (a->name); return 0; } VLIB_INIT_FUNCTION (quic_init); /* *INDENT-OFF* */ VLIB_PLUGIN_REGISTER () = { .version = VPP_BUILD_VER, .description = "Quic transport protocol", }; /* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */