aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/srmpls/sr_mpls_policy.c
blob: ceff11cbac8a99bdeea55149c9067b5feee450b9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
/*
 * sr_mpls_policy.c: SR-MPLS policies
 *
 * Copyright (c) 2016 Cisco and/or its affiliates. Licensed under the Apache
 * License, Version 2.0 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at:
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 */

/**
 * @file
 * @brief SR MPLS policy creation and application
 *
 * Create an SR policy.
 * An SR policy can be either of 'default' type or 'spray' type
 * An SR policy has attached a list of SID lists.
 * In case the SR policy is a default one it will load balance among them.
 * An SR policy has associated a BindingSID.
 * In case any packet arrives with MPLS_label == BindingSID then the SR policy
 * associated to such bindingSID will be applied to such packet.
 * Also, a BSID can be associated with a (Next-Hop, Color)
 *
 */

#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vnet/srmpls/sr_mpls.h>
#include <vnet/fib/mpls_fib.h>
#include <vnet/dpo/dpo.h>
#include <vnet/ip/ip.h>

#include <vppinfra/error.h>
#include <vppinfra/elog.h>

mpls_sr_main_t sr_mpls_main;

/***************************  SR LB helper functions **************************/
/**
 * @brief Creates a Segment List and adds it to an SR policy
 *
 * Creates a Segment List and adds it to the SR policy. Notice that the SL are
 * not necessarily unique. Hence there might be two Segment List within the
 * same SR Policy with exactly the same segments and same weight.
 *
 * @param sr_policy is the SR policy where the SL will be added
 * @param sl is a vector of IPv6 addresses composing the Segment List
 * @param weight is the weight of the SegmentList (for load-balancing purposes)
 * @param is_encap represents the mode (SRH insertion vs Encapsulation)
 *
 * @return pointer to the just created segment list
 */
static inline mpls_sr_sl_t *
create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight)
{
  mpls_sr_main_t *sm = &sr_mpls_main;
  mpls_sr_sl_t *segment_list;
  u32 ii;

  pool_get (sm->sid_lists, segment_list);
  clib_memset (segment_list, 0, sizeof (*segment_list));

  vec_add1 (sr_policy->segments_lists, segment_list - sm->sid_lists);

  /* Fill in segment list */
  segment_list->weight =
    (weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT);
  segment_list->segments = vec_dup (sl);

  mpls_eos_bit_t eos;
  FOR_EACH_MPLS_EOS_BIT (eos)
  {
    fib_route_path_t path = {
      .frp_proto = DPO_PROTO_MPLS,
      .frp_sw_if_index = ~0,
      .frp_fib_index = 0,
      .frp_weight = segment_list->weight,
      .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
      .frp_label_stack = NULL,
      .frp_local_label = sl[0],
    };

    if (vec_len (sl) > 1)
      {
	vec_validate (path.frp_label_stack, vec_len (sl) - 2);
	for (ii = 1; ii < vec_len (sl); ii++)
	  {
	    path.frp_label_stack[ii - 1].fml_value = sl[ii];
	  }
      }
    else
      {
	/*
	 * add an impliciet NULL label to allow non-eos recursion
	 */
	fib_mpls_label_t lbl = {
	  .fml_value = MPLS_IETF_IMPLICIT_NULL_LABEL,
	};
	vec_add1 (path.frp_label_stack, lbl);
      }

    fib_route_path_t *paths = NULL;
    vec_add1 (paths, path);

    /* *INDENT-OFF* */
    fib_prefix_t pfx = {
        .fp_len = 21,
        .fp_proto = FIB_PROTOCOL_MPLS,
        .fp_label = sr_policy->bsid,
        .fp_eos = eos,
        .fp_payload_proto = DPO_PROTO_MPLS,
    };
    /* *INDENT-ON* */

    fib_table_entry_path_add2 (0,
			       &pfx,
			       FIB_SOURCE_SR,
			       (sr_policy->type == SR_POLICY_TYPE_DEFAULT ?
				FIB_ENTRY_FLAG_NONE :
				FIB_ENTRY_FLAG_MULTICAST), paths);
    vec_free (paths);
  }

  return segment_list;
}

/******************************* SR rewrite API *******************************/
/*
 * Three functions for handling sr policies: -> sr_mpls_policy_add ->
 * sr_mpls_policy_del -> sr_mpls_policy_mod All of them are API. CLI function
 * on sr_policy_command_fn
 */

/**
 * @brief Create a new SR policy
 *
 * @param bsid is the bindingSID of the SR Policy
 * @param segments is a vector of MPLS labels composing the segment list
 * @param behavior is the behavior of the SR policy. (default//spray)
 * @param fib_table is the VRF where to install the FIB entry for the BSID
 * @param weight is the weight of this specific SID list
 *
 * @return 0 if correct, else error
 */
int
sr_mpls_policy_add (mpls_label_t bsid, mpls_label_t * segments,
		    u8 behavior, u32 weight)
{
  mpls_sr_main_t *sm = &sr_mpls_main;
  mpls_sr_policy_t *sr_policy = 0;
  uword *p;

  if (!sm->sr_policies_index_hash)
    sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));

  /* MPLS SR policies cannot be created unless the MPLS table is present */
  if (~0 == fib_table_find (FIB_PROTOCOL_MPLS, MPLS_FIB_DEFAULT_TABLE_ID))
    return (VNET_API_ERROR_NO_SUCH_TABLE);

  /* Search for existing keys (BSID) */
  p = hash_get (sm->sr_policies_index_hash, bsid);
  if (p)
    {
      /* Add SR policy that already exists; complain */
      return -12;
    }
  /* Add an SR policy object */
  pool_get (sm->sr_policies, sr_policy);
  clib_memset (sr_policy, 0, sizeof (*sr_policy));

  /* the first policy needs to lock the MPLS table so it doesn't
   * disappear with policies in it */
  if (1 == pool_elts (sm->sr_policies))
    fib_table_find_or_create_and_lock (FIB_PROTOCOL_MPLS,
				       MPLS_FIB_DEFAULT_TABLE_ID,
				       FIB_SOURCE_SR);
  sr_policy->bsid = bsid;
  sr_policy->type = behavior;
  sr_policy->endpoint_type = 0;
  ip6_address_set_zero (&sr_policy->endpoint.ip6);
  sr_policy->color = (u32) ~ 0;

  /* Copy the key */
  hash_set (sm->sr_policies_index_hash, bsid, sr_policy - sm->sr_policies);

  /* Create a segment list and add the index to the SR policy */
  create_sl (sr_policy, segments, weight);

  return 0;
}

/**
 * @brief Delete a SR policy
 *
 * @param bsid is the bindingSID of the SR Policy
 * @param index is the index of the SR policy
 *
 * @return 0 if correct, else error
 */
int
sr_mpls_policy_del (mpls_label_t bsid)
{
  mpls_sr_main_t *sm = &sr_mpls_main;
  mpls_sr_policy_t *sr_policy = 0;
  mpls_sr_sl_t *segment_list;
  mpls_eos_bit_t eos;
  u32 *sl_index;
  uword *p;

  if (!sm->sr_policies_index_hash)
    sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));

  p = hash_get (sm->sr_policies_index_hash, bsid);
  if (p)
    sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
  else
    return -1;

  /* Clean SID Lists */
  vec_foreach (sl_index, sr_policy->segments_lists)
  {
    segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);

    fib_route_path_t path = {
      .frp_proto = DPO_PROTO_MPLS,
      .frp_sw_if_index = ~0,
      .frp_fib_index = 0,
      .frp_weight = segment_list->weight,
      .frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
      .frp_local_label = segment_list->segments[0],
    };

    vec_add (path.frp_label_stack, segment_list + 1,
	     vec_len (segment_list) - 1);

    fib_route_path_t *paths = NULL;
    vec_add1 (paths, path);

    /* remove each of the MPLS routes */
    FOR_EACH_MPLS_EOS_BIT (eos)
    {
			/* *INDENT-OFF* */
			fib_prefix_t	pfx = {
				.fp_len = 21,
				.fp_proto = FIB_PROTOCOL_MPLS,
				.fp_label = sr_policy->bsid,
				.fp_eos = eos,
				.fp_payload_proto = DPO_PROTO_MPLS,
			};
			/* *INDENT-ON* */

      fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
    }
    vec_free (paths);
    vec_free (segment_list->segments);
    pool_put_index (sm->sid_lists, *sl_index);
  }

  /* If there is still traces of TE, make sure locks are released */
  if (sr_policy->endpoint_type != 0 && sr_policy->color != (u32) ~ 0)
    {
      sr_mpls_policy_assign_endpoint_color (bsid, NULL, 0, (u32) ~ 0);
    }

  /* Remove SR policy entry */
  hash_unset (sm->sr_policies_index_hash, sr_policy->bsid);
  pool_put (sm->sr_policies, sr_policy);

  if (0 == pool_elts (sm->sr_policies))
    fib_table_unlock (MPLS_FIB_DEFAULT_TABLE_ID,
		      FIB_PROTOCOL_MPLS, FIB_SOURCE_SR);

  return 0;
}

/**
 * @brief Modify an existing SR policy
 *
 * The possible modifications are adding a new Segment List, modifying an
 * existing Segment List (modify the weight only) and delete a given
 * Segment List from the SR Policy.
 *
 * @param bsid is the bindingSID of the SR Policy
 * @param fib_table is the VRF where to install the FIB entry for the BSID
 * @param operation is the operation to perform (among the top ones)
 * @param segments is a vector of IPv6 address composing the segment list
 * @param sl_index is the index of the Segment List to modify/delete
 * @param weight is the weight of the sid list. optional.
 *
 * @return 0 ok, >0 index of SL, <0 error
 */
int
sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
		    mpls_label_t * segments, u32 sl_index, u32 weight)
{
  mpls_sr_main_t *sm = &sr_mpls_main;
  mpls_sr_policy_t *sr_policy = 0;
  mpls_sr_sl_t *segment_list;
  u32 *sl_index_iterate;
  uword *p;

  if (!sm->sr_policies_index_hash)
    sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));

  p = hash_get (sm->sr_policies_index_hash, bsid);
  if (p)
    sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
  else
    return -1;

  if (operation == 1)
    {				/* Add SR List to an existing SR policy */
      /* Create the new SL */
      segment_list = create_sl (sr_policy, segments, weight);
      return segment_list - sm->sid_lists;
    }
  else if (operation == 2)
    {				/* Delete SR List from an existing SR
				 * policy */
      /* Check that currently there are more than one SID list */
      if (vec_len (sr_policy->segments_lists) == 1)
	return -21;

      /*
       * Check that the SR list does exist and is assigned to the
       * sr policy
       */
      vec_foreach (sl_index_iterate, sr_policy->segments_lists)
	if (*sl_index_iterate == sl_index)
	break;

      if (*sl_index_iterate != sl_index)
	return -22;

      /* Remove the lucky SR list that is being kicked out */
      segment_list = pool_elt_at_index (sm->sid_lists, sl_index);

      mpls_eos_bit_t eos;
      fib_route_path_t path = {
	.frp_proto = DPO_PROTO_MPLS,
	.frp_sw_if_index = ~0,
	.frp_fib_index = 0,
	.frp_weight = segment_list->weight,
	.frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
	.frp_local_label = segment_list->segments[0],
      };

      vec_add (path.frp_label_stack, segment_list + 1,
	       vec_len (segment_list) - 1);

      fib_route_path_t *paths = NULL;
      vec_add1 (paths, path);

      FOR_EACH_MPLS_EOS_BIT (eos)
      {
			/* *INDENT-OFF* */
			fib_prefix_t	pfx = {
				.fp_len = 21,
				.fp_proto = FIB_PROTOCOL_MPLS,
				.fp_label = sr_policy->bsid,
				.fp_eos = eos,
				.fp_payload_proto = DPO_PROTO_MPLS,
			};
			/* *INDENT-ON* */

	fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
      }

      vec_free (paths);
      vec_free (segment_list->segments);
      pool_put_index (sm->sid_lists, sl_index);
      vec_del1 (sr_policy->segments_lists,
		sl_index_iterate - sr_policy->segments_lists);
    }
  else if (operation == 3)
    {				/* Modify the weight of an existing
				 * SR List */
      /* Find the corresponding SL */
      vec_foreach (sl_index_iterate, sr_policy->segments_lists)
	if (*sl_index_iterate == sl_index)
	break;

      if (*sl_index_iterate != sl_index)
	return -32;

      /* Change the weight */
      segment_list = pool_elt_at_index (sm->sid_lists, sl_index);

      /* Update LB */
      mpls_eos_bit_t eos;
      fib_route_path_t path = {
	.frp_proto = DPO_PROTO_MPLS,
	.frp_sw_if_index = ~0,
	.frp_fib_index = 0,
	.frp_weight = segment_list->weight,
	.frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
	.frp_local_label = segment_list->segments[0],
      };

      vec_add (path.frp_label_stack, segment_list + 1,
	       vec_len (segment_list) - 1);

      fib_route_path_t *paths = NULL;
      vec_add1 (paths, path);

      FOR_EACH_MPLS_EOS_BIT (eos)
      {
			/* *INDENT-OFF* */
			fib_prefix_t	pfx = {
				.fp_len = 21,
				.fp_proto = FIB_PROTOCOL_MPLS,
				.fp_label = sr_policy->bsid,
				.fp_eos = eos,
				.fp_payload_proto = DPO_PROTO_MPLS,
			};
			/* *INDENT-ON* */

	fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
      }

      segment_list->weight = weight;

      path.frp_weight = segment_list->weight;

      vec_free (paths);
      paths = NULL;
      vec_add1 (paths, path);

      FOR_EACH_MPLS_EOS_BIT (eos)
      {
			/* *INDENT-OFF* */
			fib_prefix_t	pfx = {
				.fp_len = 21,
				.fp_proto = FIB_PROTOCOL_MPLS,
				.fp_label = sr_policy->bsid,
				.fp_eos = eos,
				.fp_payload_proto = DPO_PROTO_MPLS,
			};
			/* *INDENT-ON* */

	fib_table_entry_path_add2 (0,
				   &pfx,
				   FIB_SOURCE_SR,
				   (sr_policy->type ==
				    SR_POLICY_TYPE_DEFAULT ?
				    FIB_ENTRY_FLAG_NONE :
				    FIB_ENTRY_FLAG_MULTICAST), paths);
      }
    }
  return 0;
}

/**
 * @brief CLI for 'sr mpls policies' command family
 */
static clib_error_t *
sr_mpls_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
			   vlib_cli_command_t * cmd)
{
  int rv = -1;
  char is_del = 0, is_add = 0, is_mod = 0;
  char policy_set = 0;
  mpls_label_t bsid, next_label;
  u32 sl_index = (u32) ~ 0;
  u32 weight = (u32) ~ 0;
  mpls_label_t *segments = 0;
  u8 operation = 0;
  u8 is_spray = 0;

  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
    {
      if (!is_add && !is_mod && !is_del && unformat (input, "add"))
	is_add = 1;
      else if (!is_add && !is_mod && !is_del && unformat (input, "del"))
	is_del = 1;
      else if (!is_add && !is_mod && !is_del && unformat (input, "mod"))
	is_mod = 1;
      else if (!policy_set
	       && unformat (input, "bsid %U", unformat_mpls_unicast_label,
			    &bsid))
	policy_set = 1;
      else if (unformat (input, "weight %d", &weight));
      else if (unformat
	       (input, "next %U", unformat_mpls_unicast_label, &next_label))
	{
	  vec_add (segments, &next_label, 1);
	}
      else if (unformat (input, "add sl"))
	operation = 1;
      else if (unformat (input, "del sl index %d", &sl_index))
	operation = 2;
      else if (unformat (input, "mod sl index %d", &sl_index))
	operation = 3;
      else if (unformat (input, "spray"))
	is_spray = 1;
      else
	break;
    }

  if (!is_add && !is_mod && !is_del)
    return clib_error_return (0, "Incorrect CLI");

  if (!policy_set)
    return clib_error_return (0, "No SR policy BSID or index specified");

  if (is_add)
    {
      if (vec_len (segments) == 0)
	return clib_error_return (0, "No Segment List specified");

      rv = sr_mpls_policy_add (bsid, segments,
			       (is_spray ? SR_POLICY_TYPE_SPRAY :
				SR_POLICY_TYPE_DEFAULT), weight);
      vec_free (segments);
    }
  else if (is_del)
    rv = sr_mpls_policy_del (bsid);
  else if (is_mod)
    {
      if (!operation)
	return clib_error_return (0, "No SL modification specified");
      if (operation != 1 && sl_index == (u32) ~ 0)
	return clib_error_return (0, "No Segment List index specified");
      if (operation == 1 && vec_len (segments) == 0)
	return clib_error_return (0, "No Segment List specified");
      if (operation == 3 && weight == (u32) ~ 0)
	return clib_error_return (0, "No new weight for the SL specified");
      rv = sr_mpls_policy_mod (bsid, operation, segments, sl_index, weight);
      vec_free (segments);
    }
  switch (rv)
    {
    case 0:
      break;
    case 1:
      return 0;
    case -12:
      return clib_error_return (0,
				"There is already a FIB entry for the BindingSID address.\n"
				"The SR policy could not be created.");
    case -21:
      return clib_error_return (0,
				"The selected SR policy only contains ONE segment list. "
				"Please remove the SR policy instead");
    case -22:
      return clib_error_return (0,
				"Could not delete the segment list. "
				"It is not associated with that SR policy.");
    case -23:
      return clib_error_return (0,
				"Could not delete the segment list. "
				"It is not associated with that SR policy.");
    case -32:
      return clib_error_return (0,
				"Could not modify the segment list. "
				"The given SL is not associated with such SR policy.");
    case VNET_API_ERROR_NO_SUCH_TABLE:
      return clib_error_return (0, "the Default MPLS table is not present");
    default:
      return clib_error_return (0, "BUG: sr policy returns %d", rv);
    }
  return 0;
}

/* *INDENT-OFF* */
VLIB_CLI_COMMAND(sr_mpls_policy_command, static)=
{
	.path = "sr mpls policy",
		.short_help = "sr mpls policy [add||del||mod] bsid 2999 "
		"next 10 next 20 next 30 (weight 1) (spray)",
		.long_help = "TBD.\n",
		.function = sr_mpls_policy_command_fn,
};
/* *INDENT-ON* */

/**
 * @brief CLI to display onscreen all the SR MPLS policies
 */
static clib_error_t *
show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
				  vlib_cli_command_t * cmd)
{
  mpls_sr_main_t *sm = &sr_mpls_main;
  mpls_sr_sl_t *segment_list = 0;
  mpls_sr_policy_t *sr_policy = 0;
  mpls_sr_policy_t **vec_policies = 0;
  mpls_label_t *label;
  u32 *sl_index;
  u8 *s;
  int i = 0;

  vlib_cli_output (vm, "SR MPLS policies:");

	/* *INDENT-OFF* */
	pool_foreach(sr_policy, sm->sr_policies, {
		vec_add1(vec_policies, sr_policy);
	});
	/* *INDENT-ON* */

  vec_foreach_index (i, vec_policies)
  {
    sr_policy = vec_policies[i];
    vlib_cli_output (vm, "[%u].-\tBSID: %U",
		     (u32) (sr_policy - sm->sr_policies),
		     format_mpls_unicast_label, sr_policy->bsid);
    switch (sr_policy->endpoint_type)
      {
      case SR_STEER_IPV6:
	vlib_cli_output (vm, "\tEndpoint: %U", format_ip6_address,
			 &sr_policy->endpoint.ip6);
	vlib_cli_output (vm, "\tColor: %u", sr_policy->color);
	break;
      case SR_STEER_IPV4:
	vlib_cli_output (vm, "\tEndpoint: %U", format_ip4_address,
			 &sr_policy->endpoint.ip4);
	vlib_cli_output (vm, "\tColor: %u", sr_policy->color);
	break;
      default:
	vlib_cli_output (vm, "\tTE disabled");
      }
    vlib_cli_output (vm, "\tType: %s",
		     (sr_policy->type ==
		      SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray"));
    vlib_cli_output (vm, "\tSegment Lists:");
    vec_foreach (sl_index, sr_policy->segments_lists)
    {
      s = NULL;
      segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
      s = format (s, "\t[%u].- ", *sl_index);
      s = format (s, "< ");
      vec_foreach (label, segment_list->segments)
      {
	s = format (s, "%U, ", format_mpls_unicast_label, *label);
      }
      s = format (s, "\b\b > ");
      vlib_cli_output (vm, "  %s", s);
    }
    vlib_cli_output (vm, "-----------");
  }
  vec_free (vec_policies);
  return 0;
}

/* *INDENT-OFF* */
VLIB_CLI_COMMAND(show_sr_mpls_policies_command, static)=
{
	.path = "show sr mpls policies",
		.short_help = "show sr mpls policies",
		.function = show_sr_mpls_policies_command_fn,
};
/* *INDENT-ON* */

/**
 * @brief Update the Endpoint,Color tuple of an SR policy
 *
 * @param bsid is the bindingSID of the SR Policy
 * @param endpoint represents the IP46 of the endpoint
 * @param color represents the color (u32)
 *
 * To reset to NULL use ~0 as parameters.
 *
 * @return 0 if correct, else error
 */
int
sr_mpls_policy_assign_endpoint_color (mpls_label_t bsid,
				      ip46_address_t * endpoint,
				      u8 endpoint_type, u32 color)
{
  mpls_sr_main_t *sm = &sr_mpls_main;
  mpls_sr_policy_t *sr_policy = 0;
  uword *endpoint_table, *p, *old_value;

  ip46_address_t any;
  any.as_u64[0] = any.as_u64[1] = (u64) ~ 0;

  if (!sm->sr_policies_index_hash)
    sm->sr_policies_index_hash = hash_create (0, sizeof (mpls_label_t));

  p = hash_get (sm->sr_policies_index_hash, bsid);
  if (p)
    sr_policy = pool_elt_at_index (sm->sr_policies, p[0]);
  else
    return -1;

  /* If previous Endpoint, color existed, remove (NH,C) and (ANY,C) */
  if (sr_policy->endpoint_type)
    {
      endpoint_table =
	mhash_get (&sm->sr_policies_c2e2eclabel_hash, &sr_policy->color);
      if (!endpoint_table)
	return -2;
      old_value =
	mhash_get ((mhash_t *) endpoint_table, &sr_policy->endpoint);

      /* CID 180995 This should never be NULL unless the two hash tables
       * get out of sync */
      ALWAYS_ASSERT (old_value != NULL);

      fib_prefix_t pfx = { 0 };
      pfx.fp_proto = FIB_PROTOCOL_MPLS;
      pfx.fp_len = 21;
      pfx.fp_label = (u32) * old_value;

      mpls_eos_bit_t eos;
      FOR_EACH_MPLS_EOS_BIT (eos)
      {
	pfx.fp_eos = eos;
	fib_table_entry_path_remove (sm->fib_table_EC,
				     &pfx,
				     FIB_SOURCE_SR,
				     DPO_PROTO_MPLS,
				     NULL,
				     ~0, 0, 1, FIB_ROUTE_PATH_FLAG_NONE);
      }

      old_value = mhash_get ((mhash_t *) endpoint_table, &any);
      pfx.fp_label = (u32) * old_value;

      FOR_EACH_MPLS_EOS_BIT (eos)
      {
	pfx.fp_eos = eos;
	fib_table_entry_path_remove (sm->fib_table_EC,
				     &pfx,
				     FIB_SOURCE_SR,
				     DPO_PROTO_MPLS,
				     NULL,
				     ~0, 0, 1, FIB_ROUTE_PATH_FLAG_NONE);
      }

      /* Release the lock on (NH, Color) and (ANY, Color) */
      internal_label_unlock (sr_policy->endpoint, sr_policy->color);
      internal_label_unlock (any, sr_policy->color);

      /* Reset the values on the SR policy */
      sr_policy->endpoint_type = 0;
      sr_policy->endpoint.as_u64[0] = sr_policy->endpoint.as_u64[1] =
	(u64) ~ 0;
      sr_policy->color = (u32) ~ 0;
    }

  if (endpoint_type)
    {
      sr_policy->endpoint_type = endpoint_type;
      sr_policy->endpoint.as_u64[0] = endpoint->as_u64[0];
      sr_policy->endpoint.as_u64[1] = endpoint->as_u64[1];
      sr_policy->color = color;

      u32 label = find_or_create_internal_label (*endpoint, color);
      internal_label_lock (*endpoint, sr_policy->color);

      /* If FIB doesnt exist, create them */
      if (sm->fib_table_EC == (u32) ~ 0)
	{
	  sm->fib_table_EC = fib_table_create_and_lock (FIB_PROTOCOL_MPLS,
							FIB_SOURCE_SR,
							"SR-MPLS Traffic Engineering (NextHop,Color)");

	  fib_table_flush (sm->fib_table_EC, FIB_PROTOCOL_MPLS,
			   FIB_SOURCE_SPECIAL);
	}

      fib_prefix_t pfx = { 0 };
      pfx.fp_proto = FIB_PROTOCOL_MPLS;
      pfx.fp_len = 21;

      fib_route_path_t path = {
	.frp_proto = DPO_PROTO_MPLS,
	.frp_sw_if_index = ~0,
	.frp_fib_index = 0,
	.frp_weight = 1,
	.frp_flags = FIB_ROUTE_PATH_FLAG_NONE,
	.frp_label_stack = 0
      };
      path.frp_local_label = sr_policy->bsid;

      //Add the entry to ANY,Color
      u32 any_label = find_or_create_internal_label (any, color);
      internal_label_lock (any, sr_policy->color);

      pfx.fp_eos = MPLS_EOS;
      path.frp_eos = MPLS_EOS;

      fib_route_path_t *paths = NULL;
      vec_add1 (paths, path);

      pfx.fp_label = label;
      fib_table_entry_update (sm->fib_table_EC,
			      &pfx,
			      FIB_SOURCE_SR,
			      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);

      pfx.fp_label = any_label;
      fib_table_entry_update (sm->fib_table_EC,
			      &pfx,
			      FIB_SOURCE_SR,
			      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);

      fib_mpls_label_t fml = {
	.fml_value = MPLS_IETF_IMPLICIT_NULL_LABEL,
      };

      vec_add1 (path.frp_label_stack, fml);
      pfx.fp_eos = MPLS_NON_EOS;
      path.frp_eos = MPLS_NON_EOS;

      paths = NULL;
      vec_add1 (paths, path);

      pfx.fp_label = label;
      fib_table_entry_update (sm->fib_table_EC,
			      &pfx,
			      FIB_SOURCE_SR,
			      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);

      pfx.fp_label = any_label;
      fib_table_entry_update (sm->fib_table_EC,
			      &pfx,
			      FIB_SOURCE_SR,
			      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT, paths);
    }
  return 0;
}

/**
 * @brief CLI to modify the Endpoint,Color of an SR policy
 */
static clib_error_t *
cli_sr_mpls_policy_ec_command_fn (vlib_main_t * vm, unformat_input_t * input,
				  vlib_cli_command_t * cmd)
{
  ip46_address_t endpoint;
  u32 color = (u32) ~ 0;
  mpls_label_t bsid;
  u8 endpoint_type = 0;
  char clear = 0, color_set = 0, bsid_set = 0;

  clib_memset (&endpoint, 0, sizeof (ip46_address_t));

  int rv;
  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
    {
      if (!endpoint_type
	  && unformat (input, "endpoint %U", unformat_ip6_address,
		       &endpoint.ip6))
	endpoint_type = SR_STEER_IPV6;
      else if (!endpoint_type
	       && unformat (input, "endpoint %U", unformat_ip4_address,
			    &endpoint.ip4))
	endpoint_type = SR_STEER_IPV4;
      else if (!color_set && unformat (input, "color %u", &color))
	color_set = 1;
      else if (!bsid_set
	       && unformat (input, "bsid %U", unformat_mpls_unicast_label,
			    &bsid))
	bsid_set = 1;
      else if (!clear && unformat (input, "clear"))
	clear = 1;
      else
	break;
    }

  if (!bsid_set)
    return clib_error_return (0, "No BSID specified");
  if (!endpoint_type && !clear)
    return clib_error_return (0, "No Endpoint specified");
  if (!color_set && !clear)
    return clib_error_return (0, "No Color set");

  /* In case its a cleanup */
  if (clear)
    {
      ip6_address_set_zero (&endpoint.ip6);
      color = (u32) ~ 0;
    }
  rv =
    sr_mpls_policy_assign_endpoint_color (bsid, &endpoint, endpoint_type,
					  color);

  if (rv)
    clib_error_return (0, "Error on Endpoint,Color");

  return 0;
}

/* *INDENT-OFF* */
VLIB_CLI_COMMAND(cli_sr_mpls_policy_ec_command, static)=
{
	.path = "sr mpls policy te",
		.short_help = "sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234",
		.function = cli_sr_mpls_policy_ec_command_fn,
};
/* *INDENT-ON* */

/********************* SR MPLS Policy initialization ***********************/
/**
 * @brief SR MPLS Policy  initialization
 */
clib_error_t *
sr_mpls_policy_rewrite_init (vlib_main_t * vm)
{
  mpls_sr_main_t *sm = &sr_mpls_main;

  /* Init memory for sr policy keys (bsid <-> ip6_address_t) */
  sm->sr_policies_index_hash = NULL;
  sm->sr_policies_c2e2eclabel_hash.hash = NULL;
  return 0;
}

VLIB_INIT_FUNCTION (sr_mpls_policy_rewrite_init);

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables: eval: (c-set-style "gnu") End:
 */
>); if (node->flags & VLIB_NODE_FLAG_TRACE) ip4_forward_next_trace (vm, node, frame, VLIB_TX); return frame->n_vectors; } /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_load_balance_node) = { .name = "ip4-load-balance", .vector_size = sizeof (u32), .sibling_of = "ip4-lookup", .format_trace = format_ip4_lookup_trace, }; /* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT /* get first interface address */ ip4_address_t * ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, ip_interface_address_t ** result_ia) { ip_lookup_main_t *lm = &im->lookup_main; ip_interface_address_t *ia = 0; ip4_address_t *result = 0; /* *INDENT-OFF* */ foreach_ip_interface_address (lm, ia, sw_if_index, 1 /* honor unnumbered */ , ({ ip4_address_t * a = ip_interface_address_get_address (lm, ia); result = a; break; })); /* *INDENT-OFF* */ if (result_ia) *result_ia = result ? ia : 0; return result; } #endif static void ip4_add_subnet_bcast_route (u32 fib_index, fib_prefix_t *pfx, u32 sw_if_index) { vnet_sw_interface_flags_t iflags; iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index); fib_table_entry_special_remove(fib_index, pfx, FIB_SOURCE_INTERFACE); if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST) { fib_table_entry_update_one_path (fib_index, pfx, FIB_SOURCE_INTERFACE, FIB_ENTRY_FLAG_NONE, DPO_PROTO_IP4, /* No next-hop address */ &ADJ_BCAST_ADDR, sw_if_index, // invalid FIB index ~0, 1, // no out-label stack NULL, FIB_ROUTE_PATH_FLAG_NONE); } else { fib_table_entry_special_add(fib_index, pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_DROP | FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT)); } } static void ip4_add_interface_prefix_routes (ip4_main_t *im, u32 sw_if_index, u32 fib_index, ip_interface_address_t * a) { ip_lookup_main_t *lm = &im->lookup_main; ip_interface_prefix_t *if_prefix; ip4_address_t *address = ip_interface_address_get_address (lm, a); ip_interface_prefix_key_t key = { .prefix = { .fp_len = a->address_length, .fp_proto = FIB_PROTOCOL_IP4, .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length], }, .sw_if_index = sw_if_index, }; fib_prefix_t pfx_special = { .fp_proto = FIB_PROTOCOL_IP4, }; /* If prefix already set on interface, just increment ref count & return */ if_prefix = ip_get_interface_prefix (lm, &key); if (if_prefix) { if_prefix->ref_count += 1; return; } /* New prefix - allocate a pool entry, initialize it, add to the hash */ pool_get (lm->if_prefix_pool, if_prefix); if_prefix->ref_count = 1; if_prefix->src_ia_index = a - lm->if_address_pool; clib_memcpy (&if_prefix->key, &key, sizeof (key)); mhash_set (&lm->prefix_to_if_prefix_index, &key, if_prefix - lm->if_prefix_pool, 0 /* old value */); pfx_special.fp_len = a->address_length; pfx_special.fp_addr.ip4.as_u32 = address->as_u32; /* set the glean route for the prefix */ fib_table_entry_update_one_path (fib_index, &pfx_special, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), DPO_PROTO_IP4, /* No next-hop address */ NULL, sw_if_index, /* invalid FIB index */ ~0, 1, /* no out-label stack */ NULL, FIB_ROUTE_PATH_FLAG_NONE); /* length <= 30 - add glean, drop first address, maybe drop bcast address */ if (a->address_length <= 30) { /* set a drop route for the base address of the prefix */ pfx_special.fp_len = 32; pfx_special.fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[a->address_length]; if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32) fib_table_entry_special_add (fib_index, &pfx_special, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_DROP | FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT)); /* set a route for the broadcast address of the prefix */ pfx_special.fp_len = 32; pfx_special.fp_addr.ip4.as_u32 = address->as_u32 | ~im->fib_masks[a->address_length]; if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32) ip4_add_subnet_bcast_route (fib_index, &pfx_special, sw_if_index); } /* length == 31 - add an attached route for the other address */ else if (a->address_length == 31) { pfx_special.fp_len = 32; pfx_special.fp_addr.ip4.as_u32 = address->as_u32 ^ clib_host_to_net_u32(1); fib_table_entry_update_one_path (fib_index, &pfx_special, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_ATTACHED), DPO_PROTO_IP4, &pfx_special.fp_addr, sw_if_index, /* invalid FIB index */ ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); } } static void ip4_add_interface_routes (u32 sw_if_index, ip4_main_t * im, u32 fib_index, ip_interface_address_t * a) { ip_lookup_main_t *lm = &im->lookup_main; ip4_address_t *address = ip_interface_address_get_address (lm, a); fib_prefix_t pfx = { .fp_len = 32, .fp_proto = FIB_PROTOCOL_IP4, .fp_addr.ip4 = *address, }; /* set special routes for the prefix if needed */ ip4_add_interface_prefix_routes (im, sw_if_index, fib_index, a); if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index)) { u32 classify_table_index = lm->classify_table_index_by_sw_if_index[sw_if_index]; if (classify_table_index != (u32) ~ 0) { dpo_id_t dpo = DPO_INVALID; dpo_set (&dpo, DPO_CLASSIFY, DPO_PROTO_IP4, classify_dpo_create (DPO_PROTO_IP4, classify_table_index)); fib_table_entry_special_dpo_add (fib_index, &pfx, FIB_SOURCE_CLASSIFY, FIB_ENTRY_FLAG_NONE, &dpo); dpo_reset (&dpo); } } fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), DPO_PROTO_IP4, &pfx.fp_addr, sw_if_index, // invalid FIB index ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); } static void ip4_del_interface_prefix_routes (ip4_main_t * im, u32 sw_if_index, u32 fib_index, ip4_address_t * address, u32 address_length) { ip_lookup_main_t *lm = &im->lookup_main; ip_interface_prefix_t *if_prefix; ip_interface_prefix_key_t key = { .prefix = { .fp_len = address_length, .fp_proto = FIB_PROTOCOL_IP4, .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length], }, .sw_if_index = sw_if_index, }; fib_prefix_t pfx_special = { .fp_len = 32, .fp_proto = FIB_PROTOCOL_IP4, }; if_prefix = ip_get_interface_prefix (lm, &key); if (!if_prefix) { clib_warning ("Prefix not found while deleting %U", format_ip4_address_and_length, address, address_length); return; } if_prefix->ref_count -= 1; /* * Routes need to be adjusted if deleting last intf addr in prefix * * We're done now otherwise */ if (if_prefix->ref_count > 0) return; /* length <= 30, delete glean route, first address, last address */ if (address_length <= 30) { /* Less work to do in FIB if we remove the covered /32s first */ /* first address in prefix */ pfx_special.fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[address_length]; pfx_special.fp_len = 32; if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32) fib_table_entry_special_remove (fib_index, &pfx_special, FIB_SOURCE_INTERFACE); /* prefix broadcast address */ pfx_special.fp_addr.ip4.as_u32 = address->as_u32 | ~im->fib_masks[address_length]; pfx_special.fp_len = 32; if (pfx_special.fp_addr.ip4.as_u32 != address->as_u32) fib_table_entry_special_remove (fib_index, &pfx_special, FIB_SOURCE_INTERFACE); } else if (address_length == 31) { /* length == 31, delete attached route for the other address */ pfx_special.fp_addr.ip4.as_u32 = address->as_u32 ^ clib_host_to_net_u32(1); fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE); } /* remove glean route for prefix */ pfx_special.fp_addr.ip4 = *address; pfx_special.fp_len = address_length; fib_table_entry_delete (fib_index, &pfx_special, FIB_SOURCE_INTERFACE); mhash_unset (&lm->prefix_to_if_prefix_index, &key, 0 /* old_value */); pool_put (lm->if_prefix_pool, if_prefix); } static void ip4_del_interface_routes (u32 sw_if_index, ip4_main_t * im, u32 fib_index, ip4_address_t * address, u32 address_length) { fib_prefix_t pfx = { .fp_len = 32, .fp_proto = FIB_PROTOCOL_IP4, .fp_addr.ip4 = *address, }; fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE); ip4_del_interface_prefix_routes (im, sw_if_index, fib_index, address, address_length); } #ifndef CLIB_MARCH_VARIANT void ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) { ip4_main_t *im = &ip4_main; vnet_main_t *vnm = vnet_get_main (); vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index); vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0); /* * enable/disable only on the 1<->0 transition */ if (is_enable) { if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index]) return; } else { ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0); if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) return; } vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index, !is_enable, 0, 0); vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled", sw_if_index, !is_enable, 0, 0); if (is_enable) hi->l3_if_count++; else if (hi->l3_if_count) hi->l3_if_count--; { ip4_enable_disable_interface_callback_t *cb; vec_foreach (cb, im->enable_disable_interface_callbacks) cb->function (im, cb->function_opaque, sw_if_index, is_enable); } } static clib_error_t * ip4_add_del_interface_address_internal (vlib_main_t * vm, u32 sw_if_index, ip4_address_t * address, u32 address_length, u32 is_del) { vnet_main_t *vnm = vnet_get_main (); ip4_main_t *im = &ip4_main; ip_lookup_main_t *lm = &im->lookup_main; clib_error_t *error = 0; u32 if_address_index; ip4_address_fib_t ip4_af, *addr_fib = 0; /* local0 interface doesn't support IP addressing */ if (sw_if_index == 0) { return clib_error_create ("local0 interface doesn't support IP addressing"); } vec_validate (im->fib_index_by_sw_if_index, sw_if_index); ip4_addr_fib_init (&ip4_af, address, vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); vec_add1 (addr_fib, ip4_af); /* * there is no support for adj-fib handling in the presence of overlapping * subnets on interfaces. Easy fix - disallow overlapping subnets, like * most routers do. */ /* *INDENT-OFF* */ if (!is_del) { /* When adding an address check that it does not conflict with an existing address on any interface in this table. */ ip_interface_address_t *ia; vnet_sw_interface_t *sif; pool_foreach(sif, vnm->interface_main.sw_interfaces, ({ if (im->fib_index_by_sw_if_index[sw_if_index] == im->fib_index_by_sw_if_index[sif->sw_if_index]) { foreach_ip_interface_address (&im->lookup_main, ia, sif->sw_if_index, 0 /* honor unnumbered */ , ({ ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia); if (ip4_destination_matches_route (im, address, x, ia->address_length) || ip4_destination_matches_route (im, x, address, address_length)) { /* an intf may have >1 addr from the same prefix */ if ((sw_if_index == sif->sw_if_index) && (ia->address_length == address_length) && (x->as_u32 != address->as_u32)) continue; if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE) /* if the address we're comparing against is stale * then the CP has not added this one back yet, maybe * it never will, so we have to assume it won't and * ignore it. if it does add it back, then it will fail * because this one is now present */ continue; /* error if the length or intf was different */ vnm->api_errno = VNET_API_ERROR_ADDRESS_IN_USE; error = clib_error_create ("failed to add %U on %U which conflicts with %U for interface %U", format_ip4_address_and_length, address, address_length, format_vnet_sw_if_index_name, vnm, sw_if_index, format_ip4_address_and_length, x, ia->address_length, format_vnet_sw_if_index_name, vnm, sif->sw_if_index); goto done; } })); } })); } /* *INDENT-ON* */ if_address_index = ip_interface_address_find (lm, addr_fib, address_length); if (is_del) { if (~0 == if_address_index) { vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE; error = clib_error_create ("%U not found for interface %U", lm->format_address_and_length, addr_fib, address_length, format_vnet_sw_if_index_name, vnm, sw_if_index); goto done; } error = ip_interface_address_del (lm, vnm, if_address_index, addr_fib, address_length, sw_if_index); if (error) goto done; } else { if (~0 != if_address_index) { ip_interface_address_t *ia; ia = pool_elt_at_index (lm->if_address_pool, if_address_index); if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE) { if (ia->sw_if_index == sw_if_index) { /* re-adding an address during the replace action. * consdier this the update. clear the flag and * we're done */ ia->flags &= ~IP_INTERFACE_ADDRESS_FLAG_STALE; goto done; } else { /* The prefix is moving from one interface to another. * delete the stale and add the new */ ip4_add_del_interface_address_internal (vm, ia->sw_if_index, address, address_length, 1); ia = NULL; error = ip_interface_address_add (lm, sw_if_index, addr_fib, address_length, &if_address_index); } } else { vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS; error = clib_error_create ("Prefix %U already found on interface %U", lm->format_address_and_length, addr_fib, address_length, format_vnet_sw_if_index_name, vnm, ia->sw_if_index); } } else error = ip_interface_address_add (lm, sw_if_index, addr_fib, address_length, &if_address_index); } if (error) goto done; ip4_sw_interface_enable_disable (sw_if_index, !is_del); ip4_mfib_interface_enable_disable (sw_if_index, !is_del); /* intf addr routes are added/deleted on admin up/down */ if (vnet_sw_interface_is_admin_up (vnm, sw_if_index)) { if (is_del) ip4_del_interface_routes (sw_if_index, im, ip4_af.fib_index, address, address_length); else ip4_add_interface_routes (sw_if_index, im, ip4_af.fib_index, pool_elt_at_index (lm->if_address_pool, if_address_index)); } ip4_add_del_interface_address_callback_t *cb; vec_foreach (cb, im->add_del_interface_address_callbacks) cb->function (im, cb->function_opaque, sw_if_index, address, address_length, if_address_index, is_del); done: vec_free (addr_fib); return error; } clib_error_t * ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index, ip4_address_t * address, u32 address_length, u32 is_del) { return ip4_add_del_interface_address_internal (vm, sw_if_index, address, address_length, is_del); } void ip4_directed_broadcast (u32 sw_if_index, u8 enable) { ip_interface_address_t *ia; ip4_main_t *im; im = &ip4_main; /* * when directed broadcast is enabled, the subnet braodcast route will forward * packets using an adjacency with a broadcast MAC. otherwise it drops */ /* *INDENT-OFF* */ foreach_ip_interface_address(&im->lookup_main, ia, sw_if_index, 0, ({ if (ia->address_length <= 30) { ip4_address_t *ipa; ipa = ip_interface_address_get_address (&im->lookup_main, ia); fib_prefix_t pfx = { .fp_len = 32, .fp_proto = FIB_PROTOCOL_IP4, .fp_addr = { .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]), }, }; ip4_add_subnet_bcast_route (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4, sw_if_index), &pfx, sw_if_index); } })); /* *INDENT-ON* */ } #endif static clib_error_t * ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) { ip4_main_t *im = &ip4_main; ip_interface_address_t *ia; ip4_address_t *a; u32 is_admin_up, fib_index; /* Fill in lookup tables with default table (0). */ vec_validate (im->fib_index_by_sw_if_index, sw_if_index); vec_validate_init_empty (im-> lookup_main.if_address_pool_index_by_sw_if_index, sw_if_index, ~0); is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); /* *INDENT-OFF* */ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({ a = ip_interface_address_get_address (&im->lookup_main, ia); if (is_admin_up) ip4_add_interface_routes (sw_if_index, im, fib_index, ia); else ip4_del_interface_routes (sw_if_index, im, fib_index, a, ia->address_length); })); /* *INDENT-ON* */ return 0; } VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down); /* Built-in ip4 unicast rx feature path definition */ /* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip4_unicast, static) = { .arc_name = "ip4-unicast", .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"), .last_in_arc = "ip4-lookup", .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index, }; VNET_FEATURE_INIT (ip4_flow_classify, static) = { .arc_name = "ip4-unicast", .node_name = "ip4-flow-classify", .runs_before = VNET_FEATURES ("ip4-inacl"), }; VNET_FEATURE_INIT (ip4_inacl, static) = { .arc_name = "ip4-unicast", .node_name = "ip4-inacl", .runs_before = VNET_FEATURES ("ip4-policer-classify"), }; VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = { .arc_name = "ip4-unicast", .node_name = "ip4-source-and-port-range-check-rx", .runs_before = VNET_FEATURES ("ip4-policer-classify"), }; VNET_FEATURE_INIT (ip4_policer_classify, static) = { .arc_name = "ip4-unicast", .node_name = "ip4-policer-classify", .runs_before = VNET_FEATURES ("ipsec4-input-feature"), }; VNET_FEATURE_INIT (ip4_ipsec, static) = { .arc_name = "ip4-unicast", .node_name = "ipsec4-input-feature", .runs_before = VNET_FEATURES ("vpath-input-ip4"), }; VNET_FEATURE_INIT (ip4_vpath, static) = { .arc_name = "ip4-unicast", .node_name = "vpath-input-ip4", .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"), }; VNET_FEATURE_INIT (ip4_vxlan_bypass, static) = { .arc_name = "ip4-unicast", .node_name = "ip4-vxlan-bypass", .runs_before = VNET_FEATURES ("ip4-lookup"), }; VNET_FEATURE_INIT (ip4_not_enabled, static) = { .arc_name = "ip4-unicast", .node_name = "ip4-not-enabled", .runs_before = VNET_FEATURES ("ip4-lookup"), }; VNET_FEATURE_INIT (ip4_lookup, static) = { .arc_name = "ip4-unicast", .node_name = "ip4-lookup", .runs_before = 0, /* not before any other features */ }; /* Built-in ip4 multicast rx feature path definition */ VNET_FEATURE_ARC_INIT (ip4_multicast, static) = { .arc_name = "ip4-multicast", .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"), .last_in_arc = "ip4-mfib-forward-lookup", .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index, }; VNET_FEATURE_INIT (ip4_vpath_mc, static) = { .arc_name = "ip4-multicast", .node_name = "vpath-input-ip4", .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), }; VNET_FEATURE_INIT (ip4_mc_not_enabled, static) = { .arc_name = "ip4-multicast", .node_name = "ip4-not-enabled", .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), }; VNET_FEATURE_INIT (ip4_lookup_mc, static) = { .arc_name = "ip4-multicast", .node_name = "ip4-mfib-forward-lookup", .runs_before = 0, /* last feature */ }; /* Source and port-range check ip4 tx feature path definition */ VNET_FEATURE_ARC_INIT (ip4_output, static) = { .arc_name = "ip4-output", .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"), .last_in_arc = "interface-output", .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index, }; VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = { .arc_name = "ip4-output", .node_name = "ip4-source-and-port-range-check-tx", .runs_before = VNET_FEATURES ("ip4-outacl"), }; VNET_FEATURE_INIT (ip4_outacl, static) = { .arc_name = "ip4-output", .node_name = "ip4-outacl", .runs_before = VNET_FEATURES ("ipsec4-output-feature"), }; VNET_FEATURE_INIT (ip4_ipsec_output, static) = { .arc_name = "ip4-output", .node_name = "ipsec4-output-feature", .runs_before = VNET_FEATURES ("interface-output"), }; /* Built-in ip4 tx feature path definition */ VNET_FEATURE_INIT (ip4_interface_output, static) = { .arc_name = "ip4-output", .node_name = "interface-output", .runs_before = 0, /* not before any other features */ }; /* *INDENT-ON* */ static clib_error_t * ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) { ip4_main_t *im = &ip4_main; /* Fill in lookup tables with default table (0). */ vec_validate (im->fib_index_by_sw_if_index, sw_if_index); vec_validate (im->mfib_index_by_sw_if_index, sw_if_index); if (!is_add) { ip4_main_t *im4 = &ip4_main; ip_lookup_main_t *lm4 = &im4->lookup_main; ip_interface_address_t *ia = 0; ip4_address_t *address; vlib_main_t *vm = vlib_get_main (); vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0); /* *INDENT-OFF* */ foreach_ip_interface_address (lm4, ia, sw_if_index, 0, ({ address = ip_interface_address_get_address (lm4, ia); ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1); })); /* *INDENT-ON* */ ip4_mfib_interface_enable_disable (sw_if_index, 0); } vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index, is_add, 0, 0); vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled", sw_if_index, is_add, 0, 0); return /* no error */ 0; } VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del); /* Global IP4 main. */ #ifndef CLIB_MARCH_VARIANT ip4_main_t ip4_main; #endif /* CLIB_MARCH_VARIANT */ static clib_error_t * ip4_lookup_init (vlib_main_t * vm) { ip4_main_t *im = &ip4_main; clib_error_t *error; uword i; if ((error = vlib_call_init_function (vm, vnet_feature_init))) return error; if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init))) return (error); if ((error = vlib_call_init_function (vm, fib_module_init))) return error; if ((error = vlib_call_init_function (vm, mfib_module_init))) return error; for (i = 0; i < ARRAY_LEN (im->fib_masks); i++) { u32 m; if (i < 32) m = pow2_mask (i) << (32 - i); else m = ~0; im->fib_masks[i] = clib_host_to_net_u32 (m); } ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0); /* Create FIB with index 0 and table id of 0. */ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0, FIB_SOURCE_DEFAULT_ROUTE); mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0, MFIB_SOURCE_DEFAULT_ROUTE); { pg_node_t *pn; pn = pg_get_node (ip4_lookup_node.index); pn->unformat_edit = unformat_pg_ip4_header; } { ethernet_arp_header_t h; clib_memset (&h, 0, sizeof (h)); #define _16(f,v) h.f = clib_host_to_net_u16 (v); #define _8(f,v) h.f = v; _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet); _16 (l3_type, ETHERNET_TYPE_IP4); _8 (n_l2_address_bytes, 6); _8 (n_l3_address_bytes, 4); _16 (opcode, ETHERNET_ARP_OPCODE_request); #undef _16 #undef _8 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template, /* data */ &h, sizeof (h), /* alloc chunk size */ 8, "ip4 arp"); } return error; } VLIB_INIT_FUNCTION (ip4_lookup_init); typedef struct { /* Adjacency taken. */ u32 dpo_index; u32 flow_hash; u32 fib_index; /* Packet data, possibly *after* rewrite. */ u8 packet_data[64 - 1 * sizeof (u32)]; } ip4_forward_next_trace_t; #ifndef CLIB_MARCH_VARIANT u8 * format_ip4_forward_next_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *); u32 indent = format_get_indent (s); s = format (s, "%U%U", format_white_space, indent, format_ip4_header, t->packet_data, sizeof (t->packet_data)); return s; } #endif static u8 * format_ip4_lookup_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *); u32 indent = format_get_indent (s); s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x", t->fib_index, t->dpo_index, t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, format_ip4_header, t->packet_data, sizeof (t->packet_data)); return s; } static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *); u32 indent = format_get_indent (s); s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x", t->fib_index, t->dpo_index, format_ip_adjacency, t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash); s = format (s, "\n%U%U", format_white_space, indent, format_ip_adjacency_packet_data, t->packet_data, sizeof (t->packet_data)); return s; } #ifndef CLIB_MARCH_VARIANT /* Common trace function for all ip4-forward next nodes. */ void ip4_forward_next_trace (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index) { u32 *from, n_left; ip4_main_t *im = &ip4_main; n_left = frame->n_vectors; from = vlib_frame_vector_args (frame); while (n_left >= 4) { u32 bi0, bi1; vlib_buffer_t *b0, *b1; ip4_forward_next_trace_t *t0, *t1; /* Prefetch next iteration. */ vlib_prefetch_buffer_with_index (vm, from[2], LOAD); vlib_prefetch_buffer_with_index (vm, from[3], LOAD); bi0 = from[0]; bi1 = from[1]; b0 = vlib_get_buffer (vm, bi0); b1 = vlib_get_buffer (vm, bi1); if (b0->flags & VLIB_BUFFER_IS_TRACED) { t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index]; t0->flow_hash = vnet_buffer (b0)->ip.flow_hash; t0->fib_index = (vnet_buffer (b0)->sw_if_index[VLIB_TX] != (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] : vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (b0)->sw_if_index[VLIB_RX]); clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0), sizeof (t0->packet_data)); } if (b1->flags & VLIB_BUFFER_IS_TRACED) { t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index]; t1->flow_hash = vnet_buffer (b1)->ip.flow_hash; t1->fib_index = (vnet_buffer (b1)->sw_if_index[VLIB_TX] != (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] : vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (b1)->sw_if_index[VLIB_RX]); clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1), sizeof (t1->packet_data)); } from += 2; n_left -= 2; } while (n_left >= 1) { u32 bi0; vlib_buffer_t *b0; ip4_forward_next_trace_t *t0; bi0 = from[0]; b0 = vlib_get_buffer (vm, bi0); if (b0->flags & VLIB_BUFFER_IS_TRACED) { t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index]; t0->flow_hash = vnet_buffer (b0)->ip.flow_hash; t0->fib_index = (vnet_buffer (b0)->sw_if_index[VLIB_TX] != (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] : vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (b0)->sw_if_index[VLIB_RX]); clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0), sizeof (t0->packet_data)); } from += 1; n_left -= 1; } } /* Compute TCP/UDP/ICMP4 checksum in software. */ u16 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip4_header_t * ip0) { ip_csum_t sum0; u32 ip_header_length, payload_length_host_byte_order; /* Initialize checksum with ip header. */ ip_header_length = ip4_header_bytes (ip0); payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length; sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16)); if (BITS (uword) == 32) { sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32)); sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32)); } else sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64)); return ip_calculate_l4_checksum (vm, p0, sum0, payload_length_host_byte_order, (u8 *) ip0, ip_header_length, NULL); } u32 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) { ip4_header_t *ip0 = vlib_buffer_get_current (p0); udp_header_t *udp0; u16 sum16; ASSERT (ip0->protocol == IP_PROTOCOL_TCP || ip0->protocol == IP_PROTOCOL_UDP); udp0 = (void *) (ip0 + 1); if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0) { p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT); return p0->flags; } sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0); p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT)); return p0->flags; } #endif /* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip4_local) = { .arc_name = "ip4-local", .start_nodes = VNET_FEATURES ("ip4-local"), .last_in_arc = "ip4-local-end-of-arc", }; /* *INDENT-ON* */ static inline void ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip, u8 is_udp, u8 * error, u8 * good_tcp_udp) { u32 flags0; flags0 = ip4_tcp_udp_validate_checksum (vm, p); *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; if (is_udp) { udp_header_t *udp; u32 ip_len, udp_len; i32 len_diff; udp = ip4_next_header (ip); /* Verify UDP length. */ ip_len = clib_net_to_host_u16 (ip->length); udp_len = clib_net_to_host_u16 (udp->length); len_diff = ip_len - udp_len; *good_tcp_udp &= len_diff >= 0; *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error; } } #define ip4_local_csum_is_offloaded(_b) \ _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \ || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM #define ip4_local_need_csum_check(is_tcp_udp, _b) \ (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \ || ip4_local_csum_is_offloaded (_b))) #define ip4_local_csum_is_valid(_b) \ (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT \ || (ip4_local_csum_is_offloaded (_b))) != 0 static inline void ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b, ip4_header_t * ih, u8 * error) { u8 is_udp, is_tcp_udp, good_tcp_udp; is_udp = ih->protocol == IP_PROTOCOL_UDP; is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP; if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b))) ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp); else good_tcp_udp = ip4_local_csum_is_valid (b); ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM); *error = (is_tcp_udp && !good_tcp_udp ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error); } static inline void ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b, ip4_header_t ** ih, u8 * error) { u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2]; is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP; is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP; is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP; is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP; good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]); good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]); if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0]) || ip4_local_need_csum_check (is_tcp_udp[1], b[1]))) { if (is_tcp_udp[0]) ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0], &good_tcp_udp[0]); if (is_tcp_udp[1]) ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1], &good_tcp_udp[1]); } error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ? IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]); error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ? IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]); } static inline void ip4_local_set_next_and_error (vlib_node_runtime_t * error_node, vlib_buffer_t * b, u16 * next, u8 error, u8 head_of_feature_arc) { u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; u32 next_index; *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next; b->error = error ? error_node->errors[error] : 0; if (head_of_feature_arc) { next_index = *next; if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) { vnet_feature_arc_start (arc_index, vnet_buffer (b)->sw_if_index[VLIB_RX], &next_index, b); *next = next_index; } } } typedef struct { ip4_address_t src; u32 lbi; u8 error; u8 first; } ip4_local_last_check_t; static inline void ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0, ip4_local_last_check_t * last_check, u8 * error0) { ip4_fib_mtrie_leaf_t leaf0; ip4_fib_mtrie_t *mtrie0; const dpo_id_t *dpo0; load_balance_t *lb0; u32 lbi0; vnet_buffer (b)->ip.fib_index = vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ? vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index; /* * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the * adjacency for the destination address (the local interface address). * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the * adjacency for the source address (the remote sender's address) */ if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) || last_check->first) { mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie; leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); vnet_buffer (b)->ip.adj_index[VLIB_RX] = vnet_buffer (b)->ip.adj_index[VLIB_TX]; vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0; lb0 = load_balance_get (lbi0); dpo0 = load_balance_get_bucket_i (lb0, 0); /* * Must have a route to source otherwise we drop the packet. * ip4 broadcasts are accepted, e.g. to make dhcp client work * * The checks are: * - the source is a recieve => it's from us => bogus, do this * first since it sets a different error code. * - uRPF check for any route to source - accept if passes. * - allow packets destined to the broadcast address from unknown sources */ *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL && dpo0->dpoi_type == DPO_RECEIVE) ? IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0); *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL && !fib_urpf_check_size (lb0->lb_urpf) && ip0->dst_address.as_u32 != 0xFFFFFFFF) ? IP4_ERROR_SRC_LOOKUP_MISS : *error0); last_check->src.as_u32 = ip0->src_address.as_u32; last_check->lbi = lbi0; last_check->error = *error0; last_check->first = 0; } else { vnet_buffer (b)->ip.adj_index[VLIB_RX] = vnet_buffer (b)->ip.adj_index[VLIB_TX]; vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi; *error0 = last_check->error; } } static inline void ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip, ip4_local_last_check_t * last_check, u8 * error) { ip4_fib_mtrie_leaf_t leaf[2]; ip4_fib_mtrie_t *mtrie[2]; const dpo_id_t *dpo[2]; load_balance_t *lb[2]; u32 not_last_hit; u32 lbi[2]; not_last_hit = last_check->first; not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32; not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32; vnet_buffer (b[0])->ip.fib_index = vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ? vnet_buffer (b[0])->sw_if_index[VLIB_TX] : vnet_buffer (b[0])->ip.fib_index; vnet_buffer (b[1])->ip.fib_index = vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ? vnet_buffer (b[1])->sw_if_index[VLIB_TX] : vnet_buffer (b[1])->ip.fib_index; /* * vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the * adjacency for the destination address (the local interface address). * vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the * adjacency for the source address (the remote sender's address) */ if (PREDICT_TRUE (not_last_hit)) { mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie; mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie; leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address); leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address); leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0], &ip[0]->src_address, 2); leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1], &ip[1]->src_address, 2); leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0], &ip[0]->src_address, 3); leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1], &ip[1]->src_address, 3); lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]); lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]); vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = vnet_buffer (b[0])->ip.adj_index[VLIB_TX]; vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0]; vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = vnet_buffer (b[1])->ip.adj_index[VLIB_TX]; vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1]; lb[0] = load_balance_get (lbi[0]); lb[1] = load_balance_get (lbi[1]); dpo[0] = load_balance_get_bucket_i (lb[0], 0); dpo[1] = load_balance_get_bucket_i (lb[1], 0); error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL && dpo[0]->dpoi_type == DPO_RECEIVE) ? IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]); error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL && !fib_urpf_check_size (lb[0]->lb_urpf) && ip[0]->dst_address.as_u32 != 0xFFFFFFFF) ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]); error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL && dpo[1]->dpoi_type == DPO_RECEIVE) ? IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]); error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL && !fib_urpf_check_size (lb[1]->lb_urpf) && ip[1]->dst_address.as_u32 != 0xFFFFFFFF) ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]); last_check->src.as_u32 = ip[1]->src_address.as_u32; last_check->lbi = lbi[1]; last_check->error = error[1]; last_check->first = 0; } else { vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = vnet_buffer (b[0])->ip.adj_index[VLIB_TX]; vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi; vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = vnet_buffer (b[1])->ip.adj_index[VLIB_TX]; vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi; error[0] = last_check->error; error[1] = last_check->error; } } enum ip_local_packet_type_e { IP_LOCAL_PACKET_TYPE_L4, IP_LOCAL_PACKET_TYPE_NAT, IP_LOCAL_PACKET_TYPE_FRAG, }; /** * Determine packet type and next node. * * The expectation is that all packets that are not L4 will skip * checksums and source checks. */ always_inline u8 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next) { ip_lookup_main_t *lm = &ip4_main.lookup_main; if (PREDICT_FALSE (ip4_is_fragment (ip))) { *next = IP_LOCAL_NEXT_REASSEMBLY; return IP_LOCAL_PACKET_TYPE_FRAG; } if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED)) { *next = lm->local_next_by_ip_protocol[ip->protocol]; return IP_LOCAL_PACKET_TYPE_NAT; } *next = lm->local_next_by_ip_protocol[ip->protocol]; return IP_LOCAL_PACKET_TYPE_L4; } static inline uword ip4_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, int head_of_feature_arc) { u32 *from, n_left_from; vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip4_local_node.index); u16 nexts[VLIB_FRAME_SIZE], *next; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; ip4_header_t *ip[2]; u8 error[2], pt[2]; ip4_local_last_check_t last_check = { /* * 0.0.0.0 can appear as the source address of an IP packet, * as can any other address, hence the need to use the 'first' * member to make sure the .lbi is initialised for the first * packet. */ .src = {.as_u32 = 0}, .lbi = ~0, .error = IP4_ERROR_UNKNOWN_PROTOCOL, .first = 1, }; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; if (node->flags & VLIB_NODE_FLAG_TRACE) ip4_forward_next_trace (vm, node, frame, VLIB_TX); vlib_get_buffers (vm, from, bufs, n_left_from); b = bufs; next = nexts; while (n_left_from >= 6) { u8 not_batch = 0; /* Prefetch next iteration. */ { vlib_prefetch_buffer_header (b[4], LOAD); vlib_prefetch_buffer_header (b[5], LOAD); CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD); CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD); } error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL; ip[0] = vlib_buffer_get_current (b[0]); ip[1] = vlib_buffer_get_current (b[1]); vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data; vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data; pt[0] = ip4_local_classify (b[0], ip[0], &next[0]); pt[1] = ip4_local_classify (b[1], ip[1], &next[1]); not_batch = pt[0] ^ pt[1]; if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0)) goto skip_checks; if (PREDICT_TRUE (not_batch == 0)) { ip4_local_check_l4_csum_x2 (vm, b, ip, error); ip4_local_check_src_x2 (b, ip, &last_check, error); } else { if (!pt[0]) { ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]); ip4_local_check_src (b[0], ip[0], &last_check, &error[0]); } if (!pt[1]) { ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]); ip4_local_check_src (b[1], ip[1], &last_check, &error[1]); } } skip_checks: ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0], head_of_feature_arc); ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1], head_of_feature_arc); b += 2; next += 2; n_left_from -= 2; } while (n_left_from > 0) { error[0] = IP4_ERROR_UNKNOWN_PROTOCOL; ip[0] = vlib_buffer_get_current (b[0]); vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data; pt[0] = ip4_local_classify (b[0], ip[0], &next[0]); if (head_of_feature_arc == 0 || pt[0]) goto skip_check; ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]); ip4_local_check_src (b[0], ip[0], &last_check, &error[0]); skip_check: ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0], head_of_feature_arc); b += 1; next += 1; n_left_from -= 1; } vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); return frame->n_vectors; } VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ ); } /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_local_node) = { .name = "ip4-local", .vector_size = sizeof (u32), .format_trace = format_ip4_forward_next_trace, .n_errors = IP4_N_ERROR, .error_strings = ip4_error_strings, .n_next_nodes = IP_LOCAL_N_NEXT, .next_nodes = { [IP_LOCAL_NEXT_DROP] = "ip4-drop", [IP_LOCAL_NEXT_PUNT] = "ip4-punt", [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup", [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input", [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly", }, }; /* *INDENT-ON* */ VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ ); } /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = { .name = "ip4-local-end-of-arc", .vector_size = sizeof (u32), .format_trace = format_ip4_forward_next_trace, .sibling_of = "ip4-local", }; VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = { .arc_name = "ip4-local", .node_name = "ip4-local-end-of-arc", .runs_before = 0, /* not before any other features */ }; /* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT void ip4_register_protocol (u32 protocol, u32 node_index) { vlib_main_t *vm = vlib_get_main (); ip4_main_t *im = &ip4_main; ip_lookup_main_t *lm = &im->lookup_main; ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol)); lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index); } void ip4_unregister_protocol (u32 protocol) { ip4_main_t *im = &ip4_main; ip_lookup_main_t *lm = &im->lookup_main; ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol)); lm->local_next_by_ip_protocol[protocol] = IP_LOCAL_NEXT_PUNT; } #endif static clib_error_t * show_ip_local_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { ip4_main_t *im = &ip4_main; ip_lookup_main_t *lm = &im->lookup_main; int i; vlib_cli_output (vm, "Protocols handled by ip4_local"); for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++) { if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT) { u32 node_index = vlib_get_node (vm, ip4_local_node.index)-> next_nodes[lm->local_next_by_ip_protocol[i]]; vlib_cli_output (vm, "%U: %U", format_ip_protocol, i, format_vlib_node_name, vm, node_index); } } return 0; } /*? * Display the set of protocols handled by the local IPv4 stack. * * @cliexpar * Example of how to display local protocol table: * @cliexstart{show ip local} * Protocols handled by ip4_local * 1 * 17 * 47 * @cliexend ?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_ip_local, static) = { .path = "show ip local", .function = show_ip_local_command_fn, .short_help = "show ip local", }; /* *INDENT-ON* */ typedef enum { IP4_REWRITE_NEXT_DROP, IP4_REWRITE_NEXT_ICMP_ERROR, IP4_REWRITE_NEXT_FRAGMENT, IP4_REWRITE_N_NEXT /* Last */ } ip4_rewrite_next_t; /** * This bits of an IPv4 address to mask to construct a multicast * MAC address */ #if CLIB_ARCH_IS_BIG_ENDIAN #define IP4_MCAST_ADDR_MASK 0x007fffff #else #define IP4_MCAST_ADDR_MASK 0xffff7f00 #endif always_inline void ip4_mtu_check (vlib_buffer_t * b, u16 packet_len, u16 adj_packet_bytes, bool df, u16 * next, u8 is_midchain, u32 * error) { if (packet_len > adj_packet_bytes) { *error = IP4_ERROR_MTU_EXCEEDED; if (df) { icmp4_error_set_vnet_buffer (b, ICMP4_destination_unreachable, ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, adj_packet_bytes); *next = IP4_REWRITE_NEXT_ICMP_ERROR; } else { /* IP fragmentation */ ip_frag_set_vnet_buffer (b, adj_packet_bytes, (is_midchain ? IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN : IP_FRAG_NEXT_IP_REWRITE), 0); *next = IP4_REWRITE_NEXT_FRAGMENT; } } } /* increment TTL & update checksum. Works either endian, so no need for byte swap. */ static_always_inline void ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip) { i32 ttl; u32 checksum; if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)) return; ttl = ip->ttl; checksum = ip->checksum - clib_host_to_net_u16 (0x0100); checksum += checksum >= 0xffff; ip->checksum = checksum; ttl += 1; ip->ttl = ttl; ASSERT (ip4_header_checksum_is_valid (ip)); } /* Decrement TTL & update checksum. Works either endian, so no need for byte swap. */ static_always_inline void ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next, u32 * error) { i32 ttl; u32 checksum; if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)) return; ttl = ip->ttl; /* Input node should have reject packets with ttl 0. */ ASSERT (ip->ttl > 0); checksum = ip->checksum + clib_host_to_net_u16 (0x0100); checksum += checksum >= 0xffff; ip->checksum = checksum; ttl -= 1; ip->ttl = ttl; /* * If the ttl drops below 1 when forwarding, generate * an ICMP response. */ if (PREDICT_FALSE (ttl <= 0)) { *error = IP4_ERROR_TIME_EXPIRED; vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0; icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded, ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); *next = IP4_REWRITE_NEXT_ICMP_ERROR; } /* Verify checksum. */ ASSERT (ip4_header_checksum_is_valid (ip) || (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)); } always_inline uword ip4_rewrite_inline_with_gso (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, int do_counters, int is_midchain, int is_mcast) { ip_lookup_main_t *lm = &ip4_main.lookup_main; u32 *from = vlib_frame_vector_args (frame); vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; u16 nexts[VLIB_FRAME_SIZE], *next; u32 n_left_from; vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip4_input_node.index); n_left_from = frame->n_vectors; u32 thread_index = vm->thread_index; vlib_get_buffers (vm, from, bufs, n_left_from); clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from); #if (CLIB_N_PREFETCHES >= 8) if (n_left_from >= 6) { int i; for (i = 2; i < 6; i++) vlib_prefetch_buffer_header (bufs[i], LOAD); } next = nexts; b = bufs; while (n_left_from >= 8) { const ip_adjacency_t *adj0, *adj1; ip4_header_t *ip0, *ip1; u32 rw_len0, error0, adj_index0; u32 rw_len1, error1, adj_index1; u32 tx_sw_if_index0, tx_sw_if_index1; u8 *p; if (is_midchain) { vlib_prefetch_buffer_header (b[6], LOAD); vlib_prefetch_buffer_header (b[7], LOAD); } adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX]; adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX]; /* * pre-fetch the per-adjacency counters */ if (do_counters) { vlib_prefetch_combined_counter (&adjacency_counters, thread_index, adj_index0); vlib_prefetch_combined_counter (&adjacency_counters, thread_index, adj_index1); } ip0 = vlib_buffer_get_current (b[0]); ip1 = vlib_buffer_get_current (b[1]); error0 = error1 = IP4_ERROR_NONE; ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0); ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1); /* Rewrite packet header and updates lengths. */ adj0 = adj_get (adj_index0); adj1 = adj_get (adj_index1); /* Worth pipelining. No guarantee that adj0,1 are hot... */ rw_len0 = adj0[0].rewrite_header.data_bytes; rw_len1 = adj1[0].rewrite_header.data_bytes; vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0; vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1; p = vlib_buffer_get_current (b[2]); CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE); CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD); p = vlib_buffer_get_current (b[3]); CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE); CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD); /* Check MTU of outgoing interface. */ u16 ip0_len = clib_net_to_host_u16 (ip0->length); u16 ip1_len = clib_net_to_host_u16 (ip1->length); if (b[0]->flags & VNET_BUFFER_F_GSO) ip0_len = gso_mtu_sz (b[0]); if (b[1]->flags & VNET_BUFFER_F_GSO) ip1_len = gso_mtu_sz (b[1]); ip4_mtu_check (b[0], ip0_len, adj0[0].rewrite_header.max_l3_packet_bytes, ip0->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), next + 0, is_midchain, &error0); ip4_mtu_check (b[1], ip1_len, adj1[0].rewrite_header.max_l3_packet_bytes, ip1->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), next + 1, is_midchain, &error1); if (is_mcast) { error0 = ((adj0[0].rewrite_header.sw_if_index == vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ? IP4_ERROR_SAME_INTERFACE : error0); error1 = ((adj1[0].rewrite_header.sw_if_index == vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ? IP4_ERROR_SAME_INTERFACE : error1); } /* Don't adjust the buffer for ttl issue; icmp-error node wants * to see the IP header */ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE)) { u32 next_index = adj0[0].rewrite_header.next_index; vlib_buffer_advance (b[0], -(word) rw_len0); tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index; vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0; if (PREDICT_FALSE (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index, tx_sw_if_index0, &next_index, b[0], adj0->ia_cfg_index); next[0] = next_index; if (is_midchain) vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , 0 /* is_ip6 */ ); } else { b[0]->error = error_node->errors[error0]; if (error0 == IP4_ERROR_MTU_EXCEEDED) ip4_ttl_inc (b[0], ip0); } if (PREDICT_TRUE (error1 == IP4_ERROR_NONE)) { u32 next_index = adj1[0].rewrite_header.next_index; vlib_buffer_advance (b[1], -(word) rw_len1); tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index; vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1; if (PREDICT_FALSE (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index, tx_sw_if_index1, &next_index, b[1], adj1->ia_cfg_index); next[1] = next_index; if (is_midchain) vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ , 0 /* is_ip6 */ ); } else { b[1]->error = error_node->errors[error1]; if (error1 == IP4_ERROR_MTU_EXCEEDED) ip4_ttl_inc (b[1], ip1); } if (is_midchain) /* Guess we are only writing on ipv4 header. */ vnet_rewrite_two_headers (adj0[0], adj1[0], ip0, ip1, sizeof (ip4_header_t)); else /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_two_headers (adj0[0], adj1[0], ip0, ip1, sizeof (ethernet_header_t)); if (do_counters) { if (error0 == IP4_ERROR_NONE) vlib_increment_combined_counter (&adjacency_counters, thread_index, adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0); if (error1 == IP4_ERROR_NONE) vlib_increment_combined_counter (&adjacency_counters, thread_index, adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1); } if (is_midchain) { if (error0 == IP4_ERROR_NONE) adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4); if (error1 == IP4_ERROR_NONE) adj_midchain_fixup (vm, adj1, b[1], VNET_LINK_IP4); } if (is_mcast) { /* copy bytes from the IP address into the MAC rewrite */ if (error0 == IP4_ERROR_NONE) vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, adj0->rewrite_header.dst_mcast_offset, &ip0->dst_address.as_u32, (u8 *) ip0); if (error1 == IP4_ERROR_NONE) vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, adj1->rewrite_header.dst_mcast_offset, &ip1->dst_address.as_u32, (u8 *) ip1); } next += 2; b += 2; n_left_from -= 2; } #elif (CLIB_N_PREFETCHES >= 4) next = nexts; b = bufs; while (n_left_from >= 1) { ip_adjacency_t *adj0; ip4_header_t *ip0; u32 rw_len0, error0, adj_index0; u32 tx_sw_if_index0; u8 *p; /* Prefetch next iteration */ if (PREDICT_TRUE (n_left_from >= 4)) { ip_adjacency_t *adj2; u32 adj_index2; vlib_prefetch_buffer_header (b[3], LOAD); vlib_prefetch_buffer_data (b[2], LOAD); /* Prefetch adj->rewrite_header */ adj_index2 = vnet_buffer (b[2])->ip.adj_index[VLIB_TX]; adj2 = adj_get (adj_index2); p = (u8 *) adj2; CLIB_PREFETCH (p + CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, LOAD); } adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX]; /* * Prefetch the per-adjacency counters */ if (do_counters) { vlib_prefetch_combined_counter (&adjacency_counters, thread_index, adj_index0); } ip0 = vlib_buffer_get_current (b[0]); error0 = IP4_ERROR_NONE; ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0); /* Rewrite packet header and updates lengths. */ adj0 = adj_get (adj_index0); /* Rewrite header was prefetched. */ rw_len0 = adj0[0].rewrite_header.data_bytes; vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0; /* Check MTU of outgoing interface. */ u16 ip0_len = clib_net_to_host_u16 (ip0->length); if (b[0]->flags & VNET_BUFFER_F_GSO) ip0_len = gso_mtu_sz (b[0]); ip4_mtu_check (b[0], ip0_len, adj0[0].rewrite_header.max_l3_packet_bytes, ip0->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), next + 0, is_midchain, &error0); if (is_mcast) { error0 = ((adj0[0].rewrite_header.sw_if_index == vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ? IP4_ERROR_SAME_INTERFACE : error0); } /* Don't adjust the buffer for ttl issue; icmp-error node wants * to see the IP header */ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE)) { u32 next_index = adj0[0].rewrite_header.next_index; vlib_buffer_advance (b[0], -(word) rw_len0); tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index; vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0; if (PREDICT_FALSE (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index, tx_sw_if_index0, &next_index, b[0], adj0->ia_cfg_index); next[0] = next_index; if (is_midchain) { vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , 0 /* is_ip6 */ ); /* Guess we are only writing on ipv4 header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t)); } else /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); /* * Bump the per-adjacency counters */ if (do_counters) vlib_increment_combined_counter (&adjacency_counters, thread_index, adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0); if (is_midchain) adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4); if (is_mcast) /* copy bytes from the IP address into the MAC rewrite */ vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, adj0->rewrite_header.dst_mcast_offset, &ip0->dst_address.as_u32, (u8 *) ip0); } else { b[0]->error = error_node->errors[error0]; if (error0 == IP4_ERROR_MTU_EXCEEDED) ip4_ttl_inc (b[0], ip0); } next += 1; b += 1; n_left_from -= 1; } #endif while (n_left_from > 0) { ip_adjacency_t *adj0; ip4_header_t *ip0; u32 rw_len0, adj_index0, error0; u32 tx_sw_if_index0; adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX]; adj0 = adj_get (adj_index0); if (do_counters) vlib_prefetch_combined_counter (&adjacency_counters, thread_index, adj_index0); ip0 = vlib_buffer_get_current (b[0]); error0 = IP4_ERROR_NONE; ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0); /* Update packet buffer attributes/set output interface. */ rw_len0 = adj0[0].rewrite_header.data_bytes; vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0; /* Check MTU of outgoing interface. */ u16 ip0_len = clib_net_to_host_u16 (ip0->length); if (b[0]->flags & VNET_BUFFER_F_GSO) ip0_len = gso_mtu_sz (b[0]); ip4_mtu_check (b[0], ip0_len, adj0[0].rewrite_header.max_l3_packet_bytes, ip0->flags_and_fragment_offset & clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), next + 0, is_midchain, &error0); if (is_mcast) { error0 = ((adj0[0].rewrite_header.sw_if_index == vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ? IP4_ERROR_SAME_INTERFACE : error0); } /* Don't adjust the buffer for ttl issue; icmp-error node wants * to see the IP header */ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE)) { u32 next_index = adj0[0].rewrite_header.next_index; vlib_buffer_advance (b[0], -(word) rw_len0); tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index; vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0; if (PREDICT_FALSE (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) vnet_feature_arc_start_w_cfg_index (lm->output_feature_arc_index, tx_sw_if_index0, &next_index, b[0], adj0->ia_cfg_index); next[0] = next_index; if (is_midchain) { /* this acts on the packet that is about to be encapped */ vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ , 0 /* is_ip6 */ ); /* Guess we are only writing on ipv4 header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t)); } else /* Guess we are only writing on simple Ethernet header. */ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); if (do_counters) vlib_increment_combined_counter (&adjacency_counters, thread_index, adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0); if (is_midchain) adj_midchain_fixup (vm, adj0, b[0], VNET_LINK_IP4); if (is_mcast) /* copy bytes from the IP address into the MAC rewrite */ vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, adj0->rewrite_header.dst_mcast_offset, &ip0->dst_address.as_u32, (u8 *) ip0); } else { b[0]->error = error_node->errors[error0]; /* undo the TTL decrement - we'll be back to do it again */ if (error0 == IP4_ERROR_MTU_EXCEEDED) ip4_ttl_inc (b[0], ip0); } next += 1; b += 1; n_left_from -= 1; } /* Need to do trace after rewrites to pick up new packet data. */ if (node->flags & VLIB_NODE_FLAG_TRACE) ip4_forward_next_trace (vm, node, frame, VLIB_TX); vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); return frame->n_vectors; } always_inline uword ip4_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame, int do_counters, int is_midchain, int is_mcast) { return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters, is_midchain, is_mcast); } /** @brief IPv4 rewrite node. @node ip4-rewrite This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4 header checksum, fetch the ip adjacency, check the outbound mtu, apply the adjacency rewrite, and send pkts to the adjacency rewrite header's rewrite_next_index. @param vm vlib_main_t corresponding to the current thread @param node vlib_node_runtime_t @param frame vlib_frame_t whose contents should be dispatched @par Graph mechanics: buffer metadata, next index usage @em Uses: - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code> - the rewrite adjacency index - <code>adj->lookup_next_index</code> - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise the packet will be dropped. - <code>adj->rewrite_header</code> - Rewrite string length, rewrite string, next_index @em Sets: - <code>b->current_data, b->current_length</code> - Updated net of applying the rewrite string <em>Next Indices:</em> - <code> adj->rewrite_header.next_index </code> or @c ip4-drop */ VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip4_rewrite_inline (vm, node, frame, 1, 0, 0); else return ip4_rewrite_inline (vm, node, frame, 0, 0, 0); } VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip4_rewrite_inline (vm, node, frame, 1, 0, 0); else return ip4_rewrite_inline (vm, node, frame, 0, 0, 0); } VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip4_rewrite_inline (vm, node, frame, 1, 1, 0); else return ip4_rewrite_inline (vm, node, frame, 0, 1, 0); } VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip4_rewrite_inline (vm, node, frame, 1, 0, 1); else return ip4_rewrite_inline (vm, node, frame, 0, 0, 1); } VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { if (adj_are_counters_enabled ()) return ip4_rewrite_inline (vm, node, frame, 1, 1, 1); else return ip4_rewrite_inline (vm, node, frame, 0, 1, 1); } /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_rewrite_node) = { .name = "ip4-rewrite", .vector_size = sizeof (u32), .format_trace = format_ip4_rewrite_trace, .n_next_nodes = IP4_REWRITE_N_NEXT, .next_nodes = { [IP4_REWRITE_NEXT_DROP] = "ip4-drop", [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error", [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag", }, }; VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = { .name = "ip4-rewrite-bcast", .vector_size = sizeof (u32), .format_trace = format_ip4_rewrite_trace, .sibling_of = "ip4-rewrite", }; VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = { .name = "ip4-rewrite-mcast", .vector_size = sizeof (u32), .format_trace = format_ip4_rewrite_trace, .sibling_of = "ip4-rewrite", }; VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = { .name = "ip4-mcast-midchain", .vector_size = sizeof (u32), .format_trace = format_ip4_rewrite_trace, .sibling_of = "ip4-rewrite", }; VLIB_REGISTER_NODE (ip4_midchain_node) = { .name = "ip4-midchain", .vector_size = sizeof (u32), .format_trace = format_ip4_rewrite_trace, .sibling_of = "ip4-rewrite", }; /* *INDENT-ON */ static int ip4_lookup_validate (ip4_address_t * a, u32 fib_index0) { ip4_fib_mtrie_t *mtrie0; ip4_fib_mtrie_leaf_t leaf0; u32 lbi0; mtrie0 = &ip4_fib_get (fib_index0)->mtrie; leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2); leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3); lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a); } static clib_error_t * test_lookup_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { ip4_fib_t *fib; u32 table_id = 0; f64 count = 1; u32 n; int i; ip4_address_t ip4_base_address; u64 errors = 0; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "table %d", &table_id)) { /* Make sure the entry exists. */ fib = ip4_fib_get (table_id); if ((fib) && (fib->index != table_id)) return clib_error_return (0, "<fib-index> %d does not exist", table_id); } else if (unformat (input, "count %f", &count)) ; else if (unformat (input, "%U", unformat_ip4_address, &ip4_base_address)) ; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); } n = count; for (i = 0; i < n; i++) { if (!ip4_lookup_validate (&ip4_base_address, table_id)) errors++; ip4_base_address.as_u32 = clib_host_to_net_u32 (1 + clib_net_to_host_u32 (ip4_base_address.as_u32)); } if (errors) vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n); else vlib_cli_output (vm, "No errors in %d lookups\n", n); return 0; } /*? * Perform a lookup of an IPv4 Address (or range of addresses) in the * given FIB table to determine if there is a conflict with the * adjacency table. The fib-id can be determined by using the * '<em>show ip fib</em>' command. If fib-id is not entered, default value * of 0 is used. * * @todo This command uses fib-id, other commands use table-id (not * just a name, they are different indexes). Would like to change this * to table-id for consistency. * * @cliexpar * Example of how to run the test lookup command: * @cliexstart{test lookup 172.16.1.1 table 1 count 2} * No errors in 2 lookups * @cliexend ?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (lookup_test_command, static) = { .path = "test lookup", .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]", .function = test_lookup_command_fn, }; /* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config) { u32 fib_index; fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id); if (~0 == fib_index) return VNET_API_ERROR_NO_SUCH_FIB; fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4, flow_hash_config); return 0; } #endif static clib_error_t * set_ip_flow_hash_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { int matched = 0; u32 table_id = 0; u32 flow_hash_config = 0; int rv; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "table %d", &table_id)) matched = 1; #define _(a,v) \ else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;} foreach_flow_hash_bit #undef _ else break; } if (matched == 0) return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config); switch (rv) { case 0: break; case VNET_API_ERROR_NO_SUCH_FIB: return clib_error_return (0, "no such FIB table %d", table_id); default: clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config); break; } return 0; } /*? * Configure the set of IPv4 fields used by the flow hash. * * @cliexpar * Example of how to set the flow hash on a given table: * @cliexcmd{set ip flow-hash table 7 dst sport dport proto} * Example of display the configured flow hash: * @cliexstart{show ip fib} * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto * 0.0.0.0/0 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]] * [0] [@0]: dpo-drop ip6 * 0.0.0.0/32 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]] * [0] [@0]: dpo-drop ip6 * 224.0.0.0/8 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]] * [0] [@0]: dpo-drop ip6 * 6.0.1.2/32 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]] * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0 * 7.0.0.1/32 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]] * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0 * 240.0.0.0/8 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]] * [0] [@0]: dpo-drop ip6 * 255.255.255.255/32 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]] * [0] [@0]: dpo-drop ip6 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto * 0.0.0.0/0 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]] * [0] [@0]: dpo-drop ip6 * 0.0.0.0/32 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]] * [0] [@0]: dpo-drop ip6 * 172.16.1.0/24 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]] * [0] [@4]: ipv4-glean: af_packet0 * 172.16.1.1/32 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]] * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0 * 172.16.1.2/32 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]] * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36 * 172.16.2.0/24 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]] * [0] [@4]: ipv4-glean: af_packet1 * 172.16.2.1/32 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]] * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1 * 224.0.0.0/8 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]] * [0] [@0]: dpo-drop ip6 * 240.0.0.0/8 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]] * [0] [@0]: dpo-drop ip6 * 255.255.255.255/32 * unicast-ip4-chain * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]] * [0] [@0]: dpo-drop ip6 * @cliexend ?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = { .path = "set ip flow-hash", .short_help = "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]", .function = set_ip_flow_hash_command_fn, }; /* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, u32 table_index) { vnet_main_t *vnm = vnet_get_main (); vnet_interface_main_t *im = &vnm->interface_main; ip4_main_t *ipm = &ip4_main; ip_lookup_main_t *lm = &ipm->lookup_main; vnet_classify_main_t *cm = &vnet_classify_main; ip4_address_t *if_addr; if (pool_is_free_index (im->sw_interfaces, sw_if_index)) return VNET_API_ERROR_NO_MATCHING_INTERFACE; if (table_index != ~0 && pool_is_free_index (cm->tables, table_index)) return VNET_API_ERROR_NO_SUCH_ENTRY; vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index); lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index; if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL); if (NULL != if_addr) { fib_prefix_t pfx = { .fp_len = 32, .fp_proto = FIB_PROTOCOL_IP4, .fp_addr.ip4 = *if_addr, }; u32 fib_index; fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index); if (table_index != (u32) ~ 0) { dpo_id_t dpo = DPO_INVALID; dpo_set (&dpo, DPO_CLASSIFY, DPO_PROTO_IP4, classify_dpo_create (DPO_PROTO_IP4, table_index)); fib_table_entry_special_dpo_add (fib_index, &pfx, FIB_SOURCE_CLASSIFY, FIB_ENTRY_FLAG_NONE, &dpo); dpo_reset (&dpo); } else { fib_table_entry_special_remove (fib_index, &pfx, FIB_SOURCE_CLASSIFY); } } return 0; } #endif static clib_error_t * set_ip_classify_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { u32 table_index = ~0; int table_index_set = 0; u32 sw_if_index = ~0; int rv; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "table-index %d", &table_index)) table_index_set = 1; else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, vnet_get_main (), &sw_if_index)) ; else break; } if (table_index_set == 0) return clib_error_return (0, "classify table-index must be specified"); if (sw_if_index == ~0) return clib_error_return (0, "interface / subif must be specified"); rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index); switch (rv) { case 0: break; case VNET_API_ERROR_NO_MATCHING_INTERFACE: return clib_error_return (0, "No such interface"); case VNET_API_ERROR_NO_SUCH_ENTRY: return clib_error_return (0, "No such classifier table"); } return 0; } /*? * Assign a classification table to an interface. The classification * table is created using the '<em>classify table</em>' and '<em>classify session</em>' * commands. Once the table is create, use this command to filter packets * on an interface. * * @cliexpar * Example of how to assign a classification table to an interface: * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1} ?*/ /* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_ip_classify_command, static) = { .path = "set ip classify", .short_help = "set ip classify intfc <interface> table-index <classify-idx>", .function = set_ip_classify_command_fn, }; /* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */