1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
|
/*
* mpls_output.c: MPLS Adj rewrite
*
* Copyright (c) 2012-2014 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vlib/vlib.h>
#include <vnet/pg/pg.h>
#include <vnet/ip/ip.h>
#include <vnet/mpls/mpls.h>
#include <vnet/ip/ip_frag.h>
typedef struct {
/* Adjacency taken. */
u32 adj_index;
u32 flow_hash;
} mpls_output_trace_t;
typedef enum {
MPLS_OUTPUT_MODE,
MPLS_OUTPUT_MIDCHAIN_MODE
}mpls_output_mode_t;
#define foreach_mpls_output_next \
_(DROP, "error-drop") \
_(FRAG, "mpls-frag")
typedef enum {
#define _(s,n) MPLS_OUTPUT_NEXT_##s,
foreach_mpls_output_next
#undef _
MPLS_OUTPUT_N_NEXT,
} mpls_output_next_t;
static u8 *
format_mpls_output_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_output_trace_t * t = va_arg (*args, mpls_output_trace_t *);
s = format (s, "adj-idx %d : %U flow hash: 0x%08x",
t->adj_index,
format_ip_adjacency, t->adj_index, FORMAT_IP_ADJACENCY_NONE,
t->flow_hash);
return s;
}
static inline uword
mpls_output_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame,
mpls_output_mode_t mode)
{
u32 n_left_from, next_index, * from, * to_next, thread_index;
vlib_node_runtime_t * error_node;
u32 n_left_to_next;
mpls_main_t *mm;
thread_index = vlib_get_thread_index();
error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
next_index = node->cached_next_index;
mm = &mpls_main;
while (n_left_from > 0)
{
vlib_get_next_frame (vm, node, next_index,
to_next, n_left_to_next);
while (n_left_from >= 4 && n_left_to_next >= 2)
{
ip_adjacency_t * adj0;
mpls_unicast_header_t *hdr0;
vlib_buffer_t * p0;
u32 pi0, adj_index0, next0, error0;
word rw_len0;
ip_adjacency_t * adj1;
mpls_unicast_header_t *hdr1;
vlib_buffer_t * p1;
u32 pi1, adj_index1, next1, error1;
word rw_len1;
/* Prefetch next iteration. */
{
vlib_buffer_t * p2, * p3;
p2 = vlib_get_buffer (vm, from[2]);
p3 = vlib_get_buffer (vm, from[3]);
vlib_prefetch_buffer_header (p2, STORE);
vlib_prefetch_buffer_header (p3, STORE);
CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
CLIB_PREFETCH (p3->data, sizeof (hdr1[0]), STORE);
}
pi0 = to_next[0] = from[0];
pi1 = to_next[1] = from[1];
from += 2;
n_left_from -= 2;
to_next += 2;
n_left_to_next -= 2;
p0 = vlib_get_buffer (vm, pi0);
p1 = vlib_get_buffer (vm, pi1);
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
adj0 = adj_get(adj_index0);
adj1 = adj_get(adj_index1);
hdr0 = vlib_buffer_get_current (p0);
hdr1 = vlib_buffer_get_current (p1);
/* Guess we are only writing on simple Ethernet header. */
vnet_rewrite_two_headers (adj0[0], adj1[0], hdr0, hdr1,
sizeof (ethernet_header_t));
/* Update packet buffer attributes/set output interface. */
rw_len0 = adj0[0].rewrite_header.data_bytes;
rw_len1 = adj1[0].rewrite_header.data_bytes;
vnet_buffer (p0)->mpls.save_rewrite_length = rw_len0;
vnet_buffer (p1)->mpls.save_rewrite_length = rw_len1;
/* Bump the adj counters for packet and bytes */
vlib_increment_combined_counter
(&adjacency_counters,
thread_index,
adj_index0,
1,
vlib_buffer_length_in_chain (vm, p0) + rw_len0);
vlib_increment_combined_counter
(&adjacency_counters,
thread_index,
adj_index1,
1,
vlib_buffer_length_in_chain (vm, p1) + rw_len1);
/* Check MTU of outgoing interface. */
if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <=
adj0[0].rewrite_header.max_l3_packet_bytes))
{
vlib_buffer_advance(p0, -rw_len0);
vnet_buffer (p0)->sw_if_index[VLIB_TX] =
adj0[0].rewrite_header.sw_if_index;
next0 = adj0[0].rewrite_header.next_index;
error0 = IP4_ERROR_NONE;
if (PREDICT_FALSE(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
vnet_feature_arc_start (mm->output_feature_arc_index,
adj0[0].rewrite_header.sw_if_index,
&next0, p0);
}
else
{
error0 = IP4_ERROR_MTU_EXCEEDED;
next0 = MPLS_OUTPUT_NEXT_FRAG;
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_NEED_FRAG,
1);
}
if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p1) <=
adj1[0].rewrite_header.max_l3_packet_bytes))
{
vlib_buffer_advance(p1, -rw_len1);
vnet_buffer (p1)->sw_if_index[VLIB_TX] =
adj1[0].rewrite_header.sw_if_index;
next1 = adj1[0].rewrite_header.next_index;
error1 = IP4_ERROR_NONE;
if (PREDICT_FALSE(adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
vnet_feature_arc_start (mm->output_feature_arc_index,
adj1[0].rewrite_header.sw_if_index,
&next1, p1);
}
else
{
error1 = IP4_ERROR_MTU_EXCEEDED;
next1 = MPLS_OUTPUT_NEXT_FRAG;
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_NEED_FRAG,
1);
}
if (mode == MPLS_OUTPUT_MIDCHAIN_MODE)
{
adj0->sub_type.midchain.fixup_func
(vm, adj0, p0,
adj0->sub_type.midchain.fixup_data);
adj1->sub_type.midchain.fixup_func
(vm, adj1, p1,
adj1->sub_type.midchain.fixup_data);
}
p0->error = error_node->errors[error0];
p1->error = error_node->errors[error1];
if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
{
mpls_output_trace_t *tr = vlib_add_trace (vm, node,
p0, sizeof (*tr));
tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
tr->flow_hash = vnet_buffer(p0)->ip.flow_hash;
}
if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED))
{
mpls_output_trace_t *tr = vlib_add_trace (vm, node,
p1, sizeof (*tr));
tr->adj_index = vnet_buffer(p1)->ip.adj_index[VLIB_TX];
tr->flow_hash = vnet_buffer(p1)->ip.flow_hash;
}
vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
to_next, n_left_to_next,
pi0, pi1, next0, next1);
}
while (n_left_from > 0 && n_left_to_next > 0)
{
ip_adjacency_t * adj0;
mpls_unicast_header_t *hdr0;
vlib_buffer_t * p0;
u32 pi0, adj_index0, next0, error0;
word rw_len0;
pi0 = to_next[0] = from[0];
p0 = vlib_get_buffer (vm, pi0);
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
adj0 = adj_get(adj_index0);
hdr0 = vlib_buffer_get_current (p0);
/* Guess we are only writing on simple Ethernet header. */
vnet_rewrite_one_header (adj0[0], hdr0,
sizeof (ethernet_header_t));
/* Update packet buffer attributes/set output interface. */
rw_len0 = adj0[0].rewrite_header.data_bytes;
vnet_buffer (p0)->mpls.save_rewrite_length = rw_len0;
vlib_increment_combined_counter
(&adjacency_counters,
thread_index,
adj_index0,
1,
vlib_buffer_length_in_chain (vm, p0) + rw_len0);
/* Check MTU of outgoing interface. */
if (PREDICT_TRUE(vlib_buffer_length_in_chain (vm, p0) <=
adj0[0].rewrite_header.max_l3_packet_bytes))
{
vlib_buffer_advance(p0, -rw_len0);
vnet_buffer (p0)->sw_if_index[VLIB_TX] =
adj0[0].rewrite_header.sw_if_index;
next0 = adj0[0].rewrite_header.next_index;
error0 = IP4_ERROR_NONE;
if (PREDICT_FALSE(adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
vnet_feature_arc_start (mm->output_feature_arc_index,
adj0[0].rewrite_header.sw_if_index,
&next0, p0);
}
else
{
error0 = IP4_ERROR_MTU_EXCEEDED;
next0 = MPLS_OUTPUT_NEXT_FRAG;
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_NEED_FRAG,
1);
}
if (mode == MPLS_OUTPUT_MIDCHAIN_MODE)
{
adj0->sub_type.midchain.fixup_func
(vm, adj0, p0,
adj0->sub_type.midchain.fixup_data);
}
p0->error = error_node->errors[error0];
from += 1;
n_left_from -= 1;
to_next += 1;
n_left_to_next -= 1;
if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
{
mpls_output_trace_t *tr = vlib_add_trace (vm, node,
p0, sizeof (*tr));
tr->adj_index = vnet_buffer(p0)->ip.adj_index[VLIB_TX];
tr->flow_hash = vnet_buffer(p0)->ip.flow_hash;
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
pi0, next0);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
vlib_node_increment_counter (vm, mpls_output_node.index,
MPLS_ERROR_PKTS_ENCAP,
from_frame->n_vectors);
return from_frame->n_vectors;
}
static char * mpls_error_strings[] = {
#define mpls_error(n,s) s,
#include "error.def"
#undef mpls_error
};
VLIB_NODE_FN (mpls_output_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
{
return (mpls_output_inline(vm, node, from_frame, MPLS_OUTPUT_MODE));
}
VLIB_REGISTER_NODE (mpls_output_node) = {
.name = "mpls-output",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = MPLS_N_ERROR,
.error_strings = mpls_error_strings,
.n_next_nodes = MPLS_OUTPUT_N_NEXT,
.next_nodes = {
[MPLS_OUTPUT_NEXT_DROP] = "mpls-drop",
[MPLS_OUTPUT_NEXT_FRAG] = "mpls-frag",
},
.format_trace = format_mpls_output_trace,
};
VLIB_NODE_FN (mpls_midchain_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
{
return (mpls_output_inline(vm, node, from_frame, MPLS_OUTPUT_MIDCHAIN_MODE));
}
VLIB_REGISTER_NODE (mpls_midchain_node) = {
.name = "mpls-midchain",
.vector_size = sizeof (u32),
.n_errors = MPLS_N_ERROR,
.error_strings = mpls_error_strings,
.sibling_of = "mpls-output",
.format_trace = format_mpls_output_trace,
};
static char *mpls_frag_error_strings[] = {
#define _(sym,string) string,
foreach_ip_frag_error
#undef _
};
typedef struct mpls_frag_trace_t_
{
u16 pkt_size;
u16 mtu;
} mpls_frag_trace_t;
typedef enum
{
MPLS_FRAG_NEXT_REWRITE,
MPLS_FRAG_NEXT_REWRITE_MIDCHAIN,
MPLS_FRAG_NEXT_ICMP_ERROR,
MPLS_FRAG_NEXT_DROP,
MPLS_FRAG_N_NEXT,
} mpls_frag_next_t;
static uword
mpls_frag (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
u32 n_left_from, next_index, * from, * to_next, n_left_to_next, *frags;
vlib_node_runtime_t * error_node;
error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
frags = NULL;
while (n_left_from > 0)
{
vlib_get_next_frame (vm, node, next_index,
to_next, n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
ip_adjacency_t * adj0;
vlib_buffer_t * p0;
mpls_frag_next_t next0;
u32 pi0, adj_index0;
ip_frag_error_t error0 = IP_FRAG_ERROR_NONE;
i16 encap_size;
u8 is_ip4;
pi0 = to_next[0] = from[0];
p0 = vlib_get_buffer (vm, pi0);
from += 1;
n_left_from -= 1;
is_ip4 = vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4;
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
adj0 = adj_get(adj_index0);
/* the size of the MPLS stack */
encap_size = vnet_buffer(p0)->l3_hdr_offset - p0->current_data;
/* IP fragmentation */
if (is_ip4)
error0 = ip4_frag_do_fragment (vm, pi0,
adj0->rewrite_header.max_l3_packet_bytes,
encap_size, &frags);
else
error0 = ip6_frag_do_fragment (vm, pi0,
adj0->rewrite_header.max_l3_packet_bytes,
encap_size, &frags);
if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
{
mpls_frag_trace_t *tr =
vlib_add_trace (vm, node, p0, sizeof (*tr));
tr->mtu = adj0->rewrite_header.max_l3_packet_bytes;
tr->pkt_size = vlib_buffer_length_in_chain(vm, p0);
}
if (PREDICT_TRUE(error0 == IP_FRAG_ERROR_NONE))
{
/* Free original buffer chain */
vlib_buffer_free_one (vm, pi0); /* Free original packet */
next0 = (IP_LOOKUP_NEXT_MIDCHAIN == adj0->lookup_next_index ?
MPLS_FRAG_NEXT_REWRITE_MIDCHAIN :
MPLS_FRAG_NEXT_REWRITE);
}
else if (is_ip4 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
{
icmp4_error_set_vnet_buffer (
p0, ICMP4_destination_unreachable,
ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
vnet_buffer (p0)->ip_frag.mtu);
next0 = MPLS_FRAG_NEXT_ICMP_ERROR;
}
else
{
vlib_error_count (vm, mpls_output_node.index, error0, 1);
vec_add1 (frags, pi0); /* Get rid of the original buffer */
next0 = MPLS_FRAG_NEXT_DROP;
}
/* Send fragments that were added in the frame */
u32 *frag_from, frag_left;
frag_from = frags;
frag_left = vec_len (frags);
while (frag_left > 0)
{
while (frag_left > 0 && n_left_to_next > 0)
{
u32 i;
i = to_next[0] = frag_from[0];
frag_from += 1;
frag_left -= 1;
to_next += 1;
n_left_to_next -= 1;
p0 = vlib_get_buffer (vm, i);
p0->error = error_node->errors[error0];
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next, i,
next0);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
vlib_get_next_frame (vm, node, next_index, to_next,
n_left_to_next);
}
vec_reset_length (frags);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
vec_free (frags);
return frame->n_vectors;
}
static u8 *
format_mpls_frag_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_frag_trace_t *t = va_arg (*args, mpls_frag_trace_t *);
s = format (s, "mtu:%d pkt-size:%d", t->mtu, t->pkt_size);
return s;
}
VLIB_REGISTER_NODE (mpls_frag_node) = {
.function = mpls_frag,
.name = "mpls-frag",
.vector_size = sizeof (u32),
.format_trace = format_mpls_frag_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = IP_FRAG_N_ERROR,
.error_strings = mpls_frag_error_strings,
.n_next_nodes = MPLS_FRAG_N_NEXT,
.next_nodes = {
[MPLS_FRAG_NEXT_REWRITE] = "mpls-output",
[MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain",
[MPLS_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
[MPLS_FRAG_NEXT_DROP] = "mpls-drop"
},
};
/*
* @brief Next index values from the MPLS incomplete adj node
*/
#define foreach_mpls_adj_incomplete_next \
_(DROP, "error-drop") \
_(IP4, "ip4-arp") \
_(IP6, "ip6-discover-neighbor")
typedef enum {
#define _(s,n) MPLS_ADJ_INCOMPLETE_NEXT_##s,
foreach_mpls_adj_incomplete_next
#undef _
MPLS_ADJ_INCOMPLETE_N_NEXT,
} mpls_adj_incomplete_next_t;
/**
* @brief A struct to hold tracing information for the MPLS label imposition
* node.
*/
typedef struct mpls_adj_incomplete_trace_t_
{
u32 next;
} mpls_adj_incomplete_trace_t;
/**
* @brief Graph node for incomplete MPLS adjacency.
* This node will push traffic to either the v4-arp or v6-nd node
* based on the next-hop proto of the adj.
* We pay a cost for this 'routing' node, but an incomplete adj is the
* exception case.
*/
VLIB_NODE_FN (mpls_adj_incomplete_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
{
u32 n_left_from, next_index, * from, * to_next;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
next_index = node->cached_next_index;
while (n_left_from > 0)
{
u32 n_left_to_next;
vlib_get_next_frame (vm, node, next_index,
to_next, n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
u32 pi0, next0, adj_index0;
ip_adjacency_t * adj0;
vlib_buffer_t * p0;
pi0 = to_next[0] = from[0];
p0 = vlib_get_buffer (vm, pi0);
from += 1;
n_left_from -= 1;
to_next += 1;
n_left_to_next -= 1;
adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
adj0 = adj_get(adj_index0);
if (PREDICT_TRUE(FIB_PROTOCOL_IP4 == adj0->ia_nh_proto))
{
next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP4;
}
else
{
next0 = MPLS_ADJ_INCOMPLETE_NEXT_IP6;
}
if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
{
mpls_adj_incomplete_trace_t *tr =
vlib_add_trace (vm, node, p0, sizeof (*tr));
tr->next = next0;
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
pi0, next0);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
return from_frame->n_vectors;
}
static u8 *
format_mpls_adj_incomplete_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_adj_incomplete_trace_t * t;
u32 indent;
t = va_arg (*args, mpls_adj_incomplete_trace_t *);
indent = format_get_indent (s);
s = format (s, "%Unext:%d",
format_white_space, indent,
t->next);
return (s);
}
VLIB_REGISTER_NODE (mpls_adj_incomplete_node) = {
.name = "mpls-adj-incomplete",
.format_trace = format_mpls_adj_incomplete_trace,
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = MPLS_N_ERROR,
.error_strings = mpls_error_strings,
.n_next_nodes = MPLS_ADJ_INCOMPLETE_N_NEXT,
.next_nodes = {
#define _(s,n) [MPLS_ADJ_INCOMPLETE_NEXT_##s] = n,
foreach_mpls_adj_incomplete_next
#undef _
},
};
|