aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--vnet/vnet/ip/ip6_forward.c155
-rw-r--r--vnet/vnet/ip/ip6_packet.h26
-rw-r--r--vnet/vnet/sr/sr.c284
-rw-r--r--vnet/vnet/sr/sr_replicate.c99
4 files changed, 457 insertions, 107 deletions
diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c
index a4ce65a6396..6b74b7c9135 100644
--- a/vnet/vnet/ip/ip6_forward.c
+++ b/vnet/vnet/ip/ip6_forward.c
@@ -1230,6 +1230,77 @@ u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
return p0->flags;
}
+/* ip6_locate_header
+ *
+ * This function is to search for the header specified by the find_hdr number.
+ * 1. If the find_hdr < 0 then it finds and returns the protocol number and
+ * offset stored in *offset of the transport or ESP header in the chain if
+ * found.
+ * 2. If a header with find_hdr > 0 protocol number is found then the
+ * offset is stored in *offset and protocol number of the header is
+ * returned.
+ * 3. If find_hdr header is not found or packet is malformed or
+ * it is a non-first fragment -1 is returned.
+ */
+always_inline int ip6_locate_header (vlib_buffer_t *p0,
+ ip6_header_t *ip0,
+ int find_hdr,
+ u32 *offset)
+{
+ u8 next_proto = ip0->protocol;
+ u8 *next_header;
+ u8 done = 0;
+ u32 cur_offset;
+ u8 *temp_nxthdr = 0;
+ u32 exthdr_len = 0;
+
+ next_header = ip6_next_header(ip0);
+ cur_offset = sizeof(ip6_header_t);
+ while(1)
+ {
+ done = (next_proto == find_hdr);
+ if (PREDICT_FALSE(next_header >= (u8 *)vlib_buffer_get_current(p0) + p0->current_length))
+ {
+ //A malicious packet could set an extension header with a too big size
+ return(-1);
+ }
+ if (done)
+ break;
+ if ((!ip6_ext_hdr(next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT)
+ {
+ if (find_hdr < 0)
+ break;
+ return -1;
+ }
+ if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *)next_header;
+ u16 frag_off = ip6_frag_hdr_offset(frag_hdr);
+ /* Non first fragment return -1 */
+ if (frag_off)
+ return(-1);
+ exthdr_len = sizeof(ip6_frag_hdr_t);
+ temp_nxthdr = next_header + exthdr_len;
+ }
+ else if (next_proto == IP_PROTOCOL_IPSEC_AH)
+ {
+ exthdr_len = ip6_ext_authhdr_len(((ip6_ext_header_t *)next_header));
+ temp_nxthdr = next_header + exthdr_len;
+ }
+ else
+ {
+ exthdr_len = ip6_ext_header_len(((ip6_ext_header_t *)next_header));
+ temp_nxthdr = next_header + exthdr_len;
+ }
+ next_proto = ((ip6_ext_header_t *)next_header)->next_hdr;
+ next_header = temp_nxthdr;
+ cur_offset += exthdr_len;
+ }
+
+ *offset = cur_offset;
+ return(next_proto);
+}
+
static uword
ip6_local (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1263,6 +1334,7 @@ ip6_local (vlib_main_t * vm,
i32 len_diff0, len_diff1;
u8 error0, type0, good_l4_checksum0;
u8 error1, type1, good_l4_checksum1;
+ u32 udp_offset0, udp_offset1;
pi0 = to_next[0] = from[0];
pi1 = to_next[1] = from[1];
@@ -1288,26 +1360,48 @@ ip6_local (vlib_main_t * vm,
good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
good_l4_checksum1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ len_diff0 = 0;
+ len_diff1 = 0;
- udp0 = ip6_next_header (ip0);
- udp1 = ip6_next_header (ip1);
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
- good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0;
+ /* Skip HBH local processing */
+ if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *)ip6_next_header(ip0);
+ next0 = lm->local_next_by_ip_protocol[ext_hdr->next_hdr];
+ type0 = lm->builtin_protocol_by_ip_protocol[ext_hdr->next_hdr];
+ }
+ if (PREDICT_FALSE (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *)ip6_next_header(ip1);
+ next1 = lm->local_next_by_ip_protocol[ext_hdr->next_hdr];
+ type1 = lm->builtin_protocol_by_ip_protocol[ext_hdr->next_hdr];
+ }
+ if (PREDICT_TRUE(IP_PROTOCOL_UDP == ip6_locate_header(p0, ip0,
+ IP_PROTOCOL_UDP, &udp_offset0)))
+ {
+ udp0 = (udp_header_t *)((u8 *)ip0 + udp_offset0);
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
+ /* Verify UDP length. */
+ ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+ }
+ if (PREDICT_TRUE(IP_PROTOCOL_UDP == ip6_locate_header(p1, ip1,
+ IP_PROTOCOL_UDP, &udp_offset1)))
+ {
+ udp1 = (udp_header_t *)((u8 *)ip1 + udp_offset1);
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0;
+ /* Verify UDP length. */
+ ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
+ udp_len1 = clib_net_to_host_u16 (udp1->length);
+ len_diff1 = ip_len1 - udp_len1;
+ }
good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN;
- /* Verify UDP length. */
- ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
- ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
- udp_len0 = clib_net_to_host_u16 (udp0->length);
- udp_len1 = clib_net_to_host_u16 (udp1->length);
-
- len_diff0 = ip_len0 - udp_len0;
- len_diff1 = ip_len1 - udp_len1;
-
len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0;
@@ -1382,6 +1476,7 @@ ip6_local (vlib_main_t * vm,
u32 pi0, ip_len0, udp_len0, flags0, next0;
i32 len_diff0;
u8 error0, type0, good_l4_checksum0;
+ u32 udp_offset0;
pi0 = to_next[0] = from[0];
from += 1;
@@ -1399,20 +1494,28 @@ ip6_local (vlib_main_t * vm,
flags0 = p0->flags;
good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
+ len_diff0 = 0;
- udp0 = ip6_next_header (ip0);
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
+ /* Skip HBH local processing */
+ if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *)ip6_next_header(ip0);
+ next0 = lm->local_next_by_ip_protocol[ext_hdr->next_hdr];
+ type0 = lm->builtin_protocol_by_ip_protocol[ext_hdr->next_hdr];
+ }
+ if (PREDICT_TRUE(IP_PROTOCOL_UDP == ip6_locate_header(p0, ip0,
+ IP_PROTOCOL_UDP, &udp_offset0)))
+ {
+ udp0 = (udp_header_t *)((u8 *)ip0 + udp_offset0);
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0;
+ /* Verify UDP length. */
+ ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+ }
good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
-
- /* Verify UDP length. */
- ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
- udp_len0 = clib_net_to_host_u16 (udp0->length);
-
- len_diff0 = ip_len0 - udp_len0;
-
len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
diff --git a/vnet/vnet/ip/ip6_packet.h b/vnet/vnet/ip/ip6_packet.h
index 456c011a182..d29a06942ef 100644
--- a/vnet/vnet/ip/ip6_packet.h
+++ b/vnet/vnet/ip/ip6_packet.h
@@ -412,6 +412,32 @@ typedef CLIB_PACKED (struct {
u8 next_hdr;
/* Length of this header plus option data in 8 byte units. */
u8 n_data_u64s;
+}) ip6_ext_header_t;
+
+always_inline u8 ip6_ext_hdr(u8 nexthdr)
+{
+ /*
+ * find out if nexthdr is an extension header or a protocol
+ */
+ return (nexthdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ||
+ (nexthdr == IP_PROTOCOL_IP6_NONXT) ||
+ (nexthdr == IP_PROTOCOL_IPV6_FRAGMENTATION) ||
+ (nexthdr == IP_PROTOCOL_IPSEC_AH) ||
+ (nexthdr == IP_PROTOCOL_IPV6_ROUTE) ||
+ (nexthdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS);
+}
+
+#define ip6_ext_header_len(p) (((p)->n_data_u64s+1) << 3)
+#define ip6_ext_authhdr_len(p) (((p)->n_data_u64s+2) << 2)
+
+always_inline void *
+ip6_ext_next_header (ip6_ext_header_t *ext_hdr )
+{ return (void *)((u8 *) ext_hdr + ip6_ext_header_len(ext_hdr)); }
+
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
u8 data[0];
}) ip6_hop_by_hop_ext_t;
diff --git a/vnet/vnet/sr/sr.c b/vnet/vnet/sr/sr.c
index 287d52116bd..5d0275d992a 100644
--- a/vnet/vnet/sr/sr.c
+++ b/vnet/vnet/sr/sr.c
@@ -360,8 +360,6 @@ sr_rewrite (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
{
u32 n_left_from, next_index, *from, *to_next;
- ip6_main_t *im = &ip6_main;
- ip_lookup_main_t *lm = &im->lookup_main;
ip6_sr_main_t *sm = &sr_main;
u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *,
vlib_buffer_t *, ip6_header_t *, ip6_sr_header_t *);
@@ -384,7 +382,6 @@ sr_rewrite (vlib_main_t * vm,
u32 bi0, bi1;
vlib_buffer_t *b0, *b1;
ip6_header_t *ip0, *ip1;
- ip_adjacency_t *adj0, *adj1;
ip6_sr_header_t *sr0, *sr1;
ip6_sr_tunnel_t *t0, *t1;
u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP;
@@ -419,15 +416,12 @@ sr_rewrite (vlib_main_t * vm,
* $$$ parse through header(s) to pick the point
* where we punch in the SR extention header
*/
-
- adj0 =
- ip_get_adjacency (lm, vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
- adj1 =
- ip_get_adjacency (lm, vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
t0 =
- pool_elt_at_index (sm->tunnels, adj0->rewrite_header.sw_if_index);
+ pool_elt_at_index (sm->tunnels,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
t1 =
- pool_elt_at_index (sm->tunnels, adj1->rewrite_header.sw_if_index);
+ pool_elt_at_index (sm->tunnels,
+ vnet_buffer (b1)->ip.adj_index[VLIB_TX]);
ASSERT (VLIB_BUFFER_PRE_DATA_SIZE
>= ((word) vec_len (t0->rewrite)) + b0->current_data);
@@ -439,6 +433,16 @@ sr_rewrite (vlib_main_t * vm,
ip0 = vlib_buffer_get_current (b0);
ip1 = vlib_buffer_get_current (b1);
+#if DPDK > 0 /* Cannot call replication node yet without DPDK */
+ /* add a replication node */
+ if (PREDICT_FALSE (t0->policy_index != ~0))
+ {
+ vnet_buffer (b0)->ip.save_protocol = t0->policy_index;
+ next0 = SR_REWRITE_NEXT_SR_REPLICATE;
+ sr0 = (ip6_sr_header_t *) (t0->rewrite);
+ goto processnext;
+ }
+#endif /* DPDK */
/*
* SR-unaware service chaining case: pkt coming back from
@@ -454,22 +458,41 @@ sr_rewrite (vlib_main_t * vm,
}
else
{
+ u32 len_bytes = sizeof (ip6_header_t);
+ u8 next_hdr = ip0->protocol;
+
+ /* HBH must immediately follow ipv6 header */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ len_bytes +=
+ ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
+ /* Ignoring the sr_local for now, if RH follows HBH here */
+ next_hdr = ext_hdr->next_hdr;
+ ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
+ }
+ else
+ {
+ ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */
+ }
/*
* Copy data before the punch-in point left by the
* required amount. Assume (for the moment) that only
* the main packet header needs to be copied.
*/
clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite),
- ip0, sizeof (ip6_header_t));
+ ip0, len_bytes);
vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite));
ip0 = vlib_buffer_get_current (b0);
- sr0 = (ip6_sr_header_t *) (ip0 + 1);
+ sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes);
/* $$$ tune */
clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite));
/* Fix the next header chain */
- sr0->protocol = ip0->protocol;
- ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */
+ sr0->protocol = next_hdr;
+
new_l0 = clib_net_to_host_u16 (ip0->payload_length) +
vec_len (t0->rewrite);
ip0->payload_length = clib_host_to_net_u16 (new_l0);
@@ -496,7 +519,17 @@ sr_rewrite (vlib_main_t * vm,
b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
}
}
-
+#if DPDK > 0 /* Cannot call replication node yet without DPDK */
+ processnext:
+ /* add a replication node */
+ if (PREDICT_FALSE (t1->policy_index != ~0))
+ {
+ vnet_buffer (b1)->ip.save_protocol = t1->policy_index;
+ next1 = SR_REWRITE_NEXT_SR_REPLICATE;
+ sr1 = (ip6_sr_header_t *) (t1->rewrite);
+ goto trace00;
+ }
+#endif /* DPDK */
if (PREDICT_FALSE (ip1->protocol == IPPROTO_IPV6_ROUTE))
{
vlib_buffer_advance (b1, sizeof (ip1));
@@ -506,15 +539,38 @@ sr_rewrite (vlib_main_t * vm,
}
else
{
- clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite),
- ip0, sizeof (ip6_header_t));
+ u32 len_bytes = sizeof (ip6_header_t);
+ u8 next_hdr = ip1->protocol;
+
+ /* HBH must immediately follow ipv6 header */
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1);
+ len_bytes +=
+ ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
+ /* Ignoring the sr_local for now, if RH follows HBH here */
+ next_hdr = ext_hdr->next_hdr;
+ ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
+ }
+ else
+ {
+ ip1->protocol = IPPROTO_IPV6_ROUTE;
+ }
+ /*
+ * Copy data before the punch-in point left by the
+ * required amount. Assume (for the moment) that only
+ * the main packet header needs to be copied.
+ */
+ clib_memcpy (((u8 *) ip1) - vec_len (t1->rewrite),
+ ip1, len_bytes);
vlib_buffer_advance (b1, -(word) vec_len (t1->rewrite));
ip1 = vlib_buffer_get_current (b1);
- sr1 = (ip6_sr_header_t *) (ip1 + 1);
+ sr1 = (ip6_sr_header_t *) ((u8 *) ip1 + len_bytes);
clib_memcpy (sr1, t1->rewrite, vec_len (t1->rewrite));
- sr1->protocol = ip1->protocol;
- ip1->protocol = IPPROTO_IPV6_ROUTE;
+ sr1->protocol = next_hdr;
new_l1 = clib_net_to_host_u16 (ip1->payload_length) +
vec_len (t1->rewrite);
ip1->payload_length = clib_host_to_net_u16 (new_l1);
@@ -541,6 +597,9 @@ sr_rewrite (vlib_main_t * vm,
b1->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
}
}
+#if DPDK > 0 /* Cannot run replicate without DPDK and only replicate uses this label */
+ trace00:
+#endif /* DPDK */
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -553,7 +612,8 @@ sr_rewrite (vlib_main_t * vm,
sizeof (tr->dst.as_u8));
tr->length = new_l0;
tr->next_index = next0;
- clib_memcpy (tr->sr, sr0, sizeof (tr->sr));
+ if (sr0)
+ clib_memcpy (tr->sr, sr0, sizeof (tr->sr));
}
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -566,9 +626,9 @@ sr_rewrite (vlib_main_t * vm,
sizeof (tr->dst.as_u8));
tr->length = new_l1;
tr->next_index = next1;
- clib_memcpy (tr->sr, sr1, sizeof (tr->sr));
+ if (sr1)
+ clib_memcpy (tr->sr, sr1, sizeof (tr->sr));
}
-
vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
to_next, n_left_to_next,
bi0, bi1, next0, next1);
@@ -579,7 +639,6 @@ sr_rewrite (vlib_main_t * vm,
u32 bi0;
vlib_buffer_t *b0;
ip6_header_t *ip0 = 0;
- ip_adjacency_t *adj0;
ip6_sr_header_t *sr0 = 0;
ip6_sr_tunnel_t *t0;
u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP;
@@ -594,22 +653,21 @@ sr_rewrite (vlib_main_t * vm,
b0 = vlib_get_buffer (vm, bi0);
+
/*
* $$$ parse through header(s) to pick the point
* where we punch in the SR extention header
*/
-
- adj0 =
- ip_get_adjacency (lm, vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
t0 =
- pool_elt_at_index (sm->tunnels, adj0->rewrite_header.sw_if_index);
-
+ pool_elt_at_index (sm->tunnels,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
#if DPDK > 0 /* Cannot call replication node yet without DPDK */
/* add a replication node */
if (PREDICT_FALSE (t0->policy_index != ~0))
{
vnet_buffer (b0)->ip.save_protocol = t0->policy_index;
next0 = SR_REWRITE_NEXT_SR_REPLICATE;
+ sr0 = (ip6_sr_header_t *) (t0->rewrite);
goto trace0;
}
#endif /* DPDK */
@@ -635,22 +693,40 @@ sr_rewrite (vlib_main_t * vm,
}
else
{
+ u32 len_bytes = sizeof (ip6_header_t);
+ u8 next_hdr = ip0->protocol;
+
+ /* HBH must immediately follow ipv6 header */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ len_bytes +=
+ ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
+ next_hdr = ext_hdr->next_hdr;
+ ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
+ /* Ignoring the sr_local for now, if RH follows HBH here */
+ }
+ else
+ {
+ ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */
+ }
/*
* Copy data before the punch-in point left by the
* required amount. Assume (for the moment) that only
* the main packet header needs to be copied.
*/
clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite),
- ip0, sizeof (ip6_header_t));
+ ip0, len_bytes);
vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite));
ip0 = vlib_buffer_get_current (b0);
- sr0 = (ip6_sr_header_t *) (ip0 + 1);
+ sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes);
/* $$$ tune */
clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite));
/* Fix the next header chain */
- sr0->protocol = ip0->protocol;
- ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */
+ sr0->protocol = next_hdr;
new_l0 = clib_net_to_host_u16 (ip0->payload_length) +
vec_len (t0->rewrite);
ip0->payload_length = clib_host_to_net_u16 (new_l0);
@@ -677,10 +753,10 @@ sr_rewrite (vlib_main_t * vm,
b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK];
}
}
-
#if DPDK > 0 /* Cannot run replicate without DPDK and only replicate uses this label */
trace0:
#endif /* DPDK */
+
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
sr_rewrite_trace_t *tr = vlib_add_trace (vm, node,
@@ -695,14 +771,13 @@ sr_rewrite (vlib_main_t * vm,
}
tr->length = new_l0;
tr->next_index = next0;
- clib_memcpy (tr->sr, sr0, sizeof (tr->sr));
+ if (sr0)
+ clib_memcpy (tr->sr, sr0, sizeof (tr->sr));
}
-
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
bi0, next0);
}
-
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
return from_frame->n_vectors;
@@ -2462,6 +2537,7 @@ sr_local (vlib_main_t * vm,
ip6_address_t *new_dst0, *new_dst1;
u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP;
u32 next1 = SR_LOCAL_NEXT_IP6_LOOKUP;
+
/* Prefetch next iteration. */
{
vlib_buffer_t *p2, *p3;
@@ -2489,6 +2565,15 @@ sr_local (vlib_main_t * vm,
b0 = vlib_get_buffer (vm, bi0);
ip0 = vlib_buffer_get_current (b0);
sr0 = (ip6_sr_header_t *) (ip0 + 1);
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ sr0 =
+ (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *)
+ ext_hdr);
+ }
if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR))
{
@@ -2550,13 +2635,26 @@ sr_local (vlib_main_t * vm,
{
u64 *copy_dst0, *copy_src0;
u16 new_l0;
+ u32 copy_len_u64s0 = 0;
+ int i;
+
/*
* Copy the ip6 header right by the (real) length of the
- * sr header. Here's another place which assumes that
- * the sr header is the only extention header.
+ * sr header.
*/
-
- ip0->protocol = sr0->protocol;
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ copy_len_u64s0 =
+ (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1;
+ ext_hdr->next_hdr = sr0->protocol;
+ }
+ else
+ {
+ ip0->protocol = sr0->protocol;
+ }
vlib_buffer_advance (b0, (sr0->length + 1) * 8);
new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
@@ -2566,11 +2664,16 @@ sr_local (vlib_main_t * vm,
copy_src0 = (u64 *) ip0;
copy_dst0 = copy_src0 + (sr0->length + 1);
- copy_dst0[4] = copy_src0[4];
- copy_dst0[3] = copy_src0[3];
- copy_dst0[2] = copy_src0[2];
- copy_dst0[1] = copy_src0[1];
- copy_dst0[0] = copy_src0[0];
+ copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0];
+ copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0];
+ copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0];
+ copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0];
+ copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0];
+
+ for (i = copy_len_u64s0 - 1; i >= 0; i--)
+ {
+ copy_dst0[i] = copy_src0[i];
+ }
sr0 = 0;
}
@@ -2594,6 +2697,16 @@ sr_local (vlib_main_t * vm,
b1 = vlib_get_buffer (vm, bi1);
ip1 = vlib_buffer_get_current (b1);
sr1 = (ip6_sr_header_t *) (ip1 + 1);
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1);
+ sr1 =
+ (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *)
+ ext_hdr);
+ }
if (PREDICT_FALSE (sr1->type != ROUTING_HEADER_TYPE_SR))
{
@@ -2655,13 +2768,26 @@ sr_local (vlib_main_t * vm,
{
u64 *copy_dst1, *copy_src1;
u16 new_l1;
+ u32 copy_len_u64s1 = 0;
+ int i;
+
/*
* Copy the ip6 header right by the (real) length of the
- * sr header. Here's another place which assumes that
- * the sr header is the only extention header.
+ * sr header.
*/
-
- ip1->protocol = sr1->protocol;
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1);
+ copy_len_u64s1 =
+ (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1;
+ ext_hdr->next_hdr = sr1->protocol;
+ }
+ else
+ {
+ ip1->protocol = sr1->protocol;
+ }
vlib_buffer_advance (b1, (sr1->length + 1) * 8);
new_l1 = clib_net_to_host_u16 (ip1->payload_length) -
@@ -2671,11 +2797,16 @@ sr_local (vlib_main_t * vm,
copy_src1 = (u64 *) ip1;
copy_dst1 = copy_src1 + (sr1->length + 1);
- copy_dst1[4] = copy_src1[4];
- copy_dst1[3] = copy_src1[3];
- copy_dst1[2] = copy_src1[2];
- copy_dst1[1] = copy_src1[1];
- copy_dst1[0] = copy_src1[0];
+ copy_dst1[4 + copy_len_u64s1] = copy_src1[4 + copy_len_u64s1];
+ copy_dst1[3 + copy_len_u64s1] = copy_src1[3 + copy_len_u64s1];
+ copy_dst1[2 + copy_len_u64s1] = copy_src1[2 + copy_len_u64s1];
+ copy_dst1[1 + copy_len_u64s1] = copy_src1[1 + copy_len_u64s1];
+ copy_dst1[0 + copy_len_u64s1] = copy_src1[0 + copy_len_u64s1];
+
+ for (i = copy_len_u64s1 - 1; i >= 0; i--)
+ {
+ copy_dst1[i] = copy_src1[i];
+ }
sr1 = 0;
}
@@ -2721,6 +2852,15 @@ sr_local (vlib_main_t * vm,
ip0 = vlib_buffer_get_current (b0);
sr0 = (ip6_sr_header_t *) (ip0 + 1);
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ sr0 =
+ (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *)
+ ext_hdr);
+ }
if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR))
{
next0 = SR_LOCAL_NEXT_ERROR;
@@ -2781,13 +2921,27 @@ sr_local (vlib_main_t * vm,
{
u64 *copy_dst0, *copy_src0;
u16 new_l0;
+ u32 copy_len_u64s0 = 0;
+ int i;
+
/*
* Copy the ip6 header right by the (real) length of the
- * sr header. Here's another place which assumes that
- * the sr header is the only extention header.
+ * sr header.
*/
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ copy_len_u64s0 =
+ (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1;
+ ext_hdr->next_hdr = sr0->protocol;
+ }
+ else
+ {
+ ip0->protocol = sr0->protocol;
+ }
- ip0->protocol = sr0->protocol;
vlib_buffer_advance (b0, (sr0->length + 1) * 8);
new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
@@ -2796,12 +2950,16 @@ sr_local (vlib_main_t * vm,
copy_src0 = (u64 *) ip0;
copy_dst0 = copy_src0 + (sr0->length + 1);
+ copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0];
+ copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0];
+ copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0];
+ copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0];
+ copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0];
- copy_dst0[4] = copy_src0[4];
- copy_dst0[3] = copy_src0[3];
- copy_dst0[2] = copy_src0[2];
- copy_dst0[1] = copy_src0[1];
- copy_dst0[0] = copy_src0[0];
+ for (i = copy_len_u64s0 - 1; i >= 0; i--)
+ {
+ copy_dst0[i] = copy_src0[i];
+ }
sr0 = 0;
}
diff --git a/vnet/vnet/sr/sr_replicate.c b/vnet/vnet/sr/sr_replicate.c
index 9aa57873c8c..34cde3d2938 100644
--- a/vnet/vnet/sr/sr_replicate.c
+++ b/vnet/vnet/sr/sr_replicate.c
@@ -175,6 +175,8 @@ sr_replicate_node_fn (vlib_main_t * vm,
ip6_header_t *ip0 = 0, *hdr_ip0 = 0;
int num_replicas = 0;
int i;
+ u32 len_bytes = sizeof (ip6_header_t);
+ u8 next_hdr, ip_next_hdr = IPPROTO_IPV6_ROUTE;
bi0 = from[0];
@@ -187,6 +189,24 @@ sr_replicate_node_fn (vlib_main_t * vm,
ip0 = vlib_buffer_get_current (b0);
/* Skip forward to the punch-in point */
vlib_buffer_advance (b0, sizeof (*ip0));
+ next_hdr = ip0->protocol;
+
+ /* HBH must immediately follow ipv6 header */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0);
+ u32 ext_hdr_len = 0;
+ ext_hdr_len = ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr);
+ len_bytes += ext_hdr_len;
+ next_hdr = ext_hdr->next_hdr;
+ ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE;
+ ip_next_hdr = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS;
+ /* Skip forward to the punch-in point */
+ vlib_buffer_advance (b0, ext_hdr_len);
+
+ }
orig_mb0 = rte_mbuf_from_vlib_buffer (b0);
@@ -198,8 +218,7 @@ sr_replicate_node_fn (vlib_main_t * vm,
orig_mb0->data_len = new_data_len0;
orig_mb0->pkt_len = new_pkt_len0;
- orig_mb0->data_off =
- (u16) (RTE_PKTMBUF_HEADROOM + b0->current_data);
+ orig_mb0->data_off += (u16) (b0->current_data);
/*
Before entering loop determine if we can allocate:
@@ -222,14 +241,49 @@ sr_replicate_node_fn (vlib_main_t * vm,
for (i = 0; i < num_replicas; i++)
{
+ uint8_t nb_seg;
+ struct rte_mbuf *clone0i;
+ vlib_buffer_t *clone0_c, *clone_b0;
+
+ t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]);
hdr_mb0 = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]);
if (i < (num_replicas - 1))
- /* Not the last tunnel to process */
- clone0 = rte_pktmbuf_clone
- (orig_mb0, bm->pktmbuf_pools[socket_id]);
+ {
+ /* Not the last tunnel to process */
+ clone0 = rte_pktmbuf_clone
+ (orig_mb0, bm->pktmbuf_pools[socket_id]);
+ nb_seg = 0;
+ clone0i = clone0;
+ clone0_c = NULL;
+ while ((clone0->nb_segs >= 1) && (nb_seg < clone0->nb_segs))
+ {
+
+ clone_b0 = vlib_buffer_from_rte_mbuf (clone0i);
+ vlib_buffer_init_for_free_list (clone_b0, fl);
+
+ ASSERT ((clone_b0->flags & VLIB_BUFFER_NEXT_PRESENT) ==
+ 0);
+ ASSERT (clone_b0->current_data == 0);
+
+ clone_b0->current_data =
+ (clone0i->buf_addr + clone0i->data_off) -
+ (void *) clone_b0->data;
+
+ clone_b0->current_length = clone0i->data_len;
+ if (PREDICT_FALSE (clone0_c != NULL))
+ {
+ clone0_c->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ clone0_c->next_buffer =
+ vlib_get_buffer_index (vm, clone_b0);
+ }
+ clone0_c = clone_b0;
+ clone0i = clone0i->next;
+ nb_seg++;
+ }
+ }
else
- /* Last tunnel to process, use original MB */
+ /* First tunnel to process, use original MB */
clone0 = orig_mb0;
@@ -260,14 +314,14 @@ sr_replicate_node_fn (vlib_main_t * vm,
for (i = 0; i < num_replicas; i++)
{
vlib_buffer_t *hdr_b0;
+ u16 new_l0 = 0;
t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]);
-
/* Our replicas */
hdr_mb0 = hdr_vec[i];
clone0 = rte_mbuf_vec[i];
- hdr_mb0->data_len = sizeof (*ip0) + vec_len (t0->rewrite);
+ hdr_mb0->data_len = len_bytes + vec_len (t0->rewrite);
hdr_mb0->pkt_len = hdr_mb0->data_len +
vlib_buffer_length_in_chain (vm, orig_b0);
@@ -275,24 +329,33 @@ sr_replicate_node_fn (vlib_main_t * vm,
vlib_buffer_init_for_free_list (hdr_b0, fl);
- memcpy (hdr_b0->data, ip0, sizeof (*ip0));
- memcpy (hdr_b0->data + sizeof (*ip0), t0->rewrite,
+ memcpy (hdr_b0->data, ip0, len_bytes);
+ memcpy (hdr_b0->data + len_bytes, t0->rewrite,
vec_len (t0->rewrite));
hdr_b0->current_data = 0;
- hdr_b0->current_length = sizeof (*ip0) + vec_len (t0->rewrite);
+ hdr_b0->current_length = len_bytes + vec_len (t0->rewrite);
hdr_b0->flags = orig_b0->flags | VLIB_BUFFER_NEXT_PRESENT;
-
+ hdr_b0->trace_index = orig_b0->trace_index;
+ vnet_buffer (hdr_b0)->l2_classify.opaque_index = 0;
hdr_b0->total_length_not_including_first_buffer =
hdr_mb0->pkt_len - hdr_b0->current_length;
+ vnet_buffer (hdr_b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index;
hdr_ip0 = (ip6_header_t *) hdr_b0->data;
- hdr_ip0->payload_length =
- clib_host_to_net_u16 (hdr_mb0->data_len);
- hdr_sr0 = (ip6_sr_header_t *) (hdr_ip0 + 1);
- hdr_sr0->protocol = hdr_ip0->protocol;
- hdr_ip0->protocol = 43;
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) +
+ vec_len (t0->rewrite);
+ hdr_ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ hdr_sr0 = (ip6_sr_header_t *) ((u8 *) hdr_ip0 + len_bytes);
+ /* $$$ tune */
+ clib_memcpy (hdr_sr0, t0->rewrite, vec_len (t0->rewrite));
+ hdr_sr0->protocol = next_hdr;
+ hdr_ip0->protocol = ip_next_hdr;
+
+ /* Copy dst address into the DA slot in the segment list */
+ clib_memcpy (hdr_sr0->segments, ip0->dst_address.as_u64,
+ sizeof (ip6_address_t));
/* Rewrite the ip6 dst address */
hdr_ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0];
@@ -318,7 +381,7 @@ sr_replicate_node_fn (vlib_main_t * vm,
hdr_mb0->tx_offload = clone0->tx_offload;
hdr_mb0->hash = clone0->hash;
- hdr_mb0->ol_flags = clone0->ol_flags;
+ hdr_mb0->ol_flags = clone0->ol_flags & ~(IND_ATTACHED_MBUF);
__rte_mbuf_sanity_check (hdr_mb0, 1);