diff options
author | Shwetha <shwethab@cisco.com> | 2016-09-13 11:51:00 +0100 |
---|---|---|
committer | Damjan Marion <dmarion.lists@gmail.com> | 2016-12-09 14:57:21 +0000 |
commit | b78292efdfaf70baf89c778973d4bb3b822e95dd (patch) | |
tree | ef510f2ca149ba06f40a17956d7aff41efdfa6fd /vnet | |
parent | 16a14cdb160160573e2d1ed69a52998cc30ce34f (diff) |
VPP-547: Fix for co-existence of HbH and RH header in Segment routing:
1. sr-rewrite - SR insertion in v6 : SR RH is inserted immediately after v6 header
but if hbh header is present as per RFC2460 it should immediately follow
v6 header. This is fixed.
2. sr-local : v6 packet destined to a sr segment is received if hbh is present
it is not handed over to sr-local for processing. fixed ip6-local handling to
skip hbh as there is no register handler for hbh for now.
3. sr-replicate - update in dual of sr_rewrite to handle replicate, fixes in
sr-replicate to handle presence of hbh header
Change-Id: I034523a42d2fedf97134761f956ab534babb8b36
Signed-off-by: Shwetha <shwethab@cisco.com>
Diffstat (limited to 'vnet')
-rw-r--r-- | vnet/vnet/ip/ip6_forward.c | 155 | ||||
-rw-r--r-- | vnet/vnet/ip/ip6_packet.h | 26 | ||||
-rw-r--r-- | vnet/vnet/sr/sr.c | 284 | ||||
-rw-r--r-- | vnet/vnet/sr/sr_replicate.c | 99 |
4 files changed, 457 insertions, 107 deletions
diff --git a/vnet/vnet/ip/ip6_forward.c b/vnet/vnet/ip/ip6_forward.c index a4ce65a6396..6b74b7c9135 100644 --- a/vnet/vnet/ip/ip6_forward.c +++ b/vnet/vnet/ip/ip6_forward.c @@ -1230,6 +1230,77 @@ u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) return p0->flags; } +/* ip6_locate_header + * + * This function is to search for the header specified by the find_hdr number. + * 1. If the find_hdr < 0 then it finds and returns the protocol number and + * offset stored in *offset of the transport or ESP header in the chain if + * found. + * 2. If a header with find_hdr > 0 protocol number is found then the + * offset is stored in *offset and protocol number of the header is + * returned. + * 3. If find_hdr header is not found or packet is malformed or + * it is a non-first fragment -1 is returned. + */ +always_inline int ip6_locate_header (vlib_buffer_t *p0, + ip6_header_t *ip0, + int find_hdr, + u32 *offset) +{ + u8 next_proto = ip0->protocol; + u8 *next_header; + u8 done = 0; + u32 cur_offset; + u8 *temp_nxthdr = 0; + u32 exthdr_len = 0; + + next_header = ip6_next_header(ip0); + cur_offset = sizeof(ip6_header_t); + while(1) + { + done = (next_proto == find_hdr); + if (PREDICT_FALSE(next_header >= (u8 *)vlib_buffer_get_current(p0) + p0->current_length)) + { + //A malicious packet could set an extension header with a too big size + return(-1); + } + if (done) + break; + if ((!ip6_ext_hdr(next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT) + { + if (find_hdr < 0) + break; + return -1; + } + if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *)next_header; + u16 frag_off = ip6_frag_hdr_offset(frag_hdr); + /* Non first fragment return -1 */ + if (frag_off) + return(-1); + exthdr_len = sizeof(ip6_frag_hdr_t); + temp_nxthdr = next_header + exthdr_len; + } + else if (next_proto == IP_PROTOCOL_IPSEC_AH) + { + exthdr_len = ip6_ext_authhdr_len(((ip6_ext_header_t *)next_header)); + temp_nxthdr = next_header + exthdr_len; + } + else + { + exthdr_len = ip6_ext_header_len(((ip6_ext_header_t *)next_header)); + temp_nxthdr = next_header + exthdr_len; + } + next_proto = ((ip6_ext_header_t *)next_header)->next_hdr; + next_header = temp_nxthdr; + cur_offset += exthdr_len; + } + + *offset = cur_offset; + return(next_proto); +} + static uword ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -1263,6 +1334,7 @@ ip6_local (vlib_main_t * vm, i32 len_diff0, len_diff1; u8 error0, type0, good_l4_checksum0; u8 error1, type1, good_l4_checksum1; + u32 udp_offset0, udp_offset1; pi0 = to_next[0] = from[0]; pi1 = to_next[1] = from[1]; @@ -1288,26 +1360,48 @@ ip6_local (vlib_main_t * vm, good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; good_l4_checksum1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + len_diff0 = 0; + len_diff1 = 0; - udp0 = ip6_next_header (ip0); - udp1 = ip6_next_header (ip1); - - /* Don't verify UDP checksum for packets with explicit zero checksum. */ - good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0; - good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0; + /* Skip HBH local processing */ + if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *)ip6_next_header(ip0); + next0 = lm->local_next_by_ip_protocol[ext_hdr->next_hdr]; + type0 = lm->builtin_protocol_by_ip_protocol[ext_hdr->next_hdr]; + } + if (PREDICT_FALSE (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *)ip6_next_header(ip1); + next1 = lm->local_next_by_ip_protocol[ext_hdr->next_hdr]; + type1 = lm->builtin_protocol_by_ip_protocol[ext_hdr->next_hdr]; + } + if (PREDICT_TRUE(IP_PROTOCOL_UDP == ip6_locate_header(p0, ip0, + IP_PROTOCOL_UDP, &udp_offset0))) + { + udp0 = (udp_header_t *)((u8 *)ip0 + udp_offset0); + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0; + /* Verify UDP length. */ + ip_len0 = clib_net_to_host_u16 (ip0->payload_length); + udp_len0 = clib_net_to_host_u16 (udp0->length); + len_diff0 = ip_len0 - udp_len0; + } + if (PREDICT_TRUE(IP_PROTOCOL_UDP == ip6_locate_header(p1, ip1, + IP_PROTOCOL_UDP, &udp_offset1))) + { + udp1 = (udp_header_t *)((u8 *)ip1 + udp_offset1); + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP && udp1->checksum == 0; + /* Verify UDP length. */ + ip_len1 = clib_net_to_host_u16 (ip1->payload_length); + udp_len1 = clib_net_to_host_u16 (udp1->length); + len_diff1 = ip_len1 - udp_len1; + } good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN; good_l4_checksum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN; - /* Verify UDP length. */ - ip_len0 = clib_net_to_host_u16 (ip0->payload_length); - ip_len1 = clib_net_to_host_u16 (ip1->payload_length); - udp_len0 = clib_net_to_host_u16 (udp0->length); - udp_len1 = clib_net_to_host_u16 (udp1->length); - - len_diff0 = ip_len0 - udp_len0; - len_diff1 = ip_len1 - udp_len1; - len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0; len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0; @@ -1382,6 +1476,7 @@ ip6_local (vlib_main_t * vm, u32 pi0, ip_len0, udp_len0, flags0, next0; i32 len_diff0; u8 error0, type0, good_l4_checksum0; + u32 udp_offset0; pi0 = to_next[0] = from[0]; from += 1; @@ -1399,20 +1494,28 @@ ip6_local (vlib_main_t * vm, flags0 = p0->flags; good_l4_checksum0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + len_diff0 = 0; - udp0 = ip6_next_header (ip0); - - /* Don't verify UDP checksum for packets with explicit zero checksum. */ - good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0; + /* Skip HBH local processing */ + if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = (ip6_hop_by_hop_ext_t *)ip6_next_header(ip0); + next0 = lm->local_next_by_ip_protocol[ext_hdr->next_hdr]; + type0 = lm->builtin_protocol_by_ip_protocol[ext_hdr->next_hdr]; + } + if (PREDICT_TRUE(IP_PROTOCOL_UDP == ip6_locate_header(p0, ip0, + IP_PROTOCOL_UDP, &udp_offset0))) + { + udp0 = (udp_header_t *)((u8 *)ip0 + udp_offset0); + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP && udp0->checksum == 0; + /* Verify UDP length. */ + ip_len0 = clib_net_to_host_u16 (ip0->payload_length); + udp_len0 = clib_net_to_host_u16 (udp0->length); + len_diff0 = ip_len0 - udp_len0; + } good_l4_checksum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN; - - /* Verify UDP length. */ - ip_len0 = clib_net_to_host_u16 (ip0->payload_length); - udp_len0 = clib_net_to_host_u16 (udp0->length); - - len_diff0 = ip_len0 - udp_len0; - len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0; if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN diff --git a/vnet/vnet/ip/ip6_packet.h b/vnet/vnet/ip/ip6_packet.h index 456c011a182..d29a06942ef 100644 --- a/vnet/vnet/ip/ip6_packet.h +++ b/vnet/vnet/ip/ip6_packet.h @@ -412,6 +412,32 @@ typedef CLIB_PACKED (struct { u8 next_hdr; /* Length of this header plus option data in 8 byte units. */ u8 n_data_u64s; +}) ip6_ext_header_t; + +always_inline u8 ip6_ext_hdr(u8 nexthdr) +{ + /* + * find out if nexthdr is an extension header or a protocol + */ + return (nexthdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) || + (nexthdr == IP_PROTOCOL_IP6_NONXT) || + (nexthdr == IP_PROTOCOL_IPV6_FRAGMENTATION) || + (nexthdr == IP_PROTOCOL_IPSEC_AH) || + (nexthdr == IP_PROTOCOL_IPV6_ROUTE) || + (nexthdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS); +} + +#define ip6_ext_header_len(p) (((p)->n_data_u64s+1) << 3) +#define ip6_ext_authhdr_len(p) (((p)->n_data_u64s+2) << 2) + +always_inline void * +ip6_ext_next_header (ip6_ext_header_t *ext_hdr ) +{ return (void *)((u8 *) ext_hdr + ip6_ext_header_len(ext_hdr)); } + +typedef CLIB_PACKED (struct { + u8 next_hdr; + /* Length of this header plus option data in 8 byte units. */ + u8 n_data_u64s; u8 data[0]; }) ip6_hop_by_hop_ext_t; diff --git a/vnet/vnet/sr/sr.c b/vnet/vnet/sr/sr.c index 287d52116bd..5d0275d992a 100644 --- a/vnet/vnet/sr/sr.c +++ b/vnet/vnet/sr/sr.c @@ -360,8 +360,6 @@ sr_rewrite (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame) { u32 n_left_from, next_index, *from, *to_next; - ip6_main_t *im = &ip6_main; - ip_lookup_main_t *lm = &im->lookup_main; ip6_sr_main_t *sm = &sr_main; u32 (*sr_local_cb) (vlib_main_t *, vlib_node_runtime_t *, vlib_buffer_t *, ip6_header_t *, ip6_sr_header_t *); @@ -384,7 +382,6 @@ sr_rewrite (vlib_main_t * vm, u32 bi0, bi1; vlib_buffer_t *b0, *b1; ip6_header_t *ip0, *ip1; - ip_adjacency_t *adj0, *adj1; ip6_sr_header_t *sr0, *sr1; ip6_sr_tunnel_t *t0, *t1; u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP; @@ -419,15 +416,12 @@ sr_rewrite (vlib_main_t * vm, * $$$ parse through header(s) to pick the point * where we punch in the SR extention header */ - - adj0 = - ip_get_adjacency (lm, vnet_buffer (b0)->ip.adj_index[VLIB_TX]); - adj1 = - ip_get_adjacency (lm, vnet_buffer (b1)->ip.adj_index[VLIB_TX]); t0 = - pool_elt_at_index (sm->tunnels, adj0->rewrite_header.sw_if_index); + pool_elt_at_index (sm->tunnels, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); t1 = - pool_elt_at_index (sm->tunnels, adj1->rewrite_header.sw_if_index); + pool_elt_at_index (sm->tunnels, + vnet_buffer (b1)->ip.adj_index[VLIB_TX]); ASSERT (VLIB_BUFFER_PRE_DATA_SIZE >= ((word) vec_len (t0->rewrite)) + b0->current_data); @@ -439,6 +433,16 @@ sr_rewrite (vlib_main_t * vm, ip0 = vlib_buffer_get_current (b0); ip1 = vlib_buffer_get_current (b1); +#if DPDK > 0 /* Cannot call replication node yet without DPDK */ + /* add a replication node */ + if (PREDICT_FALSE (t0->policy_index != ~0)) + { + vnet_buffer (b0)->ip.save_protocol = t0->policy_index; + next0 = SR_REWRITE_NEXT_SR_REPLICATE; + sr0 = (ip6_sr_header_t *) (t0->rewrite); + goto processnext; + } +#endif /* DPDK */ /* * SR-unaware service chaining case: pkt coming back from @@ -454,22 +458,41 @@ sr_rewrite (vlib_main_t * vm, } else { + u32 len_bytes = sizeof (ip6_header_t); + u8 next_hdr = ip0->protocol; + + /* HBH must immediately follow ipv6 header */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + len_bytes += + ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); + /* Ignoring the sr_local for now, if RH follows HBH here */ + next_hdr = ext_hdr->next_hdr; + ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; + } + else + { + ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */ + } /* * Copy data before the punch-in point left by the * required amount. Assume (for the moment) that only * the main packet header needs to be copied. */ clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite), - ip0, sizeof (ip6_header_t)); + ip0, len_bytes); vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite)); ip0 = vlib_buffer_get_current (b0); - sr0 = (ip6_sr_header_t *) (ip0 + 1); + sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes); /* $$$ tune */ clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite)); /* Fix the next header chain */ - sr0->protocol = ip0->protocol; - ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */ + sr0->protocol = next_hdr; + new_l0 = clib_net_to_host_u16 (ip0->payload_length) + vec_len (t0->rewrite); ip0->payload_length = clib_host_to_net_u16 (new_l0); @@ -496,7 +519,17 @@ sr_rewrite (vlib_main_t * vm, b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; } } - +#if DPDK > 0 /* Cannot call replication node yet without DPDK */ + processnext: + /* add a replication node */ + if (PREDICT_FALSE (t1->policy_index != ~0)) + { + vnet_buffer (b1)->ip.save_protocol = t1->policy_index; + next1 = SR_REWRITE_NEXT_SR_REPLICATE; + sr1 = (ip6_sr_header_t *) (t1->rewrite); + goto trace00; + } +#endif /* DPDK */ if (PREDICT_FALSE (ip1->protocol == IPPROTO_IPV6_ROUTE)) { vlib_buffer_advance (b1, sizeof (ip1)); @@ -506,15 +539,38 @@ sr_rewrite (vlib_main_t * vm, } else { - clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite), - ip0, sizeof (ip6_header_t)); + u32 len_bytes = sizeof (ip6_header_t); + u8 next_hdr = ip1->protocol; + + /* HBH must immediately follow ipv6 header */ + if (PREDICT_FALSE + (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1); + len_bytes += + ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); + /* Ignoring the sr_local for now, if RH follows HBH here */ + next_hdr = ext_hdr->next_hdr; + ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; + } + else + { + ip1->protocol = IPPROTO_IPV6_ROUTE; + } + /* + * Copy data before the punch-in point left by the + * required amount. Assume (for the moment) that only + * the main packet header needs to be copied. + */ + clib_memcpy (((u8 *) ip1) - vec_len (t1->rewrite), + ip1, len_bytes); vlib_buffer_advance (b1, -(word) vec_len (t1->rewrite)); ip1 = vlib_buffer_get_current (b1); - sr1 = (ip6_sr_header_t *) (ip1 + 1); + sr1 = (ip6_sr_header_t *) ((u8 *) ip1 + len_bytes); clib_memcpy (sr1, t1->rewrite, vec_len (t1->rewrite)); - sr1->protocol = ip1->protocol; - ip1->protocol = IPPROTO_IPV6_ROUTE; + sr1->protocol = next_hdr; new_l1 = clib_net_to_host_u16 (ip1->payload_length) + vec_len (t1->rewrite); ip1->payload_length = clib_host_to_net_u16 (new_l1); @@ -541,6 +597,9 @@ sr_rewrite (vlib_main_t * vm, b1->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; } } +#if DPDK > 0 /* Cannot run replicate without DPDK and only replicate uses this label */ + trace00: +#endif /* DPDK */ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -553,7 +612,8 @@ sr_rewrite (vlib_main_t * vm, sizeof (tr->dst.as_u8)); tr->length = new_l0; tr->next_index = next0; - clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); + if (sr0) + clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); } if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) { @@ -566,9 +626,9 @@ sr_rewrite (vlib_main_t * vm, sizeof (tr->dst.as_u8)); tr->length = new_l1; tr->next_index = next1; - clib_memcpy (tr->sr, sr1, sizeof (tr->sr)); + if (sr1) + clib_memcpy (tr->sr, sr1, sizeof (tr->sr)); } - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, bi0, bi1, next0, next1); @@ -579,7 +639,6 @@ sr_rewrite (vlib_main_t * vm, u32 bi0; vlib_buffer_t *b0; ip6_header_t *ip0 = 0; - ip_adjacency_t *adj0; ip6_sr_header_t *sr0 = 0; ip6_sr_tunnel_t *t0; u32 next0 = SR_REWRITE_NEXT_IP6_LOOKUP; @@ -594,22 +653,21 @@ sr_rewrite (vlib_main_t * vm, b0 = vlib_get_buffer (vm, bi0); + /* * $$$ parse through header(s) to pick the point * where we punch in the SR extention header */ - - adj0 = - ip_get_adjacency (lm, vnet_buffer (b0)->ip.adj_index[VLIB_TX]); t0 = - pool_elt_at_index (sm->tunnels, adj0->rewrite_header.sw_if_index); - + pool_elt_at_index (sm->tunnels, + vnet_buffer (b0)->ip.adj_index[VLIB_TX]); #if DPDK > 0 /* Cannot call replication node yet without DPDK */ /* add a replication node */ if (PREDICT_FALSE (t0->policy_index != ~0)) { vnet_buffer (b0)->ip.save_protocol = t0->policy_index; next0 = SR_REWRITE_NEXT_SR_REPLICATE; + sr0 = (ip6_sr_header_t *) (t0->rewrite); goto trace0; } #endif /* DPDK */ @@ -635,22 +693,40 @@ sr_rewrite (vlib_main_t * vm, } else { + u32 len_bytes = sizeof (ip6_header_t); + u8 next_hdr = ip0->protocol; + + /* HBH must immediately follow ipv6 header */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + len_bytes += + ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); + next_hdr = ext_hdr->next_hdr; + ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; + /* Ignoring the sr_local for now, if RH follows HBH here */ + } + else + { + ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */ + } /* * Copy data before the punch-in point left by the * required amount. Assume (for the moment) that only * the main packet header needs to be copied. */ clib_memcpy (((u8 *) ip0) - vec_len (t0->rewrite), - ip0, sizeof (ip6_header_t)); + ip0, len_bytes); vlib_buffer_advance (b0, -(word) vec_len (t0->rewrite)); ip0 = vlib_buffer_get_current (b0); - sr0 = (ip6_sr_header_t *) (ip0 + 1); + sr0 = (ip6_sr_header_t *) ((u8 *) ip0 + len_bytes); /* $$$ tune */ clib_memcpy (sr0, t0->rewrite, vec_len (t0->rewrite)); /* Fix the next header chain */ - sr0->protocol = ip0->protocol; - ip0->protocol = IPPROTO_IPV6_ROUTE; /* routing extension header */ + sr0->protocol = next_hdr; new_l0 = clib_net_to_host_u16 (ip0->payload_length) + vec_len (t0->rewrite); ip0->payload_length = clib_host_to_net_u16 (new_l0); @@ -677,10 +753,10 @@ sr_rewrite (vlib_main_t * vm, b0->error = node->errors[SR_REWRITE_ERROR_APP_CALLBACK]; } } - #if DPDK > 0 /* Cannot run replicate without DPDK and only replicate uses this label */ trace0: #endif /* DPDK */ + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) { sr_rewrite_trace_t *tr = vlib_add_trace (vm, node, @@ -695,14 +771,13 @@ sr_rewrite (vlib_main_t * vm, } tr->length = new_l0; tr->next_index = next0; - clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); + if (sr0) + clib_memcpy (tr->sr, sr0, sizeof (tr->sr)); } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, bi0, next0); } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); } return from_frame->n_vectors; @@ -2462,6 +2537,7 @@ sr_local (vlib_main_t * vm, ip6_address_t *new_dst0, *new_dst1; u32 next0 = SR_LOCAL_NEXT_IP6_LOOKUP; u32 next1 = SR_LOCAL_NEXT_IP6_LOOKUP; + /* Prefetch next iteration. */ { vlib_buffer_t *p2, *p3; @@ -2489,6 +2565,15 @@ sr_local (vlib_main_t * vm, b0 = vlib_get_buffer (vm, bi0); ip0 = vlib_buffer_get_current (b0); sr0 = (ip6_sr_header_t *) (ip0 + 1); + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + sr0 = + (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *) + ext_hdr); + } if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR)) { @@ -2550,13 +2635,26 @@ sr_local (vlib_main_t * vm, { u64 *copy_dst0, *copy_src0; u16 new_l0; + u32 copy_len_u64s0 = 0; + int i; + /* * Copy the ip6 header right by the (real) length of the - * sr header. Here's another place which assumes that - * the sr header is the only extention header. + * sr header. */ - - ip0->protocol = sr0->protocol; + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + copy_len_u64s0 = + (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1; + ext_hdr->next_hdr = sr0->protocol; + } + else + { + ip0->protocol = sr0->protocol; + } vlib_buffer_advance (b0, (sr0->length + 1) * 8); new_l0 = clib_net_to_host_u16 (ip0->payload_length) - @@ -2566,11 +2664,16 @@ sr_local (vlib_main_t * vm, copy_src0 = (u64 *) ip0; copy_dst0 = copy_src0 + (sr0->length + 1); - copy_dst0[4] = copy_src0[4]; - copy_dst0[3] = copy_src0[3]; - copy_dst0[2] = copy_src0[2]; - copy_dst0[1] = copy_src0[1]; - copy_dst0[0] = copy_src0[0]; + copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0]; + copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0]; + copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0]; + copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0]; + copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0]; + + for (i = copy_len_u64s0 - 1; i >= 0; i--) + { + copy_dst0[i] = copy_src0[i]; + } sr0 = 0; } @@ -2594,6 +2697,16 @@ sr_local (vlib_main_t * vm, b1 = vlib_get_buffer (vm, bi1); ip1 = vlib_buffer_get_current (b1); sr1 = (ip6_sr_header_t *) (ip1 + 1); + if (PREDICT_FALSE + (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1); + sr1 = + (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *) + ext_hdr); + } if (PREDICT_FALSE (sr1->type != ROUTING_HEADER_TYPE_SR)) { @@ -2655,13 +2768,26 @@ sr_local (vlib_main_t * vm, { u64 *copy_dst1, *copy_src1; u16 new_l1; + u32 copy_len_u64s1 = 0; + int i; + /* * Copy the ip6 header right by the (real) length of the - * sr header. Here's another place which assumes that - * the sr header is the only extention header. + * sr header. */ - - ip1->protocol = sr1->protocol; + if (PREDICT_FALSE + (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip1); + copy_len_u64s1 = + (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1; + ext_hdr->next_hdr = sr1->protocol; + } + else + { + ip1->protocol = sr1->protocol; + } vlib_buffer_advance (b1, (sr1->length + 1) * 8); new_l1 = clib_net_to_host_u16 (ip1->payload_length) - @@ -2671,11 +2797,16 @@ sr_local (vlib_main_t * vm, copy_src1 = (u64 *) ip1; copy_dst1 = copy_src1 + (sr1->length + 1); - copy_dst1[4] = copy_src1[4]; - copy_dst1[3] = copy_src1[3]; - copy_dst1[2] = copy_src1[2]; - copy_dst1[1] = copy_src1[1]; - copy_dst1[0] = copy_src1[0]; + copy_dst1[4 + copy_len_u64s1] = copy_src1[4 + copy_len_u64s1]; + copy_dst1[3 + copy_len_u64s1] = copy_src1[3 + copy_len_u64s1]; + copy_dst1[2 + copy_len_u64s1] = copy_src1[2 + copy_len_u64s1]; + copy_dst1[1 + copy_len_u64s1] = copy_src1[1 + copy_len_u64s1]; + copy_dst1[0 + copy_len_u64s1] = copy_src1[0 + copy_len_u64s1]; + + for (i = copy_len_u64s1 - 1; i >= 0; i--) + { + copy_dst1[i] = copy_src1[i]; + } sr1 = 0; } @@ -2721,6 +2852,15 @@ sr_local (vlib_main_t * vm, ip0 = vlib_buffer_get_current (b0); sr0 = (ip6_sr_header_t *) (ip0 + 1); + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + sr0 = + (ip6_sr_header_t *) ip6_ext_next_header ((ip6_ext_header_t *) + ext_hdr); + } if (PREDICT_FALSE (sr0->type != ROUTING_HEADER_TYPE_SR)) { next0 = SR_LOCAL_NEXT_ERROR; @@ -2781,13 +2921,27 @@ sr_local (vlib_main_t * vm, { u64 *copy_dst0, *copy_src0; u16 new_l0; + u32 copy_len_u64s0 = 0; + int i; + /* * Copy the ip6 header right by the (real) length of the - * sr header. Here's another place which assumes that - * the sr header is the only extention header. + * sr header. */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + copy_len_u64s0 = + (((ip6_ext_header_t *) ext_hdr)->n_data_u64s) + 1; + ext_hdr->next_hdr = sr0->protocol; + } + else + { + ip0->protocol = sr0->protocol; + } - ip0->protocol = sr0->protocol; vlib_buffer_advance (b0, (sr0->length + 1) * 8); new_l0 = clib_net_to_host_u16 (ip0->payload_length) - @@ -2796,12 +2950,16 @@ sr_local (vlib_main_t * vm, copy_src0 = (u64 *) ip0; copy_dst0 = copy_src0 + (sr0->length + 1); + copy_dst0[4 + copy_len_u64s0] = copy_src0[4 + copy_len_u64s0]; + copy_dst0[3 + copy_len_u64s0] = copy_src0[3 + copy_len_u64s0]; + copy_dst0[2 + copy_len_u64s0] = copy_src0[2 + copy_len_u64s0]; + copy_dst0[1 + copy_len_u64s0] = copy_src0[1 + copy_len_u64s0]; + copy_dst0[0 + copy_len_u64s0] = copy_src0[0 + copy_len_u64s0]; - copy_dst0[4] = copy_src0[4]; - copy_dst0[3] = copy_src0[3]; - copy_dst0[2] = copy_src0[2]; - copy_dst0[1] = copy_src0[1]; - copy_dst0[0] = copy_src0[0]; + for (i = copy_len_u64s0 - 1; i >= 0; i--) + { + copy_dst0[i] = copy_src0[i]; + } sr0 = 0; } diff --git a/vnet/vnet/sr/sr_replicate.c b/vnet/vnet/sr/sr_replicate.c index 9aa57873c8c..34cde3d2938 100644 --- a/vnet/vnet/sr/sr_replicate.c +++ b/vnet/vnet/sr/sr_replicate.c @@ -175,6 +175,8 @@ sr_replicate_node_fn (vlib_main_t * vm, ip6_header_t *ip0 = 0, *hdr_ip0 = 0; int num_replicas = 0; int i; + u32 len_bytes = sizeof (ip6_header_t); + u8 next_hdr, ip_next_hdr = IPPROTO_IPV6_ROUTE; bi0 = from[0]; @@ -187,6 +189,24 @@ sr_replicate_node_fn (vlib_main_t * vm, ip0 = vlib_buffer_get_current (b0); /* Skip forward to the punch-in point */ vlib_buffer_advance (b0, sizeof (*ip0)); + next_hdr = ip0->protocol; + + /* HBH must immediately follow ipv6 header */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) ip6_next_header (ip0); + u32 ext_hdr_len = 0; + ext_hdr_len = ip6_ext_header_len ((ip6_ext_header_t *) ext_hdr); + len_bytes += ext_hdr_len; + next_hdr = ext_hdr->next_hdr; + ext_hdr->next_hdr = IPPROTO_IPV6_ROUTE; + ip_next_hdr = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS; + /* Skip forward to the punch-in point */ + vlib_buffer_advance (b0, ext_hdr_len); + + } orig_mb0 = rte_mbuf_from_vlib_buffer (b0); @@ -198,8 +218,7 @@ sr_replicate_node_fn (vlib_main_t * vm, orig_mb0->data_len = new_data_len0; orig_mb0->pkt_len = new_pkt_len0; - orig_mb0->data_off = - (u16) (RTE_PKTMBUF_HEADROOM + b0->current_data); + orig_mb0->data_off += (u16) (b0->current_data); /* Before entering loop determine if we can allocate: @@ -222,14 +241,49 @@ sr_replicate_node_fn (vlib_main_t * vm, for (i = 0; i < num_replicas; i++) { + uint8_t nb_seg; + struct rte_mbuf *clone0i; + vlib_buffer_t *clone0_c, *clone_b0; + + t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]); hdr_mb0 = rte_pktmbuf_alloc (bm->pktmbuf_pools[socket_id]); if (i < (num_replicas - 1)) - /* Not the last tunnel to process */ - clone0 = rte_pktmbuf_clone - (orig_mb0, bm->pktmbuf_pools[socket_id]); + { + /* Not the last tunnel to process */ + clone0 = rte_pktmbuf_clone + (orig_mb0, bm->pktmbuf_pools[socket_id]); + nb_seg = 0; + clone0i = clone0; + clone0_c = NULL; + while ((clone0->nb_segs >= 1) && (nb_seg < clone0->nb_segs)) + { + + clone_b0 = vlib_buffer_from_rte_mbuf (clone0i); + vlib_buffer_init_for_free_list (clone_b0, fl); + + ASSERT ((clone_b0->flags & VLIB_BUFFER_NEXT_PRESENT) == + 0); + ASSERT (clone_b0->current_data == 0); + + clone_b0->current_data = + (clone0i->buf_addr + clone0i->data_off) - + (void *) clone_b0->data; + + clone_b0->current_length = clone0i->data_len; + if (PREDICT_FALSE (clone0_c != NULL)) + { + clone0_c->flags |= VLIB_BUFFER_NEXT_PRESENT; + clone0_c->next_buffer = + vlib_get_buffer_index (vm, clone_b0); + } + clone0_c = clone_b0; + clone0i = clone0i->next; + nb_seg++; + } + } else - /* Last tunnel to process, use original MB */ + /* First tunnel to process, use original MB */ clone0 = orig_mb0; @@ -260,14 +314,14 @@ sr_replicate_node_fn (vlib_main_t * vm, for (i = 0; i < num_replicas; i++) { vlib_buffer_t *hdr_b0; + u16 new_l0 = 0; t0 = vec_elt_at_index (sm->tunnels, pol0->tunnel_indices[i]); - /* Our replicas */ hdr_mb0 = hdr_vec[i]; clone0 = rte_mbuf_vec[i]; - hdr_mb0->data_len = sizeof (*ip0) + vec_len (t0->rewrite); + hdr_mb0->data_len = len_bytes + vec_len (t0->rewrite); hdr_mb0->pkt_len = hdr_mb0->data_len + vlib_buffer_length_in_chain (vm, orig_b0); @@ -275,24 +329,33 @@ sr_replicate_node_fn (vlib_main_t * vm, vlib_buffer_init_for_free_list (hdr_b0, fl); - memcpy (hdr_b0->data, ip0, sizeof (*ip0)); - memcpy (hdr_b0->data + sizeof (*ip0), t0->rewrite, + memcpy (hdr_b0->data, ip0, len_bytes); + memcpy (hdr_b0->data + len_bytes, t0->rewrite, vec_len (t0->rewrite)); hdr_b0->current_data = 0; - hdr_b0->current_length = sizeof (*ip0) + vec_len (t0->rewrite); + hdr_b0->current_length = len_bytes + vec_len (t0->rewrite); hdr_b0->flags = orig_b0->flags | VLIB_BUFFER_NEXT_PRESENT; - + hdr_b0->trace_index = orig_b0->trace_index; + vnet_buffer (hdr_b0)->l2_classify.opaque_index = 0; hdr_b0->total_length_not_including_first_buffer = hdr_mb0->pkt_len - hdr_b0->current_length; + vnet_buffer (hdr_b0)->sw_if_index[VLIB_TX] = t0->tx_fib_index; hdr_ip0 = (ip6_header_t *) hdr_b0->data; - hdr_ip0->payload_length = - clib_host_to_net_u16 (hdr_mb0->data_len); - hdr_sr0 = (ip6_sr_header_t *) (hdr_ip0 + 1); - hdr_sr0->protocol = hdr_ip0->protocol; - hdr_ip0->protocol = 43; + new_l0 = clib_net_to_host_u16 (ip0->payload_length) + + vec_len (t0->rewrite); + hdr_ip0->payload_length = clib_host_to_net_u16 (new_l0); + hdr_sr0 = (ip6_sr_header_t *) ((u8 *) hdr_ip0 + len_bytes); + /* $$$ tune */ + clib_memcpy (hdr_sr0, t0->rewrite, vec_len (t0->rewrite)); + hdr_sr0->protocol = next_hdr; + hdr_ip0->protocol = ip_next_hdr; + + /* Copy dst address into the DA slot in the segment list */ + clib_memcpy (hdr_sr0->segments, ip0->dst_address.as_u64, + sizeof (ip6_address_t)); /* Rewrite the ip6 dst address */ hdr_ip0->dst_address.as_u64[0] = t0->first_hop.as_u64[0]; @@ -318,7 +381,7 @@ sr_replicate_node_fn (vlib_main_t * vm, hdr_mb0->tx_offload = clone0->tx_offload; hdr_mb0->hash = clone0->hash; - hdr_mb0->ol_flags = clone0->ol_flags; + hdr_mb0->ol_flags = clone0->ol_flags & ~(IND_ATTACHED_MBUF); __rte_mbuf_sanity_check (hdr_mb0, 1); |