diff options
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/map/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/plugins/map/ip4_map.c | 41 | ||||
-rw-r--r-- | src/plugins/map/ip4_map_t.c | 176 | ||||
-rw-r--r-- | src/plugins/map/ip6_map.c | 26 | ||||
-rw-r--r-- | src/plugins/map/ip6_map_t.c | 375 | ||||
-rw-r--r-- | src/plugins/map/map.api | 5 | ||||
-rw-r--r-- | src/plugins/map/map.c | 138 | ||||
-rw-r--r-- | src/plugins/map/map.h | 116 | ||||
-rw-r--r-- | src/plugins/map/map_api.c | 7 | ||||
-rw-r--r-- | src/plugins/map/map_dpo.c | 130 | ||||
-rw-r--r-- | src/plugins/map/map_dpo.h | 43 |
11 files changed, 582 insertions, 477 deletions
diff --git a/src/plugins/map/CMakeLists.txt b/src/plugins/map/CMakeLists.txt index 2d604e6e4d8..2919199c938 100644 --- a/src/plugins/map/CMakeLists.txt +++ b/src/plugins/map/CMakeLists.txt @@ -19,7 +19,6 @@ add_vpp_plugin(map ip6_map_t.c map_api.c map.c - map_dpo.c lpm.c API_FILES @@ -28,6 +27,5 @@ add_vpp_plugin(map INSTALL_HEADERS map_all_api_h.h map_msg_enum.h - map_dpo.h map.h ) diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c index 487b9212681..64da602e838 100644 --- a/src/plugins/map/ip4_map.c +++ b/src/plugins/map/ip4_map.c @@ -248,12 +248,12 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p1 = vlib_get_buffer (vm, pi1); ip40 = vlib_buffer_get_current (p0); ip41 = vlib_buffer_get_current (p1); - map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - d0 = ip4_map_get_domain (map_domain_index0); - map_domain_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; - d1 = ip4_map_get_domain (map_domain_index1); - ASSERT (d0); - ASSERT (d1); + d0 = + ip4_map_get_domain (&ip40->dst_address, &map_domain_index0, + &error0); + d1 = + ip4_map_get_domain (&ip41->dst_address, &map_domain_index1, + &error1); /* * Shared IPv4 address @@ -417,9 +417,15 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p0 = vlib_get_buffer (vm, pi0); ip40 = vlib_buffer_get_current (p0); - map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - d0 = ip4_map_get_domain (map_domain_index0); - ASSERT (d0); + + d0 = + ip4_map_get_domain (&ip40->dst_address, &map_domain_index0, + &error0); + if (!d0) + { /* Guess it wasn't for us */ + vnet_feature_next (&next0, p0); + goto exit; + } /* * Shared IPv4 address @@ -495,6 +501,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) } p0->error = error_node->errors[error0]; + exit: vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, pi0, next0); } @@ -537,7 +544,7 @@ ip4_map_reass (vlib_main_t * vm, i32 port0 = 0; ip6_header_t *ip60; u32 next0 = IP4_MAP_REASS_NEXT_IP6_LOOKUP; - u32 map_domain_index0; + u32 map_domain_index0 = ~0; u8 cached = 0; pi0 = to_next[0] = from[0]; @@ -549,8 +556,9 @@ ip4_map_reass (vlib_main_t * vm, p0 = vlib_get_buffer (vm, pi0); ip60 = vlib_buffer_get_current (p0); ip40 = (ip4_header_t *) (ip60 + 1); - map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - d0 = ip4_map_get_domain (map_domain_index0); + d0 = + ip4_map_get_domain (&ip40->dst_address, &map_domain_index0, + &error0); map_ip4_reass_lock (); map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32, @@ -698,7 +706,16 @@ static char *map_error_strings[] = { #undef _ }; + /* *INDENT-OFF* */ +VNET_FEATURE_INIT (ip4_map_feature, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "ip4-map", + .runs_before = + VNET_FEATURES ("ip4-flow-classify"), +}; + VLIB_REGISTER_NODE(ip4_map_node) = { .function = ip4_map, .name = "ip4-map", diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c index 67df6a087b9..21d17d77910 100644 --- a/src/plugins/map/ip4_map_t.c +++ b/src/plugins/map/ip4_map_t.c @@ -229,10 +229,44 @@ ip4_map_t_icmp (vlib_main_t * vm, return frame->n_vectors; } -static int -ip4_to_ip6_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *ctx) +/* + * Translate fragmented IPv4 UDP/TCP packet to IPv6. + */ +always_inline int +map_ip4_to_ip6_fragmented (vlib_buffer_t * p, + ip4_mapt_pseudo_header_t * pheader) { - ip4_mapt_pseudo_header_t *pheader = ctx; + ip4_header_t *ip4; + ip6_header_t *ip6; + ip6_frag_hdr_t *frag; + + ip4 = vlib_buffer_get_current (p); + frag = (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*frag) - + sizeof (*ip6)); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + + //We know that the protocol was one of ICMP, TCP or UDP + //because the first fragment was found and cached + frag->next_hdr = + (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol; + frag->identification = frag_id_4to6 (ip4->fragment_id); + frag->rsv = 0; + frag->fragment_offset_and_more = + ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip4), + clib_net_to_host_u16 + (ip4->flags_and_fragment_offset) & + IP4_HEADER_FLAG_MORE_FRAGMENTS); + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = + clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - + sizeof (*ip4) + sizeof (*frag)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0]; ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1]; @@ -277,7 +311,7 @@ ip4_map_t_fragmented (vlib_main_t * vm, pheader0 = vlib_buffer_get_current (p0); vlib_buffer_advance (p0, sizeof (*pheader0)); - if (ip4_to_ip6_fragmented (p0, ip4_to_ip6_set_cb, pheader0)) + if (map_ip4_to_ip6_fragmented (p0, pheader0)) { p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; next0 = IP4_MAPT_FRAGMENTED_NEXT_DROP; @@ -302,6 +336,110 @@ ip4_map_t_fragmented (vlib_main_t * vm, return frame->n_vectors; } +/* + * Translate IPv4 UDP/TCP packet to IPv6. + */ +always_inline int +map_ip4_to_ip6_tcp_udp (vlib_buffer_t * p, ip4_mapt_pseudo_header_t * pheader) +{ + map_main_t *mm = &map_main; + ip4_header_t *ip4; + ip6_header_t *ip6; + ip_csum_t csum; + u16 *checksum; + ip6_frag_hdr_t *frag; + u32 frag_id; + ip4_address_t old_src, old_dst; + + ip4 = vlib_buffer_get_current (p); + + if (ip4->protocol == IP_PROTOCOL_UDP) + { + udp_header_t *udp = ip4_next_header (ip4); + checksum = &udp->checksum; + + /* + * UDP checksum is optional over IPv4 but mandatory for IPv6 We + * do not check udp->length sanity but use our safe computed + * value instead + */ + if (PREDICT_FALSE (!*checksum)) + { + u16 udp_len = clib_host_to_net_u16 (ip4->length) - sizeof (*ip4); + csum = ip_incremental_checksum (0, udp, udp_len); + csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len)); + csum = + ip_csum_with_carry (csum, clib_host_to_net_u16 (IP_PROTOCOL_UDP)); + csum = ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address))); + *checksum = ~ip_csum_fold (csum); + } + } + else + { + tcp_header_t *tcp = ip4_next_header (ip4); + if (mm->tcp_mss > 0) + { + csum = tcp->checksum; + map_mss_clamping (tcp, &csum, mm->tcp_mss); + tcp->checksum = ip_csum_fold (csum); + } + checksum = &tcp->checksum; + } + + old_src.as_u32 = ip4->src_address.as_u32; + old_dst.as_u32 = ip4->dst_address.as_u32; + + /* Deal with fragmented packets */ + if (PREDICT_FALSE (ip4->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS))) + { + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*ip6) - + sizeof (*frag)); + frag = + (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + frag_id = frag_id_4to6 (ip4->fragment_id); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + } + else + { + ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); + frag = NULL; + } + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = ip4->protocol; + if (PREDICT_FALSE (frag != NULL)) + { + frag->next_hdr = ip6->protocol; + frag->identification = frag_id; + frag->rsv = 0; + frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1); + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); + } + + ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0]; + ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1]; + ip6->src_address.as_u64[0] = pheader->saddr.as_u64[0]; + ip6->src_address.as_u64[1] = pheader->saddr.as_u64[1]; + + csum = ip_csum_sub_even (*checksum, old_src.as_u32); + csum = ip_csum_sub_even (csum, old_dst.as_u32); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); + *checksum = ip_csum_fold (csum); + + return 0; +} + static uword ip4_map_t_tcp_udp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -338,7 +476,7 @@ ip4_map_t_tcp_udp (vlib_main_t * vm, pheader0 = vlib_buffer_get_current (p0); vlib_buffer_advance (p0, sizeof (*pheader0)); - if (ip4_to_ip6_tcp_udp (p0, ip4_to_ip6_set_cb, pheader0)) + if (map_ip4_to_ip6_tcp_udp (p0, pheader0)) { p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; next0 = IP4_MAPT_TCP_UDP_NEXT_DROP; @@ -436,7 +574,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vlib_buffer_t *p0; ip4_header_t *ip40; map_domain_t *d0; - ip4_mapt_next_t next0; + ip4_mapt_next_t next0 = 0; u16 ip4_len0; u8 error0; i32 dst_port0; @@ -456,12 +594,17 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip40->ip_version_and_header_length != 0x45)) { error0 = MAP_ERROR_UNKNOWN; - next0 = IP4_MAPT_NEXT_DROP; } - vnet_buffer (p0)->map_t.map_domain_index = - vnet_buffer (p0)->ip.adj_index[VLIB_TX]; - d0 = ip4_map_get_domain (vnet_buffer (p0)->map_t.map_domain_index); + d0 = ip4_map_get_domain (&ip40->dst_address, + &vnet_buffer (p0)->map_t.map_domain_index, + &error0); + + if (!d0) + { /* Guess it wasn't for us */ + vnet_feature_next (&next0, p0); + goto exit; + } vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; @@ -489,9 +632,10 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) pheader0->daddr.as_u64[1] = map_get_sfx_net (d0, ip40->dst_address.as_u32, (u16) dst_port0); - //It is important to cache at this stage because the result might be necessary - //for packets within the same vector. - //Actually, this approach even provides some limited out-of-order fragments support + // It is important to cache at this stage because the result + // might be necessary for packets within the same vector. + // Actually, this approach even provides some limited + // out-of-order fragments support if (PREDICT_FALSE (ip4_is_first_fragment (ip40) && (dst_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) @@ -513,6 +657,7 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; p0->error = error_node->errors[error0]; + exit: vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, pi0, next0); @@ -528,6 +673,11 @@ static char *map_t_error_strings[] = { #undef _ }; +VNET_FEATURE_INIT (ip4_map_t_feature, static) = +{ +.arc_name = "ip4-unicast",.node_name = "ip4-map-t",.runs_before = + VNET_FEATURES ("ip4-flow-classify"),}; + /* *INDENT-OFF* */ VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = { .function = ip4_map_t_fragmented, diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c index 35b58110876..d9db602bad8 100644 --- a/src/plugins/map/ip6_map.c +++ b/src/plugins/map/ip6_map.c @@ -244,8 +244,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) && clib_net_to_host_u16 (ip60->payload_length) > 20)) { d0 = - ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & ip40-> + ip4_map_get_domain ((ip4_address_t *) & ip40-> src_address.as_u32, &map_domain_index0, &error0); } @@ -272,8 +271,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) && clib_net_to_host_u16 (ip61->payload_length) > 20)) { d1 = - ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & ip41-> + ip4_map_get_domain ((ip4_address_t *) & ip41-> src_address.as_u32, &map_domain_index1, &error1); } @@ -455,8 +453,7 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) && clib_net_to_host_u16 (ip60->payload_length) > 20)) { d0 = - ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & ip40-> + ip4_map_get_domain ((ip4_address_t *) & ip40-> src_address.as_u32, &map_domain_index0, &error0); } @@ -478,7 +475,10 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) } else { - error0 = MAP_ERROR_BAD_PROTOCOL; + /* XXX: Move get_domain to ip6_get_domain lookup on source */ + //error0 = MAP_ERROR_BAD_PROTOCOL; + vlib_buffer_advance (p0, -sizeof (ip6_header_t)); + vnet_feature_next (&next0, p0); } if (d0) @@ -811,7 +811,7 @@ ip6_map_ip6_reass (vlib_main_t * vm, } /* - * ip6_ip4_virt_reass + * ip6_map_ip4_reass */ static uword ip6_map_ip4_reass (vlib_main_t * vm, @@ -858,8 +858,7 @@ ip6_map_ip4_reass (vlib_main_t * vm, ip60 = ((ip6_header_t *) ip40) - 1; d0 = - ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & ip40->src_address.as_u32, + ip4_map_get_domain ((ip4_address_t *) & ip40->src_address.as_u32, &map_domain_index0, &error0); map_ip4_reass_lock (); @@ -1172,6 +1171,13 @@ static char *map_error_strings[] = { }; /* *INDENT-OFF* */ +VNET_FEATURE_INIT (ip6_map_feature, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "ip6-map", + .runs_before = VNET_FEATURES ("ip6-flow-classify"), +}; + VLIB_REGISTER_NODE(ip6_map_node) = { .function = ip6_map, .name = "ip6-map", diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c index 069d392de09..21d6e10c5ae 100644 --- a/src/plugins/map/ip6_map_t.c +++ b/src/plugins/map/ip6_map_t.c @@ -14,9 +14,9 @@ */ #include "map.h" -#include <vnet/ip/ip_frag.h> -#include <vnet/ip/ip6_to_ip4.h> #include <vnet/ip/ip4_to_ip6.h> +#include <vnet/ip/ip6_to_ip4.h> +#include <vnet/ip/ip_frag.h> typedef enum { @@ -57,16 +57,13 @@ ip6_map_fragment_cache (ip6_header_t * ip6, ip6_frag_hdr_t * frag, { u32 *ignore = NULL; map_ip4_reass_lock (); - map_ip4_reass_t *r = map_ip4_reass_get (map_get_ip4 (&ip6->src_address, - d->flags), - ip6_map_t_embedded_address (d, - &ip6-> - dst_address), - frag_id_6to4 (frag->identification), - (ip6->protocol == - IP_PROTOCOL_ICMP6) ? - IP_PROTOCOL_ICMP : ip6->protocol, - &ignore); + map_ip4_reass_t *r = + map_ip4_reass_get (map_get_ip4 (&ip6->src_address, d->ip6_src_len), + ip6_map_t_embedded_address (d, &ip6->dst_address), + frag_id_6to4 (frag->identification), + (ip6->protocol == + IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : ip6->protocol, + &ignore); if (r) r->port = port; @@ -81,16 +78,13 @@ ip6_map_fragment_get (ip6_header_t * ip6, ip6_frag_hdr_t * frag, { u32 *ignore = NULL; map_ip4_reass_lock (); - map_ip4_reass_t *r = map_ip4_reass_get (map_get_ip4 (&ip6->src_address, - d->flags), - ip6_map_t_embedded_address (d, - &ip6-> - dst_address), - frag_id_6to4 (frag->identification), - (ip6->protocol == - IP_PROTOCOL_ICMP6) ? - IP_PROTOCOL_ICMP : ip6->protocol, - &ignore); + map_ip4_reass_t *r = + map_ip4_reass_get (map_get_ip4 (&ip6->src_address, d->ip6_src_len), + ip6_map_t_embedded_address (d, &ip6->dst_address), + frag_id_6to4 (frag->identification), + (ip6->protocol == + IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : ip6->protocol, + &ignore); i32 ret = r ? r->port : -1; map_ip4_reass_unlock (); return ret; @@ -108,8 +102,9 @@ ip6_to_ip4_set_icmp_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) icmp6_to_icmp_ctx_t *ctx = arg; u32 ip4_sadr; - //Security check - //Note that this prevents an intermediate IPv6 router from answering the request + // Security check + // Note that this prevents an intermediate IPv6 router from answering + // the request. ip4_sadr = map_get_ip4 (&ip6->src_address, ctx->d->flags); if (ip6->src_address.as_u64[0] != map_get_pfx_net (ctx->d, ip4_sadr, ctx->sender_port) @@ -211,7 +206,7 @@ ip6_map_t_icmp (vlib_main_t * vm, if (vnet_buffer (p0)->map_t.mtu < p0->current_length) { - //Send to fragmentation node if necessary + // Send to fragmentation node if necessary vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG; @@ -240,14 +235,53 @@ ip6_map_t_icmp (vlib_main_t * vm, return frame->n_vectors; } -static int -ip6_to_ip4_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *ctx) +/* + * Translate IPv6 fragmented packet to IPv4. + */ +always_inline int +map_ip6_to_ip4_fragmented (vlib_buffer_t * p) { - vlib_buffer_t *p = ctx; + ip6_header_t *ip6; + ip6_frag_hdr_t *frag; + ip4_header_t *ip4; + u16 frag_id; + u8 frag_more; + u16 frag_offset; + u8 l4_protocol; + u16 l4_offset; + + ip6 = vlib_buffer_get_current (p); + + if (ip6_parse + (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) + return -1; + + frag = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); + ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + + frag_id = frag_id_6to4 (frag->identification); + frag_more = ip6_frag_hdr_more (frag); + frag_offset = ip6_frag_hdr_offset (frag); ip4->dst_address.as_u32 = vnet_buffer (p)->map_t.v6.daddr; ip4->src_address.as_u32 = vnet_buffer (p)->map_t.v6.saddr; + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip6); + ip4->length = + u16_net_add (ip6->payload_length, + sizeof (*ip4) - l4_offset + sizeof (*ip6)); + ip4->fragment_id = frag_id; + ip4->flags_and_fragment_offset = + clib_host_to_net_u16 (frag_offset | + (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0)); + ip4->ttl = ip6->hop_limit; + ip4->protocol = + (l4_protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : l4_protocol; + ip4->checksum = ip4_header_checksum (ip4); + return 0; } @@ -281,7 +315,7 @@ ip6_map_t_fragmented (vlib_main_t * vm, next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; p0 = vlib_get_buffer (vm, pi0); - if (ip6_to_ip4_fragmented (p0, ip6_to_ip4_set_cb, p0)) + if (map_ip6_to_ip4_fragmented (p0)) { p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; next0 = IP6_MAPT_FRAGMENTED_NEXT_DROP; @@ -290,7 +324,7 @@ ip6_map_t_fragmented (vlib_main_t * vm, { if (vnet_buffer (p0)->map_t.mtu < p0->current_length) { - //Send to fragmentation node if necessary + // Send to fragmentation node if necessary vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; @@ -307,6 +341,103 @@ ip6_map_t_fragmented (vlib_main_t * vm, return frame->n_vectors; } +/* + * Translate IPv6 UDP/TCP packet to IPv4. + */ +always_inline int +map_ip6_to_ip4_tcp_udp (vlib_buffer_t * p, bool udp_checksum) +{ + map_main_t *mm = &map_main; + ip6_header_t *ip6; + u16 *checksum; + ip_csum_t csum = 0; + ip4_header_t *ip4; + u16 fragment_id; + u16 flags; + u16 frag_offset; + u8 l4_protocol; + u16 l4_offset; + ip6_address_t old_src, old_dst; + + ip6 = vlib_buffer_get_current (p); + + if (ip6_parse + (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) + return -1; + + if (l4_protocol == IP_PROTOCOL_TCP) + { + tcp_header_t *tcp = ip6_next_header (ip6); + if (mm->tcp_mss > 0) + { + csum = tcp->checksum; + map_mss_clamping (tcp, &csum, mm->tcp_mss); + tcp->checksum = ip_csum_fold (csum); + } + checksum = &tcp->checksum; + } + else + { + udp_header_t *udp = ip6_next_header (ip6); + checksum = &udp->checksum; + } + + old_src.as_u64[0] = ip6->src_address.as_u64[0]; + old_src.as_u64[1] = ip6->src_address.as_u64[1]; + old_dst.as_u64[0] = ip6->dst_address.as_u64[0]; + old_dst.as_u64[1] = ip6->dst_address.as_u64[1]; + + ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); + + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + + if (PREDICT_FALSE (frag_offset)) + { + // Only the first fragment + ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); + fragment_id = frag_id_6to4 (hdr->identification); + flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); + } + else + { + fragment_id = 0; + flags = 0; + } + + ip4->dst_address.as_u32 = vnet_buffer (p)->map_t.v6.daddr; + ip4->src_address.as_u32 = vnet_buffer (p)->map_t.v6.saddr; + + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip6); + ip4->length = + u16_net_add (ip6->payload_length, + sizeof (*ip4) + sizeof (*ip6) - l4_offset); + ip4->fragment_id = fragment_id; + ip4->flags_and_fragment_offset = flags; + ip4->ttl = ip6->hop_limit; + ip4->protocol = l4_protocol; + ip4->checksum = ip4_header_checksum (ip4); + + // UDP checksum is optional over IPv4 + if (!udp_checksum && l4_protocol == IP_PROTOCOL_UDP) + { + *checksum = 0; + } + else + { + csum = ip_csum_sub_even (*checksum, old_src.as_u64[0]); + csum = ip_csum_sub_even (csum, old_src.as_u64[1]); + csum = ip_csum_sub_even (csum, old_dst.as_u64[0]); + csum = ip_csum_sub_even (csum, old_dst.as_u64[1]); + csum = ip_csum_add_even (csum, ip4->dst_address.as_u32); + csum = ip_csum_add_even (csum, ip4->src_address.as_u32); + *checksum = ip_csum_fold (csum); + } + + return 0; +} + static uword ip6_map_t_tcp_udp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -337,7 +468,7 @@ ip6_map_t_tcp_udp (vlib_main_t * vm, p0 = vlib_get_buffer (vm, pi0); - if (ip6_to_ip4_tcp_udp (p0, ip6_to_ip4_set_cb, p0, 1)) + if (map_ip6_to_ip4_tcp_udp (p0, true)) { p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; next0 = IP6_MAPT_TCP_UDP_NEXT_DROP; @@ -346,7 +477,7 @@ ip6_map_t_tcp_udp (vlib_main_t * vm, { if (vnet_buffer (p0)->map_t.mtu < p0->current_length) { - //Send to fragmentation node if necessary + // Send to fragmentation node if necessary vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; @@ -386,7 +517,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ip6_header_t *ip60; u8 error0; u32 l4_len0; - i32 src_port0; + i32 map_port0; map_domain_t *d0; ip6_frag_hdr_t *frag0; ip6_mapt_next_t next0 = 0; @@ -402,50 +533,52 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p0 = vlib_get_buffer (vm, pi0); ip60 = vlib_buffer_get_current (p0); - //Save saddr in a different variable to not overwrite ip.adj_index - saddr = 0; /* TODO */ - /* NOTE: ip6_map_get_domain currently doesn't utilize second argument */ - - d0 = ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & saddr, - &vnet_buffer (p0)->map_t.map_domain_index, - &error0); - - saddr = map_get_ip4 (&ip60->src_address, d0->flags); + d0 = + ip6_map_get_domain (&ip60->dst_address, + &vnet_buffer (p0)->map_t.map_domain_index, + &error0); + if (!d0) + { /* Guess it wasn't for us */ + vnet_feature_next (&next0, p0); + goto exit; + } - //FIXME: What if d0 is null + saddr = map_get_ip4 (&ip60->src_address, d0->ip6_src_len); vnet_buffer (p0)->map_t.v6.saddr = saddr; vnet_buffer (p0)->map_t.v6.daddr = ip6_map_t_embedded_address (d0, &ip60->dst_address); vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; - if (PREDICT_FALSE (ip6_parse (ip60, p0->current_length, - &(vnet_buffer (p0)->map_t. - v6.l4_protocol), - &(vnet_buffer (p0)->map_t. - v6.l4_offset), - &(vnet_buffer (p0)->map_t. - v6.frag_offset)))) + if (PREDICT_FALSE + (ip6_parse (ip60, p0->current_length, + &(vnet_buffer (p0)->map_t.v6.l4_protocol), + &(vnet_buffer (p0)->map_t.v6.l4_offset), + &(vnet_buffer (p0)->map_t.v6.frag_offset)))) { - error0 = MAP_ERROR_MALFORMED; - next0 = IP6_MAPT_NEXT_DROP; + error0 = + error0 == MAP_ERROR_NONE ? MAP_ERROR_MALFORMED : error0; } - src_port0 = -1; - l4_len0 = (u32) clib_net_to_host_u16 (ip60->payload_length) + + map_port0 = -1; + l4_len0 = + (u32) clib_net_to_host_u16 (ip60->payload_length) + sizeof (*ip60) - vnet_buffer (p0)->map_t.v6.l4_offset; frag0 = (ip6_frag_hdr_t *) u8_ptr_add (ip60, - vnet_buffer (p0)->map_t. - v6.frag_offset); - + vnet_buffer (p0)->map_t.v6. + frag_offset); - if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset && - ip6_frag_hdr_offset (frag0))) + if (PREDICT_FALSE + (vnet_buffer (p0)->map_t.v6.frag_offset + && ip6_frag_hdr_offset (frag0))) { - src_port0 = ip6_map_fragment_get (ip60, frag0, d0); - error0 = (src_port0 != -1) ? error0 : MAP_ERROR_FRAGMENT_MEMORY; - next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED; + map_port0 = ip6_map_fragment_get (ip60, frag0, d0); + if (map_port0 == -1) + error0 = + error0 == + MAP_ERROR_NONE ? MAP_ERROR_FRAGMENT_MEMORY : error0; + else + next0 = IP6_MAPT_NEXT_MAPT_FRAGMENTED; } else if (PREDICT_TRUE @@ -457,7 +590,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_buffer (p0)->map_t.checksum_offset = vnet_buffer (p0)->map_t.v6.l4_offset + 16; next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; - src_port0 = + map_port0 = (i32) * ((u16 *) u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset)); @@ -472,7 +605,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) vnet_buffer (p0)->map_t.checksum_offset = vnet_buffer (p0)->map_t.v6.l4_offset + 6; next0 = IP6_MAPT_NEXT_MAPT_TCP_UDP; - src_port0 = + map_port0 = (i32) * ((u16 *) u8_ptr_add (ip60, vnet_buffer (p0)->map_t.v6.l4_offset)); @@ -490,9 +623,9 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) ICMP6_echo_reply || ((icmp46_header_t *) u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.v6. - l4_offset))->code == ICMP6_echo_request) - src_port0 = + vnet_buffer (p0)->map_t.v6.l4_offset))-> + code == ICMP6_echo_request) + map_port0 = (i32) * ((u16 *) u8_ptr_add (ip60, @@ -500,41 +633,44 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) } else { - //TODO: In case of 1:1 mapping, it might be possible to do something with those packets. + // TODO: In case of 1:1 mapping, it might be possible to + // do something with those packets. error0 = MAP_ERROR_BAD_PROTOCOL; } - //Security check - if (PREDICT_FALSE - ((src_port0 != -1) - && (ip60->src_address.as_u64[0] != - map_get_pfx_net (d0, vnet_buffer (p0)->map_t.v6.saddr, - src_port0) - || ip60->src_address.as_u64[1] != map_get_sfx_net (d0, - vnet_buffer - (p0)->map_t.v6.saddr, - src_port0)))) + if (PREDICT_FALSE (map_port0 != -1) && + (ip60->src_address.as_u64[0] != + map_get_pfx_net (d0, vnet_buffer (p0)->map_t.v6.saddr, + map_port0) + || ip60->src_address.as_u64[1] != map_get_sfx_net (d0, + vnet_buffer + (p0)->map_t. + v6.saddr, + map_port0))) { - //Security check when src_port0 is not zero (non-first fragment, UDP or TCP) - error0 = MAP_ERROR_SEC_CHECK; + // Security check when map_port0 is not zero (non-first + // fragment, UDP or TCP) + error0 = + error0 == MAP_ERROR_NONE ? MAP_ERROR_SEC_CHECK : error0; } - //Fragmented first packet needs to be cached for following packets - if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset && - !ip6_frag_hdr_offset ((ip6_frag_hdr_t *) - u8_ptr_add (ip60, - vnet_buffer - (p0)->map_t. - v6.frag_offset))) - && (src_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) + // Fragmented first packet needs to be cached for following packets + if (PREDICT_FALSE + (vnet_buffer (p0)->map_t.v6.frag_offset + && !ip6_frag_hdr_offset ((ip6_frag_hdr_t *) + u8_ptr_add (ip60, + vnet_buffer (p0)->map_t. + v6.frag_offset))) + && (map_port0 != -1) && (d0->ea_bits_len != 0 || !d0->rules) && (error0 == MAP_ERROR_NONE)) { ip6_map_fragment_cache (ip60, (ip6_frag_hdr_t *) u8_ptr_add (ip60, vnet_buffer - (p0)->map_t. - v6.frag_offset), - d0, src_port0); + (p0)-> + map_t.v6. + frag_offset), + d0, map_port0); } if (PREDICT_TRUE @@ -542,14 +678,15 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, thread_index, - vnet_buffer (p0)-> - map_t.map_domain_index, 1, - clib_net_to_host_u16 - (ip60->payload_length)); + vnet_buffer (p0)->map_t. + map_domain_index, 1, + clib_net_to_host_u16 (ip60-> + payload_length)); } next0 = (error0 != MAP_ERROR_NONE) ? IP6_MAPT_NEXT_DROP : next0; p0->error = error_node->errors[error0]; + exit: vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, pi0, next0); @@ -560,7 +697,7 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) } static char *map_t_error_strings[] = { -#define _(sym,string) string, +#define _(sym, string) string, foreach_map_error #undef _ }; @@ -577,10 +714,11 @@ VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = { .error_strings = map_t_error_strings, .n_next_nodes = IP6_MAPT_FRAGMENTED_N_NEXT, - .next_nodes = { - [IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, - [IP6_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", + .next_nodes = + { + [IP6_MAPT_FRAGMENTED_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, + [IP6_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop", }, }; /* *INDENT-ON* */ @@ -597,10 +735,11 @@ VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = { .error_strings = map_t_error_strings, .n_next_nodes = IP6_MAPT_ICMP_N_NEXT, - .next_nodes = { - [IP6_MAPT_ICMP_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP6_MAPT_ICMP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, - [IP6_MAPT_ICMP_NEXT_DROP] = "error-drop", + .next_nodes = + { + [IP6_MAPT_ICMP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAPT_ICMP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, + [IP6_MAPT_ICMP_NEXT_DROP] = "error-drop", }, }; /* *INDENT-ON* */ @@ -617,15 +756,22 @@ VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = { .error_strings = map_t_error_strings, .n_next_nodes = IP6_MAPT_TCP_UDP_N_NEXT, - .next_nodes = { - [IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP] = "ip4-lookup", - [IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, - [IP6_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", + .next_nodes = + { + [IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG] = IP4_FRAG_NODE_NAME, + [IP6_MAPT_TCP_UDP_NEXT_DROP] = "error-drop", }, }; /* *INDENT-ON* */ /* *INDENT-OFF* */ +VNET_FEATURE_INIT(ip4_map_t_feature, static) = { + .arc_name = "ip6-unicast", + .node_name = "ip6-map-t", + .runs_before = VNET_FEATURES("ip6-flow-classify"), +}; + VLIB_REGISTER_NODE(ip6_map_t_node) = { .function = ip6_map_t, .name = "ip6-map-t", @@ -637,11 +783,12 @@ VLIB_REGISTER_NODE(ip6_map_t_node) = { .error_strings = map_t_error_strings, .n_next_nodes = IP6_MAPT_N_NEXT, - .next_nodes = { - [IP6_MAPT_NEXT_MAPT_TCP_UDP] = "ip6-map-t-tcp-udp", - [IP6_MAPT_NEXT_MAPT_ICMP] = "ip6-map-t-icmp", - [IP6_MAPT_NEXT_MAPT_FRAGMENTED] = "ip6-map-t-fragmented", - [IP6_MAPT_NEXT_DROP] = "error-drop", + .next_nodes = + { + [IP6_MAPT_NEXT_MAPT_TCP_UDP] = "ip6-map-t-tcp-udp", + [IP6_MAPT_NEXT_MAPT_ICMP] = "ip6-map-t-icmp", + [IP6_MAPT_NEXT_MAPT_FRAGMENTED] = "ip6-map-t-fragmented", + [IP6_MAPT_NEXT_DROP] = "error-drop", }, }; /* *INDENT-ON* */ diff --git a/src/plugins/map/map.api b/src/plugins/map/map.api index 752d1b55d9c..f64fe4ccd0d 100644 --- a/src/plugins/map/map.api +++ b/src/plugins/map/map.api @@ -13,7 +13,7 @@ * limitations under the License. */ -option version = "2.3.0"; +option version = "3.0.0"; import "vnet/ip/ip_types.api"; @@ -38,8 +38,6 @@ define map_add_domain u8 ea_bits_len; u8 psid_offset; u8 psid_length; - bool is_translation; - bool is_rfc6052; u16 mtu; }; @@ -108,7 +106,6 @@ define map_domain_details u8 psid_length; u8 flags; u16 mtu; - bool is_translation; }; define map_rule_dump diff --git a/src/plugins/map/map.c b/src/plugins/map/map.c index 307b7a97cfc..1ce3b6faeec 100644 --- a/src/plugins/map/map.c +++ b/src/plugins/map/map.c @@ -18,7 +18,6 @@ #include <vnet/fib/fib_table.h> #include <vnet/fib/ip6_fib.h> #include <vnet/adj/adj.h> -#include <map/map_dpo.h> #include <vppinfra/crc32.h> #include <vnet/plugin/plugin.h> #include <vpp/app/version.h> @@ -74,30 +73,8 @@ map_create_domain (ip4_address_t * ip4_prefix, { u8 suffix_len, suffix_shift; map_main_t *mm = &map_main; - dpo_id_t dpo_v4 = DPO_INVALID; - dpo_id_t dpo_v6 = DPO_INVALID; map_domain_t *d; - /* Sanity check on the src prefix length */ - if (flags & MAP_DOMAIN_TRANSLATION) - { - if (ip6_src_len != 96 && ip6_src_len != 64) - { - clib_warning ("MAP-T only supports prefix lengths of 64 and 96."); - return -1; - } - } - else - { - if (ip6_src_len != 128) - { - clib_warning - ("MAP-E requires a BR address, not a prefix (ip6_src_len should " - "be 128)."); - return -1; - } - } - /* How many, and which bits to grab from the IPv4 DA */ if (ip4_prefix_len + ea_bits_len < 32) { @@ -145,53 +122,13 @@ map_create_domain (ip4_address_t * ip4_prefix, d->psid_mask = (1 << d->psid_length) - 1; d->ea_shift = 64 - ip6_prefix_len - suffix_len - d->psid_length; - /* MAP data-plane object */ - if (d->flags & MAP_DOMAIN_TRANSLATION) - map_t_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4); - else - map_dpo_create (DPO_PROTO_IP4, *map_domain_index, &dpo_v4); - - /* Create ip4 route */ - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP4, - .fp_len = d->ip4_prefix_len, - .fp_addr = { - .ip4 = d->ip4_prefix, - } - , - }; - fib_table_entry_special_dpo_add (0, &pfx, - FIB_SOURCE_MAP, - FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v4); - dpo_reset (&dpo_v4); + /* MAP longest match lookup table (input feature / FIB) */ + mm->ip4_prefix_tbl->add (mm->ip4_prefix_tbl, &d->ip4_prefix, + d->ip4_prefix_len, *map_domain_index); - /* - * construct a DPO to use the v6 domain - */ - if (d->flags & MAP_DOMAIN_TRANSLATION) - map_t_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6); - else - map_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6); - - /* - * Multiple MAP domains may share same source IPv6 TEP. Which is just dandy. - * We are not tracking the sharing. So a v4 lookup to find the correct - * domain post decap/trnaslate is always done - * - * Create ip6 route. This is a reference counted add. If the prefix - * already exists and is MAP sourced, it is now MAP source n+1 times - * and will need to be removed n+1 times. - */ - fib_prefix_t pfx6 = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = d->ip6_src_len, - .fp_addr.ip6 = d->ip6_src, - }; - - fib_table_entry_special_dpo_add (0, &pfx6, - FIB_SOURCE_MAP, - FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v6); - dpo_reset (&dpo_v6); + /* Really needed? Or always use FIB? */ + mm->ip6_src_prefix_tbl->add (mm->ip6_src_prefix_tbl, &d->ip6_src, + d->ip6_src_len, *map_domain_index); /* Validate packet/byte counters */ map_domain_counter_lock (mm); @@ -231,26 +168,10 @@ map_delete_domain (u32 map_domain_index) } d = pool_elt_at_index (mm->domains, map_domain_index); - - fib_prefix_t pfx = { - .fp_proto = FIB_PROTOCOL_IP4, - .fp_len = d->ip4_prefix_len, - .fp_addr = { - .ip4 = d->ip4_prefix, - } - , - }; - fib_table_entry_special_remove (0, &pfx, FIB_SOURCE_MAP); - - fib_prefix_t pfx6 = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = d->ip6_src_len, - .fp_addr = { - .ip6 = d->ip6_src, - } - , - }; - fib_table_entry_special_remove (0, &pfx6, FIB_SOURCE_MAP); + mm->ip4_prefix_tbl->delete (mm->ip4_prefix_tbl, &d->ip4_prefix, + d->ip4_prefix_len); + mm->ip6_src_prefix_tbl->delete (mm->ip6_src_prefix_tbl, &d->ip6_src, + d->ip6_src_len); /* Deleting rules */ if (d->rules) @@ -263,7 +184,7 @@ map_delete_domain (u32 map_domain_index) int map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep, - u8 is_add) + bool is_add) { map_domain_t *d; map_main_t *mm = &map_main; @@ -441,7 +362,7 @@ map_fib_unresolve (map_main_pre_resolved_t * pr, } void -map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6, int is_del) +map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6, bool is_del) { if (ip6 && (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0)) { @@ -587,10 +508,6 @@ map_add_domain_command_fn (vlib_main_t * vm, num_m_args++; else if (unformat (line_input, "mtu %d", &mtu)) num_m_args++; - else if (unformat (line_input, "map-t")) - flags |= MAP_DOMAIN_TRANSLATION; - else if (unformat (line_input, "rfc6052")) - flags |= (MAP_DOMAIN_TRANSLATION | MAP_DOMAIN_RFC6052); else { error = clib_error_return (0, "unknown input `%U'", @@ -714,7 +631,7 @@ map_pre_resolve_command_fn (vlib_main_t * vm, ip4_address_t ip4nh, *p_v4 = NULL; ip6_address_t ip6nh, *p_v6 = NULL; clib_error_t *error = NULL; - int is_del = 0; + bool is_del = false; clib_memset (&ip4nh, 0, sizeof (ip4nh)); clib_memset (&ip6nh, 0, sizeof (ip6nh)); @@ -731,7 +648,7 @@ map_pre_resolve_command_fn (vlib_main_t * vm, if (unformat (line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh)) p_v6 = &ip6nh; else if (unformat (line_input, "del")) - is_del = 1; + is_del = true; else { error = clib_error_return (0, "unknown input `%U'", @@ -938,12 +855,8 @@ done: static char * map_flags_to_string (u32 flags) { - if (flags & MAP_DOMAIN_RFC6052) - return "rfc6052"; if (flags & MAP_DOMAIN_PREFIX) return "prefix"; - if (flags & MAP_DOMAIN_TRANSLATION) - return "map-t"; return ""; } @@ -1070,9 +983,10 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input, if (map_domain_index == ~0) { - /* *INDENT-OFF* */ - pool_foreach(d, mm->domains, ({vlib_cli_output(vm, "%U", format_map_domain, d, counters);})); - /* *INDENT-ON* */ + /* *INDENT-OFF* */ + pool_foreach(d, mm->domains, + ({vlib_cli_output(vm, "%U", format_map_domain, d, counters);})); + /* *INDENT-ON* */ } else { @@ -2257,6 +2171,9 @@ map_init (vlib_main_t * vm) { map_main_t *mm = &map_main; clib_error_t *error = 0; + + memset (mm, 0, sizeof (*mm)); + mm->vnet_main = vnet_get_main (); mm->vlib_main = vm; @@ -2290,6 +2207,7 @@ map_init (vlib_main_t * vm) vlib_validate_simple_counter (&mm->icmp_relayed, 0); vlib_zero_simple_counter (&mm->icmp_relayed, 0); + mm->icmp_relayed.stat_segment_name = "/map/icmp-relayed"; /* IP4 virtual reassembly */ mm->ip4_reass_hash_table = 0; @@ -2326,7 +2244,17 @@ map_init (vlib_main_t * vm) #ifdef MAP_SKIP_IP6_LOOKUP fib_node_register_type (FIB_NODE_TYPE_MAP_E, &map_vft); #endif - map_dpo_module_init (); + + /* Create empty domain that's used in case of error */ + map_domain_t *d; + pool_get_aligned (mm->domains, d, CLIB_CACHE_LINE_BYTES); + memset (d, 0, sizeof (*d)); + d->ip6_src_len = 64; + + /* LPM lookup tables */ + mm->ip4_prefix_tbl = lpm_table_init (LPM_TYPE_KEY32); + mm->ip6_prefix_tbl = lpm_table_init (LPM_TYPE_KEY128); + mm->ip6_src_prefix_tbl = lpm_table_init (LPM_TYPE_KEY128); error = map_plugin_api_hookup (vm); diff --git a/src/plugins/map/map.h b/src/plugins/map/map.h index 5b8aef4e872..2169435bc09 100644 --- a/src/plugins/map/map.h +++ b/src/plugins/map/map.h @@ -20,8 +20,8 @@ #include <vnet/fib/fib_types.h> #include <vnet/fib/ip4_fib.h> #include <vnet/adj/adj.h> -#include <map/map_dpo.h> #include <vnet/dpo/load_balance.h> +#include "lpm.h" #define MAP_SKIP_IP6_LOOKUP 1 @@ -39,7 +39,7 @@ int map_create_domain (ip4_address_t * ip4_prefix, u8 ip4_prefix_len, u32 * map_domain_index, u16 mtu, u8 flags); int map_delete_domain (u32 map_domain_index); int map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep, - u8 is_add); + bool is_add); int map_if_enable_disable (bool is_enable, u32 sw_if_index, bool is_translation); u8 *format_map_trace (u8 * s, va_list * args); @@ -47,7 +47,7 @@ u8 *format_map_trace (u8 * s, va_list * args); int map_param_set_fragmentation (bool inner, bool ignore_df); int map_param_set_icmp (ip4_address_t * ip4_err_relay_src); int map_param_set_icmp6 (u8 enable_unreachable); -void map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6, int is_del); +void map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6, bool is_del); int map_param_set_reassembly (bool is_ipv6, u16 lifetime_ms, u16 pool_size, u32 buffers, f64 ht_ratio, u32 * reass, u32 * packets); @@ -321,6 +321,10 @@ typedef struct { /* Counters */ u32 ip6_reass_buffered_counter; + /* Lookup tables */ + lpm_t *ip4_prefix_tbl; + lpm_t *ip6_prefix_tbl; + lpm_t *ip6_src_prefix_tbl; } map_main_t; /* @@ -412,7 +416,7 @@ map_get_sfx (map_domain_t *d, u32 addr, u16 port) if (d->ip6_prefix_len == 128) return clib_net_to_host_u64(d->ip6_prefix.as_u64[1]); - if (d->flags & MAP_DOMAIN_RFC6052) + if (d->ip6_src_len == 96) return (clib_net_to_host_u64(d->ip6_prefix.as_u64[1]) | addr); /* IPv4 prefix */ @@ -431,62 +435,48 @@ map_get_sfx_net (map_domain_t *d, u32 addr, u16 port) } static_always_inline u32 -map_get_ip4 (ip6_address_t *addr, map_domain_flags_e flags) +map_get_ip4 (ip6_address_t *addr, u16 prefix_len) { - if (flags & MAP_DOMAIN_RFC6052) + ASSERT(prefix_len == 64 || prefix_len == 96); + if (prefix_len == 96) return clib_host_to_net_u32(clib_net_to_host_u64(addr->as_u64[1])); else return clib_host_to_net_u32(clib_net_to_host_u64(addr->as_u64[1]) >> 16); } -/* - * Get the MAP domain from an IPv4 lookup adjacency. - */ static_always_inline map_domain_t * -ip4_map_get_domain (u32 mdi) +ip4_map_get_domain (ip4_address_t *addr, u32 *map_domain_index, u8 *error) { map_main_t *mm = &map_main; + u32 mdi = mm->ip4_prefix_tbl->lookup(mm->ip4_prefix_tbl, addr, 32); + if (mdi == ~0) { + *error = MAP_ERROR_NO_DOMAIN; + return 0; + } + *map_domain_index = mdi; return pool_elt_at_index(mm->domains, mdi); } /* - * Get the MAP domain from an IPv6 lookup adjacency. - * If the IPv6 address or prefix is not shared, no lookup is required. - * The IPv4 address is used otherwise. + * Get the MAP domain from an IPv6 address. + * If the IPv6 address or + * prefix is shared the IPv4 address must be used. */ static_always_inline map_domain_t * -ip6_map_get_domain (u32 mdi, - ip4_address_t *addr, +ip6_map_get_domain (ip6_address_t *addr, u32 *map_domain_index, u8 *error) { map_main_t *mm = &map_main; - -#ifdef TODO - /* - * Disable direct MAP domain lookup on decap, until the security check is updated to verify IPv4 SA. - * (That's done implicitly when MAP domain is looked up in the IPv4 FIB) - */ - //#ifdef MAP_NONSHARED_DOMAIN_ENABLED - //#error "How can you be sure this domain is not shared?" -#endif + u32 mdi = mm->ip6_src_prefix_tbl->lookup(mm->ip6_src_prefix_tbl, addr, 128); + if (mdi == ~0) { + *error = MAP_ERROR_NO_DOMAIN; + return 0; + } *map_domain_index = mdi; return pool_elt_at_index(mm->domains, mdi); - -#ifdef TODO - u32 lbi = ip4_fib_forwarding_lookup(0, addr); - const dpo_id_t *dpo = load_balance_get_bucket(lbi, 0); - if (PREDICT_TRUE(dpo->dpoi_type == map_dpo_type || - dpo->dpoi_type == map_t_dpo_type)) - { - *map_domain_index = dpo->dpoi_index; - return pool_elt_at_index(mm->domains, *map_domain_index); - } - *error = MAP_ERROR_NO_DOMAIN; - return NULL; -#endif } map_ip4_reass_t * @@ -551,6 +541,9 @@ int map_ip6_reass_conf_lifetime(u16 lifetime_ms); int map_ip6_reass_conf_buffers(u32 buffers); #define MAP_IP6_REASS_CONF_BUFFERS_MAX (0xffffffff) +/* + * Supports prefix of 96 or 64 (with u-octet) + */ static_always_inline void ip4_map_t_embedded_address (map_domain_t *d, ip6_address_t *ip6, const ip4_address_t *ip4) @@ -613,6 +606,55 @@ map_send_all_to_node(vlib_main_t *vm, u32 *pi_vector, } } +static_always_inline void +map_mss_clamping (tcp_header_t * tcp, ip_csum_t * sum, u16 mss_clamping) +{ + u8 *data; + u8 opt_len, opts_len, kind; + u16 mss; + u16 mss_value_net = clib_host_to_net_u16(mss_clamping); + + if (!tcp_syn (tcp)) + return; + + opts_len = (tcp_doff (tcp) << 2) - sizeof (tcp_header_t); + data = (u8 *) (tcp + 1); + for (; opts_len > 0; opts_len -= opt_len, data += opt_len) + { + kind = data[0]; + + if (kind == TCP_OPTION_EOL) + break; + else if (kind == TCP_OPTION_NOOP) + { + opt_len = 1; + continue; + } + else + { + if (opts_len < 2) + return; + opt_len = data[1]; + + if (opt_len < 2 || opt_len > opts_len) + return; + } + + if (kind == TCP_OPTION_MSS) + { + mss = *(u16 *) (data + 2); + if (clib_net_to_host_u16 (mss) > mss_clamping) + { + *sum = + ip_csum_update (*sum, mss, mss_value_net, ip4_header_t, + length); + clib_memcpy (data + 2, &mss_value_net, 2); + } + return; + } + } +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/map/map_api.c b/src/plugins/map/map_api.c index ac1665e5384..1b8ffc72a6a 100644 --- a/src/plugins/map/map_api.c +++ b/src/plugins/map/map_api.c @@ -54,12 +54,6 @@ vl_api_map_add_domain_t_handler (vl_api_map_add_domain_t * mp) u32 index; u8 flags = 0; - if (mp->is_translation) - flags |= MAP_DOMAIN_TRANSLATION; - - if (mp->is_rfc6052) - flags |= MAP_DOMAIN_RFC6052; - rv = map_create_domain ((ip4_address_t *) & mp->ip4_prefix.prefix, mp->ip4_prefix.len, @@ -137,7 +131,6 @@ vl_api_map_domain_dump_t_handler (vl_api_map_domain_dump_t * mp) rmp->psid_length = d->psid_length; rmp->flags = d->flags; rmp->mtu = htons(d->mtu); - rmp->is_translation = (d->flags & MAP_DOMAIN_TRANSLATION); // Redundant vl_api_send_msg (reg, (u8 *) rmp); })); diff --git a/src/plugins/map/map_dpo.c b/src/plugins/map/map_dpo.c deleted file mode 100644 index 059a4df0a44..00000000000 --- a/src/plugins/map/map_dpo.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <vnet/ip/ip.h> -#include <map/map_dpo.h> - -/** - * The register MAP DPO type - */ -dpo_type_t map_dpo_type; -dpo_type_t map_t_dpo_type; - -void -map_dpo_create (dpo_proto_t dproto, - u32 domain_index, - dpo_id_t *dpo) -{ - dpo_set(dpo, - map_dpo_type, - dproto, - domain_index); -} - -void -map_t_dpo_create (dpo_proto_t dproto, - u32 domain_index, - dpo_id_t *dpo) -{ - dpo_set(dpo, - map_t_dpo_type, - dproto, - domain_index); -} - - -u8* -format_map_dpo (u8 *s, va_list *args) -{ - index_t index = va_arg (*args, index_t); - CLIB_UNUSED(u32 indent) = va_arg (*args, u32); - - return (format(s, "map: domain:%d", index)); -} - -u8* -format_map_t_dpo (u8 *s, va_list *args) -{ - index_t index = va_arg (*args, index_t); - CLIB_UNUSED(u32 indent) = va_arg (*args, u32); - - return (format(s, "map-t: domain:%d", index)); -} - - -static void -map_dpo_lock (dpo_id_t *dpo) -{ -} - -static void -map_dpo_unlock (dpo_id_t *dpo) -{ -} - -const static dpo_vft_t md_vft = { - .dv_lock = map_dpo_lock, - .dv_unlock = map_dpo_unlock, - .dv_format = format_map_dpo, -}; - -const static char* const map_ip4_nodes[] = -{ - "ip4-map", - NULL, -}; -const static char* const map_ip6_nodes[] = -{ - "ip6-map", - NULL, -}; - -const static char* const * const map_nodes[DPO_PROTO_NUM] = -{ - [DPO_PROTO_IP4] = map_ip4_nodes, - [DPO_PROTO_IP6] = map_ip6_nodes, - [DPO_PROTO_MPLS] = NULL, -}; - -const static dpo_vft_t md_t_vft = { - .dv_lock = map_dpo_lock, - .dv_unlock = map_dpo_unlock, - .dv_format = format_map_t_dpo, -}; - -const static char* const map_t_ip4_nodes[] = -{ - "ip4-map-t", - NULL, -}; -const static char* const map_t_ip6_nodes[] = -{ - "ip6-map-t", - NULL, -}; - -const static char* const * const map_t_nodes[DPO_PROTO_NUM] = -{ - [DPO_PROTO_IP4] = map_t_ip4_nodes, - [DPO_PROTO_IP6] = map_t_ip6_nodes, - [DPO_PROTO_MPLS] = NULL, -}; - -void -map_dpo_module_init (void) -{ - map_dpo_type = dpo_register_new_type(&md_vft, map_nodes); - map_t_dpo_type = dpo_register_new_type(&md_t_vft, map_t_nodes); -} diff --git a/src/plugins/map/map_dpo.h b/src/plugins/map/map_dpo.h deleted file mode 100644 index 63bf4787383..00000000000 --- a/src/plugins/map/map_dpo.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2016 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __MAP_DPO_H__ -#define __MAP_DPO_H__ - -#include <vnet/vnet.h> -#include <vnet/dpo/dpo.h> - -/** - * A representation of a MAP DPO - */ - -extern void map_dpo_create (dpo_proto_t dproto, - u32 domain_index, - dpo_id_t *dpo); -extern void map_t_dpo_create (dpo_proto_t dproto, - u32 domain_index, - dpo_id_t *dpo); - -extern u8* format_map_dpo(u8 *s, va_list *args); - -/* - * Encapsulation violation for fast data-path access - */ -extern dpo_type_t map_dpo_type; -extern dpo_type_t map_t_dpo_type; - -extern void map_dpo_module_init(void); - -#endif |