From 7cd468a3d7dee7d6c92f69a0bb7061ae208ec727 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 19 Dec 2016 23:05:39 +0100 Subject: Reorganize source tree to use single autotools instance Change-Id: I7b51f88292e057c6443b12224486f2d0c9f8ae23 Signed-off-by: Damjan Marion --- src/vnet/map/map.h | 591 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 591 insertions(+) create mode 100644 src/vnet/map/map.h (limited to 'src/vnet/map/map.h') diff --git a/src/vnet/map/map.h b/src/vnet/map/map.h new file mode 100644 index 00000000..f446b739 --- /dev/null +++ b/src/vnet/map/map.h @@ -0,0 +1,591 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define MAP_SKIP_IP6_LOOKUP 1 + +typedef enum +{ + MAP_SENDER, + MAP_RECEIVER +} map_dir_e; + +int map_create_domain (ip4_address_t * ip4_prefix, u8 ip4_prefix_len, + ip6_address_t * ip6_prefix, u8 ip6_prefix_len, + ip6_address_t * ip6_src, u8 ip6_src_len, + u8 ea_bits_len, u8 psid_offset, u8 psid_length, + u32 * map_domain_index, u16 mtu, u8 flags); +int map_delete_domain (u32 map_domain_index); +int map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep, + u8 is_add); +u8 *format_map_trace (u8 * s, va_list * args); +i32 ip4_get_port (ip4_header_t * ip, map_dir_e dir, u16 buffer_len); +i32 ip6_get_port (ip6_header_t * ip6, map_dir_e dir, u16 buffer_len); +u16 ip4_map_get_port (ip4_header_t * ip, map_dir_e dir); + +typedef enum __attribute__ ((__packed__)) +{ + MAP_DOMAIN_PREFIX = 1 << 0, MAP_DOMAIN_TRANSLATION = 1 << 1, // The domain uses MAP-T +} map_domain_flags_e; + +/** + * IP4 reassembly logic: + * One virtually reassembled flow requires a map_ip4_reass_t structure in order + * to keep the first-fragment port number and, optionally, cache out of sequence + * packets. + * There are up to MAP_IP4_REASS_MAX_REASSEMBLY such structures. + * When in use, those structures are stored in a hash table of MAP_IP4_REASS_BUCKETS buckets. + * When a new structure needs to be used, it is allocated from available ones. + * If there is no structure available, the oldest in use is selected and used if and + * only if it was first allocated more than MAP_IP4_REASS_LIFETIME seconds ago. + * In case no structure can be allocated, the fragment is dropped. + */ + +#define MAP_IP4_REASS_LIFETIME_DEFAULT (100) /* ms */ +#define MAP_IP4_REASS_HT_RATIO_DEFAULT (1.0) +#define MAP_IP4_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures +#define MAP_IP4_REASS_BUFFERS_DEFAULT 2048 + +#define MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 // Number of fragment per reassembly + +#define MAP_IP6_REASS_LIFETIME_DEFAULT (100) /* ms */ +#define MAP_IP6_REASS_HT_RATIO_DEFAULT (1.0) +#define MAP_IP6_REASS_POOL_SIZE_DEFAULT 1024 // Number of reassembly structures +#define MAP_IP6_REASS_BUFFERS_DEFAULT 2048 + +#define MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY 5 + +#define MAP_IP6_REASS_COUNT_BYTES +#define MAP_IP4_REASS_COUNT_BYTES + +//#define IP6_MAP_T_OVERRIDE_TOS 0 + +/* + * This structure _MUST_ be no larger than a single cache line (64 bytes). + * If more space is needed make a union of ip6_prefix and *rules, those are mutually exclusive. + */ +typedef struct +{ + ip6_address_t ip6_src; + ip6_address_t ip6_prefix; + ip6_address_t *rules; + u32 suffix_mask; + ip4_address_t ip4_prefix; + u16 psid_mask; + u16 mtu; + map_domain_flags_e flags; + u8 ip6_prefix_len; + u8 ip6_src_len; + u8 ea_bits_len; + u8 psid_offset; + u8 psid_length; + + /* helpers */ + u8 psid_shift; + u8 suffix_shift; + u8 ea_shift; + + /* not used by forwarding */ + u8 ip4_prefix_len; +} map_domain_t; + +STATIC_ASSERT ((sizeof (map_domain_t) <= CLIB_CACHE_LINE_BYTES), + "MAP domain fits in one cacheline"); + +#define MAP_REASS_INDEX_NONE ((u16)0xffff) + +/* + * Hash key, padded out to 16 bytes for fast compare + */ +/* *INDENT-OFF* */ +typedef union { + CLIB_PACKED (struct { + ip4_address_t src; + ip4_address_t dst; + u16 fragment_id; + u8 protocol; + }); + u64 as_u64[2]; + u32 as_u32[4]; +} map_ip4_reass_key_t; +/* *INDENT-ON* */ + +typedef struct +{ + map_ip4_reass_key_t key; + f64 ts; +#ifdef MAP_IP4_REASS_COUNT_BYTES + u16 expected_total; + u16 forwarded; +#endif + i32 port; + u16 bucket; + u16 bucket_next; + u16 fifo_prev; + u16 fifo_next; + u32 fragments[MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY]; +} map_ip4_reass_t; + +/* + * MAP domain counters + */ +typedef enum +{ + /* Simple counters */ + MAP_DOMAIN_IPV4_FRAGMENT = 0, + /* Combined counters */ + MAP_DOMAIN_COUNTER_RX = 0, + MAP_DOMAIN_COUNTER_TX, + MAP_N_DOMAIN_COUNTER +} map_domain_counter_t; + +/* + * main_main_t + */ +/* *INDENT-OFF* */ +typedef union { + CLIB_PACKED (struct { + ip6_address_t src; + ip6_address_t dst; + u32 fragment_id; + u8 protocol; + }); + u64 as_u64[5]; + u32 as_u32[10]; +} map_ip6_reass_key_t; +/* *INDENT-OFF* */ + +typedef struct { + u32 pi; //Cached packet or ~0 + u16 next_data_offset; //The data offset of the additional 20 bytes or ~0 + u8 next_data_len; //Number of bytes ready to be copied (20 if not last fragment) + u8 next_data[20]; //The 20 additional bytes +} map_ip6_fragment_t; + +typedef struct { + map_ip6_reass_key_t key; + f64 ts; +#ifdef MAP_IP6_REASS_COUNT_BYTES + u16 expected_total; + u16 forwarded; +#endif + u16 bucket; //What hash bucket this element is linked in + u16 bucket_next; + u16 fifo_prev; + u16 fifo_next; + ip4_header_t ip4_header; + map_ip6_fragment_t fragments[MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY]; +} map_ip6_reass_t; + +typedef struct { + /* pool of MAP domains */ + map_domain_t *domains; + + /* MAP Domain packet/byte counters indexed by map domain index */ + vlib_simple_counter_main_t *simple_domain_counters; + vlib_combined_counter_main_t *domain_counters; + volatile u32 *counter_lock; + +#ifdef MAP_SKIP_IP6_LOOKUP + /* pre-presolve */ + u32 adj6_index, adj4_index; + ip4_address_t preresolve_ip4; + ip6_address_t preresolve_ip6; +#endif + + /* Traffic class: zero, copy (~0) or fixed value */ + u8 tc; + bool tc_copy; + + bool sec_check; /* Inbound security check */ + bool sec_check_frag; /* Inbound security check for (subsequent) fragments */ + bool icmp6_enabled; /* Send destination unreachable for security check failure */ + + /* ICMPv6 -> ICMPv4 relay parameters */ + ip4_address_t icmp4_src_address; + vlib_simple_counter_main_t icmp_relayed; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + + /* + * IPv4 encap and decap reassembly + */ + /* Configuration */ + f32 ip4_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) + u16 ip4_reass_conf_pool_size; //Max number of allocated reass structures + u16 ip4_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms + u32 ip4_reass_conf_buffers; //Maximum number of buffers used by ip4 reassembly + + /* Runtime */ + map_ip4_reass_t *ip4_reass_pool; + u8 ip4_reass_ht_log2len; //Hash table size is 2^log2len + u16 ip4_reass_allocated; + u16 *ip4_reass_hash_table; + u16 ip4_reass_fifo_last; + volatile u32 *ip4_reass_lock; + + /* Counters */ + u32 ip4_reass_buffered_counter; + + bool frag_inner; /* Inner or outer fragmentation */ + bool frag_ignore_df; /* Fragment (outer) packet even if DF is set */ + + /* + * IPv6 decap reassembly + */ + /* Configuration */ + f32 ip6_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) + u16 ip6_reass_conf_pool_size; //Max number of allocated reass structures + u16 ip6_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms + u32 ip6_reass_conf_buffers; //Maximum number of buffers used by ip6 reassembly + + /* Runtime */ + map_ip6_reass_t *ip6_reass_pool; + u8 ip6_reass_ht_log2len; //Hash table size is 2^log2len + u16 ip6_reass_allocated; + u16 *ip6_reass_hash_table; + u16 ip6_reass_fifo_last; + volatile u32 *ip6_reass_lock; + + /* Counters */ + u32 ip6_reass_buffered_counter; + +} map_main_t; + +/* + * MAP Error counters/messages + */ +#define foreach_map_error \ + /* Must be first. */ \ + _(NONE, "valid MAP packets") \ + _(BAD_PROTOCOL, "bad protocol") \ + _(SEC_CHECK, "security check failed") \ + _(ENCAP_SEC_CHECK, "encap security check failed") \ + _(DECAP_SEC_CHECK, "decap security check failed") \ + _(ICMP, "unable to translate ICMP") \ + _(ICMP_RELAY, "unable to relay ICMP") \ + _(UNKNOWN, "unknown") \ + _(NO_BINDING, "no binding") \ + _(NO_DOMAIN, "no domain") \ + _(FRAGMENTED, "packet is a fragment") \ + _(FRAGMENT_MEMORY, "could not cache fragment") \ + _(FRAGMENT_MALFORMED, "fragment has unexpected format")\ + _(FRAGMENT_DROPPED, "dropped cached fragment") \ + _(MALFORMED, "malformed packet") \ + _(DF_SET, "can't fragment, DF set") + +typedef enum { +#define _(sym,str) MAP_ERROR_##sym, + foreach_map_error +#undef _ + MAP_N_ERROR, + } map_error_t; + +u64 map_error_counter_get(u32 node_index, map_error_t map_error); + +typedef struct { + u32 map_domain_index; + u16 port; +} map_trace_t; + +map_main_t map_main; + +extern vlib_node_registration_t ip4_map_node; +extern vlib_node_registration_t ip6_map_node; + +extern vlib_node_registration_t ip4_map_t_node; +extern vlib_node_registration_t ip4_map_t_fragmented_node; +extern vlib_node_registration_t ip4_map_t_tcp_udp_node; +extern vlib_node_registration_t ip4_map_t_icmp_node; + +extern vlib_node_registration_t ip6_map_t_node; +extern vlib_node_registration_t ip6_map_t_fragmented_node; +extern vlib_node_registration_t ip6_map_t_tcp_udp_node; +extern vlib_node_registration_t ip6_map_t_icmp_node; + +/* + * map_get_pfx + */ +static_always_inline u64 +map_get_pfx (map_domain_t *d, u32 addr, u16 port) +{ + u16 psid = (port >> d->psid_shift) & d->psid_mask; + + if (d->ea_bits_len == 0 && d->rules) + return clib_net_to_host_u64(d->rules[psid].as_u64[0]); + + u32 suffix = (addr >> d->suffix_shift) & d->suffix_mask; + u64 ea = d->ea_bits_len == 0 ? 0 : (((u64) suffix << d->psid_length)) | psid; + + return clib_net_to_host_u64(d->ip6_prefix.as_u64[0]) | ea << d->ea_shift; +} + +static_always_inline u64 +map_get_pfx_net (map_domain_t *d, u32 addr, u16 port) +{ + return clib_host_to_net_u64(map_get_pfx(d, clib_net_to_host_u32(addr), + clib_net_to_host_u16(port))); +} + +/* + * map_get_sfx + */ +static_always_inline u64 +map_get_sfx (map_domain_t *d, u32 addr, u16 port) +{ + u16 psid = (port >> d->psid_shift) & d->psid_mask; + + /* Shared 1:1 mode. */ + if (d->ea_bits_len == 0 && d->rules) + return clib_net_to_host_u64(d->rules[psid].as_u64[1]); + if (d->ip6_prefix_len == 128) + return clib_net_to_host_u64(d->ip6_prefix.as_u64[1]); + + /* IPv4 prefix */ + if (d->flags & MAP_DOMAIN_PREFIX) + return (u64) (addr & (0xFFFFFFFF << d->suffix_shift)) << 16; + + /* Shared or full IPv4 address */ + return ((u64) addr << 16) | psid; +} + +static_always_inline u64 +map_get_sfx_net (map_domain_t *d, u32 addr, u16 port) +{ + return clib_host_to_net_u64(map_get_sfx(d, clib_net_to_host_u32(addr), + clib_net_to_host_u16(port))); +} + +static_always_inline u32 +map_get_ip4 (ip6_address_t *addr) +{ + return clib_host_to_net_u32(clib_net_to_host_u64(addr->as_u64[1]) >> 16); +} + +/* + * Get the MAP domain from an IPv4 lookup adjacency. + */ +static_always_inline map_domain_t * +ip4_map_get_domain (u32 mdi, + u32 *map_domain_index) +{ + map_main_t *mm = &map_main; + map_dpo_t *md; + + md = map_dpo_get(mdi); + + ASSERT(md); + *map_domain_index = md->md_domain; + return pool_elt_at_index(mm->domains, *map_domain_index); +} + +/* + * Get the MAP domain from an IPv6 lookup adjacency. + * If the IPv6 address or prefix is not shared, no lookup is required. + * The IPv4 address is used otherwise. + */ +static_always_inline map_domain_t * +ip6_map_get_domain (u32 mdi, ip4_address_t *addr, + u32 *map_domain_index, u8 *error) +{ + map_main_t *mm = &map_main; + map_dpo_t *md; + + /* + * Disable direct MAP domain lookup on decap, until the security check is updated to verify IPv4 SA. + * (That's done implicitly when MAP domain is looked up in the IPv4 FIB) + */ +#ifdef MAP_NONSHARED_DOMAIN_ENABLED + md = map_dpo_get(mdi); + + ASSERT(md); + *map_domain_index = md->md_domain; + if (*map_domain_index != ~0) + return pool_elt_at_index(mm->domains, *map_domain_index); +#endif + + u32 lbi = ip4_fib_forwarding_lookup(0, addr); + const dpo_id_t *dpo = load_balance_get_bucket(lbi, 0); + if (PREDICT_TRUE(dpo->dpoi_type == map_dpo_type || + dpo->dpoi_type == map_t_dpo_type)) + { + md = map_dpo_get(dpo->dpoi_index); + *map_domain_index = md->md_domain; + return pool_elt_at_index(mm->domains, *map_domain_index); + } + *error = MAP_ERROR_NO_DOMAIN; + return NULL; +} + +map_ip4_reass_t * +map_ip4_reass_get(u32 src, u32 dst, u16 fragment_id, + u8 protocol, u32 **pi_to_drop); +void +map_ip4_reass_free(map_ip4_reass_t *r, u32 **pi_to_drop); + +#define map_ip4_reass_lock() while (__sync_lock_test_and_set(map_main.ip4_reass_lock, 1)) {} +#define map_ip4_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip4_reass_lock = 0;} while(0) + +static_always_inline void +map_ip4_reass_get_fragments(map_ip4_reass_t *r, u32 **pi) +{ + int i; + for (i=0; ifragments[i] != ~0) { + vec_add1(*pi, r->fragments[i]); + r->fragments[i] = ~0; + map_main.ip4_reass_buffered_counter--; + } +} + +int map_ip4_reass_add_fragment(map_ip4_reass_t *r, u32 pi); + +map_ip6_reass_t * +map_ip6_reass_get(ip6_address_t *src, ip6_address_t *dst, u32 fragment_id, + u8 protocol, u32 **pi_to_drop); +void +map_ip6_reass_free(map_ip6_reass_t *r, u32 **pi_to_drop); + +#define map_ip6_reass_lock() while (__sync_lock_test_and_set(map_main.ip6_reass_lock, 1)) {} +#define map_ip6_reass_unlock() do {CLIB_MEMORY_BARRIER(); *map_main.ip6_reass_lock = 0;} while(0) + +int +map_ip6_reass_add_fragment(map_ip6_reass_t *r, u32 pi, + u16 data_offset, u16 next_data_offset, + u8 *data_start, u16 data_len); + +void map_ip4_drop_pi(u32 pi); + +int map_ip4_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP4_REASS_CONF_HT_RATIO_MAX 100 +int map_ip4_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP4_REASS_CONF_POOL_SIZE_MAX (0xfeff) +int map_ip4_reass_conf_lifetime(u16 lifetime_ms); +#define MAP_IP4_REASS_CONF_LIFETIME_MAX 0xffff +int map_ip4_reass_conf_buffers(u32 buffers); +#define MAP_IP4_REASS_CONF_BUFFERS_MAX (0xffffffff) + +void map_ip6_drop_pi(u32 pi); + + +int map_ip6_reass_conf_ht_ratio(f32 ht_ratio, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP6_REASS_CONF_HT_RATIO_MAX 100 +int map_ip6_reass_conf_pool_size(u16 pool_size, u32 *trashed_reass, u32 *dropped_packets); +#define MAP_IP6_REASS_CONF_POOL_SIZE_MAX (0xfeff) +int map_ip6_reass_conf_lifetime(u16 lifetime_ms); +#define MAP_IP6_REASS_CONF_LIFETIME_MAX 0xffff +int map_ip6_reass_conf_buffers(u32 buffers); +#define MAP_IP6_REASS_CONF_BUFFERS_MAX (0xffffffff) + +static_always_inline +int ip6_parse(const ip6_header_t *ip6, u32 buff_len, + u8 *l4_protocol, u16 *l4_offset, u16 *frag_hdr_offset) +{ + if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) { + *l4_protocol = ((ip6_frag_hdr_t *)(ip6 + 1))->next_hdr; + *frag_hdr_offset = sizeof(*ip6); + *l4_offset = sizeof(*ip6) + sizeof(ip6_frag_hdr_t); + } else { + *l4_protocol = ip6->protocol; + *frag_hdr_offset = 0; + *l4_offset = sizeof(*ip6); + } + + return (buff_len < (*l4_offset + 4)) || + (clib_net_to_host_u16(ip6->payload_length) < (*l4_offset + 4 - sizeof(*ip6))); +} + + +#define u8_ptr_add(ptr, index) (((u8 *)ptr) + index) +#define u16_net_add(u, val) clib_host_to_net_u16(clib_net_to_host_u16(u) + (val)) + +#define frag_id_6to4(id) ((id) ^ ((id) >> 16)) + +static_always_inline void +ip4_map_t_embedded_address (map_domain_t *d, + ip6_address_t *ip6, const ip4_address_t *ip4) +{ + ASSERT(d->ip6_src_len == 96); //No support for other lengths for now + ip6->as_u64[0] = d->ip6_src.as_u64[0]; + ip6->as_u32[2] = d->ip6_src.as_u32[2]; + ip6->as_u32[3] = ip4->as_u32; +} + +static_always_inline u32 +ip6_map_t_embedded_address (map_domain_t *d, ip6_address_t *addr) +{ + ASSERT(d->ip6_src_len == 96); //No support for other lengths for now + return addr->as_u32[3]; +} + +static inline void +map_domain_counter_lock (map_main_t *mm) +{ + if (mm->counter_lock) + while (__sync_lock_test_and_set(mm->counter_lock, 1)) + /* zzzz */ ; +} +static inline void +map_domain_counter_unlock (map_main_t *mm) +{ + if (mm->counter_lock) + *mm->counter_lock = 0; +} + + +static_always_inline void +map_send_all_to_node(vlib_main_t *vm, u32 *pi_vector, + vlib_node_runtime_t *node, vlib_error_t *error, + u32 next) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + //Deal with fragments that are ready + from = pi_vector; + n_left_from = vec_len(pi_vector); + next_index = node->cached_next_index; + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + vlib_buffer_t *p0 = vlib_get_buffer(vm, pi0); + p0->error = *error; + vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next); + } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 8082380922c65702251d5242058f7b5f35011574 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Mon, 20 Feb 2017 18:23:41 -0800 Subject: MAP pre-resolve - use FIB to track pre-resolved next-hop Change-Id: I9ea16881caf7aee57f0daf4ac2e8b82c672f87e9 Signed-off-by: Neale Ranns --- src/vnet/fib/fib_node.h | 2 + src/vnet/map/ip4_map.c | 16 ++--- src/vnet/map/ip6_map.c | 16 ++--- src/vnet/map/map.c | 172 +++++++++++++++++++++++++++++++++++++++------- src/vnet/map/map.h | 41 +++++++++-- test/test_map.py | 171 +++++++++++++++++++++++++++++++++++++++++++++ test/vpp_papi_provider.py | 28 ++++++++ 7 files changed, 391 insertions(+), 55 deletions(-) create mode 100644 test/test_map.py (limited to 'src/vnet/map/map.h') diff --git a/src/vnet/fib/fib_node.h b/src/vnet/fib/fib_node.h index 457dfb7a..496929ad 100644 --- a/src/vnet/fib/fib_node.h +++ b/src/vnet/fib/fib_node.h @@ -41,6 +41,7 @@ typedef enum fib_node_type_t_ { FIB_NODE_TYPE_LISP_ADJ, FIB_NODE_TYPE_GRE_TUNNEL, FIB_NODE_TYPE_VXLAN_TUNNEL, + FIB_NODE_TYPE_MAP_E, /** * Marker. New types before this one. leave the test last. */ @@ -63,6 +64,7 @@ typedef enum fib_node_type_t_ { [FIB_NODE_TYPE_LISP_ADJ] = "lisp-adj", \ [FIB_NODE_TYPE_GRE_TUNNEL] = "gre-tunnel", \ [FIB_NODE_TYPE_VXLAN_TUNNEL] = "vxlan-tunnel", \ + [FIB_NODE_TYPE_MAP_E] = "map-e", \ } /** diff --git a/src/vnet/map/ip4_map.c b/src/vnet/map/ip4_map.c index 9fd10f62..2be9ad37 100644 --- a/src/vnet/map/ip4_map.c +++ b/src/vnet/map/ip4_map.c @@ -173,18 +173,10 @@ static_always_inline bool ip4_map_ip6_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip) { #ifdef MAP_SKIP_IP6_LOOKUP - map_main_t *mm = &map_main; - u32 adj_index0 = mm->adj6_index; - if (adj_index0 > 0) + if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP6].fei) { - ip_lookup_main_t *lm6 = &ip6_main.lookup_main; - ip_adjacency_t *adj = ip_get_adjacency (lm6, mm->adj6_index); - if (adj->n_adj > 1) - { - u32 hash_c0 = ip4_compute_flow_hash (ip, IP_FLOW_HASH_DEFAULT); - adj_index0 += (hash_c0 & (adj->n_adj - 1)); - } - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = + pre_resolved[FIB_PROTOCOL_IP6].dpo.dpoi_index; return (true); } #endif @@ -773,7 +765,7 @@ VLIB_REGISTER_NODE(ip4_map_node) = { .next_nodes = { [IP4_MAP_NEXT_IP6_LOOKUP] = "ip6-lookup", #ifdef MAP_SKIP_IP6_LOOKUP - [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-rewrite", + [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-load-balance", #endif [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag", [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag", diff --git a/src/vnet/map/ip6_map.c b/src/vnet/map/ip6_map.c index d2945059..f7eb768f 100644 --- a/src/vnet/map/ip6_map.c +++ b/src/vnet/map/ip6_map.c @@ -151,18 +151,10 @@ static_always_inline bool ip6_map_ip4_lookup_bypass (vlib_buffer_t * p0, ip4_header_t * ip) { #ifdef MAP_SKIP_IP6_LOOKUP - map_main_t *mm = &map_main; - u32 adj_index0 = mm->adj4_index; - if (adj_index0 > 0) + if (FIB_NODE_INDEX_INVALID != pre_resolved[FIB_PROTOCOL_IP4].fei) { - ip_lookup_main_t *lm4 = &ip4_main.lookup_main; - ip_adjacency_t *adj = ip_get_adjacency (lm4, mm->adj4_index); - if (adj->n_adj > 1) - { - u32 hash_c0 = ip4_compute_flow_hash (ip, IP_FLOW_HASH_DEFAULT); - adj_index0 += (hash_c0 & (adj->n_adj - 1)); - } - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = adj_index0; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = + pre_resolved[FIB_PROTOCOL_IP4].dpo.dpoi_index; return (true); } #endif @@ -1195,7 +1187,7 @@ VLIB_REGISTER_NODE(ip6_map_node) = { .next_nodes = { [IP6_MAP_NEXT_IP4_LOOKUP] = "ip4-lookup", #ifdef MAP_SKIP_IP6_LOOKUP - [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-rewrite", + [IP6_MAP_NEXT_IP4_REWRITE] = "ip4-load-balance", #endif [IP6_MAP_NEXT_IP6_REASS] = "ip6-map-ip6-reass", [IP6_MAP_NEXT_IP4_REASS] = "ip6-map-ip4-reass", diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c index a2d28118..6823a46e 100644 --- a/src/vnet/map/map.c +++ b/src/vnet/map/map.c @@ -41,6 +41,7 @@ crc_u32 (u32 data, u32 value) } #endif + /* * This code supports the following MAP modes: * @@ -437,23 +438,141 @@ map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep, } #ifdef MAP_SKIP_IP6_LOOKUP +/** + * Pre-resolvd per-protocol global next-hops + */ +map_main_pre_resolved_t pre_resolved[FIB_PROTOCOL_MAX]; + static void -map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6) +map_pre_resolve_init (map_main_pre_resolved_t * pr) { - map_main_t *mm = &map_main; - ip6_main_t *im6 = &ip6_main; + pr->fei = FIB_NODE_INDEX_INVALID; + fib_node_init (&pr->node, FIB_NODE_TYPE_MAP_E); +} + +static u8 * +format_map_pre_resolve (u8 * s, va_list ap) +{ + map_main_pre_resolved_t *pr = va_arg (ap, map_main_pre_resolved_t *); + + if (FIB_NODE_INDEX_INVALID != pr->fei) + { + fib_prefix_t pfx; + + fib_entry_get_prefix (pr->fei, &pfx); + + return (format (s, "%U (%u)", + format_ip46_address, &pfx.fp_addr, IP46_TYPE_ANY, + pr->dpo.dpoi_index)); + } + else + { + return (format (s, "un-set")); + } +} + + +/** + * Function definition to inform the FIB node that its last lock has gone. + */ +static void +map_last_lock_gone (fib_node_t * node) +{ + /* + * The MAP is a root of the graph. As such + * it never has children and thus is never locked. + */ + ASSERT (0); +} + +static map_main_pre_resolved_t * +map_from_fib_node (fib_node_t * node) +{ +#if (CLIB_DEBUG > 0) + ASSERT (FIB_NODE_TYPE_MAP_E == node->fn_type); +#endif + return ((map_main_pre_resolved_t *) + (((char *) node) - + STRUCT_OFFSET_OF (map_main_pre_resolved_t, node))); +} + +static void +map_stack (map_main_pre_resolved_t * pr) +{ + const dpo_id_t *dpo; - if (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0) + dpo = fib_entry_contribute_ip_forwarding (pr->fei); + + dpo_copy (&pr->dpo, dpo); +} + +/** + * Function definition to backwalk a FIB node + */ +static fib_node_back_walk_rc_t +map_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx) +{ + map_stack (map_from_fib_node (node)); + + return (FIB_NODE_BACK_WALK_CONTINUE); +} + +/** + * Function definition to get a FIB node from its index + */ +static fib_node_t * +map_fib_node_get (fib_node_index_t index) +{ + return (&pre_resolved[index].node); +} + +/* + * Virtual function table registered by MPLS GRE tunnels + * for participation in the FIB object graph. + */ +const static fib_node_vft_t map_vft = { + .fnv_get = map_fib_node_get, + .fnv_last_lock = map_last_lock_gone, + .fnv_back_walk = map_back_walk, +}; + +static void +map_fib_resolve (map_main_pre_resolved_t * pr, + fib_protocol_t proto, u8 len, const ip46_address_t * addr) +{ + fib_prefix_t pfx = { + .fp_proto = proto, + .fp_len = len, + .fp_addr = *addr, + }; + + pr->fei = fib_table_entry_special_add (0, // default fib + &pfx, + FIB_SOURCE_RR, + FIB_ENTRY_FLAG_NONE, + ADJ_INDEX_INVALID); + pr->sibling = fib_entry_child_add (pr->fei, FIB_NODE_TYPE_MAP_E, proto); + map_stack (pr); +} + +static void +map_pre_resolve (ip4_address_t * ip4, ip6_address_t * ip6) +{ + if (ip6 && (ip6->as_u64[0] != 0 || ip6->as_u64[1] != 0)) { - // FIXME NOT an ADJ - mm->adj6_index = ip6_fib_table_fwding_lookup (im6, 0, ip6); - clib_warning ("FIB lookup results in: %u", mm->adj6_index); + ip46_address_t addr = { + .ip6 = *ip6, + }; + map_fib_resolve (&pre_resolved[FIB_PROTOCOL_IP6], + FIB_PROTOCOL_IP6, 128, &addr); } - if (ip4->as_u32 != 0) + if (ip4 && (ip4->as_u32 != 0)) { - // FIXME NOT an ADJ - mm->adj4_index = ip4_fib_table_lookup_lb (0, ip4); - clib_warning ("FIB lookup results in: %u", mm->adj4_index); + ip46_address_t addr = { + .ip4 = *ip4, + }; + map_fib_resolve (&pre_resolved[FIB_PROTOCOL_IP4], + FIB_PROTOCOL_IP4, 32, &addr); } } #endif @@ -695,9 +814,8 @@ map_pre_resolve_command_fn (vlib_main_t * vm, vlib_cli_command_t * cmd) { unformat_input_t _line_input, *line_input = &_line_input; - ip4_address_t ip4nh; - ip6_address_t ip6nh; - map_main_t *mm = &map_main; + ip4_address_t ip4nh, *p_v4 = NULL; + ip6_address_t ip6nh, *p_v6 = NULL; clib_error_t *error = NULL; memset (&ip4nh, 0, sizeof (ip4nh)); @@ -710,10 +828,10 @@ map_pre_resolve_command_fn (vlib_main_t * vm, while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) { if (unformat (line_input, "ip4-nh %U", unformat_ip4_address, &ip4nh)) - mm->preresolve_ip4 = ip4nh; + p_v4 = &ip4nh; else if (unformat (line_input, "ip6-nh %U", unformat_ip6_address, &ip6nh)) - mm->preresolve_ip6 = ip6nh; + p_v6 = &ip6nh; else { error = clib_error_return (0, "unknown input `%U'", @@ -722,7 +840,7 @@ map_pre_resolve_command_fn (vlib_main_t * vm, } } - map_pre_resolve (&ip4nh, &ip6nh); + map_pre_resolve (p_v4, p_v6); done: unformat_free (line_input); @@ -1113,9 +1231,10 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input, #if MAP_SKIP_IP6_LOOKUP vlib_cli_output (vm, - "MAP pre-resolve: IP6 next-hop: %U (%u), IP4 next-hop: %U (%u)\n", - format_ip6_address, &mm->preresolve_ip6, mm->adj6_index, - format_ip4_address, &mm->preresolve_ip4, mm->adj4_index); + "MAP pre-resolve: IP6 next-hop: %U, IP4 next-hop: %U\n", + format_map_pre_resolve, &pre_resolved[FIB_PROTOCOL_IP6], + format_map_pre_resolve, &pre_resolved[FIB_PROTOCOL_IP4]); + #endif if (mm->tc_copy) @@ -2180,10 +2299,12 @@ map_init (vlib_main_t * vm) mm->vlib_main = vm; #ifdef MAP_SKIP_IP6_LOOKUP - memset (&mm->preresolve_ip4, 0, sizeof (mm->preresolve_ip4)); - memset (&mm->preresolve_ip6, 0, sizeof (mm->preresolve_ip6)); - mm->adj4_index = 0; - mm->adj6_index = 0; + fib_protocol_t proto; + + FOR_EACH_FIB_PROTOCOL (proto) + { + map_pre_resolve_init (&pre_resolved[proto]); + } #endif /* traffic class */ @@ -2238,6 +2359,9 @@ map_init (vlib_main_t * vm) mm->ip6_reass_fifo_last = MAP_REASS_INDEX_NONE; map_ip6_reass_reinit (NULL, NULL); +#ifdef MAP_SKIP_IP6_LOOKUP + fib_node_register_type (FIB_NODE_TYPE_MAP_E, &map_vft); +#endif map_dpo_module_init (); return 0; diff --git a/src/vnet/map/map.h b/src/vnet/map/map.h index f446b739..616d42c0 100644 --- a/src/vnet/map/map.h +++ b/src/vnet/map/map.h @@ -198,6 +198,40 @@ typedef struct { map_ip6_fragment_t fragments[MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY]; } map_ip6_reass_t; +#ifdef MAP_SKIP_IP6_LOOKUP +/** + * A pre-resolved next-hop + */ +typedef struct map_main_pre_resolved_t_ +{ + /** + * Linkage into the FIB graph + */ + fib_node_t node; + + /** + * The FIB entry index of the next-hop + */ + fib_node_index_t fei; + + /** + * This object sibling index on the FIB entry's child dependency list + */ + u32 sibling; + + /** + * The Load-balance object index to use to forward + */ + dpo_id_t dpo; +} map_main_pre_resolved_t; + +/** + * Pre-resolved next hops for v4 and v6. Why these are global and not + * per-domain is beyond me. + */ +extern map_main_pre_resolved_t pre_resolved[FIB_PROTOCOL_MAX]; +#endif + typedef struct { /* pool of MAP domains */ map_domain_t *domains; @@ -207,13 +241,6 @@ typedef struct { vlib_combined_counter_main_t *domain_counters; volatile u32 *counter_lock; -#ifdef MAP_SKIP_IP6_LOOKUP - /* pre-presolve */ - u32 adj6_index, adj4_index; - ip4_address_t preresolve_ip4; - ip6_address_t preresolve_ip6; -#endif - /* Traffic class: zero, copy (~0) or fixed value */ u8 tc; bool tc_copy; diff --git a/test/test_map.py b/test/test_map.py new file mode 100644 index 00000000..bc6cd818 --- /dev/null +++ b/test/test_map.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python + +import unittest +import socket + +from framework import VppTestCase, VppTestRunner +from vpp_ip_route import VppIpRoute, VppRoutePath + +from scapy.layers.l2 import Ether, Raw +from scapy.layers.inet import IP, UDP, ICMP +from scapy.layers.inet6 import IPv6 + + +class TestMAP(VppTestCase): + """ MAP Test Case """ + + def setUp(self): + super(TestMAP, self).setUp() + + # create 2 pg interfaces + self.create_pg_interfaces(range(4)) + + # pg0 is 'inside' IPv4 + self.pg0.admin_up() + self.pg0.config_ip4() + self.pg0.resolve_arp() + + # pg1 is 'outside' IPv6 + self.pg1.admin_up() + self.pg1.config_ip6() + self.pg1.generate_remote_hosts(4) + self.pg1.configure_ipv6_neighbors() + + def tearDown(self): + super(TestMAP, self).tearDown() + for i in self.pg_interfaces: + i.unconfig_ip4() + i.unconfig_ip6() + i.admin_down() + + def send_and_assert_no_replies(self, intf, pkts, remark): + intf.add_stream(pkts) + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + for i in self.pg_interfaces: + i.assert_nothing_captured(remark=remark) + + def send_and_assert_encapped(self, tx, ip6_src, ip6_dst, dmac=None): + if not dmac: + dmac = self.pg1.remote_mac + + self.pg0.add_stream(tx) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg1.get_capture(1) + rx = rx[0] + + self.assertEqual(rx[Ether].dst, dmac) + self.assertEqual(rx[IP].src, tx[IP].src) + self.assertEqual(rx[IPv6].src, ip6_src) + self.assertEqual(rx[IPv6].dst, ip6_dst) + + def test_map_e(self): + """ MAP-E """ + + # + # Add a route to the MAP-BR + # + map_br_pfx = "2001::" + map_br_pfx_len = 64 + map_route = VppIpRoute(self, + map_br_pfx, + map_br_pfx_len, + [VppRoutePath(self.pg1.remote_ip6, + self.pg1.sw_if_index, + is_ip6=1)], + is_ip6=1) + map_route.add_vpp_config() + + # + # Add a domain that maps from pg0 to pg1 + # + map_dst = socket.inet_pton(socket.AF_INET6, map_br_pfx) + map_src = "3001::1" + map_src_n = socket.inet_pton(socket.AF_INET6, map_src) + client_pfx = socket.inet_pton(socket.AF_INET, "192.168.0.0") + + self.vapi.map_add_domain(map_dst, + map_br_pfx_len, + map_src_n, + 128, + client_pfx, + 16) + + # + # Fire in a v4 packet that will be encapped to the BR + # + v4 = (Ether(dst=self.pg0.local_mac, src=self.pg0.remote_mac) / + IP(src=self.pg0.remote_ip4, dst='192.168.1.1') / + UDP(sport=20000, dport=10000) / + Raw('\xa5' * 100)) + + self.send_and_assert_encapped(v4, map_src, "2001::c0a8:0:0") + + # + # Fire in a V6 encapped packet. + # expect a decapped packet on the inside ip4 link + # + p = (Ether(dst=self.pg1.local_mac, src=self.pg1.remote_mac) / + IPv6(dst=map_src, src="2001::1") / + IP(dst=self.pg0.remote_ip4, src='192.168.1.1') / + UDP(sport=20000, dport=10000) / + Raw('\xa5' * 100)) + + self.pg1.add_stream(p) + + self.pg_enable_capture(self.pg_interfaces) + self.pg_start() + + rx = self.pg0.get_capture(1) + rx = rx[0] + + self.assertFalse(rx.haslayer(IPv6)) + self.assertEqual(rx[IP].src, p[IP].src) + self.assertEqual(rx[IP].dst, p[IP].dst) + + # + # Pre-resolve. No API for this!! + # + self.vapi.ppcli("map params pre-resolve ip6-nh 4001::1") + + self.send_and_assert_no_replies(self.pg0, v4, + "resovled via default route") + + # + # Add a route to 4001::1. Expect the encapped traffic to be + # sent via that routes next-hop + # + pre_res_route = VppIpRoute(self, + "4001::1", + 128, + [VppRoutePath(self.pg1.remote_hosts[2].ip6, + self.pg1.sw_if_index, + is_ip6=1)], + is_ip6=1) + pre_res_route.add_vpp_config() + + self.send_and_assert_encapped(v4, map_src, + "2001::c0a8:0:0", + dmac=self.pg1.remote_hosts[2].mac) + + # + # change the route to the pre-solved next-hop + # + pre_res_route1 = VppIpRoute(self, + "4001::1", + 128, + [VppRoutePath(self.pg1.remote_hosts[3].ip6, + self.pg1.sw_if_index, + is_ip6=1)], + is_ip6=1) + pre_res_route1.add_vpp_config() + + self.send_and_assert_encapped(v4, map_src, + "2001::c0a8:0:0", + dmac=self.pg1.remote_hosts[3].mac) + +if __name__ == '__main__': + unittest.main(testRunner=VppTestRunner) diff --git a/test/vpp_papi_provider.py b/test/vpp_papi_provider.py index 0062b72b..92070424 100644 --- a/test/vpp_papi_provider.py +++ b/test/vpp_papi_provider.py @@ -1624,3 +1624,31 @@ class VppPapiProvider(object): { 'vni': vni }) + + def map_add_domain(self, + ip6_prefix, + ip6_prefix_len, + ip6_src, + ip6_src_prefix_len, + ip4_prefix, + ip4_prefix_len, + ea_bits_len=0, + psid_offset=0, + psid_length=0, + is_translation=0, + mtu=1280): + return self.api( + self.papi.map_add_domain, + { + 'ip6_prefix': ip6_prefix, + 'ip6_prefix_len': ip6_prefix_len, + 'ip4_prefix': ip4_prefix, + 'ip4_prefix_len': ip4_prefix_len, + 'ip6_src': ip6_src, + 'ip6_src_prefix_len': ip6_src_prefix_len, + 'ea_bits_len': ea_bits_len, + 'psid_offset': psid_offset, + 'psid_length': psid_length, + 'is_translation': is_translation, + 'mtu': mtu + }) -- cgit 1.2.3-korg From 9705c3833a7b18609df8ae315a0aa062e1d2e180 Mon Sep 17 00:00:00 2001 From: Neale Ranns Date: Mon, 20 Feb 2017 20:29:41 -0800 Subject: MAP - add the domain struct directly into the dat-path and avoid the indirectiob throught the map-DPO Change-Id: Ifb72a1c1258440fdc4845aca8aecf2abd63526b1 Signed-off-by: Neale Ranns --- src/vnet/map/ip4_map.c | 20 ++++++-------- src/vnet/map/ip4_map_t.c | 15 ++++++----- src/vnet/map/map.c | 52 ++++++++---------------------------- src/vnet/map/map.h | 29 +++++++------------- src/vnet/map/map_dpo.c | 69 +++--------------------------------------------- src/vnet/map/map_dpo.h | 24 ----------------- 6 files changed, 42 insertions(+), 167 deletions(-) (limited to 'src/vnet/map/map.h') diff --git a/src/vnet/map/ip4_map.c b/src/vnet/map/ip4_map.c index 2be9ad37..1a20d704 100644 --- a/src/vnet/map/ip4_map.c +++ b/src/vnet/map/ip4_map.c @@ -293,12 +293,10 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p1 = vlib_get_buffer (vm, pi1); ip40 = vlib_buffer_get_current (p0); ip41 = vlib_buffer_get_current (p1); - d0 = - ip4_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - &map_domain_index0); - d1 = - ip4_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX], - &map_domain_index1); + map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + d0 = ip4_map_get_domain (map_domain_index0); + map_domain_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + d1 = ip4_map_get_domain (map_domain_index1); ASSERT (d0); ASSERT (d1); @@ -464,9 +462,8 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) p0 = vlib_get_buffer (vm, pi0); ip40 = vlib_buffer_get_current (p0); - d0 = - ip4_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - &map_domain_index0); + map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + d0 = ip4_map_get_domain (map_domain_index0); ASSERT (d0); /* @@ -597,9 +594,8 @@ ip4_map_reass (vlib_main_t * vm, p0 = vlib_get_buffer (vm, pi0); ip60 = vlib_buffer_get_current (p0); ip40 = (ip4_header_t *) (ip60 + 1); - d0 = - ip4_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - &map_domain_index0); + map_domain_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + d0 = ip4_map_get_domain (map_domain_index0); map_ip4_reass_lock (); map_ip4_reass_t *r = map_ip4_reass_get (ip40->src_address.as_u32, diff --git a/src/vnet/map/ip4_map_t.c b/src/vnet/map/ip4_map_t.c index 15974d8a..b63d76bf 100644 --- a/src/vnet/map/ip4_map_t.c +++ b/src/vnet/map/ip4_map_t.c @@ -1100,10 +1100,12 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) next1 = IP4_MAPT_NEXT_DROP; } - d0 = ip4_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - &vnet_buffer (p0)->map_t.map_domain_index); - d1 = ip4_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX], - &vnet_buffer (p1)->map_t.map_domain_index); + vnet_buffer (p0)->map_t.map_domain_index = + vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + d0 = ip4_map_get_domain (vnet_buffer (p0)->map_t.map_domain_index); + vnet_buffer (p1)->map_t.map_domain_index = + vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + d1 = ip4_map_get_domain (vnet_buffer (p1)->map_t.map_domain_index); vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; vnet_buffer (p1)->map_t.mtu = d1->mtu ? d1->mtu : ~0; @@ -1213,8 +1215,9 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) next0 = IP4_MAPT_NEXT_DROP; } - d0 = ip4_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - &vnet_buffer (p0)->map_t.map_domain_index); + vnet_buffer (p0)->map_t.map_domain_index = + vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + d0 = ip4_map_get_domain (vnet_buffer (p0)->map_t.map_domain_index); vnet_buffer (p0)->map_t.mtu = d0->mtu ? d0->mtu : ~0; diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c index 99305afa..811a0abc 100644 --- a/src/vnet/map/map.c +++ b/src/vnet/map/map.c @@ -177,7 +177,6 @@ map_create_domain (ip4_address_t * ip4_prefix, map_main_t *mm = &map_main; dpo_id_t dpo_v4 = DPO_INVALID; dpo_id_t dpo_v6 = DPO_INVALID; - fib_node_index_t fei; map_domain_t *d; /* Sanity check on the src prefix length */ @@ -268,57 +267,28 @@ map_create_domain (ip4_address_t * ip4_prefix, dpo_reset (&dpo_v4); /* - * Multiple MAP domains may share same source IPv6 TEP. - * In this case the route will exist and be MAP sourced. - * Find the adj (if any) already contributed and modify it + * construct a DPO to use the v6 domain */ - fib_prefix_t pfx6 = { - .fp_proto = FIB_PROTOCOL_IP6, - .fp_len = d->ip6_src_len, - .fp_addr = { - .ip6 = d->ip6_src, - } - , - }; - fei = fib_table_lookup_exact_match (0, &pfx6); - - if (FIB_NODE_INDEX_INVALID != fei) - { - dpo_id_t dpo = DPO_INVALID; - - if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_MAP, &dpo)) - { - /* - * modify the existing MAP to indicate it's shared - * skip to route add. - */ - const dpo_id_t *md_dpo; - map_dpo_t *md; - - ASSERT (DPO_LOAD_BALANCE == dpo.dpoi_type); - - md_dpo = load_balance_get_bucket (dpo.dpoi_index, 0); - md = map_dpo_get (md_dpo->dpoi_index); - - md->md_domain = ~0; - dpo_copy (&dpo_v6, md_dpo); - dpo_reset (&dpo); - - goto route_add; - } - } - if (d->flags & MAP_DOMAIN_TRANSLATION) map_t_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6); else map_dpo_create (DPO_PROTO_IP6, *map_domain_index, &dpo_v6); -route_add: /* + * Multiple MAP domains may share same source IPv6 TEP. Which is just dandy. + * We are not tracking the sharing. So a v4 lookup to find the correct + * domain post decap/trnaslate is always done + * * Create ip6 route. This is a reference counted add. If the prefix * already exists and is MAP sourced, it is now MAP source n+1 times * and will need to be removed n+1 times. */ + fib_prefix_t pfx6 = { + .fp_proto = FIB_PROTOCOL_IP6, + .fp_len = d->ip6_src_len, + .fp_addr.ip6 = d->ip6_src, + }; + fib_table_entry_special_dpo_add (0, &pfx6, FIB_SOURCE_MAP, FIB_ENTRY_FLAG_EXCLUSIVE, &dpo_v6); diff --git a/src/vnet/map/map.h b/src/vnet/map/map.h index 616d42c0..644e80f5 100644 --- a/src/vnet/map/map.h +++ b/src/vnet/map/map.h @@ -416,17 +416,11 @@ map_get_ip4 (ip6_address_t *addr) * Get the MAP domain from an IPv4 lookup adjacency. */ static_always_inline map_domain_t * -ip4_map_get_domain (u32 mdi, - u32 *map_domain_index) +ip4_map_get_domain (u32 mdi) { map_main_t *mm = &map_main; - map_dpo_t *md; - md = map_dpo_get(mdi); - - ASSERT(md); - *map_domain_index = md->md_domain; - return pool_elt_at_index(mm->domains, *map_domain_index); + return pool_elt_at_index(mm->domains, mdi); } /* @@ -435,23 +429,21 @@ ip4_map_get_domain (u32 mdi, * The IPv4 address is used otherwise. */ static_always_inline map_domain_t * -ip6_map_get_domain (u32 mdi, ip4_address_t *addr, - u32 *map_domain_index, u8 *error) +ip6_map_get_domain (u32 mdi, + ip4_address_t *addr, + u32 *map_domain_index, + u8 *error) { map_main_t *mm = &map_main; - map_dpo_t *md; /* * Disable direct MAP domain lookup on decap, until the security check is updated to verify IPv4 SA. * (That's done implicitly when MAP domain is looked up in the IPv4 FIB) */ #ifdef MAP_NONSHARED_DOMAIN_ENABLED - md = map_dpo_get(mdi); - - ASSERT(md); - *map_domain_index = md->md_domain; - if (*map_domain_index != ~0) - return pool_elt_at_index(mm->domains, *map_domain_index); +#error "How can you be sure this domain is not shared?" + *map_domain_index = mdi; + return pool_elt_at_index(mm->domains, mdi); #endif u32 lbi = ip4_fib_forwarding_lookup(0, addr); @@ -459,8 +451,7 @@ ip6_map_get_domain (u32 mdi, ip4_address_t *addr, if (PREDICT_TRUE(dpo->dpoi_type == map_dpo_type || dpo->dpoi_type == map_t_dpo_type)) { - md = map_dpo_get(dpo->dpoi_index); - *map_domain_index = md->md_domain; + *map_domain_index = dpo->dpoi_index; return pool_elt_at_index(mm->domains, *map_domain_index); } *error = MAP_ERROR_NO_DOMAIN; diff --git a/src/vnet/map/map_dpo.c b/src/vnet/map/map_dpo.c index df2b5fa4..430c1fbf 100644 --- a/src/vnet/map/map_dpo.c +++ b/src/vnet/map/map_dpo.c @@ -16,49 +16,21 @@ #include #include -/** - * pool of all MPLS Label DPOs - */ -map_dpo_t *map_dpo_pool; - /** * The register MAP DPO type */ dpo_type_t map_dpo_type; dpo_type_t map_t_dpo_type; -static map_dpo_t * -map_dpo_alloc (void) -{ - map_dpo_t *md; - - pool_get_aligned(map_dpo_pool, md, CLIB_CACHE_LINE_BYTES); - memset(md, 0, sizeof(*md)); - - return (md); -} - -static index_t -map_dpo_get_index (map_dpo_t *md) -{ - return (md - map_dpo_pool); -} - void map_dpo_create (dpo_proto_t dproto, u32 domain_index, dpo_id_t *dpo) { - map_dpo_t *md; - - md = map_dpo_alloc(); - md->md_domain = domain_index; - md->md_proto = dproto; - dpo_set(dpo, map_dpo_type, dproto, - map_dpo_get_index(md)); + domain_index); } void @@ -66,16 +38,10 @@ map_t_dpo_create (dpo_proto_t dproto, u32 domain_index, dpo_id_t *dpo) { - map_dpo_t *md; - - md = map_dpo_alloc(); - md->md_domain = domain_index; - md->md_proto = dproto; - dpo_set(dpo, map_t_dpo_type, dproto, - map_dpo_get_index(md)); + domain_index); } @@ -84,14 +50,8 @@ format_map_dpo (u8 *s, va_list *args) { index_t index = va_arg (*args, index_t); CLIB_UNUSED(u32 indent) = va_arg (*args, u32); - map_dpo_t *md; - - md = map_dpo_get(index); - return (format(s, "map:[%d]:%U domain:%d", - index, - format_dpo_proto, md->md_proto, - md->md_domain)); + return (format(s, "map: domain:%d", index)); } u8* @@ -99,40 +59,19 @@ format_map_t_dpo (u8 *s, va_list *args) { index_t index = va_arg (*args, index_t); CLIB_UNUSED(u32 indent) = va_arg (*args, u32); - map_dpo_t *md; - - md = map_dpo_get(index); - return (format(s, "map-t:[%d]:%U domain:%d", - index, - format_dpo_proto, md->md_proto, - md->md_domain)); + return (format(s, "map-t: domain:%d", index)); } static void map_dpo_lock (dpo_id_t *dpo) { - map_dpo_t *md; - - md = map_dpo_get(dpo->dpoi_index); - - md->md_locks++; } static void map_dpo_unlock (dpo_id_t *dpo) { - map_dpo_t *md; - - md = map_dpo_get(dpo->dpoi_index); - - md->md_locks--; - - if (0 == md->md_locks) - { - pool_put(map_dpo_pool, md); - } } const static dpo_vft_t md_vft = { diff --git a/src/vnet/map/map_dpo.h b/src/vnet/map/map_dpo.h index be510dba..63bf4787 100644 --- a/src/vnet/map/map_dpo.h +++ b/src/vnet/map/map_dpo.h @@ -22,23 +22,6 @@ /** * A representation of a MAP DPO */ -typedef struct map_dpo_t -{ - /** - * The dat-plane protocol - */ - dpo_proto_t md_proto; - - /** - * the MAP domain index - */ - u32 md_domain; - - /** - * Number of locks/users of the label - */ - u16 md_locks; -} map_dpo_t; extern void map_dpo_create (dpo_proto_t dproto, u32 domain_index, @@ -52,16 +35,9 @@ extern u8* format_map_dpo(u8 *s, va_list *args); /* * Encapsulation violation for fast data-path access */ -extern map_dpo_t *map_dpo_pool; extern dpo_type_t map_dpo_type; extern dpo_type_t map_t_dpo_type; -static inline map_dpo_t * -map_dpo_get (index_t index) -{ - return (pool_elt_at_index(map_dpo_pool, index)); -} - extern void map_dpo_module_init(void); #endif -- cgit 1.2.3-korg From a774b53623f60b5e8ea8ed634d6a41e847743715 Mon Sep 17 00:00:00 2001 From: Matus Fabian Date: Tue, 2 May 2017 03:15:22 -0700 Subject: NAT64: Move IPv6-IPv4 virtual reassembly code from MAP-T to common library (VPP-708) Change-Id: I9ad636f80bf109ffac9ca1b6d80d5f2c31f2076a Signed-off-by: Matus Fabian --- src/vnet.am | 2 + src/vnet/ip/ip4_to_ip6.h | 577 +++++++++++++++++++++++++++++++++ src/vnet/ip/ip6_to_ip4.h | 571 +++++++++++++++++++++++++++++++++ src/vnet/map/ip4_map.c | 53 +-- src/vnet/map/ip4_map_t.c | 820 ++++++++--------------------------------------- src/vnet/map/ip6_map.c | 23 +- src/vnet/map/ip6_map_t.c | 806 ++++++++++------------------------------------ src/vnet/map/map.c | 85 ----- src/vnet/map/map.h | 29 -- 9 files changed, 1463 insertions(+), 1503 deletions(-) create mode 100644 src/vnet/ip/ip4_to_ip6.h create mode 100644 src/vnet/ip/ip6_to_ip4.h (limited to 'src/vnet/map/map.h') diff --git a/src/vnet.am b/src/vnet.am index 9f3aedba..6e35df87 100644 --- a/src/vnet.am +++ b/src/vnet.am @@ -52,6 +52,8 @@ nobase_include_HEADERS += \ vnet/interface.h \ vnet/interface.api.h \ vnet/interface_funcs.h \ + vnet/ip/ip4_to_ip6.h \ + vnet/ip/ip6_to_ip4.h \ vnet/l3_types.h \ vnet/pipeline.h \ vnet/replication.h \ diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h new file mode 100644 index 00000000..96b8bf1e --- /dev/null +++ b/src/vnet/ip/ip4_to_ip6.h @@ -0,0 +1,577 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief IPv4 to IPv6 translation + */ +#ifndef __included_ip4_to_ip6_h__ +#define __included_ip4_to_ip6_h__ + +#include + + +/** + * IPv4 to IPv6 set call back function type + */ +typedef int (*ip4_to_ip6_set_fn_t) (ip4_header_t * ip4, ip6_header_t * ip6, + void *ctx); + +/* *INDENT-OFF* */ +static u8 icmp_to_icmp6_updater_pointer_table[] = + { 0, 1, 4, 4, ~0, + ~0, ~0, ~0, 7, 6, + ~0, ~0, 8, 8, 8, + 8, 24, 24, 24, 24 + }; +/* *INDENT-ON* */ + +#define frag_id_4to6(id) (id) + +/** + * @brief Get TCP/UDP port number or ICMP id from IPv4 packet. + * + * @param ip4 IPv4 header. + * @param sender 1 get sender port, 0 get receiver port. + * + * @returns Port number on success, 0 otherwise. + */ +always_inline u16 +ip4_get_port (ip4_header_t * ip, u8 sender) +{ + if (ip->ip_version_and_header_length != 0x45 || + ip4_get_fragment_offset (ip)) + return 0; + + if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || + (ip->protocol == IP_PROTOCOL_UDP))) + { + udp_header_t *udp = (void *) (ip + 1); + return (sender) ? udp->src_port : udp->dst_port; + } + else if (ip->protocol == IP_PROTOCOL_ICMP) + { + icmp46_header_t *icmp = (void *) (ip + 1); + if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply) + { + return *((u16 *) (icmp + 1)); + } + else if (clib_net_to_host_u16 (ip->length) >= 64) + { + ip = (ip4_header_t *) (icmp + 2); + if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || + (ip->protocol == IP_PROTOCOL_UDP))) + { + udp_header_t *udp = (void *) (ip + 1); + return (sender) ? udp->dst_port : udp->src_port; + } + else if (ip->protocol == IP_PROTOCOL_ICMP) + { + icmp46_header_t *icmp = (void *) (ip + 1); + if (icmp->type == ICMP4_echo_request || + icmp->type == ICMP4_echo_reply) + { + return *((u16 *) (icmp + 1)); + } + } + } + } + return 0; +} + +/** + * @brief Convert type and code value from ICMP4 to ICMP6. + * + * @param icmp ICMP header. + * @param inner_ip4 Inner IPv4 header if present, 0 otherwise. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +icmp_to_icmp6_header (icmp46_header_t * icmp, ip4_header_t ** inner_ip4) +{ + *inner_ip4 = NULL; + switch (icmp->type) + { + case ICMP4_echo_reply: + icmp->type = ICMP6_echo_reply; + break; + case ICMP4_echo_request: + icmp->type = ICMP6_echo_request; + break; + case ICMP4_destination_unreachable: + *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8); + + switch (icmp->code) + { + case ICMP4_destination_unreachable_destination_unreachable_net: //0 + case ICMP4_destination_unreachable_destination_unreachable_host: //1 + icmp->type = ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_no_route_to_destination; + break; + case ICMP4_destination_unreachable_protocol_unreachable: //2 + icmp->type = ICMP6_parameter_problem; + icmp->code = ICMP6_parameter_problem_unrecognized_next_header; + break; + case ICMP4_destination_unreachable_port_unreachable: //3 + icmp->type = ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_port_unreachable; + break; + case ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set: //4 + icmp->type = + ICMP6_packet_too_big; + icmp->code = 0; + { + u32 advertised_mtu = clib_net_to_host_u32 (*((u32 *) (icmp + 1))); + if (advertised_mtu) + advertised_mtu += 20; + else + advertised_mtu = 1000; //FIXME ! (RFC 1191 - plateau value) + + //FIXME: = minimum(advertised MTU+20, MTU_of_IPv6_nexthop, (MTU_of_IPv4_nexthop)+20) + *((u32 *) (icmp + 1)) = clib_host_to_net_u32 (advertised_mtu); + } + break; + + case ICMP4_destination_unreachable_source_route_failed: //5 + case ICMP4_destination_unreachable_destination_network_unknown: //6 + case ICMP4_destination_unreachable_destination_host_unknown: //7 + case ICMP4_destination_unreachable_source_host_isolated: //8 + case ICMP4_destination_unreachable_network_unreachable_for_type_of_service: //11 + case ICMP4_destination_unreachable_host_unreachable_for_type_of_service: //12 + icmp->type = + ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_no_route_to_destination; + break; + case ICMP4_destination_unreachable_network_administratively_prohibited: //9 + case ICMP4_destination_unreachable_host_administratively_prohibited: //10 + case ICMP4_destination_unreachable_communication_administratively_prohibited: //13 + case ICMP4_destination_unreachable_precedence_cutoff_in_effect: //15 + icmp->type = ICMP6_destination_unreachable; + icmp->code = + ICMP6_destination_unreachable_destination_administratively_prohibited; + break; + case ICMP4_destination_unreachable_host_precedence_violation: //14 + default: + return -1; + } + break; + + case ICMP4_time_exceeded: //11 + *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8); + icmp->type = ICMP6_time_exceeded; + break; + + case ICMP4_parameter_problem: + *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8); + + switch (icmp->code) + { + case ICMP4_parameter_problem_pointer_indicates_error: + case ICMP4_parameter_problem_bad_length: + icmp->type = ICMP6_parameter_problem; + icmp->code = ICMP6_parameter_problem_erroneous_header_field; + { + u8 ptr = + icmp_to_icmp6_updater_pointer_table[*((u8 *) (icmp + 1))]; + if (ptr == 0xff) + return -1; + + *((u32 *) (icmp + 1)) = clib_host_to_net_u32 (ptr); + } + break; + default: + //All other codes cause error + return -1; + } + break; + + default: + //All other types cause error + return -1; + break; + } + return 0; +} + +/** + * @brief Translate ICMP4 packet to ICMP6. + * + * @param p Buffer to translate. + * @param fn The function to translate outer header. + * @param ctx A context passed in the outer header translate function. + * @param inner_fn The function to translate inner header. + * @param inner_ctx A context passed in the inner header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx, + ip4_to_ip6_set_fn_t inner_fn, void *inner_ctx) +{ + ip4_header_t *ip4, *inner_ip4; + ip6_header_t *ip6, *inner_ip6; + u32 ip_len; + icmp46_header_t *icmp; + ip_csum_t csum; + ip6_frag_hdr_t *inner_frag; + u32 inner_frag_id; + u32 inner_frag_offset; + u8 inner_frag_more; + u16 *inner_L4_checksum = 0; + int rv; + + ip4 = vlib_buffer_get_current (p); + ip_len = clib_net_to_host_u16 (ip4->length); + ASSERT (ip_len <= p->current_length); + + icmp = (icmp46_header_t *) (ip4 + 1); + if (icmp_to_icmp6_header (icmp, &inner_ip4)) + return -1; + + if (inner_ip4) + { + //We have 2 headers to translate. + //We need to make some room in the middle of the packet + if (PREDICT_FALSE (ip4_is_fragment (inner_ip4))) + { + //Here it starts getting really tricky + //We will add a fragmentation header in the inner packet + + if (!ip4_is_first_fragment (inner_ip4)) + { + //For now we do not handle unless it is the first fragment + //Ideally we should handle the case as we are in slow path already + return -1; + } + + vlib_buffer_advance (p, + -2 * (sizeof (*ip6) - sizeof (*ip4)) - + sizeof (*inner_frag)); + ip6 = vlib_buffer_get_current (p); + clib_memcpy (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4, + 20 + 8); + ip4 = + (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); + icmp = (icmp46_header_t *) (ip4 + 1); + + inner_ip6 = + (ip6_header_t *) u8_ptr_add (inner_ip4, + sizeof (*ip4) - sizeof (*ip6) - + sizeof (*inner_frag)); + inner_frag = + (ip6_frag_hdr_t *) u8_ptr_add (inner_ip6, sizeof (*inner_ip6)); + ip6->payload_length = + u16_net_add (ip4->length, + sizeof (*ip6) - 2 * sizeof (*ip4) + + sizeof (*inner_frag)); + inner_frag_id = frag_id_4to6 (inner_ip4->fragment_id); + inner_frag_offset = ip4_get_fragment_offset (inner_ip4); + inner_frag_more = + ! !(inner_ip4->flags_and_fragment_offset & + clib_net_to_host_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)); + } + else + { + vlib_buffer_advance (p, -2 * (sizeof (*ip6) - sizeof (*ip4))); + ip6 = vlib_buffer_get_current (p); + clib_memcpy (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4, + 20 + 8); + ip4 = + (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); + icmp = (icmp46_header_t *) u8_ptr_add (ip4, sizeof (*ip4)); + inner_ip6 = + (ip6_header_t *) u8_ptr_add (inner_ip4, + sizeof (*ip4) - sizeof (*ip6)); + ip6->payload_length = + u16_net_add (ip4->length, sizeof (*ip6) - 2 * sizeof (*ip4)); + inner_frag = NULL; + } + + if (PREDICT_TRUE (inner_ip4->protocol == IP_PROTOCOL_TCP)) + { + inner_L4_checksum = &((tcp_header_t *) (inner_ip4 + 1))->checksum; + *inner_L4_checksum = + ip_csum_fold (ip_csum_sub_even + (*inner_L4_checksum, + *((u64 *) (&inner_ip4->src_address)))); + } + else if (PREDICT_TRUE (inner_ip4->protocol == IP_PROTOCOL_UDP)) + { + inner_L4_checksum = &((udp_header_t *) (inner_ip4 + 1))->checksum; + if (!*inner_L4_checksum) + { + return -1; + } + *inner_L4_checksum = + ip_csum_fold (ip_csum_sub_even + (*inner_L4_checksum, + *((u64 *) (&inner_ip4->src_address)))); + } + else if (inner_ip4->protocol == IP_PROTOCOL_ICMP) + { + //We have an ICMP inside an ICMP + //It needs to be translated, but not for error ICMP messages + icmp46_header_t *inner_icmp = (icmp46_header_t *) (inner_ip4 + 1); + csum = inner_icmp->checksum; + //Only types ICMP4_echo_request and ICMP4_echo_reply are handled by icmp_to_icmp6_header + csum = ip_csum_sub_even (csum, *((u16 *) inner_icmp)); + inner_icmp->type = (inner_icmp->type == ICMP4_echo_request) ? + ICMP6_echo_request : ICMP6_echo_reply; + csum = ip_csum_add_even (csum, *((u16 *) inner_icmp)); + csum = + ip_csum_add_even (csum, clib_host_to_net_u16 (IP_PROTOCOL_ICMP6)); + csum = + ip_csum_add_even (csum, inner_ip4->length - sizeof (*inner_ip4)); + inner_icmp->checksum = ip_csum_fold (csum); + inner_L4_checksum = &inner_icmp->checksum; + inner_ip4->protocol = IP_PROTOCOL_ICMP6; + } + else + { + /* To shut up Coverity */ + os_panic (); + } + + csum = *inner_L4_checksum; //Initial checksum of the inner L4 header + + inner_ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (inner_ip4->tos << 20)); + inner_ip6->payload_length = + u16_net_add (inner_ip4->length, -sizeof (*inner_ip4)); + inner_ip6->hop_limit = inner_ip4->ttl; + inner_ip6->protocol = inner_ip4->protocol; + + if ((rv = inner_fn (inner_ip4, inner_ip6, inner_ctx)) != 0) + return rv; + + if (PREDICT_FALSE (inner_frag != NULL)) + { + inner_frag->next_hdr = inner_ip6->protocol; + inner_frag->identification = inner_frag_id; + inner_frag->rsv = 0; + inner_frag->fragment_offset_and_more = + ip6_frag_hdr_offset_and_more (inner_frag_offset, inner_frag_more); + inner_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + inner_ip6->payload_length = + clib_host_to_net_u16 (clib_net_to_host_u16 + (inner_ip6->payload_length) + + sizeof (*inner_frag)); + } + + csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]); + csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]); + csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]); + *inner_L4_checksum = ip_csum_fold (csum); + } + else + { + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); + ip6 = vlib_buffer_get_current (p); + ip6->payload_length = + clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - + sizeof (*ip4)); + } + + //Translate outer IPv6 + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + + ip6->hop_limit = ip4->ttl; + ip6->protocol = IP_PROTOCOL_ICMP6; + + if ((rv = fn (ip4, ip6, ctx)) != 0) + return rv; + + //Truncate when the packet exceeds the minimal IPv6 MTU + if (p->current_length > 1280) + { + ip6->payload_length = clib_host_to_net_u16 (1280 - sizeof (*ip6)); + p->current_length = 1280; //Looks too simple to be correct... + } + + //Recompute ICMP checksum + icmp->checksum = 0; + csum = ip_csum_with_carry (0, ip6->payload_length); + csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol)); + csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]); + csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]); + csum = + ip_incremental_checksum (csum, icmp, + clib_net_to_host_u16 (ip6->payload_length)); + icmp->checksum = ~ip_csum_fold (csum); + + return 0; +} + +/** + * @brief Translate IPv4 fragmented packet to IPv6. + * + * @param p Buffer to translate. + * @param fn The function to translate header. + * @param ctx A context passed in the header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +ip4_to_ip6_fragmented (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx) +{ + ip4_header_t *ip4; + ip6_header_t *ip6; + ip6_frag_hdr_t *frag; + int rv; + + ip4 = vlib_buffer_get_current (p); + frag = (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*frag) - + sizeof (*ip6)); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + + //We know that the protocol was one of ICMP, TCP or UDP + //because the first fragment was found and cached + frag->next_hdr = + (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol; + frag->identification = frag_id_4to6 (ip4->fragment_id); + frag->rsv = 0; + frag->fragment_offset_and_more = + ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip4), + clib_net_to_host_u16 + (ip4->flags_and_fragment_offset) & + IP4_HEADER_FLAG_MORE_FRAGMENTS); + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = + clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - + sizeof (*ip4) + sizeof (*frag)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + + if ((rv = fn (ip4, ip6, ctx)) != 0) + return rv; + + return 0; +} + +/** + * @brief Translate IPv4 UDP/TCP packet to IPv6. + * + * @param p Buffer to translate. + * @param fn The function to translate header. + * @param ctx A context passed in the header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +ip4_to_ip6_tcp_udp (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx) +{ + ip4_header_t *ip4; + ip6_header_t *ip6; + ip_csum_t csum; + u16 *checksum; + ip6_frag_hdr_t *frag; + u32 frag_id; + int rv; + + ip4 = vlib_buffer_get_current (p); + + if (ip4->protocol == IP_PROTOCOL_UDP) + { + udp_header_t *udp = ip4_next_header (ip4); + checksum = &udp->checksum; + + //UDP checksum is optional over IPv4 but mandatory for IPv6 + //We do not check udp->length sanity but use our safe computed value instead + if (PREDICT_FALSE (!checksum)) + { + u16 udp_len = clib_host_to_net_u16 (ip4->length) - sizeof (*ip4); + csum = ip_incremental_checksum (0, udp, udp_len); + csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len)); + csum = + ip_csum_with_carry (csum, clib_host_to_net_u16 (IP_PROTOCOL_UDP)); + csum = ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address))); + *checksum = ~ip_csum_fold (csum); + } + } + else + { + tcp_header_t *tcp = ip4_next_header (ip4); + checksum = &tcp->checksum; + } + + csum = ip_csum_sub_even (*checksum, ip4->src_address.as_u32); + csum = ip_csum_sub_even (csum, ip4->dst_address.as_u32); + + // Deal with fragmented packets + if (PREDICT_FALSE (ip4->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS))) + { + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*ip6) - + sizeof (*frag)); + frag = + (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + frag_id = frag_id_4to6 (ip4->fragment_id); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + } + else + { + ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); + frag = NULL; + } + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = ip4->protocol; + + if (PREDICT_FALSE (frag != NULL)) + { + frag->next_hdr = ip6->protocol; + frag->identification = frag_id; + frag->rsv = 0; + frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1); + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); + } + + if ((rv = fn (ip4, ip6, ctx)) != 0) + return rv; + + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); + *checksum = ip_csum_fold (csum); + + return 0; +} + +#endif /* __included_ip4_to_ip6_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h new file mode 100644 index 00000000..f5d56883 --- /dev/null +++ b/src/vnet/ip/ip6_to_ip4.h @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief IPv6 to IPv4 translation + */ +#ifndef __included_ip6_to_ip4_h__ +#define __included_ip6_to_ip4_h__ + +#include + +/** + * IPv6 to IPv4 set call back function type + */ +typedef int (*ip6_to_ip4_set_fn_t) (ip6_header_t * ip6, ip4_header_t * ip4, + void *ctx); + +/* *INDENT-OFF* */ +static u8 icmp6_to_icmp_updater_pointer_table[] = + { 0, 1, ~0, ~0, + 2, 2, 9, 8, + 12, 12, 12, 12, + 12, 12, 12, 12, + 12, 12, 12, 12, + 12, 12, 12, 12, + 24, 24, 24, 24, + 24, 24, 24, 24, + 24, 24, 24, 24, + 24, 24, 24, 24 + }; +/* *INDENT-ON* */ + +#define frag_id_6to4(id) ((id) ^ ((id) >> 16)) + +/** + * @brief Parse some useful information from IPv6 header. + * + * @param ip6 IPv6 header. + * @param buff_len Buffer length. + * @param l4_protocol L4 protocol number. + * @param l4_offset L4 header offset. + * @param frag_hdr_offset Fragment header offset if present, 0 otherwise. + * + * @returns 0 on success, non-zero value otherwise. + */ +static_always_inline int +ip6_parse (const ip6_header_t * ip6, u32 buff_len, + u8 * l4_protocol, u16 * l4_offset, u16 * frag_hdr_offset) +{ + if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + *l4_protocol = ((ip6_frag_hdr_t *) (ip6 + 1))->next_hdr; + *frag_hdr_offset = sizeof (*ip6); + *l4_offset = sizeof (*ip6) + sizeof (ip6_frag_hdr_t); + } + else + { + *l4_protocol = ip6->protocol; + *frag_hdr_offset = 0; + *l4_offset = sizeof (*ip6); + } + + return (buff_len < (*l4_offset + 4)) || + (clib_net_to_host_u16 (ip6->payload_length) < + (*l4_offset + 4 - sizeof (*ip6))); +} + +/** + * @brief Get TCP/UDP port number or ICMP id from IPv6 packet. + * + * @param ip6 IPv6 header. + * @param sender 1 get sender port, 0 get receiver port. + * @param buffer_len Buffer length. + * + * @returns Port number on success, 0 otherwise. + */ +always_inline u16 +ip6_get_port (ip6_header_t * ip6, u8 sender, u16 buffer_len) +{ + u8 l4_protocol; + u16 l4_offset; + u16 frag_offset; + u8 *l4; + + if (ip6_parse (ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset)) + return 0; + + if (frag_offset && + ip6_frag_hdr_offset (((ip6_frag_hdr_t *) + u8_ptr_add (ip6, frag_offset)))) + return 0; //Can't deal with non-first fragment for now + + l4 = u8_ptr_add (ip6, l4_offset); + if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP) + { + return (sender) ? ((udp_header_t *) (l4))->src_port : ((udp_header_t + *) + (l4))->dst_port; + } + else if (l4_protocol == IP_PROTOCOL_ICMP6) + { + icmp46_header_t *icmp = (icmp46_header_t *) (l4); + if (icmp->type == ICMP6_echo_request) + { + return (sender) ? ((u16 *) (icmp))[2] : -1; + } + else if (icmp->type == ICMP6_echo_reply) + { + return (sender) ? -1 : ((u16 *) (icmp))[2]; + } + } + return 0; +} + +/** + * @brief Convert type and code value from ICMP6 to ICMP4. + * + * @param icmp ICMP header. + * @param inner_ip6 Inner IPv6 header if present, 0 otherwise. + * + * @returns 0 on success, non-zero value otherwise. + */ +static_always_inline int +icmp6_to_icmp_header (icmp46_header_t * icmp, ip6_header_t ** inner_ip6) +{ + *inner_ip6 = NULL; + switch (icmp->type) + { + case ICMP6_echo_request: + icmp->type = ICMP4_echo_request; + break; + case ICMP6_echo_reply: + icmp->type = ICMP4_echo_reply; + break; + case ICMP6_destination_unreachable: + *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); + + switch (icmp->code) + { + case ICMP6_destination_unreachable_no_route_to_destination: //0 + case ICMP6_destination_unreachable_beyond_scope_of_source_address: //2 + case ICMP6_destination_unreachable_address_unreachable: //3 + icmp->type = ICMP4_destination_unreachable; + icmp->code = + ICMP4_destination_unreachable_destination_unreachable_host; + break; + case ICMP6_destination_unreachable_destination_administratively_prohibited: //1 + icmp->type = + ICMP4_destination_unreachable; + icmp->code = + ICMP4_destination_unreachable_communication_administratively_prohibited; + break; + case ICMP6_destination_unreachable_port_unreachable: + icmp->type = ICMP4_destination_unreachable; + icmp->code = ICMP4_destination_unreachable_port_unreachable; + break; + default: + return -1; + } + break; + case ICMP6_packet_too_big: + *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); + + icmp->type = ICMP4_destination_unreachable; + icmp->code = 4; + { + u32 advertised_mtu = clib_net_to_host_u32 (*((u32 *) (icmp + 1))); + advertised_mtu -= 20; + //FIXME: = minimum(advertised MTU-20, MTU_of_IPv4_nexthop, (MTU_of_IPv6_nexthop)-20) + ((u16 *) (icmp))[3] = clib_host_to_net_u16 (advertised_mtu); + } + break; + + case ICMP6_time_exceeded: + *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); + + icmp->type = ICMP4_time_exceeded; + break; + + case ICMP6_parameter_problem: + *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); + + switch (icmp->code) + { + case ICMP6_parameter_problem_erroneous_header_field: + icmp->type = ICMP4_parameter_problem; + icmp->code = ICMP4_parameter_problem_pointer_indicates_error; + u32 pointer = clib_net_to_host_u32 (*((u32 *) (icmp + 1))); + if (pointer >= 40) + return -1; + + ((u8 *) (icmp + 1))[0] = + icmp6_to_icmp_updater_pointer_table[pointer]; + break; + case ICMP6_parameter_problem_unrecognized_next_header: + icmp->type = ICMP4_destination_unreachable; + icmp->code = ICMP4_destination_unreachable_port_unreachable; + break; + case ICMP6_parameter_problem_unrecognized_option: + default: + return -1; + } + break; + default: + return -1; + break; + } + return 0; +} + +/** + * @brief Translate TOS value from IPv6 to IPv4. + * + * @param ip6 IPv6 header. + * + * @returns IPv4 TOS value. + */ +static_always_inline u8 +ip6_translate_tos (const ip6_header_t * ip6) +{ + return (clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label) + & 0x0ff00000) >> 20; +} + +/** + * @brief Translate ICMP6 packet to ICMP4. + * + * @param p Buffer to translate. + * @param fn The function to translate outer header. + * @param ctx A context passed in the outer header translate function. + * @param inner_fn The function to translate inner header. + * @param inner_ctx A context passed in the inner header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, + ip6_to_ip4_set_fn_t inner_fn, void *inner_ctx) +{ + ip6_header_t *ip6, *inner_ip6; + ip4_header_t *ip4, *inner_ip4; + u32 ip6_pay_len; + icmp46_header_t *icmp; + ip_csum_t csum; + int rv; + + ip6 = vlib_buffer_get_current (p); + ip6_pay_len = clib_net_to_host_u16 (ip6->payload_length); + icmp = (icmp46_header_t *) (ip6 + 1); + ASSERT (ip6_pay_len + sizeof (*ip6) <= p->current_length); + + //No extensions headers allowed here + if (ip6->protocol != IP_PROTOCOL_ICMP6) + return -1; + + //There are no fragmented ICMP messages, so no extension header for now + if (icmp6_to_icmp_header (icmp, &inner_ip6)) + return -1; + + if (inner_ip6) + { + u16 *inner_L4_checksum, inner_l4_offset, inner_frag_offset, + inner_frag_id; + u8 *inner_l4, inner_protocol; + + //We have two headers to translate + // FROM + // [ IPv6 ]<- ext ->[IC][ IPv6 ]<- ext ->[L4 header ... + // Handled cases: + // [ IPv6 ][IC][ IPv6 ][L4 header ... + // [ IPv6 ][IC][ IPv6 ][Fr][L4 header ... + // TO + // [ IPv4][IC][ IPv4][L4 header ... + + if (ip6_parse (inner_ip6, ip6_pay_len - 8, + &inner_protocol, &inner_l4_offset, &inner_frag_offset)) + return -1; + + inner_l4 = u8_ptr_add (inner_ip6, inner_l4_offset); + inner_ip4 = + (ip4_header_t *) u8_ptr_add (inner_l4, -sizeof (*inner_ip4)); + if (inner_frag_offset) + { + ip6_frag_hdr_t *inner_frag = + (ip6_frag_hdr_t *) u8_ptr_add (inner_ip6, inner_frag_offset); + inner_frag_id = frag_id_6to4 (inner_frag->identification); + } + else + { + inner_frag_id = 0; + } + + //Do the translation of the inner packet + if (inner_protocol == IP_PROTOCOL_TCP) + { + inner_L4_checksum = (u16 *) u8_ptr_add (inner_l4, 16); + } + else if (inner_protocol == IP_PROTOCOL_UDP) + { + inner_L4_checksum = (u16 *) u8_ptr_add (inner_l4, 6); + } + else if (inner_protocol == IP_PROTOCOL_ICMP6) + { + icmp46_header_t *inner_icmp = (icmp46_header_t *) inner_l4; + csum = inner_icmp->checksum; + csum = ip_csum_sub_even (csum, *((u16 *) inner_icmp)); + //It cannot be of a different type as ip6_icmp_to_icmp6_in_place succeeded + inner_icmp->type = (inner_icmp->type == ICMP6_echo_request) ? + ICMP4_echo_request : ICMP4_echo_reply; + csum = ip_csum_add_even (csum, *((u16 *) inner_icmp)); + inner_icmp->checksum = ip_csum_fold (csum); + inner_protocol = IP_PROTOCOL_ICMP; //Will be copied to ip6 later + inner_L4_checksum = &inner_icmp->checksum; + } + else + { + return -1; + } + + csum = *inner_L4_checksum; + csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[0]); + csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]); + csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]); + csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]); + + if ((rv = inner_fn (inner_ip6, inner_ip4, inner_ctx)) != 0) + return rv; + + inner_ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + inner_ip4->tos = ip6_translate_tos (inner_ip6); + inner_ip4->length = + u16_net_add (inner_ip6->payload_length, + sizeof (*ip4) + sizeof (*ip6) - inner_l4_offset); + inner_ip4->fragment_id = inner_frag_id; + inner_ip4->flags_and_fragment_offset = + clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); + inner_ip4->ttl = inner_ip6->hop_limit; + inner_ip4->protocol = inner_protocol; + inner_ip4->checksum = ip4_header_checksum (inner_ip4); + + if (inner_ip4->protocol == IP_PROTOCOL_ICMP) + { + //Remove remainings of the pseudo-header in the csum + csum = + ip_csum_sub_even (csum, clib_host_to_net_u16 (IP_PROTOCOL_ICMP6)); + csum = + ip_csum_sub_even (csum, inner_ip4->length - sizeof (*inner_ip4)); + } + else + { + //Update to new pseudo-header + csum = ip_csum_add_even (csum, inner_ip4->src_address.as_u32); + csum = ip_csum_add_even (csum, inner_ip4->dst_address.as_u32); + } + *inner_L4_checksum = ip_csum_fold (csum); + + //Move up icmp header + ip4 = (ip4_header_t *) u8_ptr_add (inner_l4, -2 * sizeof (*ip4) - 8); + clib_memcpy (u8_ptr_add (inner_l4, -sizeof (*ip4) - 8), icmp, 8); + icmp = (icmp46_header_t *) u8_ptr_add (inner_l4, -sizeof (*ip4) - 8); + } + else + { + //Only one header to translate + ip4 = (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); + } + + vlib_buffer_advance (p, (u32) (((u8 *) ip4) - ((u8 *) ip6))); + + if ((rv = fn (ip6, ip4, ctx)) != 0) + return rv; + + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip6); + ip4->fragment_id = 0; + ip4->flags_and_fragment_offset = 0; + ip4->ttl = ip6->hop_limit; + ip4->protocol = IP_PROTOCOL_ICMP; + //TODO fix the length depending on offset length + ip4->length = u16_net_add (ip6->payload_length, + (inner_ip6 == + NULL) ? sizeof (*ip4) : (2 * sizeof (*ip4) - + sizeof (*ip6))); + ip4->checksum = ip4_header_checksum (ip4); + + //Recompute ICMP checksum + icmp->checksum = 0; + csum = + ip_incremental_checksum (0, icmp, + clib_net_to_host_u16 (ip4->length) - + sizeof (*ip4)); + icmp->checksum = ~ip_csum_fold (csum); + + return 0; +} + +/** + * @brief Translate IPv6 fragmented packet to IPv4. + * + * @param p Buffer to translate. + * @param fn The function to translate header. + * @param ctx A context passed in the header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +ip6_to_ip4_fragmented (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx) +{ + ip6_header_t *ip6; + ip6_frag_hdr_t *frag; + ip4_header_t *ip4; + u16 frag_id; + u8 frag_more; + u16 frag_offset; + u8 l4_protocol; + u16 l4_offset; + int rv; + + ip6 = vlib_buffer_get_current (p); + + if (ip6_parse + (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) + return -1; + + frag = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); + ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + + frag_id = frag_id_6to4 (frag->identification); + frag_more = ip6_frag_hdr_more (frag); + frag_offset = ip6_frag_hdr_offset (frag); + + if ((rv = fn (ip6, ip4, ctx)) != 0) + return rv; + + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip6); + ip4->length = u16_net_add (ip6->payload_length, + sizeof (*ip4) - l4_offset + sizeof (*ip6)); + ip4->fragment_id = frag_id; + ip4->flags_and_fragment_offset = + clib_host_to_net_u16 (frag_offset | + (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0)); + ip4->ttl = ip6->hop_limit; + ip4->protocol = + (l4_protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : l4_protocol; + ip4->checksum = ip4_header_checksum (ip4); + + return 0; +} + +/** + * @brief Translate IPv6 UDP/TCP packet to IPv4. + * + * @param p Buffer to translate. + * @param fn The function to translate header. + * @param ctx A context passed in the header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +ip6_to_ip4_tcp_udp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, + u8 udp_checksum) +{ + ip6_header_t *ip6; + u16 *checksum; + ip_csum_t csum; + ip4_header_t *ip4; + u16 fragment_id; + u16 flags; + u16 frag_offset; + u8 l4_protocol; + u16 l4_offset; + int rv; + + ip6 = vlib_buffer_get_current (p); + + if (ip6_parse + (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) + return -1; + + if (l4_protocol == IP_PROTOCOL_TCP) + { + tcp_header_t *tcp = ip6_next_header (ip6); + checksum = &tcp->checksum; + } + else + { + udp_header_t *udp = ip6_next_header (ip6); + checksum = &udp->checksum; + //UDP checksum is optional over IPv4 + if (!udp_checksum) + goto no_csum; + } + + csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]); + csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]); + +no_csum: + ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); + + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + + if (PREDICT_FALSE (frag_offset)) + { + //Only the first fragment + ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); + fragment_id = frag_id_6to4 (hdr->identification); + flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); + } + else + { + fragment_id = 0; + flags = 0; + } + + if ((rv = fn (ip6, ip4, ctx)) != 0) + return rv; + + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip6); + ip4->length = u16_net_add (ip6->payload_length, + sizeof (*ip4) + sizeof (*ip6) - l4_offset); + ip4->fragment_id = fragment_id; + ip4->flags_and_fragment_offset = flags; + ip4->ttl = ip6->hop_limit; + ip4->protocol = l4_protocol; + ip4->checksum = ip4_header_checksum (ip4); + + //UDP checksum is optional over IPv4 + if (!udp_checksum && l4_protocol == IP_PROTOCOL_UDP) + { + *checksum = 0; + } + else + { + csum = ip_csum_add_even (csum, ip4->dst_address.as_u32); + csum = ip_csum_add_even (csum, ip4->src_address.as_u32); + *checksum = ip_csum_fold (csum); + } + + return 0; +} + +#endif /* __included_ip6_to_ip4_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/map/ip4_map.c b/src/vnet/map/ip4_map.c index e39b6f14..6a3bdd51 100644 --- a/src/vnet/map/ip4_map.c +++ b/src/vnet/map/ip4_map.c @@ -19,6 +19,7 @@ #include "map.h" #include "../ip/ip_frag.h" +#include vlib_node_registration_t ip4_map_reass_node; @@ -62,52 +63,6 @@ format_ip4_map_reass_trace (u8 * s, va_list * args) t->cached ? "cached" : "forwarded"); } -/* - * ip4_map_get_port - */ -u16 -ip4_map_get_port (ip4_header_t * ip, map_dir_e dir) -{ - /* Find port information */ - if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || - (ip->protocol == IP_PROTOCOL_UDP))) - { - udp_header_t *udp = (void *) (ip + 1); - return (dir == MAP_SENDER ? udp->src_port : udp->dst_port); - } - else if (ip->protocol == IP_PROTOCOL_ICMP) - { - /* - * 1) ICMP Echo request or Echo reply - * 2) ICMP Error with inner packet being UDP or TCP - * 3) ICMP Error with inner packet being ICMP Echo request or Echo reply - */ - icmp46_header_t *icmp = (void *) (ip + 1); - if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply) - { - return *((u16 *) (icmp + 1)); - } - else if (clib_net_to_host_u16 (ip->length) >= 56) - { // IP + ICMP + IP + L4 header - ip4_header_t *icmp_ip = (ip4_header_t *) (icmp + 2); - if (PREDICT_TRUE ((icmp_ip->protocol == IP_PROTOCOL_TCP) || - (icmp_ip->protocol == IP_PROTOCOL_UDP))) - { - udp_header_t *udp = (void *) (icmp_ip + 1); - return (dir == MAP_SENDER ? udp->dst_port : udp->src_port); - } - else if (icmp_ip->protocol == IP_PROTOCOL_ICMP) - { - icmp46_header_t *inner_icmp = (void *) (icmp_ip + 1); - if (inner_icmp->type == ICMP4_echo_request - || inner_icmp->type == ICMP4_echo_reply) - return (*((u16 *) (inner_icmp + 1))); - } - } - } - return (0); -} - static_always_inline u16 ip4_map_port_and_security_check (map_domain_t * d, ip4_header_t * ip, u32 * next, u8 * error) @@ -124,7 +79,7 @@ ip4_map_port_and_security_check (map_domain_t * d, ip4_header_t * ip, { return 0; } - port = ip4_map_get_port (ip, MAP_RECEIVER); + port = ip4_get_port (ip, 0); if (port) { /* Verify that port is not among the well-known ports */ @@ -626,9 +581,7 @@ ip4_map_reass (vlib_main_t * vm, cached = 1; } } - else - if ((port0 = - ip4_get_port (ip40, MAP_RECEIVER, p0->current_length)) < 0) + else if ((port0 = ip4_get_port (ip40, 0)) == 0) { // Could not find port. We'll free the reassembly. error0 = MAP_ERROR_BAD_PROTOCOL; diff --git a/src/vnet/map/ip4_map_t.c b/src/vnet/map/ip4_map_t.c index 5f2bcbf9..b89840cc 100644 --- a/src/vnet/map/ip4_map_t.c +++ b/src/vnet/map/ip4_map_t.c @@ -15,6 +15,7 @@ #include "map.h" #include "../ip/ip_frag.h" +#include #define IP4_MAP_T_DUAL_LOOP 1 @@ -63,18 +64,6 @@ typedef CLIB_PACKED (struct { }) ip4_mapt_pseudo_header_t; /* *INDENT-ON* */ -#define frag_id_4to6(id) (id) - -//TODO: Find the right place in memory for this. -/* *INDENT-OFF* */ -static u8 icmp_to_icmp6_updater_pointer_table[] = - { 0, 1, 4, 4, ~0, - ~0, ~0, ~0, 7, 6, - ~0, ~0, 8, 8, 8, - 8, 24, 24, 24, 24 - }; -/* *INDENT-ON* */ - static_always_inline int ip4_map_fragment_cache (ip4_header_t * ip4, u16 port) @@ -110,360 +99,41 @@ ip4_map_fragment_get_port (ip4_header_t * ip4) return ret; } - -/* Statelessly translates an ICMP packet into ICMPv6. - * - * Warning: The checksum will need to be recomputed. - * - */ -static_always_inline int -ip4_icmp_to_icmp6_in_place (icmp46_header_t * icmp, u32 icmp_len, - i32 * receiver_port, ip4_header_t ** inner_ip4) +typedef struct { - *inner_ip4 = NULL; - switch (icmp->type) - { - case ICMP4_echo_reply: - *receiver_port = ((u16 *) icmp)[2]; - icmp->type = ICMP6_echo_reply; - break; - case ICMP4_echo_request: - *receiver_port = ((u16 *) icmp)[2]; - icmp->type = ICMP6_echo_request; - break; - case ICMP4_destination_unreachable: - *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8); - *receiver_port = ip4_get_port (*inner_ip4, MAP_SENDER, icmp_len - 8); - - switch (icmp->code) - { - case ICMP4_destination_unreachable_destination_unreachable_net: //0 - case ICMP4_destination_unreachable_destination_unreachable_host: //1 - icmp->type = ICMP6_destination_unreachable; - icmp->code = ICMP6_destination_unreachable_no_route_to_destination; - break; - case ICMP4_destination_unreachable_protocol_unreachable: //2 - icmp->type = ICMP6_parameter_problem; - icmp->code = ICMP6_parameter_problem_unrecognized_next_header; - break; - case ICMP4_destination_unreachable_port_unreachable: //3 - icmp->type = ICMP6_destination_unreachable; - icmp->code = ICMP6_destination_unreachable_port_unreachable; - break; - case ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set: //4 - icmp->type = - ICMP6_packet_too_big; - icmp->code = 0; - { - u32 advertised_mtu = clib_net_to_host_u32 (*((u32 *) (icmp + 1))); - if (advertised_mtu) - advertised_mtu += 20; - else - advertised_mtu = 1000; //FIXME ! (RFC 1191 - plateau value) - - //FIXME: = minimum(advertised MTU+20, MTU_of_IPv6_nexthop, (MTU_of_IPv4_nexthop)+20) - *((u32 *) (icmp + 1)) = clib_host_to_net_u32 (advertised_mtu); - } - break; - - case ICMP4_destination_unreachable_source_route_failed: //5 - case ICMP4_destination_unreachable_destination_network_unknown: //6 - case ICMP4_destination_unreachable_destination_host_unknown: //7 - case ICMP4_destination_unreachable_source_host_isolated: //8 - case ICMP4_destination_unreachable_network_unreachable_for_type_of_service: //11 - case ICMP4_destination_unreachable_host_unreachable_for_type_of_service: //12 - icmp->type = - ICMP6_destination_unreachable; - icmp->code = ICMP6_destination_unreachable_no_route_to_destination; - break; - case ICMP4_destination_unreachable_network_administratively_prohibited: //9 - case ICMP4_destination_unreachable_host_administratively_prohibited: //10 - case ICMP4_destination_unreachable_communication_administratively_prohibited: //13 - case ICMP4_destination_unreachable_precedence_cutoff_in_effect: //15 - icmp->type = ICMP6_destination_unreachable; - icmp->code = - ICMP6_destination_unreachable_destination_administratively_prohibited; - break; - case ICMP4_destination_unreachable_host_precedence_violation: //14 - default: - return -1; - } - break; - - case ICMP4_time_exceeded: //11 - *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8); - *receiver_port = ip4_get_port (*inner_ip4, MAP_SENDER, icmp_len - 8); - icmp->type = ICMP6_time_exceeded; - //icmp->code = icmp->code //unchanged - break; + map_domain_t *d; + u16 recv_port; +} icmp_to_icmp6_ctx_t; - case ICMP4_parameter_problem: - *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8); - *receiver_port = ip4_get_port (*inner_ip4, MAP_SENDER, icmp_len - 8); +static int +ip4_to_ip6_set_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *arg) +{ + icmp_to_icmp6_ctx_t *ctx = arg; - switch (icmp->code) - { - case ICMP4_parameter_problem_pointer_indicates_error: - case ICMP4_parameter_problem_bad_length: - icmp->type = ICMP6_parameter_problem; - icmp->code = ICMP6_parameter_problem_erroneous_header_field; - { - u8 ptr = - icmp_to_icmp6_updater_pointer_table[*((u8 *) (icmp + 1))]; - if (ptr == 0xff) - return -1; - - *((u32 *) (icmp + 1)) = clib_host_to_net_u32 (ptr); - } - break; - default: - //All other codes cause dropping the packet - return -1; - } - break; + ip4_map_t_embedded_address (ctx->d, &ip6->src_address, &ip4->src_address); + ip6->dst_address.as_u64[0] = + map_get_pfx_net (ctx->d, ip4->dst_address.as_u32, ctx->recv_port); + ip6->dst_address.as_u64[1] = + map_get_sfx_net (ctx->d, ip4->dst_address.as_u32, ctx->recv_port); - default: - //All other types cause dropping the packet - return -1; - break; - } return 0; } -static_always_inline void -_ip4_map_t_icmp (map_domain_t * d, vlib_buffer_t * p, u8 * error) +static int +ip4_to_ip6_set_inner_icmp_cb (ip4_header_t * ip4, ip6_header_t * ip6, + void *arg) { - ip4_header_t *ip4, *inner_ip4; - ip6_header_t *ip6, *inner_ip6; - u32 ip_len; - icmp46_header_t *icmp; - i32 recv_port; - ip_csum_t csum; - u16 *inner_L4_checksum = 0; - ip6_frag_hdr_t *inner_frag; - u32 inner_frag_id; - u32 inner_frag_offset; - u8 inner_frag_more; - - ip4 = vlib_buffer_get_current (p); - ip_len = clib_net_to_host_u16 (ip4->length); - ASSERT (ip_len <= p->current_length); - - icmp = (icmp46_header_t *) (ip4 + 1); - if (ip4_icmp_to_icmp6_in_place (icmp, ip_len - sizeof (*ip4), - &recv_port, &inner_ip4)) - { - *error = MAP_ERROR_ICMP; - return; - } - - if (recv_port < 0) - { - // In case of 1:1 mapping, we don't care about the port - if (d->ea_bits_len == 0 && d->rules) - { - recv_port = 0; - } - else - { - *error = MAP_ERROR_ICMP; - return; - } - } - - if (inner_ip4) - { - //We have 2 headers to translate. - //We need to make some room in the middle of the packet - - if (PREDICT_FALSE (ip4_is_fragment (inner_ip4))) - { - //Here it starts getting really tricky - //We will add a fragmentation header in the inner packet - - if (!ip4_is_first_fragment (inner_ip4)) - { - //For now we do not handle unless it is the first fragment - //Ideally we should handle the case as we are in slow path already - *error = MAP_ERROR_FRAGMENTED; - return; - } - - vlib_buffer_advance (p, - -2 * (sizeof (*ip6) - sizeof (*ip4)) - - sizeof (*inner_frag)); - ip6 = vlib_buffer_get_current (p); - clib_memcpy (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4, - 20 + 8); - ip4 = - (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); - icmp = (icmp46_header_t *) (ip4 + 1); - - inner_ip6 = - (ip6_header_t *) u8_ptr_add (inner_ip4, - sizeof (*ip4) - sizeof (*ip6) - - sizeof (*inner_frag)); - inner_frag = - (ip6_frag_hdr_t *) u8_ptr_add (inner_ip6, sizeof (*inner_ip6)); - ip6->payload_length = - u16_net_add (ip4->length, - sizeof (*ip6) - 2 * sizeof (*ip4) + - sizeof (*inner_frag)); - inner_frag_id = frag_id_4to6 (inner_ip4->fragment_id); - inner_frag_offset = ip4_get_fragment_offset (inner_ip4); - inner_frag_more = - ! !(inner_ip4->flags_and_fragment_offset & - clib_net_to_host_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)); - } - else - { - vlib_buffer_advance (p, -2 * (sizeof (*ip6) - sizeof (*ip4))); - ip6 = vlib_buffer_get_current (p); - clib_memcpy (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4, - 20 + 8); - ip4 = - (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); - icmp = (icmp46_header_t *) u8_ptr_add (ip4, sizeof (*ip4)); - inner_ip6 = - (ip6_header_t *) u8_ptr_add (inner_ip4, - sizeof (*ip4) - sizeof (*ip6)); - ip6->payload_length = - u16_net_add (ip4->length, sizeof (*ip6) - 2 * sizeof (*ip4)); - inner_frag = NULL; - } - - if (PREDICT_TRUE (inner_ip4->protocol == IP_PROTOCOL_TCP)) - { - inner_L4_checksum = &((tcp_header_t *) (inner_ip4 + 1))->checksum; - *inner_L4_checksum = - ip_csum_fold (ip_csum_sub_even - (*inner_L4_checksum, - *((u64 *) (&inner_ip4->src_address)))); - } - else if (PREDICT_TRUE (inner_ip4->protocol == IP_PROTOCOL_UDP)) - { - inner_L4_checksum = &((udp_header_t *) (inner_ip4 + 1))->checksum; - if (!*inner_L4_checksum) - { - //The inner packet was first translated, and therefore came from IPv6. - //As the packet was an IPv6 packet, the UDP checksum can't be NULL - *error = MAP_ERROR_ICMP; - return; - } - *inner_L4_checksum = - ip_csum_fold (ip_csum_sub_even - (*inner_L4_checksum, - *((u64 *) (&inner_ip4->src_address)))); - } - else if (inner_ip4->protocol == IP_PROTOCOL_ICMP) - { - //We have an ICMP inside an ICMP - //It needs to be translated, but not for error ICMP messages - icmp46_header_t *inner_icmp = (icmp46_header_t *) (inner_ip4 + 1); - csum = inner_icmp->checksum; - //Only types ICMP4_echo_request and ICMP4_echo_reply are handled by ip4_icmp_to_icmp6_in_place - csum = ip_csum_sub_even (csum, *((u16 *) inner_icmp)); - inner_icmp->type = (inner_icmp->type == ICMP4_echo_request) ? - ICMP6_echo_request : ICMP6_echo_reply; - csum = ip_csum_add_even (csum, *((u16 *) inner_icmp)); - csum = - ip_csum_add_even (csum, clib_host_to_net_u16 (IP_PROTOCOL_ICMP6)); - csum = - ip_csum_add_even (csum, inner_ip4->length - sizeof (*inner_ip4)); - inner_icmp->checksum = ip_csum_fold (csum); - inner_L4_checksum = &inner_icmp->checksum; - inner_ip4->protocol = IP_PROTOCOL_ICMP6; - } - else - { - /* To shut up Coverity */ - os_panic (); - } - - //FIXME: Security check with the port found in the inner packet - - csum = *inner_L4_checksum; //Initial checksum of the inner L4 header - //FIXME: Shouldn't we remove ip addresses from there ? - - inner_ip6->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 ((6 << 28) + (inner_ip4->tos << 20)); - inner_ip6->payload_length = - u16_net_add (inner_ip4->length, -sizeof (*inner_ip4)); - inner_ip6->hop_limit = inner_ip4->ttl; - inner_ip6->protocol = inner_ip4->protocol; + icmp_to_icmp6_ctx_t *ctx = arg; - //Note that the source address is within the domain - //while the destination address is the one outside the domain - ip4_map_t_embedded_address (d, &inner_ip6->dst_address, - &inner_ip4->dst_address); - inner_ip6->src_address.as_u64[0] = - map_get_pfx_net (d, inner_ip4->src_address.as_u32, recv_port); - inner_ip6->src_address.as_u64[1] = - map_get_sfx_net (d, inner_ip4->src_address.as_u32, recv_port); + //Note that the source address is within the domain + //while the destination address is the one outside the domain + ip4_map_t_embedded_address (ctx->d, &ip6->dst_address, &ip4->dst_address); + ip6->src_address.as_u64[0] = + map_get_pfx_net (ctx->d, ip4->src_address.as_u32, ctx->recv_port); + ip6->src_address.as_u64[1] = + map_get_sfx_net (ctx->d, ip4->src_address.as_u32, ctx->recv_port); - if (PREDICT_FALSE (inner_frag != NULL)) - { - inner_frag->next_hdr = inner_ip6->protocol; - inner_frag->identification = inner_frag_id; - inner_frag->rsv = 0; - inner_frag->fragment_offset_and_more = - ip6_frag_hdr_offset_and_more (inner_frag_offset, inner_frag_more); - inner_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; - inner_ip6->payload_length = - clib_host_to_net_u16 (clib_net_to_host_u16 - (inner_ip6->payload_length) + - sizeof (*inner_frag)); - } - - csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]); - csum = ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]); - csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]); - csum = ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]); - *inner_L4_checksum = ip_csum_fold (csum); - - } - else - { - vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); - ip6 = vlib_buffer_get_current (p); - ip6->payload_length = - clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - - sizeof (*ip4)); - } - - //Translate outer IPv6 - ip6->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); - - ip6->hop_limit = ip4->ttl; - ip6->protocol = IP_PROTOCOL_ICMP6; - - ip4_map_t_embedded_address (d, &ip6->src_address, &ip4->src_address); - ip6->dst_address.as_u64[0] = - map_get_pfx_net (d, ip4->dst_address.as_u32, recv_port); - ip6->dst_address.as_u64[1] = - map_get_sfx_net (d, ip4->dst_address.as_u32, recv_port); - - //Truncate when the packet exceeds the minimal IPv6 MTU - if (p->current_length > 1280) - { - ip6->payload_length = clib_host_to_net_u16 (1280 - sizeof (*ip6)); - p->current_length = 1280; //Looks too simple to be correct... - } - - //TODO: We could do an easy diff-checksum for echo requests/replies - //Recompute ICMP checksum - icmp->checksum = 0; - csum = ip_csum_with_carry (0, ip6->payload_length); - csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol)); - csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]); - csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]); - csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]); - csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]); - csum = - ip_incremental_checksum (csum, icmp, - clib_net_to_host_u16 (ip6->payload_length)); - icmp->checksum = ~ip_csum_fold (csum); + return 0; } static uword @@ -491,6 +161,8 @@ ip4_map_t_icmp (vlib_main_t * vm, u8 error0; map_domain_t *d0; u16 len0; + icmp_to_icmp6_ctx_t ctx0; + ip4_header_t *ip40; next0 = IP4_MAPT_ICMP_NEXT_IP6_LOOKUP; pi0 = to_next[0] = from[0]; @@ -508,7 +180,27 @@ ip4_map_t_icmp (vlib_main_t * vm, d0 = pool_elt_at_index (map_main.domains, vnet_buffer (p0)->map_t.map_domain_index); - _ip4_map_t_icmp (d0, p0, &error0); + + ip40 = vlib_buffer_get_current (p0); + ctx0.recv_port = ip4_get_port (ip40, 1); + ctx0.d = d0; + if (ctx0.recv_port == 0) + { + // In case of 1:1 mapping, we don't care about the port + if (!(d0->ea_bits_len == 0 && d0->rules)) + { + error0 = MAP_ERROR_ICMP; + goto err0; + } + } + + if (icmp_to_icmp6 + (p0, ip4_to_ip6_set_icmp_cb, &ctx0, + ip4_to_ip6_set_inner_icmp_cb, &ctx0)) + { + error0 = MAP_ERROR_ICMP; + goto err0; + } if (vnet_buffer (p0)->map_t.mtu < p0->current_length) { @@ -517,12 +209,14 @@ ip4_map_t_icmp (vlib_main_t * vm, vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; next0 = IP4_MAPT_ICMP_NEXT_IP6_FRAG; } + err0: if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, thread_index, - vnet_buffer (p0)->map_t. - map_domain_index, 1, len0); + vnet_buffer (p0)-> + map_t.map_domain_index, 1, + len0); } else { @@ -538,6 +232,19 @@ ip4_map_t_icmp (vlib_main_t * vm, return frame->n_vectors; } +static int +ip4_to_ip6_set_cb (ip4_header_t * ip4, ip6_header_t * ip6, void *ctx) +{ + ip4_mapt_pseudo_header_t *pheader = ctx; + + ip6->dst_address.as_u64[0] = pheader->daddr.as_u64[0]; + ip6->dst_address.as_u64[1] = pheader->daddr.as_u64[1]; + ip6->src_address.as_u64[0] = pheader->saddr.as_u64[0]; + ip6->src_address.as_u64[1] = pheader->saddr.as_u64[1]; + + return 0; +} + static uword ip4_map_t_fragmented (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -546,6 +253,8 @@ ip4_map_t_fragmented (vlib_main_t * vm, from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_map_t_fragmented_node.index); while (n_left_from > 0) { @@ -555,9 +264,6 @@ ip4_map_t_fragmented (vlib_main_t * vm, { u32 pi0; vlib_buffer_t *p0; - ip4_header_t *ip40; - ip6_header_t *ip60; - ip6_frag_hdr_t *frag0; ip4_mapt_pseudo_header_t *pheader0; ip4_mapt_fragmented_next_t next0; @@ -574,50 +280,21 @@ ip4_map_t_fragmented (vlib_main_t * vm, pheader0 = vlib_buffer_get_current (p0); vlib_buffer_advance (p0, sizeof (*pheader0)); - //Accessing ip4 header - ip40 = vlib_buffer_get_current (p0); - frag0 = - (ip6_frag_hdr_t *) u8_ptr_add (ip40, - sizeof (*ip40) - sizeof (*frag0)); - ip60 = - (ip6_header_t *) u8_ptr_add (ip40, - sizeof (*ip40) - sizeof (*frag0) - - sizeof (*ip60)); - vlib_buffer_advance (p0, - sizeof (*ip40) - sizeof (*ip60) - - sizeof (*frag0)); - - //We know that the protocol was one of ICMP, TCP or UDP - //because the first fragment was found and cached - frag0->next_hdr = - (ip40->protocol == - IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip40->protocol; - frag0->identification = frag_id_4to6 (ip40->fragment_id); - frag0->rsv = 0; - frag0->fragment_offset_and_more = - ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip40), - clib_net_to_host_u16 - (ip40->flags_and_fragment_offset) & - IP4_HEADER_FLAG_MORE_FRAGMENTS); - - ip60->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 ((6 << 28) + (ip40->tos << 20)); - ip60->payload_length = - clib_host_to_net_u16 (clib_net_to_host_u16 (ip40->length) - - sizeof (*ip40) + sizeof (*frag0)); - ip60->hop_limit = ip40->ttl; - ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; - ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0]; - ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1]; - ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0]; - ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1]; - - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + if (ip4_to_ip6_fragmented (p0, ip4_to_ip6_set_cb, pheader0)) { - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; - next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG; + p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; + next0 = IP4_MAPT_FRAGMENTED_NEXT_DROP; + } + else + { + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.next_index = + IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_FRAGMENTED_NEXT_IP6_FRAG; + } } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, @@ -637,6 +314,9 @@ ip4_map_t_tcp_udp (vlib_main_t * vm, from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_map_t_tcp_udp_node.index); + while (n_left_from > 0) { @@ -647,12 +327,6 @@ ip4_map_t_tcp_udp (vlib_main_t * vm, { u32 pi0, pi1; vlib_buffer_t *p0, *p1; - ip4_header_t *ip40, *ip41; - ip6_header_t *ip60, *ip61; - ip_csum_t csum0, csum1; - u16 *checksum0, *checksum1; - ip6_frag_hdr_t *frag0, *frag1; - u32 frag_id0, frag_id1; ip4_mapt_pseudo_header_t *pheader0, *pheader1; ip4_mapt_tcp_udp_next_t next0, next1; @@ -674,183 +348,40 @@ ip4_map_t_tcp_udp (vlib_main_t * vm, vlib_buffer_advance (p0, sizeof (*pheader0)); vlib_buffer_advance (p1, sizeof (*pheader1)); - //Accessing ip4 header - ip40 = vlib_buffer_get_current (p0); - ip41 = vlib_buffer_get_current (p1); - checksum0 = - (u16 *) u8_ptr_add (ip40, - vnet_buffer (p0)->map_t.checksum_offset); - checksum1 = - (u16 *) u8_ptr_add (ip41, - vnet_buffer (p1)->map_t.checksum_offset); - - //UDP checksum is optional over IPv4 but mandatory for IPv6 - //We do not check udp->length sanity but use our safe computed value instead - if (PREDICT_FALSE - (!*checksum0 && ip40->protocol == IP_PROTOCOL_UDP)) + if (ip4_to_ip6_tcp_udp (p0, ip4_to_ip6_set_cb, pheader0)) { - u16 udp_len = - clib_host_to_net_u16 (ip40->length) - sizeof (*ip40); - udp_header_t *udp = - (udp_header_t *) u8_ptr_add (ip40, sizeof (*ip40)); - ip_csum_t csum; - csum = ip_incremental_checksum (0, udp, udp_len); - csum = - ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len)); - csum = - ip_csum_with_carry (csum, - clib_host_to_net_u16 (IP_PROTOCOL_UDP)); - csum = - ip_csum_with_carry (csum, *((u64 *) (&ip40->src_address))); - *checksum0 = ~ip_csum_fold (csum); - } - if (PREDICT_FALSE - (!*checksum1 && ip41->protocol == IP_PROTOCOL_UDP)) - { - u16 udp_len = - clib_host_to_net_u16 (ip41->length) - sizeof (*ip40); - udp_header_t *udp = - (udp_header_t *) u8_ptr_add (ip41, sizeof (*ip40)); - ip_csum_t csum; - csum = ip_incremental_checksum (0, udp, udp_len); - csum = - ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len)); - csum = - ip_csum_with_carry (csum, - clib_host_to_net_u16 (IP_PROTOCOL_UDP)); - csum = - ip_csum_with_carry (csum, *((u64 *) (&ip41->src_address))); - *checksum1 = ~ip_csum_fold (csum); - } - - csum0 = ip_csum_sub_even (*checksum0, ip40->src_address.as_u32); - csum1 = ip_csum_sub_even (*checksum1, ip41->src_address.as_u32); - csum0 = ip_csum_sub_even (csum0, ip40->dst_address.as_u32); - csum1 = ip_csum_sub_even (csum1, ip41->dst_address.as_u32); - - // Deal with fragmented packets - if (PREDICT_FALSE (ip40->flags_and_fragment_offset & - clib_host_to_net_u16 - (IP4_HEADER_FLAG_MORE_FRAGMENTS))) - { - ip60 = - (ip6_header_t *) u8_ptr_add (ip40, - sizeof (*ip40) - sizeof (*ip60) - - sizeof (*frag0)); - frag0 = - (ip6_frag_hdr_t *) u8_ptr_add (ip40, - sizeof (*ip40) - - sizeof (*frag0)); - frag_id0 = frag_id_4to6 (ip40->fragment_id); - vlib_buffer_advance (p0, - sizeof (*ip40) - sizeof (*ip60) - - sizeof (*frag0)); + p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; + next0 = IP4_MAPT_TCP_UDP_NEXT_DROP; } else { - ip60 = - (ip6_header_t *) (((u8 *) ip40) + sizeof (*ip40) - - sizeof (*ip60)); - vlib_buffer_advance (p0, sizeof (*ip40) - sizeof (*ip60)); - frag0 = NULL; + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.next_index = + IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + } } - if (PREDICT_FALSE (ip41->flags_and_fragment_offset & - clib_host_to_net_u16 - (IP4_HEADER_FLAG_MORE_FRAGMENTS))) + if (ip4_to_ip6_tcp_udp (p1, ip4_to_ip6_set_cb, pheader1)) { - ip61 = - (ip6_header_t *) u8_ptr_add (ip41, - sizeof (*ip40) - sizeof (*ip60) - - sizeof (*frag0)); - frag1 = - (ip6_frag_hdr_t *) u8_ptr_add (ip41, - sizeof (*ip40) - - sizeof (*frag0)); - frag_id1 = frag_id_4to6 (ip41->fragment_id); - vlib_buffer_advance (p1, - sizeof (*ip40) - sizeof (*ip60) - - sizeof (*frag0)); + p1->error = error_node->errors[MAP_ERROR_UNKNOWN]; + next1 = IP4_MAPT_TCP_UDP_NEXT_DROP; } else { - ip61 = - (ip6_header_t *) (((u8 *) ip41) + sizeof (*ip40) - - sizeof (*ip60)); - vlib_buffer_advance (p1, sizeof (*ip40) - sizeof (*ip60)); - frag1 = NULL; - } - - ip60->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 ((6 << 28) + (ip40->tos << 20)); - ip61->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 ((6 << 28) + (ip41->tos << 20)); - ip60->payload_length = u16_net_add (ip40->length, -sizeof (*ip40)); - ip61->payload_length = u16_net_add (ip41->length, -sizeof (*ip40)); - ip60->hop_limit = ip40->ttl; - ip61->hop_limit = ip41->ttl; - ip60->protocol = ip40->protocol; - ip61->protocol = ip41->protocol; - - if (PREDICT_FALSE (frag0 != NULL)) - { - frag0->next_hdr = ip60->protocol; - frag0->identification = frag_id0; - frag0->rsv = 0; - frag0->fragment_offset_and_more = - ip6_frag_hdr_offset_and_more (0, 1); - ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; - ip60->payload_length = - u16_net_add (ip60->payload_length, sizeof (*frag0)); - } - - if (PREDICT_FALSE (frag1 != NULL)) - { - frag1->next_hdr = ip61->protocol; - frag1->identification = frag_id1; - frag1->rsv = 0; - frag1->fragment_offset_and_more = - ip6_frag_hdr_offset_and_more (0, 1); - ip61->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; - ip61->payload_length = - u16_net_add (ip61->payload_length, sizeof (*frag0)); - } - - //Finally copying the address - ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0]; - ip61->dst_address.as_u64[0] = pheader1->daddr.as_u64[0]; - ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1]; - ip61->dst_address.as_u64[1] = pheader1->daddr.as_u64[1]; - ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0]; - ip61->src_address.as_u64[0] = pheader1->saddr.as_u64[0]; - ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1]; - ip61->src_address.as_u64[1] = pheader1->saddr.as_u64[1]; - - csum0 = ip_csum_add_even (csum0, ip60->src_address.as_u64[0]); - csum1 = ip_csum_add_even (csum1, ip61->src_address.as_u64[0]); - csum0 = ip_csum_add_even (csum0, ip60->src_address.as_u64[1]); - csum1 = ip_csum_add_even (csum1, ip61->src_address.as_u64[1]); - csum0 = ip_csum_add_even (csum0, ip60->dst_address.as_u64[0]); - csum1 = ip_csum_add_even (csum1, ip61->dst_address.as_u64[0]); - csum0 = ip_csum_add_even (csum0, ip60->dst_address.as_u64[1]); - csum1 = ip_csum_add_even (csum1, ip61->dst_address.as_u64[1]); - *checksum0 = ip_csum_fold (csum0); - *checksum1 = ip_csum_fold (csum1); - - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; - next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; - } - - if (vnet_buffer (p1)->map_t.mtu < p1->current_length) - { - vnet_buffer (p1)->ip_frag.header_offset = 0; - vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; - vnet_buffer (p1)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; - next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + if (vnet_buffer (p1)->map_t.mtu < p1->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p1)->ip_frag.header_offset = 0; + vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; + vnet_buffer (p1)->ip_frag.next_index = + IP6_FRAG_NEXT_IP6_LOOKUP; + next1 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + } } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, @@ -863,12 +394,6 @@ ip4_map_t_tcp_udp (vlib_main_t * vm, { u32 pi0; vlib_buffer_t *p0; - ip4_header_t *ip40; - ip6_header_t *ip60; - ip_csum_t csum0; - u16 *checksum0; - ip6_frag_hdr_t *frag0; - u32 frag_id0; ip4_mapt_pseudo_header_t *pheader0; ip4_mapt_tcp_udp_next_t next0; @@ -885,102 +410,23 @@ ip4_map_t_tcp_udp (vlib_main_t * vm, pheader0 = vlib_buffer_get_current (p0); vlib_buffer_advance (p0, sizeof (*pheader0)); - //Accessing ip4 header - ip40 = vlib_buffer_get_current (p0); - checksum0 = - (u16 *) u8_ptr_add (ip40, - vnet_buffer (p0)->map_t.checksum_offset); - - //UDP checksum is optional over IPv4 but mandatory for IPv6 - //We do not check udp->length sanity but use our safe computed value instead - if (PREDICT_FALSE - (!*checksum0 && ip40->protocol == IP_PROTOCOL_UDP)) - { - u16 udp_len = - clib_host_to_net_u16 (ip40->length) - sizeof (*ip40); - udp_header_t *udp = - (udp_header_t *) u8_ptr_add (ip40, sizeof (*ip40)); - ip_csum_t csum; - csum = ip_incremental_checksum (0, udp, udp_len); - csum = - ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len)); - csum = - ip_csum_with_carry (csum, - clib_host_to_net_u16 (IP_PROTOCOL_UDP)); - csum = - ip_csum_with_carry (csum, *((u64 *) (&ip40->src_address))); - *checksum0 = ~ip_csum_fold (csum); - } - - csum0 = ip_csum_sub_even (*checksum0, ip40->src_address.as_u32); - csum0 = ip_csum_sub_even (csum0, ip40->dst_address.as_u32); - - // Deal with fragmented packets - if (PREDICT_FALSE (ip40->flags_and_fragment_offset & - clib_host_to_net_u16 - (IP4_HEADER_FLAG_MORE_FRAGMENTS))) + if (ip4_to_ip6_tcp_udp (p0, ip4_to_ip6_set_cb, pheader0)) { - ip60 = - (ip6_header_t *) u8_ptr_add (ip40, - sizeof (*ip40) - sizeof (*ip60) - - sizeof (*frag0)); - frag0 = - (ip6_frag_hdr_t *) u8_ptr_add (ip40, - sizeof (*ip40) - - sizeof (*frag0)); - frag_id0 = frag_id_4to6 (ip40->fragment_id); - vlib_buffer_advance (p0, - sizeof (*ip40) - sizeof (*ip60) - - sizeof (*frag0)); + p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; + next0 = IP4_MAPT_TCP_UDP_NEXT_DROP; } else { - ip60 = - (ip6_header_t *) (((u8 *) ip40) + sizeof (*ip40) - - sizeof (*ip60)); - vlib_buffer_advance (p0, sizeof (*ip40) - sizeof (*ip60)); - frag0 = NULL; + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.next_index = + IP6_FRAG_NEXT_IP6_LOOKUP; + next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; + } } - - ip60->ip_version_traffic_class_and_flow_label = - clib_host_to_net_u32 ((6 << 28) + (ip40->tos << 20)); - ip60->payload_length = u16_net_add (ip40->length, -sizeof (*ip40)); - ip60->hop_limit = ip40->ttl; - ip60->protocol = ip40->protocol; - - if (PREDICT_FALSE (frag0 != NULL)) - { - frag0->next_hdr = ip60->protocol; - frag0->identification = frag_id0; - frag0->rsv = 0; - frag0->fragment_offset_and_more = - ip6_frag_hdr_offset_and_more (0, 1); - ip60->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; - ip60->payload_length = - u16_net_add (ip60->payload_length, sizeof (*frag0)); - } - - //Finally copying the address - ip60->dst_address.as_u64[0] = pheader0->daddr.as_u64[0]; - ip60->dst_address.as_u64[1] = pheader0->daddr.as_u64[1]; - ip60->src_address.as_u64[0] = pheader0->saddr.as_u64[0]; - ip60->src_address.as_u64[1] = pheader0->saddr.as_u64[1]; - - csum0 = ip_csum_add_even (csum0, ip60->src_address.as_u64[0]); - csum0 = ip_csum_add_even (csum0, ip60->src_address.as_u64[1]); - csum0 = ip_csum_add_even (csum0, ip60->dst_address.as_u64[0]); - csum0 = ip_csum_add_even (csum0, ip60->dst_address.as_u64[1]); - *checksum0 = ip_csum_fold (csum0); - - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.next_index = IP6_FRAG_NEXT_IP6_LOOKUP; - next0 = IP4_MAPT_TCP_UDP_NEXT_IP6_FRAG; - } - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, n_left_to_next, pi0, next0); @@ -1159,10 +605,10 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, thread_index, - vnet_buffer (p0)->map_t. - map_domain_index, 1, - clib_net_to_host_u16 (ip40-> - length)); + vnet_buffer (p0)-> + map_t.map_domain_index, 1, + clib_net_to_host_u16 + (ip40->length)); } if (PREDICT_TRUE @@ -1170,10 +616,10 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, thread_index, - vnet_buffer (p1)->map_t. - map_domain_index, 1, - clib_net_to_host_u16 (ip41-> - length)); + vnet_buffer (p1)-> + map_t.map_domain_index, 1, + clib_net_to_host_u16 + (ip41->length)); } next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; @@ -1253,10 +699,10 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_TX, thread_index, - vnet_buffer (p0)->map_t. - map_domain_index, 1, - clib_net_to_host_u16 (ip40-> - length)); + vnet_buffer (p0)-> + map_t.map_domain_index, 1, + clib_net_to_host_u16 + (ip40->length)); } next0 = (error0 != MAP_ERROR_NONE) ? IP4_MAPT_NEXT_DROP : next0; diff --git a/src/vnet/map/ip6_map.c b/src/vnet/map/ip6_map.c index 63ada962..720d13c2 100644 --- a/src/vnet/map/ip6_map.c +++ b/src/vnet/map/ip6_map.c @@ -15,6 +15,8 @@ #include "map.h" #include "../ip/ip_frag.h" +#include +#include enum ip6_map_next_e { @@ -125,7 +127,7 @@ ip6_map_security_check (map_domain_t * d, ip4_header_t * ip4, { if (!ip4_is_fragment (ip4)) { - u16 port = ip4_map_get_port (ip4, MAP_SENDER); + u16 port = ip4_get_port (ip4, 1); if (port) { if (mm->sec_check) @@ -243,8 +245,9 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { d0 = ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & ip40->src_address. - as_u32, &map_domain_index0, &error0); + (ip4_address_t *) & ip40-> + src_address.as_u32, &map_domain_index0, + &error0); } else if (ip60->protocol == IP_PROTOCOL_ICMP6 && clib_net_to_host_u16 (ip60->payload_length) > @@ -270,8 +273,9 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { d1 = ip6_map_get_domain (vnet_buffer (p1)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & ip41->src_address. - as_u32, &map_domain_index1, &error1); + (ip4_address_t *) & ip41-> + src_address.as_u32, &map_domain_index1, + &error1); } else if (ip61->protocol == IP_PROTOCOL_ICMP6 && clib_net_to_host_u16 (ip61->payload_length) > @@ -454,8 +458,9 @@ ip6_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { d0 = ip6_map_get_domain (vnet_buffer (p0)->ip.adj_index[VLIB_TX], - (ip4_address_t *) & ip40->src_address. - as_u32, &map_domain_index0, &error0); + (ip4_address_t *) & ip40-> + src_address.as_u32, &map_domain_index0, + &error0); } else if (ip60->protocol == IP_PROTOCOL_ICMP6 && clib_net_to_host_u16 (ip60->payload_length) > @@ -891,9 +896,7 @@ ip6_map_ip4_reass (vlib_main_t * vm, cached = 1; } } - else - if ((port0 = - ip4_get_port (ip40, MAP_SENDER, p0->current_length)) < 0) + else if ((port0 = ip4_get_port (ip40, 1)) == 0) { // Could not find port from first fragment. Stop reassembling. error0 = MAP_ERROR_BAD_PROTOCOL; diff --git a/src/vnet/map/ip6_map_t.c b/src/vnet/map/ip6_map_t.c index 99151678..b173bb2a 100644 --- a/src/vnet/map/ip6_map_t.c +++ b/src/vnet/map/ip6_map_t.c @@ -15,6 +15,8 @@ #include "map.h" #include "../ip/ip_frag.h" +#include +#include #define IP6_MAP_T_DUAL_LOOP @@ -94,347 +96,55 @@ ip6_map_fragment_get (ip6_header_t * ip6, ip6_frag_hdr_t * frag, return ret; } -static_always_inline u8 -ip6_translate_tos (const ip6_header_t * ip6) +typedef struct { -#ifdef IP6_MAP_T_OVERRIDE_TOS - return IP6_MAP_T_OVERRIDE_TOS; -#else - return (clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label) - & 0x0ff00000) >> 20; -#endif -} - -//TODO: Find right place in memory for that -/* *INDENT-OFF* */ -static u8 icmp6_to_icmp_updater_pointer_table[] = - { 0, 1, ~0, ~0, - 2, 2, 9, 8, - 12, 12, 12, 12, - 12, 12, 12, 12, - 12, 12, 12, 12, - 12, 12, 12, 12, - 24, 24, 24, 24, - 24, 24, 24, 24, - 24, 24, 24, 24, - 24, 24, 24, 24 - }; -/* *INDENT-ON* */ - -static_always_inline int -ip6_icmp_to_icmp6_in_place (icmp46_header_t * icmp, u32 icmp_len, - i32 * sender_port, ip6_header_t ** inner_ip6) -{ - *inner_ip6 = NULL; - switch (icmp->type) - { - case ICMP6_echo_request: - *sender_port = ((u16 *) icmp)[2]; - icmp->type = ICMP4_echo_request; - break; - case ICMP6_echo_reply: - *sender_port = ((u16 *) icmp)[2]; - icmp->type = ICMP4_echo_reply; - break; - case ICMP6_destination_unreachable: - *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); - *sender_port = ip6_get_port (*inner_ip6, MAP_RECEIVER, icmp_len); - - switch (icmp->code) - { - case ICMP6_destination_unreachable_no_route_to_destination: //0 - case ICMP6_destination_unreachable_beyond_scope_of_source_address: //2 - case ICMP6_destination_unreachable_address_unreachable: //3 - icmp->type = ICMP4_destination_unreachable; - icmp->code = - ICMP4_destination_unreachable_destination_unreachable_host; - break; - case ICMP6_destination_unreachable_destination_administratively_prohibited: //1 - icmp->type = - ICMP4_destination_unreachable; - icmp->code = - ICMP4_destination_unreachable_communication_administratively_prohibited; - break; - case ICMP6_destination_unreachable_port_unreachable: - icmp->type = ICMP4_destination_unreachable; - icmp->code = ICMP4_destination_unreachable_port_unreachable; - break; - default: - return -1; - } - break; - case ICMP6_packet_too_big: - *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); - *sender_port = ip6_get_port (*inner_ip6, MAP_RECEIVER, icmp_len); - - icmp->type = ICMP4_destination_unreachable; - icmp->code = 4; - { - u32 advertised_mtu = clib_net_to_host_u32 (*((u32 *) (icmp + 1))); - advertised_mtu -= 20; - //FIXME: = minimum(advertised MTU-20, MTU_of_IPv4_nexthop, (MTU_of_IPv6_nexthop)-20) - ((u16 *) (icmp))[3] = clib_host_to_net_u16 (advertised_mtu); - } - break; - - case ICMP6_time_exceeded: - *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); - *sender_port = ip6_get_port (*inner_ip6, MAP_RECEIVER, icmp_len); - - icmp->type = ICMP4_time_exceeded; - break; - - case ICMP6_parameter_problem: - *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); - *sender_port = ip6_get_port (*inner_ip6, MAP_RECEIVER, icmp_len); - - switch (icmp->code) - { - case ICMP6_parameter_problem_erroneous_header_field: - icmp->type = ICMP4_parameter_problem; - icmp->code = ICMP4_parameter_problem_pointer_indicates_error; - u32 pointer = clib_net_to_host_u32 (*((u32 *) (icmp + 1))); - if (pointer >= 40) - return -1; - - ((u8 *) (icmp + 1))[0] = - icmp6_to_icmp_updater_pointer_table[pointer]; - break; - case ICMP6_parameter_problem_unrecognized_next_header: - icmp->type = ICMP4_destination_unreachable; - icmp->code = ICMP4_destination_unreachable_port_unreachable; - break; - case ICMP6_parameter_problem_unrecognized_option: - default: - return -1; - } - break; - default: - return -1; - break; - } - return 0; -} + map_domain_t *d; + u16 sender_port; +} icmp6_to_icmp_ctx_t; -static_always_inline void -_ip6_map_t_icmp (map_domain_t * d, vlib_buffer_t * p, u8 * error) +static int +ip6_to_ip4_set_icmp_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *arg) { - ip6_header_t *ip6, *inner_ip6; - ip4_header_t *ip4, *inner_ip4; - u32 ip6_pay_len; - icmp46_header_t *icmp; - i32 sender_port; - ip_csum_t csum; - u32 ip4_sadr, inner_ip4_dadr; - - ip6 = vlib_buffer_get_current (p); - ip6_pay_len = clib_net_to_host_u16 (ip6->payload_length); - icmp = (icmp46_header_t *) (ip6 + 1); - ASSERT (ip6_pay_len + sizeof (*ip6) <= p->current_length); - - if (ip6->protocol != IP_PROTOCOL_ICMP6) - { - //No extensions headers allowed here - //TODO: SR header - *error = MAP_ERROR_MALFORMED; - return; - } - - //There are no fragmented ICMP messages, so no extension header for now - - if (ip6_icmp_to_icmp6_in_place - (icmp, ip6_pay_len, &sender_port, &inner_ip6)) - { - //TODO: In case of 1:1 mapping it is not necessary to have the sender port - *error = MAP_ERROR_ICMP; - return; - } - - if (sender_port < 0) - { - // In case of 1:1 mapping, we don't care about the port - if (d->ea_bits_len == 0 && d->rules) - { - sender_port = 0; - } - else - { - *error = MAP_ERROR_ICMP; - return; - } - } + icmp6_to_icmp_ctx_t *ctx = arg; + u32 ip4_sadr; //Security check //Note that this prevents an intermediate IPv6 router from answering the request ip4_sadr = map_get_ip4 (&ip6->src_address); - if (ip6->src_address.as_u64[0] != map_get_pfx_net (d, ip4_sadr, sender_port) - || ip6->src_address.as_u64[1] != map_get_sfx_net (d, ip4_sadr, - sender_port)) - { - *error = MAP_ERROR_SEC_CHECK; - return; - } - - if (inner_ip6) - { - u16 *inner_L4_checksum, inner_l4_offset, inner_frag_offset, - inner_frag_id; - u8 *inner_l4, inner_protocol; - - //We have two headers to translate - // FROM - // [ IPv6 ]<- ext ->[IC][ IPv6 ]<- ext ->[L4 header ... - // Handled cases: - // [ IPv6 ][IC][ IPv6 ][L4 header ... - // [ IPv6 ][IC][ IPv6 ][Fr][L4 header ... - // TO - // [ IPv4][IC][ IPv4][L4 header ... - - //TODO: This was already done deep in ip6_icmp_to_icmp6_in_place - //We shouldn't have to do it again - if (ip6_parse (inner_ip6, ip6_pay_len - 8, - &inner_protocol, &inner_l4_offset, &inner_frag_offset)) - { - *error = MAP_ERROR_MALFORMED; - return; - } - - inner_l4 = u8_ptr_add (inner_ip6, inner_l4_offset); - inner_ip4 = - (ip4_header_t *) u8_ptr_add (inner_l4, -sizeof (*inner_ip4)); - if (inner_frag_offset) - { - ip6_frag_hdr_t *inner_frag = - (ip6_frag_hdr_t *) u8_ptr_add (inner_ip6, inner_frag_offset); - inner_frag_id = frag_id_6to4 (inner_frag->identification); - } - else - { - inner_frag_id = 0; - } - - //Do the translation of the inner packet - if (inner_protocol == IP_PROTOCOL_TCP) - { - inner_L4_checksum = (u16 *) u8_ptr_add (inner_l4, 16); - } - else if (inner_protocol == IP_PROTOCOL_UDP) - { - inner_L4_checksum = (u16 *) u8_ptr_add (inner_l4, 6); - } - else if (inner_protocol == IP_PROTOCOL_ICMP6) - { - icmp46_header_t *inner_icmp = (icmp46_header_t *) inner_l4; - csum = inner_icmp->checksum; - csum = ip_csum_sub_even (csum, *((u16 *) inner_icmp)); - //It cannot be of a different type as ip6_icmp_to_icmp6_in_place succeeded - inner_icmp->type = (inner_icmp->type == ICMP6_echo_request) ? - ICMP4_echo_request : ICMP4_echo_reply; - csum = ip_csum_add_even (csum, *((u16 *) inner_icmp)); - inner_icmp->checksum = ip_csum_fold (csum); - inner_protocol = IP_PROTOCOL_ICMP; //Will be copied to ip6 later - inner_L4_checksum = &inner_icmp->checksum; - } - else - { - *error = MAP_ERROR_BAD_PROTOCOL; - return; - } - - csum = *inner_L4_checksum; - csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[0]); - csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]); - csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]); - csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]); - - //Sanity check of the outer destination address - if (ip6->dst_address.as_u64[0] != inner_ip6->src_address.as_u64[0] && - ip6->dst_address.as_u64[1] != inner_ip6->src_address.as_u64[1]) - { - *error = MAP_ERROR_SEC_CHECK; - return; - } - - //Security check of inner packet - inner_ip4_dadr = map_get_ip4 (&inner_ip6->dst_address); - if (inner_ip6->dst_address.as_u64[0] != - map_get_pfx_net (d, inner_ip4_dadr, sender_port) - || inner_ip6->dst_address.as_u64[1] != map_get_sfx_net (d, - inner_ip4_dadr, - sender_port)) - { - *error = MAP_ERROR_SEC_CHECK; - return; - } + if (ip6->src_address.as_u64[0] != + map_get_pfx_net (ctx->d, ip4_sadr, ctx->sender_port) + || ip6->src_address.as_u64[1] != map_get_sfx_net (ctx->d, ip4_sadr, + ctx->sender_port)) + return -1; + + ip4->dst_address.as_u32 = + ip6_map_t_embedded_address (ctx->d, &ip6->dst_address); + ip4->src_address.as_u32 = ip4_sadr; - inner_ip4->dst_address.as_u32 = inner_ip4_dadr; - inner_ip4->src_address.as_u32 = - ip6_map_t_embedded_address (d, &inner_ip6->src_address); - inner_ip4->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - inner_ip4->tos = ip6_translate_tos (inner_ip6); - inner_ip4->length = - u16_net_add (inner_ip6->payload_length, - sizeof (*ip4) + sizeof (*ip6) - inner_l4_offset); - inner_ip4->fragment_id = inner_frag_id; - inner_ip4->flags_and_fragment_offset = - clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); - inner_ip4->ttl = inner_ip6->hop_limit; - inner_ip4->protocol = inner_protocol; - inner_ip4->checksum = ip4_header_checksum (inner_ip4); - - if (inner_ip4->protocol == IP_PROTOCOL_ICMP) - { - //Remove remainings of the pseudo-header in the csum - csum = - ip_csum_sub_even (csum, clib_host_to_net_u16 (IP_PROTOCOL_ICMP6)); - csum = - ip_csum_sub_even (csum, inner_ip4->length - sizeof (*inner_ip4)); - } - else - { - //Update to new pseudo-header - csum = ip_csum_add_even (csum, inner_ip4->src_address.as_u32); - csum = ip_csum_add_even (csum, inner_ip4->dst_address.as_u32); - } - *inner_L4_checksum = ip_csum_fold (csum); + return 0; +} - //Move up icmp header - ip4 = (ip4_header_t *) u8_ptr_add (inner_l4, -2 * sizeof (*ip4) - 8); - clib_memcpy (u8_ptr_add (inner_l4, -sizeof (*ip4) - 8), icmp, 8); - icmp = (icmp46_header_t *) u8_ptr_add (inner_l4, -sizeof (*ip4) - 8); - } - else - { - //Only one header to translate - ip4 = (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); - } - vlib_buffer_advance (p, (u32) (((u8 *) ip4) - ((u8 *) ip6))); +static int +ip6_to_ip4_set_inner_icmp_cb (ip6_header_t * ip6, ip4_header_t * ip4, + void *arg) +{ + icmp6_to_icmp_ctx_t *ctx = arg; + u32 inner_ip4_dadr; + + //Security check of inner packet + inner_ip4_dadr = map_get_ip4 (&ip6->dst_address); + if (ip6->dst_address.as_u64[0] != + map_get_pfx_net (ctx->d, inner_ip4_dadr, ctx->sender_port) + || ip6->dst_address.as_u64[1] != map_get_sfx_net (ctx->d, + inner_ip4_dadr, + ctx->sender_port)) + return -1; + + ip4->dst_address.as_u32 = inner_ip4_dadr; + ip4->src_address.as_u32 = + ip6_map_t_embedded_address (ctx->d, &ip6->src_address); - ip4->dst_address.as_u32 = ip6_map_t_embedded_address (d, &ip6->dst_address); - ip4->src_address.as_u32 = ip4_sadr; - ip4->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip4->tos = ip6_translate_tos (ip6); - ip4->fragment_id = 0; - ip4->flags_and_fragment_offset = 0; - ip4->ttl = ip6->hop_limit; - ip4->protocol = IP_PROTOCOL_ICMP; - //TODO fix the length depending on offset length - ip4->length = u16_net_add (ip6->payload_length, - (inner_ip6 == - NULL) ? sizeof (*ip4) : (2 * sizeof (*ip4) - - sizeof (*ip6))); - ip4->checksum = ip4_header_checksum (ip4); - - //TODO: We could do an easy diff-checksum for echo requests/replies - //Recompute ICMP checksum - icmp->checksum = 0; - csum = - ip_incremental_checksum (0, icmp, - clib_net_to_host_u16 (ip4->length) - - sizeof (*ip4)); - icmp->checksum = ~ip_csum_fold (csum); + return 0; } static uword @@ -462,6 +172,8 @@ ip6_map_t_icmp (vlib_main_t * vm, ip6_mapt_icmp_next_t next0; map_domain_t *d0; u16 len0; + icmp6_to_icmp_ctx_t ctx0; + ip6_header_t *ip60; pi0 = to_next[0] = from[0]; from += 1; @@ -472,14 +184,30 @@ ip6_map_t_icmp (vlib_main_t * vm, next0 = IP6_MAPT_ICMP_NEXT_IP4_LOOKUP; p0 = vlib_get_buffer (vm, pi0); - len0 = - clib_net_to_host_u16 (((ip6_header_t *) - vlib_buffer_get_current - (p0))->payload_length); + ip60 = vlib_buffer_get_current (p0); + len0 = clib_net_to_host_u16 (ip60->payload_length); d0 = pool_elt_at_index (map_main.domains, vnet_buffer (p0)->map_t.map_domain_index); - _ip6_map_t_icmp (d0, p0, &error0); + ctx0.sender_port = ip6_get_port (ip60, 0, len0); + ctx0.d = d0; + if (ctx0.sender_port == 0) + { + // In case of 1:1 mapping, we don't care about the port + if (!(d0->ea_bits_len == 0 && d0->rules)) + { + error0 = MAP_ERROR_ICMP; + goto err0; + } + } + + if (icmp6_to_icmp + (p0, ip6_to_ip4_set_icmp_cb, d0, ip6_to_ip4_set_inner_icmp_cb, + d0)) + { + error0 = MAP_ERROR_ICMP; + goto err0; + } if (vnet_buffer (p0)->map_t.mtu < p0->current_length) { @@ -489,7 +217,7 @@ ip6_map_t_icmp (vlib_main_t * vm, vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; next0 = IP6_MAPT_ICMP_NEXT_IP4_FRAG; } - + err0: if (PREDICT_TRUE (error0 == MAP_ERROR_NONE)) { vlib_increment_combined_counter (cm + MAP_DOMAIN_COUNTER_RX, @@ -513,6 +241,17 @@ ip6_map_t_icmp (vlib_main_t * vm, return frame->n_vectors; } +static int +ip6_to_ip4_set_cb (ip6_header_t * ip6, ip4_header_t * ip4, void *ctx) +{ + vlib_buffer_t *p = ctx; + + ip4->dst_address.as_u32 = vnet_buffer (p)->map_t.v6.daddr; + ip4->src_address.as_u32 = vnet_buffer (p)->map_t.v6.saddr; + + return 0; +} + static uword ip6_map_t_fragmented (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -521,6 +260,8 @@ ip6_map_t_fragmented (vlib_main_t * vm, from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_map_t_fragmented_node.index); while (n_left_from > 0) { @@ -531,11 +272,6 @@ ip6_map_t_fragmented (vlib_main_t * vm, { u32 pi0, pi1; vlib_buffer_t *p0, *p1; - ip6_header_t *ip60, *ip61; - ip6_frag_hdr_t *frag0, *frag1; - ip4_header_t *ip40, *ip41; - u16 frag_id0, frag_offset0, frag_id1, frag_offset1; - u8 frag_more0, frag_more1; u32 next0, next1; pi0 = to_next[0] = from[0]; @@ -549,94 +285,41 @@ ip6_map_t_fragmented (vlib_main_t * vm, next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; p0 = vlib_get_buffer (vm, pi0); p1 = vlib_get_buffer (vm, pi1); - ip60 = vlib_buffer_get_current (p0); - ip61 = vlib_buffer_get_current (p1); - frag0 = - (ip6_frag_hdr_t *) u8_ptr_add (ip60, - vnet_buffer (p0)->map_t. - v6.frag_offset); - frag1 = - (ip6_frag_hdr_t *) u8_ptr_add (ip61, - vnet_buffer (p1)->map_t. - v6.frag_offset); - ip40 = - (ip4_header_t *) u8_ptr_add (ip60, - vnet_buffer (p0)->map_t. - v6.l4_offset - sizeof (*ip40)); - ip41 = - (ip4_header_t *) u8_ptr_add (ip61, - vnet_buffer (p1)->map_t. - v6.l4_offset - sizeof (*ip40)); - vlib_buffer_advance (p0, - vnet_buffer (p0)->map_t.v6.l4_offset - - sizeof (*ip40)); - vlib_buffer_advance (p1, - vnet_buffer (p1)->map_t.v6.l4_offset - - sizeof (*ip40)); - - frag_id0 = frag_id_6to4 (frag0->identification); - frag_id1 = frag_id_6to4 (frag1->identification); - frag_more0 = ip6_frag_hdr_more (frag0); - frag_more1 = ip6_frag_hdr_more (frag1); - frag_offset0 = ip6_frag_hdr_offset (frag0); - frag_offset1 = ip6_frag_hdr_offset (frag1); - - ip40->dst_address.as_u32 = vnet_buffer (p0)->map_t.v6.daddr; - ip41->dst_address.as_u32 = vnet_buffer (p1)->map_t.v6.daddr; - ip40->src_address.as_u32 = vnet_buffer (p0)->map_t.v6.saddr; - ip41->src_address.as_u32 = vnet_buffer (p1)->map_t.v6.saddr; - ip40->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip41->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip40->tos = ip6_translate_tos (ip60); - ip41->tos = ip6_translate_tos (ip61); - ip40->length = u16_net_add (ip60->payload_length, - sizeof (*ip40) - - vnet_buffer (p0)->map_t.v6.l4_offset + - sizeof (*ip60)); - ip41->length = - u16_net_add (ip61->payload_length, - sizeof (*ip40) - - vnet_buffer (p1)->map_t.v6.l4_offset + - sizeof (*ip60)); - ip40->fragment_id = frag_id0; - ip41->fragment_id = frag_id1; - ip40->flags_and_fragment_offset = - clib_host_to_net_u16 (frag_offset0 | - (frag_more0 ? IP4_HEADER_FLAG_MORE_FRAGMENTS - : 0)); - ip41->flags_and_fragment_offset = - clib_host_to_net_u16 (frag_offset1 | - (frag_more1 ? IP4_HEADER_FLAG_MORE_FRAGMENTS - : 0)); - ip40->ttl = ip60->hop_limit; - ip41->ttl = ip61->hop_limit; - ip40->protocol = - (vnet_buffer (p0)->map_t.v6.l4_protocol == - IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : vnet_buffer (p0)-> - map_t.v6.l4_protocol; - ip41->protocol = - (vnet_buffer (p1)->map_t.v6.l4_protocol == - IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : vnet_buffer (p1)-> - map_t.v6.l4_protocol; - ip40->checksum = ip4_header_checksum (ip40); - ip41->checksum = ip4_header_checksum (ip41); - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + if (ip6_to_ip4_fragmented (p0, ip6_to_ip4_set_cb, p0)) { - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; - next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; + next0 = IP6_MAPT_FRAGMENTED_NEXT_DROP; + } + else + { + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + } } - if (vnet_buffer (p1)->map_t.mtu < p1->current_length) + if (ip6_to_ip4_fragmented (p1, ip6_to_ip4_set_cb, p1)) { - vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; - vnet_buffer (p1)->ip_frag.header_offset = 0; - vnet_buffer (p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; - next1 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + p1->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; + next1 = IP6_MAPT_FRAGMENTED_NEXT_DROP; + } + else + { + if (vnet_buffer (p1)->map_t.mtu < p1->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; + vnet_buffer (p1)->ip_frag.header_offset = 0; + vnet_buffer (p1)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + } } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, @@ -649,12 +332,6 @@ ip6_map_t_fragmented (vlib_main_t * vm, { u32 pi0; vlib_buffer_t *p0; - ip6_header_t *ip60; - ip6_frag_hdr_t *frag0; - ip4_header_t *ip40; - u16 frag_id0; - u8 frag_more0; - u16 frag_offset0; u32 next0; pi0 = to_next[0] = from[0]; @@ -665,51 +342,23 @@ ip6_map_t_fragmented (vlib_main_t * vm, next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; p0 = vlib_get_buffer (vm, pi0); - ip60 = vlib_buffer_get_current (p0); - frag0 = - (ip6_frag_hdr_t *) u8_ptr_add (ip60, - vnet_buffer (p0)->map_t. - v6.frag_offset); - ip40 = - (ip4_header_t *) u8_ptr_add (ip60, - vnet_buffer (p0)->map_t. - v6.l4_offset - sizeof (*ip40)); - vlib_buffer_advance (p0, - vnet_buffer (p0)->map_t.v6.l4_offset - - sizeof (*ip40)); - - frag_id0 = frag_id_6to4 (frag0->identification); - frag_more0 = ip6_frag_hdr_more (frag0); - frag_offset0 = ip6_frag_hdr_offset (frag0); - - ip40->dst_address.as_u32 = vnet_buffer (p0)->map_t.v6.daddr; - ip40->src_address.as_u32 = vnet_buffer (p0)->map_t.v6.saddr; - ip40->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip40->tos = ip6_translate_tos (ip60); - ip40->length = u16_net_add (ip60->payload_length, - sizeof (*ip40) - - vnet_buffer (p0)->map_t.v6.l4_offset + - sizeof (*ip60)); - ip40->fragment_id = frag_id0; - ip40->flags_and_fragment_offset = - clib_host_to_net_u16 (frag_offset0 | - (frag_more0 ? IP4_HEADER_FLAG_MORE_FRAGMENTS - : 0)); - ip40->ttl = ip60->hop_limit; - ip40->protocol = - (vnet_buffer (p0)->map_t.v6.l4_protocol == - IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : vnet_buffer (p0)-> - map_t.v6.l4_protocol; - ip40->checksum = ip4_header_checksum (ip40); - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + if (ip6_to_ip4_fragmented (p0, ip6_to_ip4_set_cb, p0)) { - //Send to fragmentation node if necessary - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; - next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + p0->error = error_node->errors[MAP_ERROR_FRAGMENT_DROPPED]; + next0 = IP6_MAPT_FRAGMENTED_NEXT_DROP; + } + else + { + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_FRAGMENTED_NEXT_IP4_FRAG; + } } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, @@ -726,6 +375,9 @@ ip6_map_t_tcp_udp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_map_t_tcp_udp_node.index); + from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; next_index = node->cached_next_index; @@ -738,11 +390,6 @@ ip6_map_t_tcp_udp (vlib_main_t * vm, { u32 pi0, pi1; vlib_buffer_t *p0, *p1; - ip6_header_t *ip60, *ip61; - ip_csum_t csum0, csum1; - ip4_header_t *ip40, *ip41; - u16 fragment_id0, flags0, *checksum0, - fragment_id1, flags1, *checksum1; ip6_mapt_tcp_udp_next_t next0, next1; pi0 = to_next[0] = from[0]; @@ -756,118 +403,41 @@ ip6_map_t_tcp_udp (vlib_main_t * vm, p0 = vlib_get_buffer (vm, pi0); p1 = vlib_get_buffer (vm, pi1); - ip60 = vlib_buffer_get_current (p0); - ip61 = vlib_buffer_get_current (p1); - ip40 = - (ip4_header_t *) u8_ptr_add (ip60, - vnet_buffer (p0)->map_t. - v6.l4_offset - sizeof (*ip40)); - ip41 = - (ip4_header_t *) u8_ptr_add (ip61, - vnet_buffer (p1)->map_t. - v6.l4_offset - sizeof (*ip40)); - vlib_buffer_advance (p0, - vnet_buffer (p0)->map_t.v6.l4_offset - - sizeof (*ip40)); - vlib_buffer_advance (p1, - vnet_buffer (p1)->map_t.v6.l4_offset - - sizeof (*ip40)); - checksum0 = - (u16 *) u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.checksum_offset); - checksum1 = - (u16 *) u8_ptr_add (ip61, - vnet_buffer (p1)->map_t.checksum_offset); - - csum0 = ip_csum_sub_even (*checksum0, ip60->src_address.as_u64[0]); - csum1 = ip_csum_sub_even (*checksum1, ip61->src_address.as_u64[0]); - csum0 = ip_csum_sub_even (csum0, ip60->src_address.as_u64[1]); - csum1 = ip_csum_sub_even (csum1, ip61->src_address.as_u64[1]); - csum0 = ip_csum_sub_even (csum0, ip60->dst_address.as_u64[0]); - csum1 = ip_csum_sub_even (csum0, ip61->dst_address.as_u64[0]); - csum0 = ip_csum_sub_even (csum0, ip60->dst_address.as_u64[1]); - csum1 = ip_csum_sub_even (csum1, ip61->dst_address.as_u64[1]); - csum0 = ip_csum_add_even (csum0, vnet_buffer (p0)->map_t.v6.daddr); - csum1 = ip_csum_add_even (csum1, vnet_buffer (p1)->map_t.v6.daddr); - csum0 = ip_csum_add_even (csum0, vnet_buffer (p0)->map_t.v6.saddr); - csum1 = ip_csum_add_even (csum1, vnet_buffer (p1)->map_t.v6.saddr); - *checksum0 = ip_csum_fold (csum0); - *checksum1 = ip_csum_fold (csum1); - - if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset)) - { - ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip60, - vnet_buffer - (p0)-> - map_t. - v6.frag_offset); - fragment_id0 = frag_id_6to4 (hdr->identification); - flags0 = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); - } - else - { - fragment_id0 = 0; - flags0 = 0; - } - if (PREDICT_FALSE (vnet_buffer (p1)->map_t.v6.frag_offset)) + if (ip6_to_ip4_tcp_udp (p0, ip6_to_ip4_set_cb, p0, 1)) { - ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip61, - vnet_buffer - (p1)-> - map_t. - v6.frag_offset); - fragment_id1 = frag_id_6to4 (hdr->identification); - flags1 = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); + p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; + next0 = IP6_MAPT_TCP_UDP_NEXT_DROP; } else { - fragment_id1 = 0; - flags1 = 0; + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + } } - ip40->dst_address.as_u32 = vnet_buffer (p0)->map_t.v6.daddr; - ip41->dst_address.as_u32 = vnet_buffer (p1)->map_t.v6.daddr; - ip40->src_address.as_u32 = vnet_buffer (p0)->map_t.v6.saddr; - ip41->src_address.as_u32 = vnet_buffer (p1)->map_t.v6.saddr; - ip40->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip41->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip40->tos = ip6_translate_tos (ip60); - ip41->tos = ip6_translate_tos (ip61); - ip40->length = u16_net_add (ip60->payload_length, - sizeof (*ip40) + sizeof (*ip60) - - vnet_buffer (p0)->map_t.v6.l4_offset); - ip41->length = - u16_net_add (ip61->payload_length, - sizeof (*ip40) + sizeof (*ip60) - - vnet_buffer (p1)->map_t.v6.l4_offset); - ip40->fragment_id = fragment_id0; - ip41->fragment_id = fragment_id1; - ip40->flags_and_fragment_offset = flags0; - ip41->flags_and_fragment_offset = flags1; - ip40->ttl = ip60->hop_limit; - ip41->ttl = ip61->hop_limit; - ip40->protocol = vnet_buffer (p0)->map_t.v6.l4_protocol; - ip41->protocol = vnet_buffer (p1)->map_t.v6.l4_protocol; - ip40->checksum = ip4_header_checksum (ip40); - ip41->checksum = ip4_header_checksum (ip41); - - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + if (ip6_to_ip4_tcp_udp (p1, ip6_to_ip4_set_cb, p1, 1)) { - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; - next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + p1->error = error_node->errors[MAP_ERROR_UNKNOWN]; + next1 = IP6_MAPT_TCP_UDP_NEXT_DROP; } - - if (vnet_buffer (p1)->map_t.mtu < p1->current_length) + else { - vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; - vnet_buffer (p1)->ip_frag.header_offset = 0; - vnet_buffer (p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; - next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + if (vnet_buffer (p1)->map_t.mtu < p1->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p1)->ip_frag.mtu = vnet_buffer (p1)->map_t.mtu; + vnet_buffer (p1)->ip_frag.header_offset = 0; + vnet_buffer (p1)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + next1 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + } } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, @@ -880,12 +450,6 @@ ip6_map_t_tcp_udp (vlib_main_t * vm, { u32 pi0; vlib_buffer_t *p0; - ip6_header_t *ip60; - u16 *checksum0; - ip_csum_t csum0; - ip4_header_t *ip40; - u16 fragment_id0; - u16 flags0; ip6_mapt_tcp_udp_next_t next0; pi0 = to_next[0] = from[0]; @@ -896,65 +460,23 @@ ip6_map_t_tcp_udp (vlib_main_t * vm, next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_LOOKUP; p0 = vlib_get_buffer (vm, pi0); - ip60 = vlib_buffer_get_current (p0); - ip40 = - (ip4_header_t *) u8_ptr_add (ip60, - vnet_buffer (p0)->map_t. - v6.l4_offset - sizeof (*ip40)); - vlib_buffer_advance (p0, - vnet_buffer (p0)->map_t.v6.l4_offset - - sizeof (*ip40)); - checksum0 = - (u16 *) u8_ptr_add (ip60, - vnet_buffer (p0)->map_t.checksum_offset); - - //TODO: This can probably be optimized - csum0 = ip_csum_sub_even (*checksum0, ip60->src_address.as_u64[0]); - csum0 = ip_csum_sub_even (csum0, ip60->src_address.as_u64[1]); - csum0 = ip_csum_sub_even (csum0, ip60->dst_address.as_u64[0]); - csum0 = ip_csum_sub_even (csum0, ip60->dst_address.as_u64[1]); - csum0 = ip_csum_add_even (csum0, vnet_buffer (p0)->map_t.v6.daddr); - csum0 = ip_csum_add_even (csum0, vnet_buffer (p0)->map_t.v6.saddr); - *checksum0 = ip_csum_fold (csum0); - - if (PREDICT_FALSE (vnet_buffer (p0)->map_t.v6.frag_offset)) + + if (ip6_to_ip4_tcp_udp (p0, ip6_to_ip4_set_cb, p0, 1)) { - //Only the first fragment - ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip60, - vnet_buffer - (p0)-> - map_t. - v6.frag_offset); - fragment_id0 = frag_id_6to4 (hdr->identification); - flags0 = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); + p0->error = error_node->errors[MAP_ERROR_UNKNOWN]; + next0 = IP6_MAPT_TCP_UDP_NEXT_DROP; } else { - fragment_id0 = 0; - flags0 = 0; - } - - ip40->dst_address.as_u32 = vnet_buffer (p0)->map_t.v6.daddr; - ip40->src_address.as_u32 = vnet_buffer (p0)->map_t.v6.saddr; - ip40->ip_version_and_header_length = - IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; - ip40->tos = ip6_translate_tos (ip60); - ip40->length = u16_net_add (ip60->payload_length, - sizeof (*ip40) + sizeof (*ip60) - - vnet_buffer (p0)->map_t.v6.l4_offset); - ip40->fragment_id = fragment_id0; - ip40->flags_and_fragment_offset = flags0; - ip40->ttl = ip60->hop_limit; - ip40->protocol = vnet_buffer (p0)->map_t.v6.l4_protocol; - ip40->checksum = ip4_header_checksum (ip40); - - if (vnet_buffer (p0)->map_t.mtu < p0->current_length) - { - //Send to fragmentation node if necessary - vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; - vnet_buffer (p0)->ip_frag.header_offset = 0; - vnet_buffer (p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP4_LOOKUP; - next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + if (vnet_buffer (p0)->map_t.mtu < p0->current_length) + { + //Send to fragmentation node if necessary + vnet_buffer (p0)->ip_frag.mtu = vnet_buffer (p0)->map_t.mtu; + vnet_buffer (p0)->ip_frag.header_offset = 0; + vnet_buffer (p0)->ip_frag.next_index = + IP4_FRAG_NEXT_IP4_LOOKUP; + next0 = IP6_MAPT_TCP_UDP_NEXT_IP4_FRAG; + } } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, diff --git a/src/vnet/map/map.c b/src/vnet/map/map.c index 6a707df1..8579cdf6 100644 --- a/src/vnet/map/map.c +++ b/src/vnet/map/map.c @@ -76,91 +76,6 @@ crc_u32 (u32 data, u32 value) */ -i32 -ip4_get_port (ip4_header_t * ip, map_dir_e dir, u16 buffer_len) -{ - //TODO: use buffer length - if (ip->ip_version_and_header_length != 0x45 || - ip4_get_fragment_offset (ip)) - return -1; - - if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || - (ip->protocol == IP_PROTOCOL_UDP))) - { - udp_header_t *udp = (void *) (ip + 1); - return (dir == MAP_SENDER) ? udp->src_port : udp->dst_port; - } - else if (ip->protocol == IP_PROTOCOL_ICMP) - { - icmp46_header_t *icmp = (void *) (ip + 1); - if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply) - { - return *((u16 *) (icmp + 1)); - } - else if (clib_net_to_host_u16 (ip->length) >= 64) - { - ip = (ip4_header_t *) (icmp + 2); - if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || - (ip->protocol == IP_PROTOCOL_UDP))) - { - udp_header_t *udp = (void *) (ip + 1); - return (dir == MAP_SENDER) ? udp->dst_port : udp->src_port; - } - else if (ip->protocol == IP_PROTOCOL_ICMP) - { - icmp46_header_t *icmp = (void *) (ip + 1); - if (icmp->type == ICMP4_echo_request || - icmp->type == ICMP4_echo_reply) - { - return *((u16 *) (icmp + 1)); - } - } - } - } - return -1; -} - -i32 -ip6_get_port (ip6_header_t * ip6, map_dir_e dir, u16 buffer_len) -{ - u8 l4_protocol; - u16 l4_offset; - u16 frag_offset; - u8 *l4; - - if (ip6_parse (ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset)) - return -1; - - //TODO: Use buffer length - - if (frag_offset && - ip6_frag_hdr_offset (((ip6_frag_hdr_t *) - u8_ptr_add (ip6, frag_offset)))) - return -1; //Can't deal with non-first fragment for now - - l4 = u8_ptr_add (ip6, l4_offset); - if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP) - { - return (dir == - MAP_SENDER) ? ((udp_header_t *) (l4))->src_port : ((udp_header_t - *) - (l4))->dst_port; - } - else if (l4_protocol == IP_PROTOCOL_ICMP6) - { - icmp46_header_t *icmp = (icmp46_header_t *) (l4); - if (icmp->type == ICMP6_echo_request) - { - return (dir == MAP_SENDER) ? ((u16 *) (icmp))[2] : -1; - } - else if (icmp->type == ICMP6_echo_reply) - { - return (dir == MAP_SENDER) ? -1 : ((u16 *) (icmp))[2]; - } - } - return -1; -} - int map_create_domain (ip4_address_t * ip4_prefix, diff --git a/src/vnet/map/map.h b/src/vnet/map/map.h index 644e80f5..208a58ef 100644 --- a/src/vnet/map/map.h +++ b/src/vnet/map/map.h @@ -25,12 +25,6 @@ #define MAP_SKIP_IP6_LOOKUP 1 -typedef enum -{ - MAP_SENDER, - MAP_RECEIVER -} map_dir_e; - int map_create_domain (ip4_address_t * ip4_prefix, u8 ip4_prefix_len, ip6_address_t * ip6_prefix, u8 ip6_prefix_len, ip6_address_t * ip6_src, u8 ip6_src_len, @@ -40,9 +34,6 @@ int map_delete_domain (u32 map_domain_index); int map_add_del_psid (u32 map_domain_index, u16 psid, ip6_address_t * tep, u8 is_add); u8 *format_map_trace (u8 * s, va_list * args); -i32 ip4_get_port (ip4_header_t * ip, map_dir_e dir, u16 buffer_len); -i32 ip6_get_port (ip6_header_t * ip6, map_dir_e dir, u16 buffer_len); -u16 ip4_map_get_port (ip4_header_t * ip, map_dir_e dir); typedef enum __attribute__ ((__packed__)) { @@ -518,30 +509,10 @@ int map_ip6_reass_conf_lifetime(u16 lifetime_ms); int map_ip6_reass_conf_buffers(u32 buffers); #define MAP_IP6_REASS_CONF_BUFFERS_MAX (0xffffffff) -static_always_inline -int ip6_parse(const ip6_header_t *ip6, u32 buff_len, - u8 *l4_protocol, u16 *l4_offset, u16 *frag_hdr_offset) -{ - if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) { - *l4_protocol = ((ip6_frag_hdr_t *)(ip6 + 1))->next_hdr; - *frag_hdr_offset = sizeof(*ip6); - *l4_offset = sizeof(*ip6) + sizeof(ip6_frag_hdr_t); - } else { - *l4_protocol = ip6->protocol; - *frag_hdr_offset = 0; - *l4_offset = sizeof(*ip6); - } - - return (buff_len < (*l4_offset + 4)) || - (clib_net_to_host_u16(ip6->payload_length) < (*l4_offset + 4 - sizeof(*ip6))); -} - #define u8_ptr_add(ptr, index) (((u8 *)ptr) + index) #define u16_net_add(u, val) clib_host_to_net_u16(clib_net_to_host_u16(u) + (val)) -#define frag_id_6to4(id) ((id) ^ ((id) >> 16)) - static_always_inline void ip4_map_t_embedded_address (map_domain_t *d, ip6_address_t *ip6, const ip4_address_t *ip4) -- cgit 1.2.3-korg