diff options
Diffstat (limited to 'vnet')
31 files changed, 801 insertions, 288 deletions
diff --git a/vnet/Makefile.am b/vnet/Makefile.am index 1bde1e8082c..a0c20e9eb37 100644 --- a/vnet/Makefile.am +++ b/vnet/Makefile.am @@ -252,6 +252,7 @@ libvnet_la_SOURCES += \ nobase_include_HEADERS += \ vnet/ip/format.h \ vnet/ip/icmp46_packet.h \ + vnet/ip/icmp4.h \ vnet/ip/icmp6.h \ vnet/ip/igmp_packet.h \ vnet/ip/ip.h \ diff --git a/vnet/vnet/buffer.h b/vnet/vnet/buffer.h index 9cbb402bd60..f6c0023222e 100644 --- a/vnet/vnet/buffer.h +++ b/vnet/vnet/buffer.h @@ -66,7 +66,10 @@ _(gre) \ _(l2_classify) \ _(io_handoff) \ _(policer) \ -_(output_features) +_(output_features) \ +_(map) \ +_(map_t) \ +_(ip_frag) /* * vnet stack buffer opaque array overlay structure. @@ -117,6 +120,13 @@ typedef struct { u32 mini_connection_index; } tcp; + + /* ICMP */ + struct { + u8 type; + u8 code; + u32 data; + } icmp; }; } ip; diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c index fde0eb23e14..f05c37fed52 100644 --- a/vnet/vnet/devices/dpdk/node.c +++ b/vnet/vnet/devices/dpdk/node.c @@ -932,7 +932,7 @@ vlib_get_handoff_queue_elt (u32 vlib_worker_index) return elt; } -inline vlib_frame_queue_elt_t * +static inline vlib_frame_queue_elt_t * dpdk_get_handoff_queue_elt ( u32 vlib_worker_index, vlib_frame_queue_elt_t ** handoff_queue_elt_by_worker_index) diff --git a/vnet/vnet/devices/virtio/vhost-user.c b/vnet/vnet/devices/virtio/vhost-user.c index 20a4c021c6a..8dca33b82af 100644 --- a/vnet/vnet/devices/virtio/vhost-user.c +++ b/vnet/vnet/devices/virtio/vhost-user.c @@ -771,6 +771,9 @@ static u32 vhost_user_if_input ( vlib_main_t * vm, if (PREDICT_FALSE(!txvq->desc)) return 0; + if (PREDICT_FALSE(!txvq->avail)) + return 0; + /* do we have pending intterupts ? */ if ((txvq->n_since_last_int) && (txvq->int_deadline < now)) vhost_user_send_call(vm, txvq); @@ -1037,6 +1040,9 @@ vhost_user_intfc_tx (vlib_main_t * vm, if (PREDICT_FALSE(!rxvq->desc)) goto done2; + if (PREDICT_FALSE(!rxvq->avail)) + goto done2; + if (PREDICT_FALSE(vui->lockp != 0)) { while (__sync_lock_test_and_set (vui->lockp, 1)) diff --git a/vnet/vnet/ip/icmp4.c b/vnet/vnet/ip/icmp4.c index 4ee8f15df1f..abad5bd9efc 100644 --- a/vnet/vnet/ip/icmp4.c +++ b/vnet/vnet/ip/icmp4.c @@ -41,6 +41,13 @@ #include <vnet/ip/ip.h> #include <vnet/pg/pg.h> + +static char * icmp_error_strings[] = { +#define _(f,s) s, + foreach_icmp4_error +#undef _ +}; + static u8 * format_ip4_icmp_type_and_code (u8 * s, va_list * args) { icmp4_type_t type = va_arg (*args, int); @@ -96,10 +103,6 @@ static u8 * format_ip4_icmp_header (u8 * s, va_list * args) return s; } -typedef struct { - u8 packet_data[64]; -} icmp_input_trace_t; - static u8 * format_icmp_input_trace (u8 * s, va_list * va) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); @@ -114,20 +117,6 @@ static u8 * format_icmp_input_trace (u8 * s, va_list * va) } typedef enum { - ICMP4_ERROR_UNKNOWN_TYPE, - ICMP4_ERROR_ECHO_REPLIES_SENT, - ICMP4_ERROR_TTL_EXPIRE_RESP_SENT, - ICMP4_ERROR_TTL_EXPIRE_RESP_DROP, -} icmp_error_t; - -static char * icmp_error_strings[] = { - [ICMP4_ERROR_UNKNOWN_TYPE] = "unknown type", - [ICMP4_ERROR_ECHO_REPLIES_SENT] = "echo replies sent", - [ICMP4_ERROR_TTL_EXPIRE_RESP_SENT] = "TTL time exceeded response sent", - [ICMP4_ERROR_TTL_EXPIRE_RESP_DROP] = "TTL time exceeded response dropped", -}; - -typedef enum { ICMP_INPUT_NEXT_ERROR, ICMP_INPUT_N_NEXT, } icmp_input_next_t; @@ -418,19 +407,42 @@ VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = { }; typedef enum { - ICMP4_TTL_EXPIRE_NEXT_DROP, - ICMP4_TTL_EXPIRE_NEXT_LOOKUP, - ICMP4_TTL_EXPIRE_N_NEXT, -} icmp_ttl_expire_next_t; + IP4_ICMP_ERROR_NEXT_DROP, + IP4_ICMP_ERROR_NEXT_LOOKUP, + IP4_ICMP_ERROR_N_NEXT, +} ip4_icmp_error_next_t; + +void +icmp4_error_set_vnet_buffer (vlib_buffer_t *b, u8 type, u8 code, u32 data) +{ + vnet_buffer(b)->ip.icmp.type = type; + vnet_buffer(b)->ip.icmp.code = code; + vnet_buffer(b)->ip.icmp.data = data; +} + +static u8 +icmp4_icmp_type_to_error (u8 type) +{ + switch (type) { + case ICMP4_destination_unreachable: + return ICMP4_ERROR_DEST_UNREACH_SENT; + case ICMP4_time_exceeded: + return ICMP4_ERROR_TTL_EXPIRE_SENT; + case ICMP4_parameter_problem: + return ICMP4_ERROR_PARAM_PROBLEM_SENT; + default: + return ICMP4_ERROR_DROP; + } +} static uword -ip4_icmp_ttl_expire (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +ip4_icmp_error (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { u32 * from, * to_next; uword n_left_from, n_left_to_next; - icmp_ttl_expire_next_t next_index; + ip4_icmp_error_next_t next_index; ip4_main_t *im = &ip4_main; ip_lookup_main_t * lm = &im->lookup_main; @@ -442,117 +454,113 @@ ip4_icmp_ttl_expire (vlib_main_t * vm, vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, /* stride */ 1, sizeof (icmp_input_trace_t)); - while (n_left_from > 0) - { - vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from > 0 && n_left_to_next > 0) - { - u32 pi0 = from[0]; - u32 next0 = ICMP4_TTL_EXPIRE_NEXT_LOOKUP; - u8 error0 = ICMP4_ERROR_TTL_EXPIRE_RESP_SENT; - u32 len0, new_len0; - vlib_buffer_t * p0; - ip4_header_t * ip0, * out_ip0; - icmp46_header_t * icmp0; - ip_csum_t sum; - u32 sw_if_index0, if_add_index0; - - /* Speculatively enqueue p0 to the current next frame */ - to_next[0] = pi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; - - p0 = vlib_get_buffer(vm, pi0); - ip0 = vlib_buffer_get_current(p0); - len0 = vlib_buffer_length_in_chain (vm, p0); - sw_if_index0 = vnet_buffer(p0)->sw_if_index[VLIB_RX]; - - /* Cut payload to just IP header plus first 8 bytes */ - new_len0 = (ip0->ip_version_and_header_length &0xf)*4 + 8; - if (len0 > new_len0) - { - p0->current_length = new_len0; /* should fit in 1st buffer */ - if (PREDICT_FALSE(p0->total_length_not_including_first_buffer)) - { /* clear current_length of all other buffers in chain */ - vlib_buffer_t *b = p0; - p0->total_length_not_including_first_buffer = 0; - while (b->flags & VLIB_BUFFER_NEXT_PRESENT) - { - b = vlib_get_buffer (vm, b->next_buffer); - b->current_length = 0; - } - } - } + while (n_left_from > 0) { + vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next); - /* Add IP header and ICMP header including a 4 byte unused field */ - vlib_buffer_advance(p0, - -sizeof(ip4_header_t)-sizeof(icmp46_header_t)-4); - out_ip0 = vlib_buffer_get_current(p0); - icmp0 = (icmp46_header_t *) &out_ip0[1]; - - /* Fill ip header fields */ - out_ip0->ip_version_and_header_length = 0x45; - out_ip0->tos = 0; - out_ip0->length = clib_host_to_net_u16(p0->current_length); - out_ip0->fragment_id = 0; - out_ip0->ttl = 0xff; - out_ip0->protocol = IP_PROTOCOL_ICMP; - out_ip0->dst_address = ip0->src_address; - if_add_index0 = - lm->if_address_pool_index_by_sw_if_index[sw_if_index0]; - if (PREDICT_TRUE(if_add_index0 != ~0)) - { - ip_interface_address_t *if_add = - pool_elt_at_index(lm->if_address_pool, if_add_index0); - ip4_address_t *if_ip = - ip_interface_address_get_address(lm, if_add); - out_ip0->src_address = *if_ip; - vlib_error_count (vm, node->node_index, error0, 1); - } - else /* interface has no IP4 address - should not happen */ - { - next0 = ICMP4_TTL_EXPIRE_NEXT_DROP; - error0 = ICMP4_ERROR_TTL_EXPIRE_RESP_DROP; - } - out_ip0->checksum = ip4_header_checksum(out_ip0); - - /* Fill icmp header fields */ - icmp0->type = ICMP4_time_exceeded; - icmp0->code = ICMP4_time_exceeded_ttl_exceeded_in_transit; - icmp0->checksum = 0; - sum = ip_incremental_checksum( - 0, icmp0, p0->current_length - sizeof(ip4_header_t)); - icmp0->checksum = ~ip_csum_fold(sum); - - /* Update error status */ - p0->error = node->errors[error0]; - - /* Verify speculative enqueue, maybe switch current next frame */ - vlib_validate_buffer_enqueue_x1(vm, node, next_index, - to_next, n_left_to_next, - pi0, next0); - } - vlib_put_next_frame(vm, node, next_index, n_left_to_next); + while (n_left_from > 0 && n_left_to_next > 0) { + u32 pi0 = from[0]; + u32 next0 = IP4_ICMP_ERROR_NEXT_LOOKUP; + u8 error0 = ICMP4_ERROR_NONE; + vlib_buffer_t * p0; + ip4_header_t * ip0, * out_ip0; + icmp46_header_t * icmp0; + u32 sw_if_index0, if_add_index0; + ip_csum_t sum; + + /* Speculatively enqueue p0 to the current next frame */ + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer(vm, pi0); + ip0 = vlib_buffer_get_current(p0); + sw_if_index0 = vnet_buffer(p0)->sw_if_index[VLIB_RX]; + + /* + * RFC1812 says to keep as much of the original packet as + * possible within the minimum MTU (576). We cheat "a little" + * here by keeping whatever fits in the first buffer, to be more + * efficient + */ + if (PREDICT_FALSE(p0->total_length_not_including_first_buffer)) { + /* clear current_length of all other buffers in chain */ + vlib_buffer_t *b = p0; + p0->total_length_not_including_first_buffer = 0; + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) { + b = vlib_get_buffer (vm, b->next_buffer); + b->current_length = 0; + } + } + p0->current_length = p0->current_length > 576 ? 576 : p0->current_length; + + /* Add IP header and ICMPv4 header including a 4 byte data field */ + vlib_buffer_advance(p0, + -sizeof(ip4_header_t)-sizeof(icmp46_header_t)-4); + out_ip0 = vlib_buffer_get_current(p0); + icmp0 = (icmp46_header_t *) &out_ip0[1]; + + /* Fill ip header fields */ + out_ip0->ip_version_and_header_length = 0x45; + out_ip0->tos = 0; + out_ip0->length = clib_host_to_net_u16(p0->current_length); + out_ip0->fragment_id = 0; + out_ip0->flags_and_fragment_offset = 0; + out_ip0->ttl = 0xff; + out_ip0->protocol = IP_PROTOCOL_ICMP; + out_ip0->dst_address = ip0->src_address; + if_add_index0 = + lm->if_address_pool_index_by_sw_if_index[sw_if_index0]; + if (PREDICT_TRUE(if_add_index0 != ~0)) { + ip_interface_address_t *if_add = + pool_elt_at_index(lm->if_address_pool, if_add_index0); + ip4_address_t *if_ip = + ip_interface_address_get_address(lm, if_add); + out_ip0->src_address = *if_ip; + } else { + /* interface has no IP4 address - should not happen */ + next0 = IP4_ICMP_ERROR_NEXT_DROP; + error0 = ICMP4_ERROR_DROP; + } + out_ip0->checksum = ip4_header_checksum(out_ip0); + + /* Fill icmp header fields */ + icmp0->type = vnet_buffer(p0)->ip.icmp.type; + icmp0->code = vnet_buffer(p0)->ip.icmp.code; + *((u32 *)(icmp0 + 1)) = clib_host_to_net_u32(vnet_buffer(p0)->ip.icmp.data); + icmp0->checksum = 0; + sum = ip_incremental_checksum(0, icmp0, p0->current_length - sizeof(ip4_header_t)); + icmp0->checksum = ~ip_csum_fold(sum); + + /* Update error status */ + if (error0 == ICMP4_ERROR_NONE) + error0 = icmp4_icmp_type_to_error(icmp0->type); + vlib_error_count(vm, node->node_index, error0, 1); + + /* Verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1(vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); } + vlib_put_next_frame(vm, node, next_index, n_left_to_next); + } return frame->n_vectors; } -VLIB_REGISTER_NODE (ip4_icmp_ttl_expire_node) = { - .function = ip4_icmp_ttl_expire, - .name = "ip4-icmp-ttl-expire", +VLIB_REGISTER_NODE (ip4_icmp_error_node) = { + .function = ip4_icmp_error, + .name = "ip4-icmp-error", .vector_size = sizeof (u32), .n_errors = ARRAY_LEN (icmp_error_strings), .error_strings = icmp_error_strings, - .n_next_nodes = ICMP4_TTL_EXPIRE_N_NEXT, + .n_next_nodes = IP4_ICMP_ERROR_N_NEXT, .next_nodes = { - [ICMP4_TTL_EXPIRE_NEXT_DROP] = "error-drop", - [ICMP4_TTL_EXPIRE_NEXT_LOOKUP] = "ip4-lookup", + [IP4_ICMP_ERROR_NEXT_DROP] = "error-drop", + [IP4_ICMP_ERROR_NEXT_LOOKUP] = "ip4-lookup", }, .format_trace = format_icmp_input_trace, diff --git a/vnet/vnet/ip/icmp4.h b/vnet/vnet/ip/icmp4.h new file mode 100644 index 00000000000..f99bf2dafc3 --- /dev/null +++ b/vnet/vnet/ip/icmp4.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_icmp4_h +#define included_vnet_icmp4_h + +#define foreach_icmp4_error \ + _ (NONE, "valid packets") \ + _ (UNKNOWN_TYPE, "unknown type") \ + _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \ + _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \ + _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \ + _ (OPTIONS_WITH_ODD_LENGTH, \ + "total option length not multiple of 8 bytes") \ + _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \ + _ (ECHO_REPLIES_SENT, "echo replies sent") \ + _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \ + _ (DEST_UNREACH_SENT, "destination unreachable response sent") \ + _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \ + _ (PARAM_PROBLEM_SENT, "parameter Pproblem response sent") \ + _ (DROP, "error message dropped") + +typedef enum { +#define _(f,s) ICMP4_ERROR_##f, + foreach_icmp4_error +#undef _ +} icmp4_error_t; + +typedef struct { + u8 packet_data[64]; +} icmp_input_trace_t; + +format_function_t format_icmp4_input_trace; +void ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, u32 node_index); +void icmp4_error_set_vnet_buffer (vlib_buffer_t *b, u8 type, u8 code, u32 data); + +#endif /* included_vnet_icmp4_h */ diff --git a/vnet/vnet/ip/icmp6.c b/vnet/vnet/ip/icmp6.c index 4e4bb8ece81..e6022adfc49 100644 --- a/vnet/vnet/ip/icmp6.c +++ b/vnet/vnet/ip/icmp6.c @@ -483,19 +483,44 @@ VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = { }; typedef enum { - ICMP6_TTL_EXPIRE_NEXT_DROP, - ICMP6_TTL_EXPIRE_NEXT_LOOKUP, - ICMP6_TTL_EXPIRE_N_NEXT, -} icmp_ttl_expire_next_t; + IP6_ICMP_ERROR_NEXT_DROP, + IP6_ICMP_ERROR_NEXT_LOOKUP, + IP6_ICMP_ERROR_N_NEXT, +} ip6_icmp_error_next_t; + +void +icmp6_error_set_vnet_buffer (vlib_buffer_t *b, u8 type, u8 code, u32 data) +{ + vnet_buffer(b)->ip.icmp.type = type; + vnet_buffer(b)->ip.icmp.code = code; + vnet_buffer(b)->ip.icmp.data = data; +} + +static u8 +icmp6_icmp_type_to_error (u8 type) +{ + switch (type) { + case ICMP6_destination_unreachable: + return ICMP6_ERROR_DEST_UNREACH_SENT; + case ICMP6_packet_too_big: + return ICMP6_ERROR_PACKET_TOO_BIG_SENT; + case ICMP6_time_exceeded: + return ICMP6_ERROR_TTL_EXPIRE_SENT; + case ICMP6_parameter_problem: + return ICMP6_ERROR_PARAM_PROBLEM_SENT; + default: + return ICMP6_ERROR_DROP; + } +} static uword -ip6_icmp_ttl_expire (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +ip6_icmp_error (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) { u32 * from, * to_next; uword n_left_from, n_left_to_next; - icmp_ttl_expire_next_t next_index; + ip6_icmp_error_next_t next_index; ip6_main_t *im = &ip6_main; ip_lookup_main_t * lm = &im->lookup_main; @@ -514,8 +539,8 @@ ip6_icmp_ttl_expire (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next > 0) { u32 pi0 = from[0]; - u32 next0 = ICMP6_TTL_EXPIRE_NEXT_LOOKUP; - u8 error0 = ICMP6_ERROR_TTL_EXPIRE_RESP_SENT; + u32 next0 = IP6_ICMP_ERROR_NEXT_LOOKUP; + u8 error0 = ICMP6_ERROR_NONE; vlib_buffer_t * p0; ip6_header_t * ip0, * out_ip0; icmp46_header_t * icmp0; @@ -533,8 +558,8 @@ ip6_icmp_ttl_expire (vlib_main_t * vm, ip0 = vlib_buffer_get_current(p0); sw_if_index0 = vnet_buffer(p0)->sw_if_index[VLIB_RX]; - /* RFC2463 says to keep as much of the original packet as possible - * within the MTU. We cheat "a little" here by keeping whatever fits + /* RFC4443 says to keep as much of the original packet as possible + * within the minimum MTU. We cheat "a little" here by keeping whatever fits * in the first buffer, to be more efficient */ if (PREDICT_FALSE(p0->total_length_not_including_first_buffer)) { /* clear current_length of all other buffers in chain */ @@ -546,8 +571,9 @@ ip6_icmp_ttl_expire (vlib_main_t * vm, b->current_length = 0; } } + p0->current_length = p0->current_length > 1280 ? 1280 : p0->current_length; - /* Add IP header and ICMPv6 header including a 4 byte ununsed field */ + /* Add IP header and ICMPv6 header including a 4 byte data field */ vlib_buffer_advance(p0, -sizeof(ip6_header_t)-sizeof(icmp46_header_t)-4); out_ip0 = vlib_buffer_get_current(p0); @@ -556,8 +582,8 @@ ip6_icmp_ttl_expire (vlib_main_t * vm, /* Fill ip header fields */ out_ip0->ip_version_traffic_class_and_flow_label = clib_host_to_net_u32(0x6<<28); - out_ip0->payload_length = - clib_host_to_net_u16(p0->current_length - sizeof(ip6_header_t)); + + out_ip0->payload_length = clib_host_to_net_u16(p0->current_length - sizeof(ip6_header_t)); out_ip0->protocol = IP_PROTOCOL_ICMP6; out_ip0->hop_limit = 0xff; out_ip0->dst_address = ip0->src_address; @@ -570,23 +596,27 @@ ip6_icmp_ttl_expire (vlib_main_t * vm, ip6_address_t *if_ip = ip_interface_address_get_address(lm, if_add); out_ip0->src_address = *if_ip; - vlib_error_count (vm, node->node_index, error0, 1); } else /* interface has no IP6 address - should not happen */ { - next0 = ICMP6_TTL_EXPIRE_NEXT_DROP; - error0 = ICMP6_ERROR_TTL_EXPIRE_RESP_DROP; + next0 = IP6_ICMP_ERROR_NEXT_DROP; + error0 = ICMP6_ERROR_DROP; } /* Fill icmp header fields */ - icmp0->type = ICMP6_time_exceeded; - icmp0->code = ICMP6_time_exceeded_ttl_exceeded_in_transit; + icmp0->type = vnet_buffer(p0)->ip.icmp.type; + icmp0->code = vnet_buffer(p0)->ip.icmp.code; + *((u32 *)(icmp0 + 1)) = clib_host_to_net_u32(vnet_buffer(p0)->ip.icmp.data); icmp0->checksum = 0; icmp0->checksum = ip6_tcp_udp_icmp_compute_checksum( vm, p0, out_ip0, &bogus_length); + + /* Update error status */ - p0->error = node->errors[error0]; + if (error0 == ICMP6_ERROR_NONE) + error0 = icmp6_icmp_type_to_error(icmp0->type); + vlib_error_count(vm, node->node_index, error0, 1); /* Verify speculative enqueue, maybe switch current next frame */ vlib_validate_buffer_enqueue_x1(vm, node, next_index, @@ -599,18 +629,18 @@ ip6_icmp_ttl_expire (vlib_main_t * vm, return frame->n_vectors; } -VLIB_REGISTER_NODE (ip6_icmp_ttl_expire_node) = { - .function = ip6_icmp_ttl_expire, - .name = "ip6-icmp-ttl-expire", +VLIB_REGISTER_NODE (ip6_icmp_error_node) = { + .function = ip6_icmp_error, + .name = "ip6-icmp-error", .vector_size = sizeof (u32), .n_errors = ARRAY_LEN (icmp_error_strings), .error_strings = icmp_error_strings, - .n_next_nodes = ICMP6_TTL_EXPIRE_N_NEXT, + .n_next_nodes = IP6_ICMP_ERROR_N_NEXT, .next_nodes = { - [ICMP6_TTL_EXPIRE_NEXT_DROP] = "error-drop", - [ICMP6_TTL_EXPIRE_NEXT_LOOKUP] = "ip6-lookup", + [IP6_ICMP_ERROR_NEXT_DROP] = "error-drop", + [IP6_ICMP_ERROR_NEXT_LOOKUP] = "ip6-lookup", }, .format_trace = format_icmp6_input_trace, diff --git a/vnet/vnet/ip/icmp6.h b/vnet/vnet/ip/icmp6.h index 92f6913a454..d44d0c3dbc9 100644 --- a/vnet/vnet/ip/icmp6.h +++ b/vnet/vnet/ip/icmp6.h @@ -46,8 +46,11 @@ _ (ROUTER_ADVERTISEMENTS_TX, "router advertisements sent") \ _ (ROUTER_ADVERTISEMENTS_RX, "router advertisements received") \ _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \ - _ (TTL_EXPIRE_RESP_SENT, "TTL time exceeded response sent") \ - _ (TTL_EXPIRE_RESP_DROP, "TTL time exceeded response dropped") + _ (DEST_UNREACH_SENT, "destination unreachable response sent") \ + _ (PACKET_TOO_BIG_SENT, "packet too big response sent") \ + _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \ + _ (PARAM_PROBLEM_SENT, "parameter Pproblem response sent") \ + _ (DROP, "error message dropped") typedef enum { @@ -62,6 +65,7 @@ typedef struct { format_function_t format_icmp6_input_trace; void icmp6_register_type (vlib_main_t * vm, icmp6_type_t type, u32 node_index); +void icmp6_error_set_vnet_buffer (vlib_buffer_t *b, u8 type, u8 code, u32 data); extern vlib_node_registration_t ip6_icmp_input_node; diff --git a/vnet/vnet/ip/ip.h b/vnet/vnet/ip/ip.h index e47512a960d..a0b4ea6d5cb 100644 --- a/vnet/vnet/ip/ip.h +++ b/vnet/vnet/ip/ip.h @@ -56,6 +56,7 @@ #include <vnet/ip/ip4.h> #include <vnet/ip/ip4_error.h> #include <vnet/ip/ip4_packet.h> +#include <vnet/ip/icmp4.h> #include <vnet/ip/ip6.h> #include <vnet/ip/ip6_packet.h> diff --git a/vnet/vnet/ip/ip4_input.c b/vnet/vnet/ip/ip4_input.c index 68edc0fa918..f31df0f6eb9 100644 --- a/vnet/vnet/ip/ip4_input.c +++ b/vnet/vnet/ip/ip4_input.c @@ -64,7 +64,7 @@ typedef enum { IP4_INPUT_NEXT_PUNT, IP4_INPUT_NEXT_LOOKUP, IP4_INPUT_NEXT_LOOKUP_MULTICAST, - IP4_INPUT_NEXT_TTL_EXPIRE, + IP4_INPUT_NEXT_ICMP_ERROR, IP4_INPUT_N_NEXT, } ip4_input_next_t; @@ -220,19 +220,21 @@ ip4_input_inline (vlib_main_t * vm, if (PREDICT_FALSE(error0 != IP4_ERROR_NONE)) { - next0 = (error0 != IP4_ERROR_OPTIONS - ? (error0 == IP4_ERROR_TIME_EXPIRED - ? IP4_INPUT_NEXT_TTL_EXPIRE - : IP4_INPUT_NEXT_DROP) - : IP4_INPUT_NEXT_PUNT); + if (error0 == IP4_ERROR_TIME_EXPIRED) { + icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); + next0 = IP4_INPUT_NEXT_ICMP_ERROR; + } else + next0 = error0 != IP4_ERROR_OPTIONS ? IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT; } if (PREDICT_FALSE(error1 != IP4_ERROR_NONE)) { - next1 = (error1 != IP4_ERROR_OPTIONS - ? (error1 == IP4_ERROR_TIME_EXPIRED - ? IP4_INPUT_NEXT_TTL_EXPIRE - : IP4_INPUT_NEXT_DROP) - : IP4_INPUT_NEXT_PUNT); + if (error1 == IP4_ERROR_TIME_EXPIRED) { + icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); + next1 = IP4_INPUT_NEXT_ICMP_ERROR; + } else + next1 = error1 != IP4_ERROR_OPTIONS ? IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT; } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, @@ -307,11 +309,12 @@ ip4_input_inline (vlib_main_t * vm, p0->error = error_node->errors[error0]; if (PREDICT_FALSE(error0 != IP4_ERROR_NONE)) { - next0 = (error0 != IP4_ERROR_OPTIONS - ? (error0 == IP4_ERROR_TIME_EXPIRED - ? IP4_INPUT_NEXT_TTL_EXPIRE - : IP4_INPUT_NEXT_DROP) - : IP4_INPUT_NEXT_PUNT); + if (error0 == IP4_ERROR_TIME_EXPIRED) { + icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, 0); + next0 = IP4_INPUT_NEXT_ICMP_ERROR; + } else + next0 = error0 != IP4_ERROR_OPTIONS ? IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT; } vlib_validate_buffer_enqueue_x1 (vm, node, next_index, @@ -361,7 +364,7 @@ VLIB_REGISTER_NODE (ip4_input_node) = { [IP4_INPUT_NEXT_PUNT] = "error-punt", [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup", [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-lookup-multicast", - [IP4_INPUT_NEXT_TTL_EXPIRE] = "ip4-icmp-ttl-expire", + [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, .format_buffer = format_ip4_header, @@ -379,7 +382,7 @@ VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = { [IP4_INPUT_NEXT_PUNT] = "error-punt", [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup", [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-lookup-multicast", - [IP4_INPUT_NEXT_TTL_EXPIRE] = "ip4-icmp-ttl-expire", + [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, .format_buffer = format_ip4_header, diff --git a/vnet/vnet/ip/ip6_input.c b/vnet/vnet/ip/ip6_input.c index ef8c7762625..f96a1cfb5a5 100644 --- a/vnet/vnet/ip/ip6_input.c +++ b/vnet/vnet/ip/ip6_input.c @@ -62,7 +62,7 @@ static u8 * format_ip6_input_trace (u8 * s, va_list * va) typedef enum { IP6_INPUT_NEXT_DROP, IP6_INPUT_NEXT_LOOKUP, - IP6_INPUT_NEXT_TTL_EXPIRE, + IP6_INPUT_NEXT_ICMP_ERROR, IP6_INPUT_N_NEXT, } ip6_input_next_t; @@ -186,13 +186,23 @@ ip6_input (vlib_main_t * vm, if (PREDICT_FALSE(error0 != IP6_ERROR_NONE)) { - next0 = (error0 == IP6_ERROR_TIME_EXPIRED) ? - IP6_INPUT_NEXT_TTL_EXPIRE : IP6_INPUT_NEXT_DROP; + if (error0 == IP6_ERROR_TIME_EXPIRED) { + icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded, + ICMP6_time_exceeded_ttl_exceeded_in_transit, 0); + next0 = IP6_INPUT_NEXT_ICMP_ERROR; + } else { + next0 = IP6_INPUT_NEXT_DROP; + } } if (PREDICT_FALSE(error1 != IP6_ERROR_NONE)) { - next1 = (error1 == IP6_ERROR_TIME_EXPIRED) ? - IP6_INPUT_NEXT_TTL_EXPIRE : IP6_INPUT_NEXT_DROP; + if (error1 == IP6_ERROR_TIME_EXPIRED) { + icmp6_error_set_vnet_buffer(p1, ICMP6_time_exceeded, + ICMP6_time_exceeded_ttl_exceeded_in_transit, 0); + next1 = IP6_INPUT_NEXT_ICMP_ERROR; + } else { + next1 = IP6_INPUT_NEXT_DROP; + } } p0->error = error_node->errors[error0]; @@ -249,8 +259,13 @@ ip6_input (vlib_main_t * vm, if (PREDICT_FALSE(error0 != IP6_ERROR_NONE)) { - next0 = (error0 == IP6_ERROR_TIME_EXPIRED) ? - IP6_INPUT_NEXT_TTL_EXPIRE : IP6_INPUT_NEXT_DROP; + if (error0 == IP6_ERROR_TIME_EXPIRED) { + icmp6_error_set_vnet_buffer(p0, ICMP6_time_exceeded, + ICMP6_time_exceeded_ttl_exceeded_in_transit, 0); + next0 = IP6_INPUT_NEXT_ICMP_ERROR; + } else { + next0 = IP6_INPUT_NEXT_DROP; + } } p0->error = error_node->errors[error0]; @@ -283,7 +298,7 @@ VLIB_REGISTER_NODE (ip6_input_node) = { .next_nodes = { [IP6_INPUT_NEXT_DROP] = "error-drop", [IP6_INPUT_NEXT_LOOKUP] = "ip6-lookup", - [IP6_INPUT_NEXT_TTL_EXPIRE] = "ip6-icmp-ttl-expire", + [IP6_INPUT_NEXT_ICMP_ERROR] = "ip6-icmp-error", }, .format_buffer = format_ip6_header, diff --git a/vnet/vnet/ip/ip_frag.c b/vnet/vnet/ip/ip_frag.c index 22176187a9c..3436090a426 100644 --- a/vnet/vnet/ip/ip_frag.c +++ b/vnet/vnet/ip/ip_frag.c @@ -37,9 +37,8 @@ static u8 * format_ip_frag_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip_frag_trace_t * t = va_arg (*args, ip_frag_trace_t *); - s = format(s, "IPv%s offset: %u mtu: %u fragments: %u next: %s", - t->ipv6?"6":"4", - t->header_offset, t->mtu, t->n_fragments, node->next_node_names[t->next]); + s = format(s, "IPv%s offset: %u mtu: %u fragments: %u", + t->ipv6?"6":"4", t->header_offset, t->mtu, t->n_fragments); return s; } @@ -146,6 +145,14 @@ ip4_frag_do_fragment(vlib_main_t *vm, u32 pi, u32 **buffer, ip_frag_error_t *err } } +void +ip_frag_set_vnet_buffer (vlib_buffer_t *b, u16 offset, u16 mtu, u8 next_index, u8 flags) +{ + vnet_buffer(b)->ip_frag.header_offset = offset; + vnet_buffer(b)->ip_frag.mtu = mtu; + vnet_buffer(b)->ip_frag.next_index = next_index; + vnet_buffer(b)->ip_frag.flags = flags; +} static uword ip4_frag (vlib_main_t *vm, @@ -189,13 +196,25 @@ ip4_frag (vlib_main_t *vm, tr->next = vnet_buffer(p0)->ip_frag.next_index; } - next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer(p0)->ip_frag.next_index : IP4_FRAG_NEXT_DROP; - frag_sent += vec_len(buffer); - small_packets += (vec_len(buffer) == 1); + if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET) { + icmp4_error_set_vnet_buffer(p0, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, + vnet_buffer(p0)->ip_frag.mtu); + vlib_buffer_advance(p0, vnet_buffer(p0)->ip_frag.header_offset); + next0 = IP4_FRAG_NEXT_ICMP_ERROR; + } else + next0 = (error0 == IP_FRAG_ERROR_NONE) ? vnet_buffer(p0)->ip_frag.next_index : IP4_FRAG_NEXT_DROP; + + if (error0 == IP_FRAG_ERROR_NONE) { + frag_sent += vec_len(buffer); + small_packets += (vec_len(buffer) == 1); + } else + vlib_error_count(vm, ip4_frag_node.index, error0, 1); //Send fragments that were added in the frame frag_from = buffer; frag_left = vec_len(buffer); + while (frag_left > 0) { while (frag_left > 0 && n_left_to_next > 0) { u32 i; @@ -218,6 +237,7 @@ ip4_frag (vlib_main_t *vm, vlib_put_next_frame(vm, node, next_index, n_left_to_next); } vec_free(buffer); + vlib_node_increment_counter(vm, ip4_frag_node.index, IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent); vlib_node_increment_counter(vm, ip4_frag_node.index, IP_FRAG_ERROR_SMALL_PACKET, small_packets); @@ -426,6 +446,7 @@ VLIB_REGISTER_NODE (ip4_frag_node) = { .next_nodes = { [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error", [IP4_FRAG_NEXT_DROP] = "error-drop" }, }; diff --git a/vnet/vnet/ip/ip_frag.h b/vnet/vnet/ip/ip_frag.h index 04566904e5f..76238655af9 100644 --- a/vnet/vnet/ip/ip_frag.h +++ b/vnet/vnet/ip/ip_frag.h @@ -49,6 +49,7 @@ vlib_node_registration_t ip6_frag_node; typedef enum { IP4_FRAG_NEXT_IP4_LOOKUP, IP4_FRAG_NEXT_IP6_LOOKUP, + IP4_FRAG_NEXT_ICMP_ERROR, IP4_FRAG_NEXT_DROP, IP4_FRAG_N_NEXT } ip4_frag_next_t; @@ -65,8 +66,8 @@ typedef enum { _(NONE, "packet fragmented") \ _(SMALL_PACKET, "packet smaller than MTU") \ _(FRAGMENT_SENT, "number of sent fragments") \ - _(CANT_FRAGMENT_HEADER, "can't fragment header'") \ - _(DONT_FRAGMENT_SET, "can't fragment this packet'") \ + _(CANT_FRAGMENT_HEADER, "can't fragment header") \ + _(DONT_FRAGMENT_SET, "can't fragment this packet") \ _(MALFORMED, "malformed packet") \ _(MEMORY, "could not allocate buffer") \ _(UNKNOWN, "unknown error") @@ -78,4 +79,6 @@ typedef enum { IP_FRAG_N_ERROR, } ip_frag_error_t; +void ip_frag_set_vnet_buffer(vlib_buffer_t *b, u16 offset, u16 mtu, u8 next_index, u8 flags); + #endif /* ifndef IP_FRAG_H */ diff --git a/vnet/vnet/l2/l2_output.c b/vnet/vnet/l2/l2_output.c index 02862138928..31287645b14 100644 --- a/vnet/vnet/l2/l2_output.c +++ b/vnet/vnet/l2/l2_output.c @@ -474,11 +474,12 @@ u32 l2output_create_output_node_mapping ( u32 next; // index of next graph node vnet_hw_interface_t *hw0; u32 *node; -#if DPDK > 0 - uword cpu_number; hw0 = vnet_get_sup_hw_interface (vnet_main, sw_if_index); +#if DPDK > 0 + uword cpu_number; + cpu_number = os_get_cpu_number(); if (cpu_number) diff --git a/vnet/vnet/map/examples/gen-rules.py b/vnet/vnet/map/examples/gen-rules.py index ba6dde09aa8..7964aa9a359 100755 --- a/vnet/vnet/map/examples/gen-rules.py +++ b/vnet/vnet/map/examples/gen-rules.py @@ -25,11 +25,11 @@ def_ip6_pfx = '2001:db8::/32' def_ip6_src = '2001:db8::1' def_psid_offset = 6 def_psid_len = 6 -def_ea_bits_len = 14 - +def_ea_bits_len = 0 parser = argparse.ArgumentParser(description='MAP VPP configuration generator') parser.add_argument('-t', action="store", dest="mapmode") +parser.add_argument('-f', action="store", dest="format", default="vpp") parser.add_argument('--ip4-prefix', action="store", dest="ip4_pfx", default=def_ip4_pfx) parser.add_argument('--ip6-prefix', action="store", dest="ip6_pfx", default=def_ip6_pfx) parser.add_argument('--ip6-src', action="store", dest="ip6_src", default=def_ip6_src) @@ -38,18 +38,53 @@ parser.add_argument('--psid-offset', action="store", dest="psid_offset", default parser.add_argument('--ea-bits-len', action="store", dest="ea_bits_len", default=def_ea_bits_len) args = parser.parse_args() +# +# Print domain +# +def domain_print(i, ip4_pfx, ip6_pfx, ip6_src, eabits_len, psid_offset, psid_len): + if format == 'vpp': + print("map add domain ip4-pfx " + ip4_pfx + " ip6-pfx", ip6_pfx, "ip6-src " + ip6_src + + " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len) + if format == 'confd': + print("vpp softwire softwire-instances softwire-instance", i, "br-ipv6 " + ip6_src + + " ipv6-prefix " + ip6_pfx + " ipv4-prefix " + ip4_pfx + + " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len) + if format == 'xml': + print("<softwire-instance>") + print("<id>", i, "</id>"); + print(" <br-ipv6>" + ip6_src + "</br-ipv6>") + print(" <ipv6-prefix>" + ip6_pfx + "</ipv6-prefix>") + print(" <ipv4-prefix>" + ip4_pfx + "</ipv4-prefix>") + print(" <ea-len>", eabits_len, "</ea-len>") + print(" <psid-len>", psid_len, "</psid-len>") + print(" <psid-offset>", psid_offset, "</psid-offset>") + +def domain_print_end(): + if format == 'xml': + print("</softwire-instance>") + +def rule_print(i, psid, dst): + if format == 'vpp': + print("map add rule index", i, "psid", psid, "ip6-dst", dst) + if format == 'confd': + print("binding", psid, "ipv6-addr", dst) + if format == 'xml': + print(" <binding>") + print(" <psid>", psid, "</psid>") + print(" <ipv6-addr>", dst, "</ipv6-addr>") + print(" </binding>") # # Algorithmic mapping Shared IPv4 address # -def algo(ip4_pfx_str, ip6_pfx_str, ip6_src_str, psid_len, ea_bits_len, ip6_src_ecmp = False): - print("map add domain ip4-pfx " + ip4_pfx_str + " ip6-pfx " + ip6_pfx_str + " ip6-src " + ip6_src_str + - " ea-bits-len " + str(ea_bits_len) + " psid-offset 6 psid-len " + str(psid_len)) +def algo(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): + domain_print(0, ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len) + domain_print_end() # # 1:1 Full IPv4 address # -def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, psid_len, ea_bits_len, ip6_src_ecmp = False): +def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): ip4_pfx = ipaddress.ip_network(ip4_pfx_str) ip6_src = ipaddress.ip_address(ip6_src_str) ip6_dst = ipaddress.ip_network(ip6_pfx_str) @@ -57,46 +92,95 @@ def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, psid_len, ea_bits_len, ip6_src_e mod = ip4_pfx.num_addresses / 1024 for i in range(ip4_pfx.num_addresses): - print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-src", - ip6_src, "ea-bits-len 0 psid-offset 0 psid-len 0") + domain_print(i, str(ip4_pfx[i]) + "/32", str(ip6_dst[i]) + "/128", str(ip6_src), 0, 0, 0) + domain_print_end() if ip6_src_ecmp and not i % mod: ip6_src = ip6_src + 1 # # 1:1 Shared IPv4 address, shared BR (16) VPP CLI # -def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, psid_len, ea_bits_len, ip6_src_ecmp = False): +def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): ip4_pfx = ipaddress.ip_network(ip4_pfx_str) ip6_src = ipaddress.ip_address(ip6_src_str) ip6_dst = ipaddress.ip_network(ip6_pfx_str) - mod = ip4_pfx.num_addresses / 1024 for i in range(ip4_pfx.num_addresses): - print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src " + str(ip6_src) + - " ea-bits-len 0 psid-offset 0 psid-len", psid_len) - for psid in range(0x1 << psid_len): - print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid]) + domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len) + for psid in range(0x1 << int(psid_len)): + rule_print(i, psid, str(ip6_dst[(i * (0x1<<int(psid_len))) + psid])) + domain_print_end() if ip6_src_ecmp and not i % mod: ip6_src = ip6_src + 1 + # # 1:1 Shared IPv4 address, shared BR # -def lw46_shared_b(ip4_pfx_str, ip6_pfx_str, ip6_src_str, psid_len, ea_bits_len, ip6_src_ecmp = False): +def lw46_shared_b(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False): ip4_pfx = ipaddress.ip_network(ip4_pfx_str) ip6_src = ipaddress.ip_address(ip6_src_str) ip6_dst = list(ipaddress.ip_network(ip6_pfx_str).subnets(new_prefix=56)) - psid_len = 6 + mod = ip4_pfx.num_addresses / 1024 for i in range(ip4_pfx.num_addresses): - if not i % 64: - ip6_src = ip6_src + 1 - print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src " + str(ip6_src) + - " ea-bits-len 0 psid-offset 6 psid-len", psid_len) + domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len) for psid in range(0x1 << psid_len): enduserprefix = list(ip6_dst.pop(0).subnets(new_prefix=64))[255-1] - print("map add rule index", i, "psid", psid, "ip6-dst", enduserprefix[(i * (0x1<<psid_len)) + psid]) + rule_print(i, psid, enduserprefix[(i * (0x1<<psid_len)) + psid]) + domain_print_end() + if ip6_src_ecmp and not i % mod: + ip6_src = ip6_src + 1 + -globals()[args.mapmode](args.ip4_pfx, args.ip6_pfx, args.ip6_src, args.psid_len, args.psid_offset, - args.ea_bits_len) +def xml_header_print(): + print(''' +<?xml version="1.0" encoding="UTF-8"?> + <hello xmlns="urn:ietf:params:xml:ns:netconf:base:1.0"> + <capabilities> + <capability>urn:ietf:params:netconf:base:1.0</capability> + </capabilities> + </hello> +]]>]]> + +<?xml version="1.0" encoding="UTF-8"?> + <rpc xmlns="urn:ietf:params:xml:ns:netconf:base:1.0" message-id="1"> + <edit-config> + <target> + <candidate/> + </target> + <config> + + <vpp xmlns="http://www.cisco.com/yang/cisco-vpp"> + <softwire> + <softwire-instances> + + ''') + +def xml_footer_print(): + print(''' +</softwire-instances> +</softwire> +</vpp> + </config> + </edit-config> + </rpc> + +]]>]]> + +<?xml version="1.0" encoding="UTF-8"?> + <rpc xmlns="urn:ietf:params:xml:ns:netconf:base:1.0" message-id="2"> + <close-session/> + </rpc> + +]]>]]> + ''') + + +format = args.format +if format == 'xml': + xml_header_print() +globals()[args.mapmode](args.ip4_pfx, args.ip6_pfx, args.ip6_src, args.ea_bits_len, args.psid_offset, args.psid_len) +if format == 'xml': + xml_footer_print() diff --git a/vnet/vnet/map/examples/health_check.c b/vnet/vnet/map/examples/health_check.c new file mode 100644 index 00000000000..5f0d85fec08 --- /dev/null +++ b/vnet/vnet/map/examples/health_check.c @@ -0,0 +1,109 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip_icmp.h> +#include <arpa/inet.h> +#include <net/if.h> +#include <stdbool.h> +#include <errno.h> + +static void +usage (void) { + fprintf(stderr, + "Usage: health_check" + " -d debug" + " -I interface" + "\n"); + exit(2); +} + +int +main (int argc, char **argv) +{ + int sd, ch; + uint8_t *opt, *pkt; + struct ifreq ifr; + char *interface = NULL; + bool debug = false; + + while ((ch = getopt(argc, argv, "h?" "I:" "d")) != EOF) { + switch(ch) { + case 'I': + interface = optarg; + break; + case 'd': + debug = true; + break; + default: + usage(); + break; + } + } + + argc -= optind; + argv += optind; + + if (!interface) + usage(); + + /* Request a socket descriptor sd. */ + if ((sd = socket (AF_INET6, SOCK_RAW, IPPROTO_IPIP)) < 0) { + perror ("Failed to get socket descriptor "); + exit (EXIT_FAILURE); + } + + memset(&ifr, 0, sizeof(ifr)); + snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", interface); + + /* Bind socket to interface of this node. */ + if (setsockopt (sd, SOL_SOCKET, SO_BINDTODEVICE, (void *) &ifr, sizeof (ifr)) < 0) { + perror ("SO_BINDTODEVICE failed"); + exit (EXIT_FAILURE); + } + if (debug) printf("Binding to interface %s\n", interface); + + while (1) { + struct sockaddr_in6 src_addr; + socklen_t addrlen = sizeof(src_addr); + char source[INET6_ADDRSTRLEN+1]; + int len; + uint8_t inpack[IP_MAXPACKET]; + + if ((len = recvfrom(sd, inpack, sizeof(inpack), 0, (struct sockaddr *)&src_addr, &addrlen)) < 0) { + perror("recvfrom failed "); + } + if (inet_ntop(AF_INET6, &src_addr.sin6_addr, source, INET6_ADDRSTRLEN) == NULL) { + perror("inet_ntop() failed."); + exit(EXIT_FAILURE); + } + + /* Reply */ + struct iphdr *ip = (struct iphdr *)inpack; + uint32_t saddr; + struct icmphdr *icmp; + + saddr = ip->saddr; + ip->saddr = ip->daddr; + ip->daddr = saddr; + + switch (ip->protocol) { + case 1: + if (debug) printf ("ICMP Echo request from %s\n", source); + icmp = (struct icmphdr *)&ip[1]; + icmp->type = ICMP_ECHOREPLY; + break; + default: + fprintf(stderr, "Unsupported protocol %d", ip->protocol); + } + if (len = sendto(sd, inpack, len, 0, (struct sockaddr *)&src_addr, addrlen) < 0) { + perror("sendto failed "); + } + } + + close (sd); + + return (EXIT_SUCCESS); +} diff --git a/vnet/vnet/map/ip4_map.c b/vnet/vnet/map/ip4_map.c index c8ee2764f23..7b9b3ed9d89 100644 --- a/vnet/vnet/map/ip4_map.c +++ b/vnet/vnet/map/ip4_map.c @@ -27,8 +27,10 @@ enum ip4_map_next_e { #ifdef MAP_SKIP_IP6_LOOKUP IP4_MAP_NEXT_IP6_REWRITE, #endif - IP4_MAP_NEXT_FRAGMENT, + IP4_MAP_NEXT_IP4_FRAGMENT, + IP4_MAP_NEXT_IP6_FRAGMENT, IP4_MAP_NEXT_REASS, + IP4_MAP_NEXT_ICMP_ERROR, IP4_MAP_NEXT_DROP, IP4_MAP_N_NEXT, }; @@ -76,7 +78,7 @@ ip4_map_get_port (ip4_header_t *ip, map_dir_e dir) icmp46_header_t *icmp = (void *)(ip + 1); if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply) { return *((u16 *)(icmp + 1)); - } else if (clib_net_to_host_u16(ip->length) >= 64) { // IP + ICMP + IP + L4 header + } else if (clib_net_to_host_u16(ip->length) >= 56) { // IP + ICMP + IP + L4 header ip4_header_t *icmp_ip = (ip4_header_t *)(icmp + 2); if (PREDICT_TRUE((icmp_ip->protocol == IP_PROTOCOL_TCP) || (icmp_ip->protocol == IP_PROTOCOL_UDP))) { @@ -177,6 +179,27 @@ ip4_map_decrement_ttl (ip4_header_t *ip, u8 *error) ASSERT (ip->checksum == ip4_header_checksum(ip)); } +static u32 +ip4_map_fragment (vlib_buffer_t *b, u16 mtu, bool df, u8 *error) +{ + map_main_t *mm = &map_main; + + if (mm->frag_inner) { + ip_frag_set_vnet_buffer(b, sizeof(ip6_header_t), mtu, IP4_FRAG_NEXT_IP6_LOOKUP, IP_FRAG_FLAG_IP6_HEADER); + return (IP4_MAP_NEXT_IP4_FRAGMENT); + } else { + if (df && !mm->frag_ignore_df) { + icmp4_error_set_vnet_buffer(b, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, mtu); + vlib_buffer_advance(b, sizeof(ip6_header_t)); + *error = MAP_ERROR_DF_SET; + return (IP4_MAP_NEXT_ICMP_ERROR); + } + ip_frag_set_vnet_buffer(b, 0, mtu, IP6_FRAG_NEXT_IP6_LOOKUP, IP_FRAG_FLAG_IP6_HEADER); + return (IP4_MAP_NEXT_IP6_FRAGMENT); + } +} + /* * ip4_map */ @@ -247,6 +270,12 @@ ip4_map (vlib_main_t *vm, port0 = ip4_map_port_and_security_check(d0, ip40, &next0, &error0); port1 = ip4_map_port_and_security_check(d1, ip41, &next1, &error1); + /* Decrement IPv4 TTL */ + ip4_map_decrement_ttl(ip40, &error0); + ip4_map_decrement_ttl(ip41, &error1); + bool df0 = ip40->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT); + bool df1 = ip41->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT); + /* MAP calc */ u32 da40 = clib_net_to_host_u32(ip40->dst_address.as_u32); u32 da41 = clib_net_to_host_u32(ip41->dst_address.as_u32); @@ -256,8 +285,8 @@ ip4_map (vlib_main_t *vm, u64 dal61 = map_get_pfx(d1, da41, dp41); u64 dar60 = map_get_sfx(d0, da40, dp40); u64 dar61 = map_get_sfx(d1, da41, dp41); - if (dal60 == 0 && dar60 == 0) error0 = MAP_ERROR_UNKNOWN; - if (dal61 == 0 && dar61 == 0) error1 = MAP_ERROR_UNKNOWN; + if (dal60 == 0 && dar60 == 0) error0 = MAP_ERROR_NO_BINDING; + if (dal61 == 0 && dar61 == 0) error1 = MAP_ERROR_NO_BINDING; /* construct ipv6 header */ vlib_buffer_advance(p0, - sizeof(ip6_header_t)); @@ -288,11 +317,7 @@ ip4_map (vlib_main_t *vm, */ if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) { if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip6h0->payload_length) + sizeof(*ip6h0) > d0->mtu))) { - vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip6h0); - vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; - vnet_buffer(p0)->ip_frag.mtu = d0->mtu; - vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; - next0 = IP4_MAP_NEXT_FRAGMENT; + next0 = ip4_map_fragment(p0, d0->mtu, df0, &error0); } else { next0 = ip4_map_ip6_lookup_bypass(p0, ip40) ? IP4_MAP_NEXT_IP6_REWRITE : next0; vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1, @@ -308,11 +333,7 @@ ip4_map (vlib_main_t *vm, */ if (PREDICT_TRUE(error1 == MAP_ERROR_NONE)) { if (PREDICT_FALSE(d1->mtu && (clib_net_to_host_u16(ip6h1->payload_length) + sizeof(*ip6h1) > d1->mtu))) { - vnet_buffer(p1)->ip_frag.header_offset = sizeof(*ip6h1); - vnet_buffer(p1)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; - vnet_buffer(p1)->ip_frag.mtu = d1->mtu; - vnet_buffer(p1)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; - next1 = IP4_MAP_NEXT_FRAGMENT; + next1 = ip4_map_fragment(p1, d1->mtu, df1, &error1); } else { next1 = ip4_map_ip6_lookup_bypass(p1, ip41) ? IP4_MAP_NEXT_IP6_REWRITE : next1; vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index1, 1, @@ -369,13 +390,14 @@ ip4_map (vlib_main_t *vm, /* Decrement IPv4 TTL */ ip4_map_decrement_ttl(ip40, &error0); + bool df0 = ip40->flags_and_fragment_offset & clib_host_to_net_u16(IP4_HEADER_FLAG_DONT_FRAGMENT); /* MAP calc */ u32 da40 = clib_net_to_host_u32(ip40->dst_address.as_u32); u16 dp40 = clib_net_to_host_u16(port0); u64 dal60 = map_get_pfx(d0, da40, dp40); u64 dar60 = map_get_sfx(d0, da40, dp40); - if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE) error0 = MAP_ERROR_UNKNOWN; + if (dal60 == 0 && dar60 == 0 && error0 == MAP_ERROR_NONE) error0 = MAP_ERROR_NO_BINDING; /* construct ipv6 header */ vlib_buffer_advance(p0, - (sizeof(ip6_header_t))); @@ -396,11 +418,7 @@ ip4_map (vlib_main_t *vm, */ if (PREDICT_TRUE(error0 == MAP_ERROR_NONE)) { if (PREDICT_FALSE(d0->mtu && (clib_net_to_host_u16(ip6h0->payload_length) + sizeof(*ip6h0) > d0->mtu))) { - vnet_buffer(p0)->ip_frag.header_offset = sizeof(*ip6h0); - vnet_buffer(p0)->ip_frag.next_index = IP4_FRAG_NEXT_IP6_LOOKUP; - vnet_buffer(p0)->ip_frag.mtu = d0->mtu; - vnet_buffer(p0)->ip_frag.flags = IP_FRAG_FLAG_IP6_HEADER; - next0 = IP4_MAP_NEXT_FRAGMENT; + next0 = ip4_map_fragment(p0, d0->mtu, df0, &error0); } else { next0 = ip4_map_ip6_lookup_bypass(p0, ip40) ? IP4_MAP_NEXT_IP6_REWRITE : next0; vlib_increment_combined_counter(cm + MAP_DOMAIN_COUNTER_TX, cpu_index, map_domain_index0, 1, @@ -591,8 +609,10 @@ VLIB_REGISTER_NODE(ip4_map_node) = { #ifdef MAP_SKIP_IP6_LOOKUP [IP4_MAP_NEXT_IP6_REWRITE] = "ip6-rewrite", #endif - [IP4_MAP_NEXT_FRAGMENT] = "ip4-frag", + [IP4_MAP_NEXT_IP4_FRAGMENT] = "ip4-frag", + [IP4_MAP_NEXT_IP6_FRAGMENT] = "ip6-frag", [IP4_MAP_NEXT_REASS] = "ip4-map-reass", + [IP4_MAP_NEXT_ICMP_ERROR] = "ip4-icmp-error", [IP4_MAP_NEXT_DROP] = "error-drop", }, }; diff --git a/vnet/vnet/map/ip6_map.c b/vnet/vnet/map/ip6_map.c index 7ef85e791f5..208f45f521c 100644 --- a/vnet/vnet/map/ip6_map.c +++ b/vnet/vnet/map/ip6_map.c @@ -27,6 +27,7 @@ enum ip6_map_next_e { IP6_MAP_NEXT_IP6_ICMP_RELAY, IP6_MAP_NEXT_IP6_LOCAL, IP6_MAP_NEXT_DROP, + IP6_MAP_NEXT_ICMP, IP6_MAP_N_NEXT, }; @@ -142,7 +143,6 @@ ip6_map_ip4_lookup_bypass (vlib_buffer_t *p0, ip4_header_t *ip) return (false); } - /* * ip6_map */ @@ -230,7 +230,6 @@ ip6_map (vlib_main_t *vm, next0 = IP6_MAP_NEXT_IP6_REASS; } else { error0 = MAP_ERROR_BAD_PROTOCOL; - next0 = IP6_MAP_NEXT_DROP; } if (PREDICT_TRUE(ip61->protocol == IP_PROTOCOL_IP_IN_IP && clib_net_to_host_u16(ip61->payload_length) > 20)) { d1 = ip6_map_get_domain(vnet_buffer(p1)->ip.adj_index[VLIB_TX], (ip4_address_t *)&ip41->src_address.as_u32, @@ -244,7 +243,6 @@ ip6_map (vlib_main_t *vm, next1 = IP6_MAP_NEXT_IP6_REASS; } else { error1 = MAP_ERROR_BAD_PROTOCOL; - next1 = IP6_MAP_NEXT_DROP; } if (d0) { @@ -298,6 +296,32 @@ ip6_map (vlib_main_t *vm, tr->port = port1; } + if (error0 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled) { + /* Set ICMP parameters */ + vlib_buffer_advance(p0, -sizeof(ip6_header_t)); + icmp6_error_set_vnet_buffer(p0, ICMP6_destination_unreachable, + ICMP6_destination_unreachable_source_address_failed_policy, 0); + next0 = IP6_MAP_NEXT_ICMP; + } else { + next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP; + } + + if (error1 == MAP_ERROR_DECAP_SEC_CHECK && mm->icmp6_enabled) { + /* Set ICMP parameters */ + vlib_buffer_advance(p1, -sizeof(ip6_header_t)); + icmp6_error_set_vnet_buffer(p1, ICMP6_destination_unreachable, + ICMP6_destination_unreachable_source_address_failed_policy, 0); + next1 = IP6_MAP_NEXT_ICMP; + } else { + next1 = (error1 == MAP_ERROR_NONE) ? next1 : IP6_MAP_NEXT_DROP; + } + + /* Reset packet */ + if (next0 == IP6_MAP_NEXT_IP6_LOCAL) + vlib_buffer_advance(p0, -sizeof(ip6_header_t)); + if (next1 == IP6_MAP_NEXT_IP6_LOCAL) + vlib_buffer_advance(p1, -sizeof(ip6_header_t)); + p0->error = error_node->errors[error0]; p1->error = error_node->errors[error1]; vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next, n_left_to_next, pi0, pi1, next0, next1); @@ -377,7 +401,21 @@ ip6_map (vlib_main_t *vm, tr->port = (u16)port0; } - next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP; + if (mm->icmp6_enabled && + (error0 == MAP_ERROR_DECAP_SEC_CHECK || error0 == MAP_ERROR_NO_DOMAIN)) { + /* Set ICMP parameters */ + vlib_buffer_advance(p0, -sizeof(ip6_header_t)); + icmp6_error_set_vnet_buffer(p0, ICMP6_destination_unreachable, + ICMP6_destination_unreachable_source_address_failed_policy, 0); + next0 = IP6_MAP_NEXT_ICMP; + } else { + next0 = (error0 == MAP_ERROR_NONE) ? next0 : IP6_MAP_NEXT_DROP; + } + + /* Reset packet */ + if (next0 == IP6_MAP_NEXT_IP6_LOCAL) + vlib_buffer_advance(p0, -sizeof(ip6_header_t)); + p0->error = error_node->errors[error0]; vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, pi0, next0); } @@ -626,7 +664,7 @@ ip6_map_ip4_reass (vlib_main_t *vm, ip4_header_t *ip40; ip6_header_t *ip60; i32 port0 = 0; - u32 map_domain_index0; + u32 map_domain_index0 = ~0; u32 next0 = IP6_MAP_IP4_REASS_NEXT_IP4_LOOKUP; u8 cached = 0; @@ -860,7 +898,7 @@ ip6_map_icmp_relay (vlib_main_t *vm, new_ip40->fragment_id = fid[0]; fid++; new_ip40->ttl = 64; new_ip40->protocol = IP_PROTOCOL_ICMP; - new_ip40->src_address = mm->icmp_src_address; + new_ip40->src_address = mm->icmp4_src_address; new_ip40->dst_address = inner_ip40->src_address; new_ip40->checksum = ip4_header_checksum(new_ip40); @@ -916,6 +954,7 @@ VLIB_REGISTER_NODE(ip6_map_node) = { [IP6_MAP_NEXT_IP6_ICMP_RELAY] = "ip6-map-icmp-relay", [IP6_MAP_NEXT_IP6_LOCAL] = "ip6-local", [IP6_MAP_NEXT_DROP] = "error-drop", + [IP6_MAP_NEXT_ICMP] = "ip6-icmp-error", }, }; diff --git a/vnet/vnet/map/map.c b/vnet/vnet/map/map.c index b7eb1f14839..a63122b58f3 100644 --- a/vnet/vnet/map/map.c +++ b/vnet/vnet/map/map.c @@ -602,8 +602,7 @@ map_icmp_relay_source_address_command_fn (vlib_main_t *vm, ip4_address_t icmp_src_address; map_main_t *mm = &map_main; - memset(&icmp_src_address, 0, sizeof(icmp_src_address)); - + mm->icmp4_src_address.as_u32 = 0; /* Get a line of input. */ if (!unformat_user(input, unformat_line_input, line_input)) @@ -611,7 +610,91 @@ map_icmp_relay_source_address_command_fn (vlib_main_t *vm, while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { if (unformat(line_input, "%U", unformat_ip4_address, &icmp_src_address)) - mm->icmp_src_address = icmp_src_address; + mm->icmp4_src_address = icmp_src_address; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + return 0; +} + +static clib_error_t * +map_icmp_unreachables_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + int num_m_args = 0; + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + num_m_args++; + if (unformat(line_input, "on")) + mm->icmp6_enabled = true; + else if (unformat(line_input, "off")) + mm->icmp6_enabled = false; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + + if (num_m_args != 1) + return clib_error_return(0, "mandatory argument(s) missing"); + + return 0; +} + +static clib_error_t * +map_fragment_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "inner")) + mm->frag_inner = true; + else if (unformat(line_input, "outer")) + mm->frag_inner = false; + else + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + unformat_free(line_input); + + return 0; +} + +static clib_error_t * +map_fragment_df_command_fn (vlib_main_t *vm, + unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + map_main_t *mm = &map_main; + + /* Get a line of input. */ + if (!unformat_user(input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input(line_input) != UNFORMAT_END_OF_INPUT) { + if (unformat(line_input, "on")) + mm->frag_ignore_df = true; + else if (unformat(line_input, "off")) + mm->frag_ignore_df = false; else return clib_error_return(0, "unknown input `%U'", format_unformat_error, input); @@ -833,9 +916,13 @@ show_map_stats_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_co else vlib_cli_output(vm, "MAP traffic-class: %x", mm->tc); - vlib_cli_output(vm, "MAP IPv6 inbound security check: %s Fragments: %s", mm->sec_check ? "enabled" : "disabled", + vlib_cli_output(vm, "MAP IPv6 inbound security check: %s, fragmented packet security check: %s", mm->sec_check ? "enabled" : "disabled", mm->sec_check_frag ? "enabled" : "disabled"); + vlib_cli_output(vm, "ICMP-relay IPv4 source address: %U\n", format_ip4_address, &mm->icmp4_src_address); + vlib_cli_output(vm, "ICMP6 unreachables sent for unmatched packets: %s\n", mm->icmp6_enabled ? "enabled" : "disabled"); + vlib_cli_output(vm, "Inner fragmentation: %s\n", mm->frag_inner ? "enabled" : "disabled"); + vlib_cli_output(vm, "Fragment packets regardless of DF flag: %s\n", mm->frag_ignore_df ? "enabled" : "disabled"); /* * Counters @@ -861,9 +948,9 @@ show_map_stats_command_fn (vlib_main_t *vm, unformat_input_t *input, vlib_cli_co } map_domain_counter_unlock (mm); - vlib_cli_output(vm, "Encapsulated packets: %d bytes: %d\n", total_pkts[MAP_DOMAIN_COUNTER_TX], + vlib_cli_output(vm, "Encapsulated packets: %lld bytes: %lld\n", total_pkts[MAP_DOMAIN_COUNTER_TX], total_bytes[MAP_DOMAIN_COUNTER_TX]); - vlib_cli_output(vm, "Decapsulated packets: %d bytes: %d\n", total_pkts[MAP_DOMAIN_COUNTER_RX], + vlib_cli_output(vm, "Decapsulated packets: %lld bytes: %lld\n", total_pkts[MAP_DOMAIN_COUNTER_RX], total_bytes[MAP_DOMAIN_COUNTER_RX]); vlib_cli_output(vm, "ICMP relayed packets: %d\n", vlib_get_simple_counter(&mm->icmp_relayed, 0)); @@ -1524,12 +1611,29 @@ VLIB_CLI_COMMAND(map_security_check_command, static) = { }; VLIB_CLI_COMMAND(map_icmp_relay_source_address_command, static) = { - .path = "map params icmp-source-address", - .short_help = - "icmp-source-address <ip4-address>", + .path = "map params icmp source-address", + .short_help = "source-address <ip4-address>", .function = map_icmp_relay_source_address_command_fn, }; +VLIB_CLI_COMMAND(map_icmp_unreachables_command, static) = { + .path = "map params icmp6 unreachables", + .short_help = "unreachables {on|off}", + .function = map_icmp_unreachables_command_fn, +}; + +VLIB_CLI_COMMAND(map_fragment_command, static) = { + .path = "map params fragment", + .short_help = "[inner|outer] [ignore-df [on|off]]", + .function = map_fragment_command_fn, +}; + +VLIB_CLI_COMMAND(map_fragment_df_command, static) = { + .path = "map params fragment ignore-df", + .short_help = "on|off", + .function = map_fragment_df_command_fn, +}; + VLIB_CLI_COMMAND(map_security_check_frag_command, static) = { .path = "map params security-check fragments", .short_help = @@ -1598,6 +1702,13 @@ clib_error_t *map_init (vlib_main_t *vm) mm->sec_check = true; mm->sec_check_frag = false; + /* ICMP6 Type 1, Code 5 for security check failure */ + mm->icmp6_enabled = false; + + /* Inner or outer fragmentation */ + mm->frag_inner = false; + mm->frag_ignore_df = false; + vec_validate(mm->domain_counters, MAP_N_DOMAIN_COUNTER - 1); mm->domain_counters[MAP_DOMAIN_COUNTER_RX].name = "rx"; mm->domain_counters[MAP_DOMAIN_COUNTER_TX].name = "tx"; diff --git a/vnet/vnet/map/map.h b/vnet/vnet/map/map.h index 95b842d4782..d38d7f47241 100644 --- a/vnet/vnet/map/map.h +++ b/vnet/vnet/map/map.h @@ -190,9 +190,6 @@ typedef struct { vlib_combined_counter_main_t *domain_counters; volatile u32 *counter_lock; - /* Global counters */ - vlib_simple_counter_main_t icmp_relayed; - #ifdef MAP_SKIP_IP6_LOOKUP /* pre-presolve */ u32 adj6_index, adj4_index; @@ -203,11 +200,14 @@ typedef struct { /* Traffic class: zero, copy (~0) or fixed value */ u8 tc; bool tc_copy; - bool sec_check; - bool sec_check_frag; + + bool sec_check; /* Inbound security check */ + bool sec_check_frag; /* Inbound security check for (subsequent) fragments */ + bool icmp6_enabled; /* Send destination unreachable for security check failure */ /* ICMPv6 -> ICMPv4 relay parameters */ - ip4_address_t icmp_src_address; + ip4_address_t icmp4_src_address; + vlib_simple_counter_main_t icmp_relayed; /* convenience */ vlib_main_t *vlib_main; @@ -216,13 +216,13 @@ typedef struct { /* * IPv4 encap and decap reassembly */ - //Conf + /* Configuration */ f32 ip4_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) u16 ip4_reass_conf_pool_size; //Max number of allocated reass structures u16 ip4_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms u32 ip4_reass_conf_buffers; //Maximum number of buffers used by ip4 reassembly - //Runtime + /* Runtime */ map_ip4_reass_t *ip4_reass_pool; u8 ip4_reass_ht_log2len; //Hash table size is 2^log2len u16 ip4_reass_allocated; @@ -230,19 +230,22 @@ typedef struct { u16 ip4_reass_fifo_last; volatile u32 *ip4_reass_lock; - //Counters + /* Counters */ u32 ip4_reass_buffered_counter; + bool frag_inner; /* Inner or outer fragmentation */ + bool frag_ignore_df; /* Fragment (outer) packet even if DF is set */ + /* * IPv6 decap reassembly */ - //Conf + /* Configuration */ f32 ip6_reass_conf_ht_ratio; //Size of ht is 2^ceil(log2(ratio*pool_size)) u16 ip6_reass_conf_pool_size; //Max number of allocated reass structures u16 ip6_reass_conf_lifetime_ms; //Time a reassembly struct is considered valid in ms u32 ip6_reass_conf_buffers; //Maximum number of buffers used by ip6 reassembly - //Runtime + /* Runtime */ map_ip6_reass_t *ip6_reass_pool; u8 ip6_reass_ht_log2len; //Hash table size is 2^log2len u16 ip6_reass_allocated; @@ -250,32 +253,32 @@ typedef struct { u16 ip6_reass_fifo_last; volatile u32 *ip6_reass_lock; - //Counters + /* Counters */ u32 ip6_reass_buffered_counter; } map_main_t; /* - * TODO: Remove SEC_CHECK / TRANSLATED_4TO6 / TRANSLATED_6TO4 + * MAP Error counters/messages */ #define foreach_map_error \ /* Must be first. */ \ _(NONE, "valid MAP packets") \ _(BAD_PROTOCOL, "bad protocol") \ - _(WRONG_ICMP_TYPE, "wrong icmp type") \ _(SEC_CHECK, "security check failed") \ _(ENCAP_SEC_CHECK, "encap security check failed") \ _(DECAP_SEC_CHECK, "decap security check failed") \ _(ICMP, "unable to translate ICMP") \ _(ICMP_RELAY, "unable to relay ICMP") \ _(UNKNOWN, "unknown") \ + _(NO_BINDING, "no binding") \ _(NO_DOMAIN, "no domain") \ _(FRAGMENTED, "packet is a fragment") \ _(FRAGMENT_MEMORY, "could not cache fragment") \ _(FRAGMENT_MALFORMED, "fragment has unexpected format")\ _(FRAGMENT_DROPPED, "dropped cached fragment") \ _(MALFORMED, "malformed packet") \ - _(IP4_ERROR_TIME_EXPIRED, "time expired") + _(DF_SET, "can't fragment, DF set") typedef enum { #define _(sym,str) MAP_ERROR_##sym, diff --git a/vnet/vnet/vcgn/cnat_db_v2.c b/vnet/vnet/vcgn/cnat_db_v2.c index c09a73ebb15..5a26fd99d6c 100644 --- a/vnet/vnet/vcgn/cnat_db_v2.c +++ b/vnet/vnet/vcgn/cnat_db_v2.c @@ -176,7 +176,7 @@ void cnat_table_entry_fill_map(u32 start_addr, u32 end_addr, void cnat_delete_session_db_entry (cnat_session_entry_t *ep, u8 log); -inline void handle_cnat_port_exceeded_logging( +void handle_cnat_port_exceeded_logging( cnat_user_db_entry_t *udb, cnat_key_t * key, cnat_vrfmap_t *vrfmap); @@ -3025,7 +3025,7 @@ static inline void handle_dslite_port_exceeded_logging( } #endif -inline void handle_cnat_port_exceeded_logging( +void handle_cnat_port_exceeded_logging( cnat_user_db_entry_t *udb, cnat_key_t * key, cnat_vrfmap_t *vrfmap) diff --git a/vnet/vnet/vcgn/cnat_ipv4_icmp_error_inside_input.c b/vnet/vnet/vcgn/cnat_ipv4_icmp_error_inside_input.c index 218d7e538fa..998663d73cb 100644 --- a/vnet/vnet/vcgn/cnat_ipv4_icmp_error_inside_input.c +++ b/vnet/vnet/vcgn/cnat_ipv4_icmp_error_inside_input.c @@ -62,7 +62,7 @@ vlib_node_registration_t cnat_ipv4_icmp_e_inside_input_node; #define NSTAGES 5 -inline void swap_ip_src_emip_dst(ipv4_header *ip, +void swap_ip_src_emip_dst(ipv4_header *ip, icmp_em_ip_info *icmp_info, cnat_main_db_entry_t *db, u16 vrf) { diff --git a/vnet/vnet/vcgn/cnat_ipv4_icmp_error_outside_input.c b/vnet/vnet/vcgn/cnat_ipv4_icmp_error_outside_input.c index f25f4d022c7..ae856da1a4e 100644 --- a/vnet/vnet/vcgn/cnat_ipv4_icmp_error_outside_input.c +++ b/vnet/vnet/vcgn/cnat_ipv4_icmp_error_outside_input.c @@ -62,7 +62,7 @@ vlib_node_registration_t cnat_ipv4_icmp_e_outside_input_node; #define NSTAGES 5 -inline void swap_ip_dst_emip_src(ipv4_header *ip, +void swap_ip_dst_emip_src(ipv4_header *ip, icmp_em_ip_info *icmp_info, cnat_main_db_entry_t *db, u16 vrf) { diff --git a/vnet/vnet/vcgn/cnat_ipv4_icmp_query_inside_input.c b/vnet/vnet/vcgn/cnat_ipv4_icmp_query_inside_input.c index 1b9f0266d71..0d261bf030b 100644 --- a/vnet/vnet/vcgn/cnat_ipv4_icmp_query_inside_input.c +++ b/vnet/vnet/vcgn/cnat_ipv4_icmp_query_inside_input.c @@ -65,7 +65,7 @@ vlib_node_registration_t cnat_ipv4_icmp_q_inside_input_node; #define NSTAGES 5 -inline void swap_ip_src_icmp_id(ipv4_header *ip, +void swap_ip_src_icmp_id(ipv4_header *ip, icmp_v4_t *icmp, cnat_main_db_entry_t *db, u16 vrf) { diff --git a/vnet/vnet/vcgn/cnat_ipv4_icmp_query_outside_input.c b/vnet/vnet/vcgn/cnat_ipv4_icmp_query_outside_input.c index 2c05e0b400e..62b95f7ccc0 100644 --- a/vnet/vnet/vcgn/cnat_ipv4_icmp_query_outside_input.c +++ b/vnet/vnet/vcgn/cnat_ipv4_icmp_query_outside_input.c @@ -64,7 +64,7 @@ vlib_node_registration_t cnat_ipv4_icmp_q_outside_input_node; #define NSTAGES 5 -inline void swap_ip_dst_icmp_id(ipv4_header *ip, +void swap_ip_dst_icmp_id(ipv4_header *ip, icmp_v4_t *icmp, cnat_main_db_entry_t *db, u16 vrf) { diff --git a/vnet/vnet/vcgn/cnat_ipv4_udp_inside_input.c b/vnet/vnet/vcgn/cnat_ipv4_udp_inside_input.c index 657c5f1e64e..246bec1d02f 100644 --- a/vnet/vnet/vcgn/cnat_ipv4_udp_inside_input.c +++ b/vnet/vnet/vcgn/cnat_ipv4_udp_inside_input.c @@ -111,7 +111,7 @@ is_pcp_pkt(spp_ctx_t *ctx, u32 addr, u16 port) } #endif -inline void swap_ip_src_udp_port(ipv4_header *ip, +void swap_ip_src_udp_port(ipv4_header *ip, udp_hdr_type_t *udp, cnat_main_db_entry_t *db) { diff --git a/vnet/vnet/vcgn/cnat_ipv4_udp_outside_input.c b/vnet/vnet/vcgn/cnat_ipv4_udp_outside_input.c index 203568c8e0d..5783314a7ea 100644 --- a/vnet/vnet/vcgn/cnat_ipv4_udp_outside_input.c +++ b/vnet/vnet/vcgn/cnat_ipv4_udp_outside_input.c @@ -177,7 +177,7 @@ inline void swap_ip_dst(ipv4_header *ip, #endif } -inline void swap_ip_dst_udp_port(ipv4_header *ip, +void swap_ip_dst_udp_port(ipv4_header *ip, udp_hdr_type_t *udp, cnat_main_db_entry_t *db, u16 vrf) { diff --git a/vnet/vnet/vcgn/cnat_logging.c b/vnet/vnet/vcgn/cnat_logging.c index eace942c657..f46dd0ac95a 100644 --- a/vnet/vnet/vcgn/cnat_logging.c +++ b/vnet/vnet/vcgn/cnat_logging.c @@ -310,7 +310,7 @@ cnat_nfv9_dump_logging_context (u32 value1, * Argument: u8 *new_addr, u8 *org_addr * returns the difference */ -inline + int cnat_nfv9_pad_added_to_an_addr(u8 *new_addr, u8 *org_addr) { uword addr1 = (uword) new_addr; @@ -326,7 +326,7 @@ int cnat_nfv9_pad_added_to_an_addr(u8 *new_addr, u8 *org_addr) * Argument: u8 * data_ptr * pointer to the data pointer */ -inline + u8 *cnat_nfv9_add_end_of_record_padding (u8 *data_ptr) { uword tmp = (uword) data_ptr; @@ -345,7 +345,7 @@ u8 *cnat_nfv9_add_end_of_record_padding (u8 *data_ptr) * Argument: u8 * data_ptr * pointer to the data pointer */ -inline + u16 cnat_nfv9_pad_end_of_record_length (u16 record_length) { u16 pad_value = NFV9_PAD_VALUE; @@ -419,7 +419,7 @@ void fill_ip_n_udp_hdr (u32 ipv4_addr, u16 port, * Argument: cnat_nfv9_logging_info_t *nfv9_logging_info * structure that contains the packet context */ -inline + void cnat_nfv9_fill_nfv9_ip_header (cnat_nfv9_logging_info_t *nfv9_logging_info) { u16 new_record_length = 0; @@ -488,7 +488,7 @@ void cnat_nfv9_fill_nfv9_ip_header (cnat_nfv9_logging_info_t *nfv9_logging_info) * Argument: cnat_nfv9_logging_info_t *nfv9_logging_info * structure that contains the packet context */ -inline + void cnat_nfv9_send_queued_pkt (cnat_nfv9_logging_info_t *nfv9_logging_info) { } @@ -503,7 +503,7 @@ void cnat_nfv9_send_queued_pkt (cnat_nfv9_logging_info_t *nfv9_logging_info) * Argument: cnat_nfv9_logging_info_t *nfv9_logging_info * structure that contains the packet context */ -inline + void cnat_nfv9_send_pkt (cnat_nfv9_logging_info_t *nfv9_logging_info) { cnat_nfv9_fill_nfv9_ip_header(nfv9_logging_info); @@ -606,7 +606,7 @@ inline void send_vpp3_nfv9_pkt (cnat_nfv9_logging_info_t *nfv9_logging_info) /* * send_vpp3_nfv9_pkt: to send one b0 in a frame */ -inline void send_vpp3_nfv9_pkt (cnat_nfv9_logging_info_t *nfv9_logging_info) +void send_vpp3_nfv9_pkt (cnat_nfv9_logging_info_t *nfv9_logging_info) { vlib_node_t *output_node; vlib_main_t *vm = vlib_get_main(); @@ -647,7 +647,7 @@ inline void send_vpp3_nfv9_pkt (cnat_nfv9_logging_info_t *nfv9_logging_info) * Argument: vlib_node_t *output_node * vlib_node_t structure for rewrite_output node */ -inline + void cnat_nfv9_send_pkt_always_success ( cnat_nfv9_logging_info_t *nfv9_logging_info, vlib_node_t *output_node) @@ -722,7 +722,7 @@ void cnat_nfv9_send_pkt_always_success ( * structure that contains the nfv9 logging info and will store * the packet context as well. */ -inline + void cnat_nfv9_create_logging_context ( cnat_nfv9_logging_info_t *nfv9_logging_info, cnat_nfv9_template_add_flag_t template_flag) @@ -851,7 +851,6 @@ void cnat_nfv9_create_logging_context ( } } -inline void cnat_nfv9_record_create ( cnat_nfv9_logging_info_t *nfv9_logging_info, u16 cur_record) { diff --git a/vnet/vnet/vcgn/cnat_util.c b/vnet/vnet/vcgn/cnat_util.c index 2415c5437fd..c3697b6be2f 100644 --- a/vnet/vnet/vcgn/cnat_util.c +++ b/vnet/vnet/vcgn/cnat_util.c @@ -2132,7 +2132,6 @@ int icmpv6_generate_with_throttling_v1 (spp_ctx_t *ctx, ipv6_header_t *ipv6, } #endif -inline void calculate_window_scale(tcp_hdr_type *tcp_header, u8 *scale) { u8 check_options = 0; diff --git a/vnet/vnet/vcgn/cnat_v4_functions.c b/vnet/vnet/vcgn/cnat_v4_functions.c index d3051fba5a7..bb25f5ef9bd 100644 --- a/vnet/vnet/vcgn/cnat_v4_functions.c +++ b/vnet/vnet/vcgn/cnat_v4_functions.c @@ -264,7 +264,7 @@ print_tcp_pkt (ipv4_header *ip) * ip & tcp checksum update (incremental) */ -inline void tcp_in2out_nat_mss_n_checksum (ipv4_header * ip, +void tcp_in2out_nat_mss_n_checksum (ipv4_header * ip, tcp_hdr_type * tcp, u32 ipv4_addr, u16 port, diff --git a/vnet/vnet/vcgn/spp_platform_trace_log.c b/vnet/vnet/vcgn/spp_platform_trace_log.c index 2c119f06ba2..8d1f49bf3ac 100644 --- a/vnet/vnet/vcgn/spp_platform_trace_log.c +++ b/vnet/vnet/vcgn/spp_platform_trace_log.c @@ -397,7 +397,6 @@ u32 spp_trace_log_get_sys_up_time_in_ms (void) return (sys_up_time); } -inline u32 spp_trace_log_get_unix_time_in_seconds (void) { spp_node_main_vector_t *nmv; @@ -978,7 +977,6 @@ void spp_printf(u16 error_code, u16 num_args, u32 *arg) /* To be filled */ } -inline u32 spp_trace_log_get_unix_time_in_seconds (void) { vlib_main_t *vlib_main; |