diff options
Diffstat (limited to 'src/vnet/ip')
55 files changed, 33459 insertions, 0 deletions
diff --git a/src/vnet/ip/dir.dox b/src/vnet/ip/dir.dox new file mode 100644 index 00000000..a4eb7337 --- /dev/null +++ b/src/vnet/ip/dir.dox @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Doxygen directory documentation */ + +/** +@dir +@brief Layer 3 IP Code. + +This directory contains the source code for IP routing. + +*/ +/*? %%clicmd:group_label Layer 3 IP CLI %% ?*/ diff --git a/src/vnet/ip/format.c b/src/vnet/ip/format.c new file mode 100644 index 00000000..be1c4fd3 --- /dev/null +++ b/src/vnet/ip/format.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip_format.c: ip generic (4 or 6) formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +/* Format IP protocol. */ +u8 * +format_ip_protocol (u8 * s, va_list * args) +{ + ip_protocol_t protocol = va_arg (*args, ip_protocol_t); + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi = ip_get_protocol_info (im, protocol); + + if (pi) + return format (s, "%s", pi->name); + else + return format (s, "unknown %d", protocol); +} + +uword +unformat_ip_protocol (unformat_input_t * input, va_list * args) +{ + u8 *result = va_arg (*args, u8 *); + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi; + int i; + + if (!unformat_user (input, unformat_vlib_number_by_name, + im->protocol_info_by_name, &i)) + return 0; + + pi = vec_elt_at_index (im->protocol_infos, i); + *result = pi->protocol; + return 1; +} + +u8 * +format_tcp_udp_port (u8 * s, va_list * args) +{ + int port = va_arg (*args, int); + ip_main_t *im = &ip_main; + tcp_udp_port_info_t *pi; + + pi = ip_get_tcp_udp_port_info (im, port); + if (pi) + s = format (s, "%s", pi->name); + else + s = format (s, "%d", clib_net_to_host_u16 (port)); + + return s; +} + +uword +unformat_tcp_udp_port (unformat_input_t * input, va_list * args) +{ + u16 *result = va_arg (*args, u16 *); + ip_main_t *im = &ip_main; + tcp_udp_port_info_t *pi; + u32 i, port; + + + if (unformat_user (input, unformat_vlib_number_by_name, + im->port_info_by_name, &i)) + { + pi = vec_elt_at_index (im->port_infos, i); + port = pi->port; + } + else if (unformat_user (input, unformat_vlib_number, &port) + && port < (1 << 16)) + port = clib_host_to_net_u16 (port); + + else + return 0; + + *result = port; + return 1; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/format.h b/src/vnet/ip/format.h new file mode 100644 index 00000000..c35f0f4b --- /dev/null +++ b/src/vnet/ip/format.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/format.h: ip 4 and/or 6 formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_format_h +#define included_ip_format_h + +/* IP4 or IP6. */ + +format_function_t format_ip_protocol; +unformat_function_t unformat_ip_protocol; + +format_function_t format_tcp_udp_port; +unformat_function_t unformat_tcp_udp_port; + +typedef enum format_ip_adjacency_flags_t_ +{ + FORMAT_IP_ADJACENCY_NONE, + FORMAT_IP_ADJACENCY_BRIEF = FORMAT_IP_ADJACENCY_NONE, + FORMAT_IP_ADJACENCY_DETAIL = (1 << 0), +} format_ip_adjacency_flags_t; + +format_function_t format_ip_adjacency; +format_function_t format_ip_adjacency_packet_data; + +format_function_t format_ip46_address; + +typedef enum +{ + IP46_TYPE_ANY, + IP46_TYPE_IP4, + IP46_TYPE_IP6 +} ip46_type_t; +/* unformat_ip46_address expects arguments (ip46_address_t *, ip46_type_t) + * The type argument is used to enforce a particular IP version. */ +unformat_function_t unformat_ip46_address; + +/* IP4 */ + +/* Parse an IP4 address %d.%d.%d.%d. */ +unformat_function_t unformat_ip4_address; + +/* Format an IP4 address. */ +format_function_t format_ip4_address; +format_function_t format_ip4_address_and_length; + +/* Parse an IP4 header. */ +unformat_function_t unformat_ip4_header; + +/* Format an IP4 header. */ +format_function_t format_ip4_header; + +/* Parse an IP packet matching pattern. */ +unformat_function_t unformat_ip4_match; + +unformat_function_t unformat_pg_ip4_header; + +/* IP6 */ +unformat_function_t unformat_ip6_address; +format_function_t format_ip6_address; +format_function_t format_ip6_address_and_length; +unformat_function_t unformat_ip6_header; +format_function_t format_ip6_header; +unformat_function_t unformat_pg_ip6_header; + +/* Format a TCP/UDP headers. */ +format_function_t format_tcp_header, format_udp_header; + +unformat_function_t unformat_pg_tcp_header, unformat_pg_udp_header; + +#endif /* included_ip_format_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c new file mode 100644 index 00000000..bbeab32b --- /dev/null +++ b/src/vnet/ip/icmp4.c @@ -0,0 +1,784 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/icmp4.c: ipv4 icmp + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> + + +static char *icmp_error_strings[] = { +#define _(f,s) s, + foreach_icmp4_error +#undef _ +}; + +static u8 * +format_ip4_icmp_type_and_code (u8 * s, va_list * args) +{ + icmp4_type_t type = va_arg (*args, int); + u8 code = va_arg (*args, int); + char *t = 0; + +#define _(n,f) case n: t = #f; break; + + switch (type) + { + foreach_icmp4_type; + + default: + break; + } + +#undef _ + + if (!t) + return format (s, "unknown 0x%x", type); + + s = format (s, "%s", t); + + t = 0; + switch ((type << 8) | code) + { +#define _(a,n,f) case (ICMP4_##a << 8) | (n): t = #f; break; + + foreach_icmp4_code; + +#undef _ + } + + if (t) + s = format (s, " %s", t); + + return s; +} + +static u8 * +format_ip4_icmp_header (u8 * s, va_list * args) +{ + icmp46_header_t *icmp = va_arg (*args, icmp46_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + + /* Nothing to do. */ + if (max_header_bytes < sizeof (icmp[0])) + return format (s, "ICMP header truncated"); + + s = format (s, "ICMP %U checksum 0x%x", + format_ip4_icmp_type_and_code, icmp->type, icmp->code, + clib_net_to_host_u16 (icmp->checksum)); + + return s; +} + +static u8 * +format_icmp_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + icmp_input_trace_t *t = va_arg (*va, icmp_input_trace_t *); + + s = format (s, "%U", + format_ip4_header, t->packet_data, sizeof (t->packet_data)); + + return s; +} + +typedef enum +{ + ICMP_INPUT_NEXT_ERROR, + ICMP_INPUT_N_NEXT, +} icmp_input_next_t; + +typedef struct +{ + uword *type_and_code_by_name; + + uword *type_by_name; + + /* Vector dispatch table indexed by [icmp type]. */ + u8 ip4_input_next_index_by_type[256]; +} icmp4_main_t; + +icmp4_main_t icmp4_main; + +static uword +ip4_icmp_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + icmp4_main_t *im = &icmp4_main; + uword n_packets = frame->n_vectors; + u32 *from, *to_next; + u32 n_left_from, n_left_to_next, next; + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp_input_trace_t)); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip4_header_t *ip0; + icmp46_header_t *icmp0; + icmp4_type_t type0; + u32 bi0, next0; + + if (PREDICT_TRUE (n_left_from > 2)) + { + vlib_prefetch_buffer_with_index (vm, from[2], LOAD); + p0 = vlib_get_buffer (vm, from[1]); + ip0 = vlib_buffer_get_current (p0); + CLIB_PREFETCH (ip0, CLIB_CACHE_LINE_BYTES, LOAD); + } + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + icmp0 = ip4_next_header (ip0); + type0 = icmp0->type; + next0 = im->ip4_input_next_index_by_type[type0]; + + p0->error = node->errors[ICMP4_ERROR_UNKNOWN_TYPE]; + if (PREDICT_FALSE (next0 != next)) + { + vlib_put_next_frame (vm, node, next, n_left_to_next + 1); + next = next0; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_icmp_input_node,static) = { + .function = ip4_icmp_input, + .name = "ip4-icmp-input", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp_input_trace, + + .n_errors = ARRAY_LEN (icmp_error_strings), + .error_strings = icmp_error_strings, + + .n_next_nodes = 1, + .next_nodes = { + [ICMP_INPUT_NEXT_ERROR] = "error-punt", + }, +}; +/* *INDENT-ON* */ + +static uword +ip4_icmp_echo_request (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + uword n_packets = frame->n_vectors; + u32 *from, *to_next; + u32 n_left_from, n_left_to_next, next; + ip4_main_t *i4m = &ip4_main; + u16 *fragment_ids, *fid; + u8 host_config_ttl = i4m->host_config.ttl; + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp_input_trace_t)); + + /* Get random fragment IDs for replies. */ + fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer, + n_packets * + sizeof (fragment_ids[0])); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + while (n_left_from > 2 && n_left_to_next > 2) + { + vlib_buffer_t *p0, *p1; + ip4_header_t *ip0, *ip1; + icmp46_header_t *icmp0, *icmp1; + u32 bi0, src0, dst0; + u32 bi1, src1, dst1; + ip_csum_t sum0, sum1; + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, bi0); + p1 = vlib_get_buffer (vm, bi1); + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + icmp0 = ip4_next_header (ip0); + icmp1 = ip4_next_header (ip1); + + vnet_buffer (p0)->sw_if_index[VLIB_RX] = + vnet_main.local_interface_sw_if_index; + vnet_buffer (p1)->sw_if_index[VLIB_RX] = + vnet_main.local_interface_sw_if_index; + + /* Update ICMP checksum. */ + sum0 = icmp0->checksum; + sum1 = icmp1->checksum; + + ASSERT (icmp0->type == ICMP4_echo_request); + ASSERT (icmp1->type == ICMP4_echo_request); + sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply, + icmp46_header_t, type); + sum1 = ip_csum_update (sum1, ICMP4_echo_request, ICMP4_echo_reply, + icmp46_header_t, type); + icmp0->type = ICMP4_echo_reply; + icmp1->type = ICMP4_echo_reply; + + icmp0->checksum = ip_csum_fold (sum0); + icmp1->checksum = ip_csum_fold (sum1); + + src0 = ip0->src_address.data_u32; + src1 = ip1->src_address.data_u32; + dst0 = ip0->dst_address.data_u32; + dst1 = ip1->dst_address.data_u32; + + /* Swap source and destination address. + Does not change checksum. */ + ip0->src_address.data_u32 = dst0; + ip1->src_address.data_u32 = dst1; + ip0->dst_address.data_u32 = src0; + ip1->dst_address.data_u32 = src1; + + /* Update IP checksum. */ + sum0 = ip0->checksum; + sum1 = ip1->checksum; + + sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl, + ip4_header_t, ttl); + sum1 = ip_csum_update (sum1, ip1->ttl, host_config_ttl, + ip4_header_t, ttl); + ip0->ttl = host_config_ttl; + ip1->ttl = host_config_ttl; + + /* New fragment id. */ + sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0], + ip4_header_t, fragment_id); + sum1 = ip_csum_update (sum1, ip1->fragment_id, fid[1], + ip4_header_t, fragment_id); + ip0->fragment_id = fid[0]; + ip1->fragment_id = fid[1]; + fid += 2; + + ip0->checksum = ip_csum_fold (sum0); + ip1->checksum = ip_csum_fold (sum1); + + ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + ASSERT (ip1->checksum == ip4_header_checksum (ip1)); + + p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + p1->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip4_header_t *ip0; + icmp46_header_t *icmp0; + u32 bi0, src0, dst0; + ip_csum_t sum0; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + icmp0 = ip4_next_header (ip0); + + vnet_buffer (p0)->sw_if_index[VLIB_RX] = + vnet_main.local_interface_sw_if_index; + + /* Update ICMP checksum. */ + sum0 = icmp0->checksum; + + ASSERT (icmp0->type == ICMP4_echo_request); + sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply, + icmp46_header_t, type); + icmp0->type = ICMP4_echo_reply; + icmp0->checksum = ip_csum_fold (sum0); + + src0 = ip0->src_address.data_u32; + dst0 = ip0->dst_address.data_u32; + ip0->src_address.data_u32 = dst0; + ip0->dst_address.data_u32 = src0; + + /* Update IP checksum. */ + sum0 = ip0->checksum; + + sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl, + ip4_header_t, ttl); + ip0->ttl = host_config_ttl; + + sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0], + ip4_header_t, fragment_id); + ip0->fragment_id = fid[0]; + fid += 1; + + ip0->checksum = ip_csum_fold (sum0); + + ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + + p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + vlib_error_count (vm, ip4_icmp_input_node.index, + ICMP4_ERROR_ECHO_REPLIES_SENT, frame->n_vectors); + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = { + .function = ip4_icmp_echo_request, + .name = "ip4-icmp-echo-request", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp_input_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "ip4-load-balance", + }, +}; +/* *INDENT-ON* */ + +typedef enum +{ + IP4_ICMP_ERROR_NEXT_DROP, + IP4_ICMP_ERROR_NEXT_LOOKUP, + IP4_ICMP_ERROR_N_NEXT, +} ip4_icmp_error_next_t; + +void +icmp4_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code, u32 data) +{ + vnet_buffer (b)->ip.icmp.type = type; + vnet_buffer (b)->ip.icmp.code = code; + vnet_buffer (b)->ip.icmp.data = data; +} + +static u8 +icmp4_icmp_type_to_error (u8 type) +{ + switch (type) + { + case ICMP4_destination_unreachable: + return ICMP4_ERROR_DEST_UNREACH_SENT; + case ICMP4_time_exceeded: + return ICMP4_ERROR_TTL_EXPIRE_SENT; + case ICMP4_parameter_problem: + return ICMP4_ERROR_PARAM_PROBLEM_SENT; + default: + return ICMP4_ERROR_DROP; + } +} + +static uword +ip4_icmp_error (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 *from, *to_next; + uword n_left_from, n_left_to_next; + ip4_icmp_error_next_t next_index; + ip4_main_t *im = &ip4_main; + ip_lookup_main_t *lm = &im->lookup_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp_input_trace_t)); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0 = from[0]; + u32 next0 = IP4_ICMP_ERROR_NEXT_LOOKUP; + u8 error0 = ICMP4_ERROR_NONE; + vlib_buffer_t *p0; + ip4_header_t *ip0, *out_ip0; + icmp46_header_t *icmp0; + u32 sw_if_index0, if_add_index0; + ip_csum_t sum; + + /* Speculatively enqueue p0 to the current next frame */ + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip0 = vlib_buffer_get_current (p0); + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + /* + * RFC1812 says to keep as much of the original packet as + * possible within the minimum MTU (576). We cheat "a little" + * here by keeping whatever fits in the first buffer, to be more + * efficient + */ + if (PREDICT_FALSE (p0->total_length_not_including_first_buffer)) + { + /* clear current_length of all other buffers in chain */ + vlib_buffer_t *b = p0; + p0->total_length_not_including_first_buffer = 0; + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, b->next_buffer); + b->current_length = 0; + } + } + p0->current_length = + p0->current_length > 576 ? 576 : p0->current_length; + + /* Add IP header and ICMPv4 header including a 4 byte data field */ + vlib_buffer_advance (p0, + -sizeof (ip4_header_t) - + sizeof (icmp46_header_t) - 4); + out_ip0 = vlib_buffer_get_current (p0); + icmp0 = (icmp46_header_t *) & out_ip0[1]; + + /* Fill ip header fields */ + out_ip0->ip_version_and_header_length = 0x45; + out_ip0->tos = 0; + out_ip0->length = clib_host_to_net_u16 (p0->current_length); + out_ip0->fragment_id = 0; + out_ip0->flags_and_fragment_offset = 0; + out_ip0->ttl = 0xff; + out_ip0->protocol = IP_PROTOCOL_ICMP; + out_ip0->dst_address = ip0->src_address; + if_add_index0 = ~0; + if (PREDICT_TRUE (vec_len (lm->if_address_pool_index_by_sw_if_index) + > sw_if_index0)) + if_add_index0 = + lm->if_address_pool_index_by_sw_if_index[sw_if_index0]; + if (PREDICT_TRUE (if_add_index0 != ~0)) + { + ip_interface_address_t *if_add = + pool_elt_at_index (lm->if_address_pool, if_add_index0); + ip4_address_t *if_ip = + ip_interface_address_get_address (lm, if_add); + out_ip0->src_address = *if_ip; + } + else + { + /* interface has no IP4 address - should not happen */ + next0 = IP4_ICMP_ERROR_NEXT_DROP; + error0 = ICMP4_ERROR_DROP; + } + out_ip0->checksum = ip4_header_checksum (out_ip0); + + /* Fill icmp header fields */ + icmp0->type = vnet_buffer (p0)->ip.icmp.type; + icmp0->code = vnet_buffer (p0)->ip.icmp.code; + *((u32 *) (icmp0 + 1)) = + clib_host_to_net_u32 (vnet_buffer (p0)->ip.icmp.data); + icmp0->checksum = 0; + sum = + ip_incremental_checksum (0, icmp0, + p0->current_length - + sizeof (ip4_header_t)); + icmp0->checksum = ~ip_csum_fold (sum); + + /* Update error status */ + if (error0 == ICMP4_ERROR_NONE) + error0 = icmp4_icmp_type_to_error (icmp0->type); + vlib_error_count (vm, node->node_index, error0, 1); + + /* Verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_icmp_error_node) = { + .function = ip4_icmp_error, + .name = "ip4-icmp-error", + .vector_size = sizeof (u32), + + .n_errors = ARRAY_LEN (icmp_error_strings), + .error_strings = icmp_error_strings, + + .n_next_nodes = IP4_ICMP_ERROR_N_NEXT, + .next_nodes = { + [IP4_ICMP_ERROR_NEXT_DROP] = "error-drop", + [IP4_ICMP_ERROR_NEXT_LOOKUP] = "ip4-lookup", + }, + + .format_trace = format_icmp_input_trace, +}; +/* *INDENT-ON* */ + + +static uword +unformat_icmp_type_and_code (unformat_input_t * input, va_list * args) +{ + icmp46_header_t *h = va_arg (*args, icmp46_header_t *); + icmp4_main_t *cm = &icmp4_main; + u32 i; + + if (unformat_user (input, unformat_vlib_number_by_name, + cm->type_and_code_by_name, &i)) + { + h->type = (i >> 8) & 0xff; + h->code = (i >> 0) & 0xff; + } + else if (unformat_user (input, unformat_vlib_number_by_name, + cm->type_by_name, &i)) + { + h->type = i; + h->code = 0; + } + else + return 0; + + return 1; +} + +static void +icmp4_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, u32 * packets, u32 n_packets) +{ + vlib_main_t *vm = vlib_get_main (); + u32 ip_offset, icmp_offset; + + icmp_offset = g->start_byte_offset; + ip_offset = (g - 1)->start_byte_offset; + + while (n_packets >= 1) + { + vlib_buffer_t *p0; + ip4_header_t *ip0; + icmp46_header_t *icmp0; + u32 len0; + + p0 = vlib_get_buffer (vm, packets[0]); + n_packets -= 1; + packets += 1; + + ASSERT (p0->current_data == 0); + ip0 = (void *) (p0->data + ip_offset); + icmp0 = (void *) (p0->data + icmp_offset); + len0 = clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0); + icmp0->checksum = + ~ip_csum_fold (ip_incremental_checksum (0, icmp0, len0)); + } +} + +typedef struct +{ + pg_edit_t type, code; + pg_edit_t checksum; +} pg_icmp46_header_t; + +always_inline void +pg_icmp_header_init (pg_icmp46_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, icmp46_header_t, f); + _(type); + _(code); + _(checksum); +#undef _ +} + +static uword +unformat_pg_icmp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t *s = va_arg (*args, pg_stream_t *); + pg_icmp46_header_t *p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t), + &group_index); + pg_icmp_header_init (p); + + p->checksum.type = PG_EDIT_UNSPECIFIED; + + { + icmp46_header_t tmp; + + if (!unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp)) + goto error; + + pg_edit_set_fixed (&p->type, tmp.type); + pg_edit_set_fixed (&p->code, tmp.code); + } + + /* Parse options. */ + while (1) + { + if (unformat (input, "checksum %U", + unformat_pg_edit, unformat_pg_number, &p->checksum)) + ; + + /* Can't parse input: try next protocol level. */ + else + break; + } + + if (!unformat_user (input, unformat_pg_payload, s)) + goto error; + + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t *g = pg_stream_get_group (s, group_index); + g->edit_function = icmp4_pg_edit_function; + g->edit_function_opaque = 0; + } + + return 1; + +error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + +void +ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, u32 node_index) +{ + icmp4_main_t *im = &icmp4_main; + + ASSERT ((int) type < ARRAY_LEN (im->ip4_input_next_index_by_type)); + im->ip4_input_next_index_by_type[type] + = vlib_node_add_next (vm, ip4_icmp_input_node.index, node_index); +} + +static clib_error_t * +icmp4_init (vlib_main_t * vm) +{ + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi; + icmp4_main_t *cm = &icmp4_main; + clib_error_t *error; + + error = vlib_call_init_function (vm, ip_main_init); + + if (error) + return error; + + pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP); + pi->format_header = format_ip4_icmp_header; + pi->unformat_pg_edit = unformat_pg_icmp_header; + + cm->type_by_name = hash_create_string (0, sizeof (uword)); +#define _(n,t) hash_set_mem (cm->type_by_name, #t, (n)); + foreach_icmp4_type; +#undef _ + + cm->type_and_code_by_name = hash_create_string (0, sizeof (uword)); +#define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP4_##a << 8)); + foreach_icmp4_code; +#undef _ + + memset (cm->ip4_input_next_index_by_type, + ICMP_INPUT_NEXT_ERROR, sizeof (cm->ip4_input_next_index_by_type)); + + ip4_icmp_register_type (vm, ICMP4_echo_request, + ip4_icmp_echo_request_node.index); + + return 0; +} + +VLIB_INIT_FUNCTION (icmp4_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/icmp4.h b/src/vnet/ip/icmp4.h new file mode 100644 index 00000000..ae805148 --- /dev/null +++ b/src/vnet/ip/icmp4.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_icmp4_h +#define included_vnet_icmp4_h + +#define foreach_icmp4_error \ + _ (NONE, "valid packets") \ + _ (UNKNOWN_TYPE, "unknown type") \ + _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \ + _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \ + _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \ + _ (OPTIONS_WITH_ODD_LENGTH, \ + "total option length not multiple of 8 bytes") \ + _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \ + _ (ECHO_REPLIES_SENT, "echo replies sent") \ + _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \ + _ (DEST_UNREACH_SENT, "destination unreachable response sent") \ + _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \ + _ (PARAM_PROBLEM_SENT, "parameter problem response sent") \ + _ (DROP, "error message dropped") + +typedef enum +{ +#define _(f,s) ICMP4_ERROR_##f, + foreach_icmp4_error +#undef _ +} icmp4_error_t; + +typedef struct +{ + u8 packet_data[64]; +} icmp_input_trace_t; + +format_function_t format_icmp4_input_trace; +void ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, + u32 node_index); +void icmp4_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code, + u32 data); + +#endif /* included_vnet_icmp4_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/icmp46_packet.h b/src/vnet/ip/icmp46_packet.h new file mode 100644 index 00000000..a86cbd57 --- /dev/null +++ b/src/vnet/ip/icmp46_packet.h @@ -0,0 +1,398 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * icmp46_packet.h: ip4/ip6 icmp packet format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_icmp46_packet_h +#define included_vnet_icmp46_packet_h + +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip6_packet.h> + +#define foreach_icmp4_type \ + _ (0, echo_reply) \ + _ (3, destination_unreachable) \ + _ (4, source_quench) \ + _ (5, redirect) \ + _ (6, alternate_host_address) \ + _ (8, echo_request) \ + _ (9, router_advertisement) \ + _ (10, router_solicitation) \ + _ (11, time_exceeded) \ + _ (12, parameter_problem) \ + _ (13, timestamp_request) \ + _ (14, timestamp_reply) \ + _ (15, information_request) \ + _ (16, information_reply) \ + _ (17, address_mask_request) \ + _ (18, address_mask_reply) \ + _ (30, traceroute) \ + _ (31, datagram_conversion_error) \ + _ (32, mobile_host_redirect) \ + _ (33, ip6_where_are_you) \ + _ (34, ip6_i_am_here) \ + _ (35, mobile_registration_request) \ + _ (36, mobile_registration_reply) \ + _ (37, domain_name_request) \ + _ (38, domain_name_reply) \ + _ (39, skip) \ + _ (40, photuris) + +#define icmp_no_code 0 + +#define foreach_icmp4_code \ + _ (destination_unreachable, 0, destination_unreachable_net) \ + _ (destination_unreachable, 1, destination_unreachable_host) \ + _ (destination_unreachable, 2, protocol_unreachable) \ + _ (destination_unreachable, 3, port_unreachable) \ + _ (destination_unreachable, 4, fragmentation_needed_and_dont_fragment_set) \ + _ (destination_unreachable, 5, source_route_failed) \ + _ (destination_unreachable, 6, destination_network_unknown) \ + _ (destination_unreachable, 7, destination_host_unknown) \ + _ (destination_unreachable, 8, source_host_isolated) \ + _ (destination_unreachable, 9, network_administratively_prohibited) \ + _ (destination_unreachable, 10, host_administratively_prohibited) \ + _ (destination_unreachable, 11, network_unreachable_for_type_of_service) \ + _ (destination_unreachable, 12, host_unreachable_for_type_of_service) \ + _ (destination_unreachable, 13, communication_administratively_prohibited) \ + _ (destination_unreachable, 14, host_precedence_violation) \ + _ (destination_unreachable, 15, precedence_cutoff_in_effect) \ + _ (redirect, 0, network_redirect) \ + _ (redirect, 1, host_redirect) \ + _ (redirect, 2, type_of_service_and_network_redirect) \ + _ (redirect, 3, type_of_service_and_host_redirect) \ + _ (router_advertisement, 0, normal_router_advertisement) \ + _ (router_advertisement, 16, does_not_route_common_traffic) \ + _ (time_exceeded, 0, ttl_exceeded_in_transit) \ + _ (time_exceeded, 1, fragment_reassembly_time_exceeded) \ + _ (parameter_problem, 0, pointer_indicates_error) \ + _ (parameter_problem, 1, missing_required_option) \ + _ (parameter_problem, 2, bad_length) + +/* ICMPv6 */ +#define foreach_icmp6_type \ + _ (1, destination_unreachable) \ + _ (2, packet_too_big) \ + _ (3, time_exceeded) \ + _ (4, parameter_problem) \ + _ (128, echo_request) \ + _ (129, echo_reply) \ + _ (130, multicast_listener_request) \ + _ (131, multicast_listener_report) \ + _ (132, multicast_listener_done) \ + _ (133, router_solicitation) \ + _ (134, router_advertisement) \ + _ (135, neighbor_solicitation) \ + _ (136, neighbor_advertisement) \ + _ (137, redirect) \ + _ (138, router_renumbering) \ + _ (139, node_information_request) \ + _ (140, node_information_response) \ + _ (141, inverse_neighbor_solicitation) \ + _ (142, inverse_neighbor_advertisement) \ + _ (143, multicast_listener_report_v2) \ + _ (144, home_agent_address_discovery_request) \ + _ (145, home_agent_address_discovery_reply) \ + _ (146, mobile_prefix_solicitation) \ + _ (147, mobile_prefix_advertisement) \ + _ (148, certification_path_solicitation) \ + _ (149, certification_path_advertisement) \ + _ (151, multicast_router_advertisement) \ + _ (152, multicast_router_solicitation) \ + _ (153, multicast_router_termination) \ + _ (154, fmipv6_messages) + +#define foreach_icmp6_code \ + _ (destination_unreachable, 0, no_route_to_destination) \ + _ (destination_unreachable, 1, destination_administratively_prohibited) \ + _ (destination_unreachable, 2, beyond_scope_of_source_address) \ + _ (destination_unreachable, 3, address_unreachable) \ + _ (destination_unreachable, 4, port_unreachable) \ + _ (destination_unreachable, 5, source_address_failed_policy) \ + _ (destination_unreachable, 6, reject_route_to_destination) \ + _ (time_exceeded, 0, ttl_exceeded_in_transit) \ + _ (time_exceeded, 1, fragment_reassembly_time_exceeded) \ + _ (parameter_problem, 0, erroneous_header_field) \ + _ (parameter_problem, 1, unrecognized_next_header) \ + _ (parameter_problem, 2, unrecognized_option) \ + _ (router_renumbering, 0, command) \ + _ (router_renumbering, 1, result) \ + _ (node_information_request, 0, data_contains_ip6_address) \ + _ (node_information_request, 1, data_contains_name) \ + _ (node_information_request, 2, data_contains_ip4_address) \ + _ (node_information_response, 0, success) \ + _ (node_information_response, 1, failed) \ + _ (node_information_response, 2, unknown_request) + +typedef enum +{ +#define _(n,f) ICMP4_##f = n, + foreach_icmp4_type +#undef _ +} icmp4_type_t; + +typedef enum +{ +#define _(t,n,f) ICMP4_##t##_##f = n, + foreach_icmp4_code +#undef _ +} icmp4_code_t; + +typedef enum +{ +#define _(n,f) ICMP6_##f = n, + foreach_icmp6_type +#undef _ +} icmp6_type_t; + +typedef enum +{ +#define _(t,n,f) ICMP6_##t##_##f = n, + foreach_icmp6_code +#undef _ +} icmp6_code_t; + +typedef CLIB_PACKED (struct + { + u8 type; + u8 code; + /* IP checksum of icmp header plus data which follows. */ + u16 checksum; + }) icmp46_header_t; + +/* ip6 neighbor discovery */ +#define foreach_icmp6_neighbor_discovery_option \ + _ (1, source_link_layer_address) \ + _ (2, target_link_layer_address) \ + _ (3, prefix_information) \ + _ (4, redirected_header) \ + _ (5, mtu) \ + _ (6, nbma_shortcut_limit) \ + _ (7, advertisement_interval) \ + _ (8, home_agent_information) \ + _ (9, source_address_list) \ + _ (10, target_address_list) \ + _ (11, cryptographically_generated_address) \ + _ (12, rsa_signature) \ + _ (13, timestamp) \ + _ (14, nonce) \ + _ (15, trust_anchor) \ + _ (16, certificate) \ + _ (17, ip_address_and_prefix) \ + _ (18, new_router_prefix_information) \ + _ (19, mobile_link_layer_address) \ + _ (20, neighbor_advertisement_acknowledgment) \ + _ (23, map) \ + _ (24, route_information) \ + _ (25, recursive_dns_server) \ + _ (26, ra_flags_extension) \ + _ (27, handover_key_request) \ + _ (28, handover_key_reply) \ + _ (29, handover_assist_information) \ + _ (30, mobile_node_identifier) \ + _ (31, dns_search_list) \ + _ (138, card_request) \ + _ (139, card_reply) + +typedef enum icmp6_neighbor_discovery_option_type +{ +#define _(n,f) ICMP6_NEIGHBOR_DISCOVERY_OPTION_##f = n, + foreach_icmp6_neighbor_discovery_option +#undef _ +} icmp6_neighbor_discovery_option_type_t; + +typedef CLIB_PACKED (struct + { + /* Option type. */ + u8 type; + /* Length of this header plus option data in 8 byte units. */ + u8 n_data_u64s; + /* Option data follows. */ + u8 data[0]; + }) icmp6_neighbor_discovery_option_header_t; + +typedef CLIB_PACKED (struct + { + icmp6_neighbor_discovery_option_header_t header; + u8 dst_address_length; + u8 flags; +#define ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_FLAG_ON_LINK (1 << 7) +#define ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_AUTO (1 << 6) + u32 valid_time; + u32 preferred_time; + u32 unused; ip6_address_t dst_address; + }) icmp6_neighbor_discovery_prefix_information_option_t; + +typedef CLIB_PACKED (struct + { + u8 type; + u8 aux_data_len_u32s; + u16 num_sources; + ip6_address_t mcast_addr; ip6_address_t source_addr[0]; + }) icmp6_multicast_address_record_t; + +typedef CLIB_PACKED (struct + { + ip6_hop_by_hop_ext_t ext_hdr; + ip6_router_alert_option_t alert; + ip6_padN_option_t pad; + icmp46_header_t icmp; + u16 rsvd; + u16 num_addr_records; + icmp6_multicast_address_record_t records[0]; + }) icmp6_multicast_listener_report_header_t; + +typedef CLIB_PACKED (struct + { + icmp6_neighbor_discovery_option_header_t header; + u8 reserved[6]; + /* IP6 header plus payload follows. */ + u8 data[0]; + }) icmp6_neighbor_discovery_redirected_header_option_t; + +typedef CLIB_PACKED (struct + { + icmp6_neighbor_discovery_option_header_t header; + u16 unused; u32 mtu; + }) icmp6_neighbor_discovery_mtu_option_t; + +typedef CLIB_PACKED (struct + { + icmp6_neighbor_discovery_option_header_t header; + u8 ethernet_address[6]; + }) + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t; + +typedef CLIB_PACKED (struct + { + icmp6_neighbor_discovery_option_header_t header; + u8 max_l2_address[6 + 8]; + }) + icmp6_neighbor_discovery_max_link_layer_address_option_t; + +/* Generic neighbor discover header. Used for router solicitations, + etc. */ +typedef CLIB_PACKED (struct + { + icmp46_header_t icmp; u32 reserved_must_be_zero; + }) icmp6_neighbor_discovery_header_t; + +/* Router advertisement packet formats. */ +typedef CLIB_PACKED (struct + { + icmp46_header_t icmp; + /* Current hop limit to use for outgoing packets. */ + u8 current_hop_limit; + u8 flags; +#define ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP (1 << 7) +#define ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP (1 << 6) + /* Zero means unspecified. */ + u16 router_lifetime_in_sec; + /* Zero means unspecified. */ + u32 neighbor_reachable_time_in_msec; + /* Zero means unspecified. */ + u32 + time_in_msec_between_retransmitted_neighbor_solicitations; + /* Options that may follow: source_link_layer_address, mtu, prefix_information. */ + }) icmp6_router_advertisement_header_t; + +/* Neighbor solicitation/advertisement header. */ +typedef CLIB_PACKED (struct + { + icmp46_header_t icmp; + /* Zero for solicitation; flags for advertisement. */ + u32 advertisement_flags; + /* Set when sent by a router. */ +#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_ROUTER (1 << 31) + /* Set when response to solicitation. */ +#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED (1 << 30) +#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE (1 << 29) + ip6_address_t target_address; + /* Options that may follow: source_link_layer_address + (for solicitation) target_link_layer_address (for advertisement). */ + }) icmp6_neighbor_solicitation_or_advertisement_header_t; + +typedef CLIB_PACKED (struct + { + icmp46_header_t icmp; + u32 reserved_must_be_zero; + /* Better next hop to use for given destination. */ + ip6_address_t better_next_hop_address; + ip6_address_t dst_address; + /* Options that may follow: target_link_layer_address, + redirected_header. */ + }) icmp6_redirect_header_t; + +/* Solicitation/advertisement packet format for ethernet. */ +typedef CLIB_PACKED (struct + { + ip6_header_t ip; + icmp6_neighbor_solicitation_or_advertisement_header_t + neighbor; + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t + link_layer_option; + }) icmp6_neighbor_solicitation_header_t; + +/* Router solicitation packet format for ethernet. */ +typedef CLIB_PACKED (struct + { + ip6_header_t ip; + icmp6_neighbor_discovery_header_t neighbor; + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t + link_layer_option; + }) icmp6_router_solicitation_header_t; + +/* router advertisement packet format for ethernet. */ +typedef CLIB_PACKED (struct + { + ip6_header_t ip; + icmp6_router_advertisement_header_t router; + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t + link_layer_option; + icmp6_neighbor_discovery_mtu_option_t mtu_option; + icmp6_neighbor_discovery_prefix_information_option_t + prefix[0]; + }) icmp6_router_advertisement_packet_t; + +/* multicast listener report packet format for ethernet. */ +typedef CLIB_PACKED (struct + { + ip6_header_t ip; + icmp6_multicast_listener_report_header_t report_hdr; + }) icmp6_multicast_listener_report_packet_t; + +#endif /* included_vnet_icmp46_packet_h */ diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c new file mode 100644 index 00000000..70696d0c --- /dev/null +++ b/src/vnet/ip/icmp6.c @@ -0,0 +1,882 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/icmp6.c: ip6 icmp + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> + +static u8 * +format_ip6_icmp_type_and_code (u8 * s, va_list * args) +{ + icmp6_type_t type = va_arg (*args, int); + u8 code = va_arg (*args, int); + char *t = 0; + +#define _(n,f) case n: t = #f; break; + + switch (type) + { + foreach_icmp6_type; + + default: + break; + } + +#undef _ + + if (!t) + return format (s, "unknown 0x%x", type); + + s = format (s, "%s", t); + + t = 0; + switch ((type << 8) | code) + { +#define _(a,n,f) case (ICMP6_##a << 8) | (n): t = #f; break; + + foreach_icmp6_code; + +#undef _ + } + + if (t) + s = format (s, " %s", t); + + return s; +} + +static u8 * +format_icmp6_header (u8 * s, va_list * args) +{ + icmp46_header_t *icmp = va_arg (*args, icmp46_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + + /* Nothing to do. */ + if (max_header_bytes < sizeof (icmp[0])) + return format (s, "ICMP header truncated"); + + s = format (s, "ICMP %U checksum 0x%x", + format_ip6_icmp_type_and_code, icmp->type, icmp->code, + clib_net_to_host_u16 (icmp->checksum)); + + if (max_header_bytes >= + sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t) && + (icmp->type == ICMP6_neighbor_solicitation || + icmp->type == ICMP6_neighbor_advertisement)) + { + icmp6_neighbor_solicitation_or_advertisement_header_t *icmp6_nd = + (icmp6_neighbor_solicitation_or_advertisement_header_t *) icmp; + s = format (s, "\n target address %U", + format_ip6_address, &icmp6_nd->target_address); + } + + return s; +} + +u8 * +format_icmp6_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + icmp6_input_trace_t *t = va_arg (*va, icmp6_input_trace_t *); + + s = format (s, "%U", + format_ip6_header, t->packet_data, sizeof (t->packet_data)); + + return s; +} + +static char *icmp_error_strings[] = { +#define _(f,s) s, + foreach_icmp6_error +#undef _ +}; + +typedef enum +{ + ICMP_INPUT_NEXT_DROP, + ICMP_INPUT_N_NEXT, +} icmp_input_next_t; + +typedef struct +{ + uword *type_and_code_by_name; + + uword *type_by_name; + + /* Vector dispatch table indexed by [icmp type]. */ + u8 input_next_index_by_type[256]; + + /* Max valid code indexed by icmp type. */ + u8 max_valid_code_by_type[256]; + + /* hop_limit must be >= this value for this icmp type. */ + u8 min_valid_hop_limit_by_type[256]; + + u8 min_valid_length_by_type[256]; +} icmp6_main_t; + +icmp6_main_t icmp6_main; + +static uword +ip6_icmp_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + icmp6_main_t *im = &icmp6_main; + u32 *from, *to_next; + u32 n_left_from, n_left_to_next, next_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp6_input_trace_t)); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *b0; + ip6_header_t *ip0; + icmp46_header_t *icmp0; + icmp6_type_t type0; + u32 bi0, next0, error0, len0; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (b0); + icmp0 = ip6_next_header (ip0); + type0 = icmp0->type; + + error0 = ICMP6_ERROR_NONE; + + next0 = im->input_next_index_by_type[type0]; + error0 = + next0 == ICMP_INPUT_NEXT_DROP ? ICMP6_ERROR_UNKNOWN_TYPE : error0; + + /* Check code is valid for type. */ + error0 = + icmp0->code > + im->max_valid_code_by_type[type0] ? + ICMP6_ERROR_INVALID_CODE_FOR_TYPE : error0; + + /* Checksum is already validated by ip6_local node so we don't need to check that. */ + + /* Check that hop limit == 255 for certain types. */ + error0 = + ip0->hop_limit < + im->min_valid_hop_limit_by_type[type0] ? + ICMP6_ERROR_INVALID_HOP_LIMIT_FOR_TYPE : error0; + + len0 = clib_net_to_host_u16 (ip0->payload_length); + error0 = + len0 < + im->min_valid_length_by_type[type0] ? + ICMP6_ERROR_LENGTH_TOO_SMALL_FOR_TYPE : error0; + + b0->error = node->errors[error0]; + + next0 = error0 != ICMP6_ERROR_NONE ? ICMP_INPUT_NEXT_DROP : next0; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_icmp_input_node) = { + .function = ip6_icmp_input, + .name = "ip6-icmp-input", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp6_input_trace, + + .n_errors = ARRAY_LEN (icmp_error_strings), + .error_strings = icmp_error_strings, + + .n_next_nodes = 1, + .next_nodes = { + [ICMP_INPUT_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +typedef enum +{ + ICMP6_ECHO_REQUEST_NEXT_LOOKUP, + ICMP6_ECHO_REQUEST_NEXT_OUTPUT, + ICMP6_ECHO_REQUEST_N_NEXT, +} icmp6_echo_request_next_t; + +static uword +ip6_icmp_echo_request (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 *from, *to_next; + u32 n_left_from, n_left_to_next, next_index; + ip6_main_t *im = &ip6_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp6_input_trace_t)); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 2 && n_left_to_next > 2) + { + vlib_buffer_t *p0, *p1; + ip6_header_t *ip0, *ip1; + icmp46_header_t *icmp0, *icmp1; + ip6_address_t tmp0, tmp1; + ip_csum_t sum0, sum1; + u32 bi0, bi1; + u32 fib_index0, fib_index1; + u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP; + u32 next1 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP; + + bi0 = to_next[0] = from[0]; + bi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, bi0); + p1 = vlib_get_buffer (vm, bi1); + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + icmp0 = ip6_next_header (ip0); + icmp1 = ip6_next_header (ip1); + + /* Check icmp type to echo reply and update icmp checksum. */ + sum0 = icmp0->checksum; + sum1 = icmp1->checksum; + + ASSERT (icmp0->type == ICMP6_echo_request); + ASSERT (icmp1->type == ICMP6_echo_request); + sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply, + icmp46_header_t, type); + sum1 = ip_csum_update (sum1, ICMP6_echo_request, ICMP6_echo_reply, + icmp46_header_t, type); + + icmp0->checksum = ip_csum_fold (sum0); + icmp1->checksum = ip_csum_fold (sum1); + + icmp0->type = ICMP6_echo_reply; + icmp1->type = ICMP6_echo_reply; + + /* Swap source and destination address. */ + tmp0 = ip0->src_address; + tmp1 = ip1->src_address; + + ip0->src_address = ip0->dst_address; + ip1->src_address = ip1->dst_address; + + ip0->dst_address = tmp0; + ip1->dst_address = tmp1; + + /* New hop count. */ + ip0->hop_limit = im->host_config.ttl; + ip1->hop_limit = im->host_config.ttl; + + if (ip6_address_is_link_local_unicast (&ip0->dst_address)) + { + ethernet_header_t *eth0; + u8 tmp_mac[6]; + /* For link local, reuse current MAC header by sawpping + * SMAC to DMAC instead of IP6 lookup since link local + * is not in the IP6 FIB */ + vlib_buffer_reset (p0); + eth0 = vlib_buffer_get_current (p0); + clib_memcpy (tmp_mac, eth0->dst_address, 6); + clib_memcpy (eth0->dst_address, eth0->src_address, 6); + clib_memcpy (eth0->src_address, tmp_mac, 6); + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + vnet_buffer (p0)->sw_if_index[VLIB_RX]; + next0 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT; + } + else + { + /* Determine the correct lookup fib indices... */ + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p0)->sw_if_index[VLIB_RX]); + vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0; + } + + if (ip6_address_is_link_local_unicast (&ip1->dst_address)) + { + ethernet_header_t *eth1; + u8 tmp_mac[6]; + /* For link local, reuse current MAC header by sawpping + * SMAC to DMAC instead of IP6 lookup since link local + * is not in the IP6 FIB */ + vlib_buffer_reset (p1); + eth1 = vlib_buffer_get_current (p1); + clib_memcpy (tmp_mac, eth1->dst_address, 6); + clib_memcpy (eth1->dst_address, eth1->src_address, 6); + clib_memcpy (eth1->src_address, tmp_mac, 6); + vnet_buffer (p1)->sw_if_index[VLIB_TX] = + vnet_buffer (p1)->sw_if_index[VLIB_RX]; + next1 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT; + } + else + { + /* Determine the correct lookup fib indices... */ + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p1)->sw_if_index[VLIB_RX]); + vnet_buffer (p1)->sw_if_index[VLIB_TX] = fib_index1; + } + + vnet_buffer (p0)->sw_if_index[VLIB_RX] + = vnet_main.local_interface_sw_if_index; + vnet_buffer (p1)->sw_if_index[VLIB_RX] + = vnet_main.local_interface_sw_if_index; + + /* verify speculative enqueues, maybe switch current next frame */ + /* if next0==next1==next_index then nothing special needs to be done */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip6_header_t *ip0; + icmp46_header_t *icmp0; + u32 bi0; + ip6_address_t tmp0; + ip_csum_t sum0; + u32 fib_index0; + u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP; + + bi0 = to_next[0] = from[0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + icmp0 = ip6_next_header (ip0); + + /* Check icmp type to echo reply and update icmp checksum. */ + sum0 = icmp0->checksum; + + ASSERT (icmp0->type == ICMP6_echo_request); + sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply, + icmp46_header_t, type); + + icmp0->checksum = ip_csum_fold (sum0); + + icmp0->type = ICMP6_echo_reply; + + /* Swap source and destination address. */ + tmp0 = ip0->src_address; + ip0->src_address = ip0->dst_address; + ip0->dst_address = tmp0; + + ip0->hop_limit = im->host_config.ttl; + + if (ip6_address_is_link_local_unicast (&ip0->dst_address)) + { + ethernet_header_t *eth0; + u8 tmp_mac[6]; + /* For link local, reuse current MAC header by sawpping + * SMAC to DMAC instead of IP6 lookup since link local + * is not in the IP6 FIB */ + vlib_buffer_reset (p0); + eth0 = vlib_buffer_get_current (p0); + clib_memcpy (tmp_mac, eth0->dst_address, 6); + clib_memcpy (eth0->dst_address, eth0->src_address, 6); + clib_memcpy (eth0->src_address, tmp_mac, 6); + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + vnet_buffer (p0)->sw_if_index[VLIB_RX]; + next0 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT; + } + else + { + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p0)->sw_if_index[VLIB_RX]); + vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0; + } + vnet_buffer (p0)->sw_if_index[VLIB_RX] + = vnet_main.local_interface_sw_if_index; + + /* Verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_error_count (vm, ip6_icmp_input_node.index, + ICMP6_ERROR_ECHO_REPLIES_SENT, frame->n_vectors); + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = { + .function = ip6_icmp_echo_request, + .name = "ip6-icmp-echo-request", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp6_input_trace, + + .n_next_nodes = ICMP6_ECHO_REQUEST_N_NEXT, + .next_nodes = { + [ICMP6_ECHO_REQUEST_NEXT_LOOKUP] = "ip6-lookup", + [ICMP6_ECHO_REQUEST_NEXT_OUTPUT] = "interface-output", + }, +}; +/* *INDENT-ON* */ + +typedef enum +{ + IP6_ICMP_ERROR_NEXT_DROP, + IP6_ICMP_ERROR_NEXT_LOOKUP, + IP6_ICMP_ERROR_N_NEXT, +} ip6_icmp_error_next_t; + +void +icmp6_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code, u32 data) +{ + vnet_buffer (b)->ip.icmp.type = type; + vnet_buffer (b)->ip.icmp.code = code; + vnet_buffer (b)->ip.icmp.data = data; +} + +static u8 +icmp6_icmp_type_to_error (u8 type) +{ + switch (type) + { + case ICMP6_destination_unreachable: + return ICMP6_ERROR_DEST_UNREACH_SENT; + case ICMP6_packet_too_big: + return ICMP6_ERROR_PACKET_TOO_BIG_SENT; + case ICMP6_time_exceeded: + return ICMP6_ERROR_TTL_EXPIRE_SENT; + case ICMP6_parameter_problem: + return ICMP6_ERROR_PARAM_PROBLEM_SENT; + default: + return ICMP6_ERROR_DROP; + } +} + +static uword +ip6_icmp_error (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 *from, *to_next; + uword n_left_from, n_left_to_next; + ip6_icmp_error_next_t next_index; + ip6_main_t *im = &ip6_main; + ip_lookup_main_t *lm = &im->lookup_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp6_input_trace_t)); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0 = from[0]; + u32 next0 = IP6_ICMP_ERROR_NEXT_LOOKUP; + u8 error0 = ICMP6_ERROR_NONE; + vlib_buffer_t *p0; + ip6_header_t *ip0, *out_ip0; + icmp46_header_t *icmp0; + u32 sw_if_index0, if_add_index0; + int bogus_length; + + /* Speculatively enqueue p0 to the current next frame */ + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip0 = vlib_buffer_get_current (p0); + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + /* RFC4443 says to keep as much of the original packet as possible + * within the minimum MTU. We cheat "a little" here by keeping whatever fits + * in the first buffer, to be more efficient */ + if (PREDICT_FALSE (p0->total_length_not_including_first_buffer)) + { /* clear current_length of all other buffers in chain */ + vlib_buffer_t *b = p0; + p0->total_length_not_including_first_buffer = 0; + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, b->next_buffer); + b->current_length = 0; + } + } + p0->current_length = + p0->current_length > 1280 ? 1280 : p0->current_length; + + /* Add IP header and ICMPv6 header including a 4 byte data field */ + vlib_buffer_advance (p0, + -sizeof (ip6_header_t) - + sizeof (icmp46_header_t) - 4); + out_ip0 = vlib_buffer_get_current (p0); + icmp0 = (icmp46_header_t *) & out_ip0[1]; + + /* Fill ip header fields */ + out_ip0->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + + out_ip0->payload_length = + clib_host_to_net_u16 (p0->current_length - sizeof (ip6_header_t)); + out_ip0->protocol = IP_PROTOCOL_ICMP6; + out_ip0->hop_limit = 0xff; + out_ip0->dst_address = ip0->src_address; + if_add_index0 = + lm->if_address_pool_index_by_sw_if_index[sw_if_index0]; + if (PREDICT_TRUE (if_add_index0 != ~0)) + { + ip_interface_address_t *if_add = + pool_elt_at_index (lm->if_address_pool, if_add_index0); + ip6_address_t *if_ip = + ip_interface_address_get_address (lm, if_add); + out_ip0->src_address = *if_ip; + } + else /* interface has no IP6 address - should not happen */ + { + next0 = IP6_ICMP_ERROR_NEXT_DROP; + error0 = ICMP6_ERROR_DROP; + } + + /* Fill icmp header fields */ + icmp0->type = vnet_buffer (p0)->ip.icmp.type; + icmp0->code = vnet_buffer (p0)->ip.icmp.code; + *((u32 *) (icmp0 + 1)) = + clib_host_to_net_u32 (vnet_buffer (p0)->ip.icmp.data); + icmp0->checksum = 0; + icmp0->checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, p0, out_ip0, + &bogus_length); + + + + /* Update error status */ + if (error0 == ICMP6_ERROR_NONE) + error0 = icmp6_icmp_type_to_error (icmp0->type); + vlib_error_count (vm, node->node_index, error0, 1); + + /* Verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_icmp_error_node) = { + .function = ip6_icmp_error, + .name = "ip6-icmp-error", + .vector_size = sizeof (u32), + + .n_errors = ARRAY_LEN (icmp_error_strings), + .error_strings = icmp_error_strings, + + .n_next_nodes = IP6_ICMP_ERROR_N_NEXT, + .next_nodes = { + [IP6_ICMP_ERROR_NEXT_DROP] = "error-drop", + [IP6_ICMP_ERROR_NEXT_LOOKUP] = "ip6-lookup", + }, + + .format_trace = format_icmp6_input_trace, +}; +/* *INDENT-ON* */ + + +static uword +unformat_icmp_type_and_code (unformat_input_t * input, va_list * args) +{ + icmp46_header_t *h = va_arg (*args, icmp46_header_t *); + icmp6_main_t *cm = &icmp6_main; + u32 i; + + if (unformat_user (input, unformat_vlib_number_by_name, + cm->type_and_code_by_name, &i)) + { + h->type = (i >> 8) & 0xff; + h->code = (i >> 0) & 0xff; + } + else if (unformat_user (input, unformat_vlib_number_by_name, + cm->type_by_name, &i)) + { + h->type = i; + h->code = 0; + } + else + return 0; + + return 1; +} + +static void +icmp6_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, u32 * packets, u32 n_packets) +{ + vlib_main_t *vm = vlib_get_main (); + u32 ip_offset, icmp_offset; + int bogus_length; + + icmp_offset = g->start_byte_offset; + ip_offset = (g - 1)->start_byte_offset; + + while (n_packets >= 1) + { + vlib_buffer_t *p0; + ip6_header_t *ip0; + icmp46_header_t *icmp0; + + p0 = vlib_get_buffer (vm, packets[0]); + n_packets -= 1; + packets += 1; + + ASSERT (p0->current_data == 0); + ip0 = (void *) (p0->data + ip_offset); + icmp0 = (void *) (p0->data + icmp_offset); + + icmp0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, + &bogus_length); + ASSERT (bogus_length == 0); + } +} + +typedef struct +{ + pg_edit_t type, code; + pg_edit_t checksum; +} pg_icmp46_header_t; + +always_inline void +pg_icmp_header_init (pg_icmp46_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, icmp46_header_t, f); + _(type); + _(code); + _(checksum); +#undef _ +} + +static uword +unformat_pg_icmp_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t *s = va_arg (*args, pg_stream_t *); + pg_icmp46_header_t *p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t), + &group_index); + pg_icmp_header_init (p); + + p->checksum.type = PG_EDIT_UNSPECIFIED; + + { + icmp46_header_t tmp; + + if (!unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp)) + goto error; + + pg_edit_set_fixed (&p->type, tmp.type); + pg_edit_set_fixed (&p->code, tmp.code); + } + + /* Parse options. */ + while (1) + { + if (unformat (input, "checksum %U", + unformat_pg_edit, unformat_pg_number, &p->checksum)) + ; + + /* Can't parse input: try next protocol level. */ + else + break; + } + + if (!unformat_user (input, unformat_pg_payload, s)) + goto error; + + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t *g = pg_stream_get_group (s, group_index); + g->edit_function = icmp6_pg_edit_function; + g->edit_function_opaque = 0; + } + + return 1; + +error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + +void +icmp6_register_type (vlib_main_t * vm, icmp6_type_t type, u32 node_index) +{ + icmp6_main_t *im = &icmp6_main; + + ASSERT ((int) type < ARRAY_LEN (im->input_next_index_by_type)); + im->input_next_index_by_type[type] + = vlib_node_add_next (vm, ip6_icmp_input_node.index, node_index); +} + +static clib_error_t * +icmp6_init (vlib_main_t * vm) +{ + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi; + icmp6_main_t *cm = &icmp6_main; + clib_error_t *error; + + error = vlib_call_init_function (vm, ip_main_init); + + if (error) + return error; + + pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP6); + pi->format_header = format_icmp6_header; + pi->unformat_pg_edit = unformat_pg_icmp_header; + + cm->type_by_name = hash_create_string (0, sizeof (uword)); +#define _(n,t) hash_set_mem (cm->type_by_name, #t, (n)); + foreach_icmp6_type; +#undef _ + + cm->type_and_code_by_name = hash_create_string (0, sizeof (uword)); +#define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP6_##a << 8)); + foreach_icmp6_code; +#undef _ + + memset (cm->input_next_index_by_type, + ICMP_INPUT_NEXT_DROP, sizeof (cm->input_next_index_by_type)); + memset (cm->max_valid_code_by_type, 0, sizeof (cm->max_valid_code_by_type)); + +#define _(a,n,t) cm->max_valid_code_by_type[ICMP6_##a] = clib_max (cm->max_valid_code_by_type[ICMP6_##a], n); + foreach_icmp6_code; +#undef _ + + memset (cm->min_valid_hop_limit_by_type, 0, + sizeof (cm->min_valid_hop_limit_by_type)); + cm->min_valid_hop_limit_by_type[ICMP6_router_solicitation] = 255; + cm->min_valid_hop_limit_by_type[ICMP6_router_advertisement] = 255; + cm->min_valid_hop_limit_by_type[ICMP6_neighbor_solicitation] = 255; + cm->min_valid_hop_limit_by_type[ICMP6_neighbor_advertisement] = 255; + cm->min_valid_hop_limit_by_type[ICMP6_redirect] = 255; + + memset (cm->min_valid_length_by_type, sizeof (icmp46_header_t), + sizeof (cm->min_valid_length_by_type)); + cm->min_valid_length_by_type[ICMP6_router_solicitation] = + sizeof (icmp6_neighbor_discovery_header_t); + cm->min_valid_length_by_type[ICMP6_router_advertisement] = + sizeof (icmp6_router_advertisement_header_t); + cm->min_valid_length_by_type[ICMP6_neighbor_solicitation] = + sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t); + cm->min_valid_length_by_type[ICMP6_neighbor_advertisement] = + sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t); + cm->min_valid_length_by_type[ICMP6_redirect] = + sizeof (icmp6_redirect_header_t); + + icmp6_register_type (vm, ICMP6_echo_request, + ip6_icmp_echo_request_node.index); + + return vlib_call_init_function (vm, ip6_neighbor_init); +} + +VLIB_INIT_FUNCTION (icmp6_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/icmp6.h b/src/vnet/ip/icmp6.h new file mode 100644 index 00000000..9a3487b1 --- /dev/null +++ b/src/vnet/ip/icmp6.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_icmp6_h +#define included_vnet_icmp6_h + +#define foreach_icmp6_error \ + _ (NONE, "valid packets") \ + _ (UNKNOWN_TYPE, "unknown type") \ + _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \ + _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \ + _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \ + _ (OPTIONS_WITH_ODD_LENGTH, \ + "total option length not multiple of 8 bytes") \ + _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \ + _ (ECHO_REPLIES_SENT, "echo replies sent") \ + _ (NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK, \ + "neighbor solicitations from source not on link") \ + _ (NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN, \ + "neighbor solicitations for unknown targets") \ + _ (NEIGHBOR_ADVERTISEMENTS_TX, "neighbor advertisements sent") \ + _ (NEIGHBOR_ADVERTISEMENTS_RX, "neighbor advertisements received") \ + _ (ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK, \ + "router solicitations from source not on link") \ + _ (ROUTER_SOLICITATION_UNSUPPORTED_INTF, \ + "neighbor discovery unsupported interface") \ + _ (ROUTER_SOLICITATION_RADV_NOT_CONFIG, \ + "neighbor discovery not configured") \ + _ (ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL, \ + "router advertisement source not link local") \ + _ (ROUTER_ADVERTISEMENTS_TX, "router advertisements sent") \ + _ (ROUTER_ADVERTISEMENTS_RX, "router advertisements received") \ + _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \ + _ (DEST_UNREACH_SENT, "destination unreachable response sent") \ + _ (PACKET_TOO_BIG_SENT, "packet too big response sent") \ + _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \ + _ (PARAM_PROBLEM_SENT, "parameter Pproblem response sent") \ + _ (DROP, "error message dropped") + + +typedef enum +{ +#define _(f,s) ICMP6_ERROR_##f, + foreach_icmp6_error +#undef _ +} icmp6_error_t; + +typedef struct +{ + u8 packet_data[64]; +} icmp6_input_trace_t; + +format_function_t format_icmp6_input_trace; +void icmp6_register_type (vlib_main_t * vm, icmp6_type_t type, + u32 node_index); +void icmp6_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code, + u32 data); + +extern vlib_node_registration_t ip6_icmp_input_node; + +#endif /* included_vnet_icmp6_h */ + + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/igmp_packet.h b/src/vnet/ip/igmp_packet.h new file mode 100644 index 00000000..503259ec --- /dev/null +++ b/src/vnet/ip/igmp_packet.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * igmp_packet.h: igmp packet format + * + * Copyright (c) 2011 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vnet_igmp_packet_h +#define included_vnet_igmp_packet_h + +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> + +#define foreach_igmp_type \ + _ (0x11, membership_query) \ + _ (0x12, membership_report_v1) \ + _ (0x13, dvmrp) \ + _ (0x14, pim_v1) \ + _ (0x15, cisco_trace) \ + _ (0x16, membership_report_v2) \ + _ (0x17, leave_group_v2) \ + _ (0x1e, traceroute_response) \ + _ (0x1f, traceroute_request) \ + _ (0x22, membership_report_v3) \ + _ (0x30, router_advertisement) \ + _ (0x31, router_solicitation) \ + _ (0x32, router_termination) + +typedef enum +{ +#define _(n,f) IGMP_TYPE_##f = n, + foreach_igmp_type +#undef _ +} igmp_type_t; + +typedef struct +{ + igmp_type_t type:8; + + u8 code; + + u16 checksum; +} igmp_header_t; + +typedef struct +{ + /* membership_query, version <= 2 reports. */ + igmp_header_t header; + + /* Multicast destination address. */ + ip4_address_t dst; +} igmp_message_t; + +#define foreach_igmp_membership_group_v3_type \ + _ (1, mode_is_filter_include) \ + _ (2, mode_is_filter_exclude) \ + _ (3, change_to_filter_include) \ + _ (4, change_to_filter_exclude) \ + _ (5, allow_new_sources) \ + _ (6, block_old_sources) + +typedef enum +{ +#define _(n,f) IGMP_MEMBERSHIP_GROUP_##f = n, + foreach_igmp_membership_group_v3_type +#undef _ +} igmp_membership_group_v3_type_t; + +typedef struct +{ + igmp_membership_group_v3_type_t type:8; + + /* Number of 32 bit words of aux data after source addresses. */ + u8 n_aux_u32s; + + /* Number of source addresses that follow. */ + u16 n_src_addresses; + + /* Destination multicast address. */ + ip4_address_t dst_address; + + ip4_address_t src_addresses[0]; +} igmp_membership_group_v3_t; + +always_inline igmp_membership_group_v3_t * +igmp_membership_group_v3_next (igmp_membership_group_v3_t * g) +{ + return ((void *) g + + g->n_src_addresses * sizeof (g->src_addresses[0]) + + g->n_aux_u32s * sizeof (u32)); +} + +typedef struct +{ + /* Type 0x22. */ + igmp_header_t header; + + u16 unused; + + /* Number of groups which follow. */ + u16 n_groups; + + igmp_membership_group_v3_t groups[0]; +} igmp_membership_report_v3_t; + +/* IP6 flavor of IGMP is called MLD which is embedded in ICMP6. */ +typedef struct +{ + /* Preceeded by ICMP v6 header. */ + u16 max_response_delay_in_milliseconds; + u16 reserved; + ip6_address_t dst; +} mld_header_t; + +#endif /* included_vnet_igmp_packet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api new file mode 100644 index 00000000..f26d7943 --- /dev/null +++ b/src/vnet/ip/ip.api @@ -0,0 +1,551 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** \file + + This file defines vpp IP control-plane API messages which are generally + called through a shared memory interface. +*/ + +/** \brief Add / del table request + A table can be added multiple times, but need be deleted only once. + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_ipv6 - V4 or V6 table + @param table_id - table ID associated with the route + This table ID will apply to both the unicats + and mlticast FIBs + @param name - A client provided name/tag for the table. If this is + not set by the client, then VPP will generate something + meaningfull. +*/ +autoreply define ip_table_add_del +{ + u32 client_index; + u32 context; + u32 table_id; + u8 is_ipv6; + u8 is_add; + u8 name[64]; +}; + +/** \brief Dump IP fib table + @param client_index - opaque cookie to identify the sender +*/ +define ip_fib_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief FIB path + @param sw_if_index - index of the interface + @param weight - The weight, for UCMP + @param preference - The preference of the path. lowest preference is prefered + @param is_local - local if non-zero, else remote + @param is_drop - Drop the packet + @param is_unreach - Drop the packet and rate limit send ICMP unreachable + @param is_prohibit - Drop the packet and rate limit send ICMP prohibited + @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2 + @param next_hop[16] - the next hop address + + WARNING: this type is replicated, pending cleanup completion +*/ +typeonly manual_print manual_endian define fib_path +{ + u32 sw_if_index; + u8 weight; + u8 preference; + u8 is_local; + u8 is_drop; + u8 is_unreach; + u8 is_prohibit; + u8 afi; + u8 next_hop[16]; +}; + +/** \brief IP FIB table response + @param table_id - IP fib table id + @address_length - mask length + @address - ip4 prefix + @param count - the number of fib_path in path + @param path - array of of fib_path structures +*/ +manual_endian manual_print define ip_fib_details +{ + u32 context; + u32 table_id; + u8 table_name[64]; + u8 address_length; + u8 address[4]; + u32 count; + vl_api_fib_path_t path[count]; +}; + +/** \brief Dump IP6 fib table + @param client_index - opaque cookie to identify the sender +*/ +define ip6_fib_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief IP6 FIB table entry response + @param table_id - IP6 fib table id + @param address_length - mask length + @param address - ip6 prefix + @param count - the number of fib_path in path + @param path - array of of fib_path structures +*/ +manual_endian manual_print define ip6_fib_details +{ + u32 context; + u32 table_id; + u8 table_name[64]; + u8 address_length; + u8 address[16]; + u32 count; + vl_api_fib_path_t path[count]; +}; + +/** \brief Dump IP neighboors + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - the interface to dump neighboors + @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4] +*/ +define ip_neighbor_dump +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 is_ipv6; +}; + +/** \brief IP neighboors dump response + @param context - sender context which was passed in the request + @param is_static - [1|0] to indicate if neighbor is statically configured + @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4] +*/ +define ip_neighbor_details { + u32 context; + u8 is_static; + u8 is_ipv6; + u8 mac_address[6]; + u8 ip_address[16]; +}; + +/** \brief IP neighbor add / del request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface used to reach neighbor + @param is_add - 1 to add neighbor, 0 to delete + @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4 + @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4 + @param is_static - A static neighbor Entry - there are not flushed + If the interface goes down. + @param is_no_adj_fib - Do not create a corresponding entry in the FIB + table for the neighbor. + @param mac_address - l2 address of the neighbor + @param dst_address - ip4 or ip6 address of the neighbor +*/ +autoreply define ip_neighbor_add_del +{ + u32 client_index; + u32 context; + u32 sw_if_index; + /* 1 = add, 0 = delete */ + u8 is_add; + u8 is_ipv6; + u8 is_static; + u8 is_no_adj_fib; + u8 mac_address[6]; + u8 dst_address[16]; +}; + +/** \brief Set the ip flow hash config for a fib request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param vrf_id - vrf/fib id + @param is_ipv6 - if non-zero the fib is ip6, else ip4 + @param src - if non-zero include src in flow hash + @param dst - if non-zero include dst in flow hash + @param sport - if non-zero include sport in flow hash + @param dport - if non-zero include dport in flow hash + @param proto -if non-zero include proto in flow hash + @param reverse - if non-zero include reverse in flow hash +*/ +autoreply define set_ip_flow_hash +{ + u32 client_index; + u32 context; + u32 vrf_id; + u8 is_ipv6; + u8 src; + u8 dst; + u8 sport; + u8 dport; + u8 proto; + u8 reverse; +}; + +/** \brief IPv6 router advertisement config request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param suppress - + @param managed - + @param other - + @param ll_option - + @param send_unicast - + @param cease - + @param is_no - + @param default_router - + @param max_interval - + @param min_interval - + @param lifetime - + @param initial_count - + @param initial_interval - +*/ +autoreply define sw_interface_ip6nd_ra_config +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 suppress; + u8 managed; + u8 other; + u8 ll_option; + u8 send_unicast; + u8 cease; + u8 is_no; + u8 default_router; + u32 max_interval; + u32 min_interval; + u32 lifetime; + u32 initial_count; + u32 initial_interval; +}; + +/** \brief IPv6 router advertisement prefix config request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - The interface the RA prefix information is for + @param address[] - The prefix to advertise + @param address_length - the prefix length + @param use_default - Revert to default settings + @param no_advertise - Do not advertise this prefix + @param off_link - The prefix is off link (it is not configured on the interface) + Configures the L-flag, When set, indicates that this + prefix can be used for on-link determination. + @param no_autoconfig - Setting for the A-flag. When + set indicates that this prefix can be used for + stateless address configuration. + @param no_onlink - The prefix is not on link. Make sure this is consistent + with the off_link parameter else YMMV + @param is_no - add/delete + @param val_lifetime - The length of time in + seconds (relative to the time the packet is sent) + that the prefix is valid for the purpose of on-link + determination. A value of all one bits + (0xffffffff) represents infinity + @param pref_lifetime - The length of time in + seconds (relative to the time the packet is sent) + that addresses generated from the prefix via + stateless address autoconfiguration remain + preferred [ADDRCONF]. A value of all one bits + (0xffffffff) represents infinity. +*/ +autoreply define sw_interface_ip6nd_ra_prefix +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 address[16]; + u8 address_length; + u8 use_default; + u8 no_advertise; + u8 off_link; + u8 no_autoconfig; + u8 no_onlink; + u8 is_no; + u32 val_lifetime; + u32 pref_lifetime; +}; + +/** \brief IPv6 ND proxy config + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - The interface the host is on + @param address - The address of the host for which to proxy for + @param is_add - Adding or deleting +*/ +autoreply define ip6nd_proxy_add_del +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 is_del; + u8 address[16]; +}; + +/** \brief IPv6 ND proxy details returned after request + @param context - sender context, to match reply w/ request + @param retval - return code for the request +*/ +define ip6nd_proxy_details +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 address[16]; +}; + +/** \brief IPv6 ND proxy dump request + @param context - sender context, to match reply w/ request + @param retval - return code for the request + @param sw_if_index - The interface the host is on + @param address - The address of the host for which to proxy for +*/ +define ip6nd_proxy_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief IPv6 interface enable / disable request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface used to reach neighbor + @param enable - if non-zero enable ip6 on interface, else disable +*/ +autoreply define sw_interface_ip6_enable_disable +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 enable; /* set to true if enable */ +}; + +/** \brief IPv6 set link local address on interface request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - interface to set link local on + @param address[] - the new link local address +*/ +autoreply define sw_interface_ip6_set_link_local_address +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 address[16]; +}; + +/** \brief Add / del route request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - software index of the new vlan's parent interface + @param vrf_id - fib table /vrf associated with the route + @param lookup_in_vrf - + @param classify_table_index - + @param create_vrf_if_needed - + @param is_add - 1 if adding the route, 0 if deleting + @param is_drop - Drop the packet + @param is_unreach - Drop the packet and rate limit send ICMP unreachable + @param is_prohibit - Drop the packet and rate limit send ICMP prohibited + @param is_ipv6 - 0 if an ip4 route, else ip6 + @param is_local - + @param is_classify - + @param is_multipath - Set to 1 if this is a multipath route, else 0 + @param not_last - Is last or not last msg in group of multiple add/del msgs + @param next_hop_weight - + @param dst_address_length - + @param dst_address[16] - + @param next_hop_address[16] - + @param next_hop_n_out_labels - the number of labels in the label stack + @param next_hop_out_label_stack - the next-hop output label stack, outer most first + @param next_hop_via_label - The next-hop is a resolved via a local label +*/ +autoreply define ip_add_del_route +{ + u32 client_index; + u32 context; + u32 next_hop_sw_if_index; + u32 table_id; + u32 classify_table_index; + u32 next_hop_table_id; + u8 create_vrf_if_needed; + u8 is_add; + u8 is_drop; + u8 is_unreach; + u8 is_prohibit; + u8 is_ipv6; + u8 is_local; + u8 is_classify; + u8 is_multipath; + u8 is_resolve_host; + u8 is_resolve_attached; + /* Is last/not-last message in group of multiple add/del messages. */ + u8 not_last; + u8 next_hop_weight; + u8 next_hop_preference; + u8 dst_address_length; + u8 dst_address[16]; + u8 next_hop_address[16]; + u8 next_hop_n_out_labels; + u32 next_hop_via_label; + u32 next_hop_out_label_stack[next_hop_n_out_labels]; +}; + +/** \brief Add / del route request + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param sw_if_index - software index of the new vlan's parent interface + @param vrf_id - fib table /vrf associated with the route + + FIXME +*/ +autoreply define ip_mroute_add_del +{ + u32 client_index; + u32 context; + u32 next_hop_sw_if_index; + u32 table_id; + u32 entry_flags; + u32 itf_flags; + u32 rpf_id; + u16 grp_address_length; + u8 create_vrf_if_needed; + u8 is_add; + u8 is_ipv6; + u8 is_local; + u8 grp_address[16]; + u8 src_address[16]; +}; + +/** \brief Dump IP multicast fib table + @param client_index - opaque cookie to identify the sender +*/ +define ip_mfib_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief IP Multicast FIB table response + @param table_id - IP fib table id + @address_length - mask length + @grp_address - Group address/prefix + @src_address - Source address + @param count - the number of fib_path in path + @param path - array of of fib_path structures +*/ +manual_endian manual_print define ip_mfib_details +{ + u32 context; + u32 table_id; + u32 entry_flags; + u32 rpf_id; + u8 address_length; + u8 grp_address[4]; + u8 src_address[4]; + u32 count; + vl_api_fib_path_t path[count]; +}; + +/** \brief Dump IP6 multicast fib table + @param client_index - opaque cookie to identify the sender +*/ +define ip6_mfib_dump +{ + u32 client_index; + u32 context; +}; + +/** \brief IP6 Multicast FIB table response + @param table_id - IP fib table id + @address_length - mask length + @grp_address - Group address/prefix + @src_address - Source address + @param count - the number of fib_path in path + @param path - array of of fib_path structures +*/ +manual_endian manual_print define ip6_mfib_details +{ + u32 context; + u32 table_id; + u8 address_length; + u8 grp_address[16]; + u8 src_address[16]; + u32 count; + vl_api_fib_path_t path[count]; +}; + +define ip_address_details +{ + u32 client_index; + u32 context; + u8 ip[16]; + u8 prefix_length; + u32 sw_if_index; + u8 is_ipv6; +}; + +define ip_address_dump +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u8 is_ipv6; +}; + +define ip_details +{ + u32 sw_if_index; + u32 context; + u8 is_ipv6; +}; + +define ip_dump +{ + u32 client_index; + u32 context; + u8 is_ipv6; +}; + +define mfib_signal_dump +{ + u32 client_index; + u32 context; +}; + +define mfib_signal_details +{ + u32 client_index; + u32 context; + u32 sw_if_index; + u32 table_id; + u16 grp_address_len; + u8 grp_address[16]; + u8 src_address[16]; + u16 ip_packet_len; + u8 ip_packet_data[256]; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h new file mode 100644 index 00000000..7e26bc6c --- /dev/null +++ b/src/vnet/ip/ip.h @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip.h: ip generic (4 or 6) main + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_main_h +#define included_ip_main_h + +#include <vppinfra/hash.h> +#include <vppinfra/heap.h> /* adjacency heap */ +#include <vppinfra/ptclosure.h> + +#include <vnet/vnet.h> + +#include <vnet/ip/format.h> +#include <vnet/ip/ip_packet.h> +#include <vnet/ip/lookup.h> + +#include <vnet/tcp/tcp_packet.h> +#include <vnet/udp/udp_packet.h> +#include <vnet/ip/icmp46_packet.h> + +#include <vnet/ip/ip4.h> +#include <vnet/ip/ip4_error.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/icmp4.h> + +#include <vnet/ip/ip6.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/ip/ip6_error.h> +#include <vnet/ip/icmp6.h> +#include <vnet/classify/vnet_classify.h> + +/* Per protocol info. */ +typedef struct +{ + /* Protocol name (also used as hash key). */ + u8 *name; + + /* Protocol number. */ + ip_protocol_t protocol; + + /* Format function for this IP protocol. */ + format_function_t *format_header; + + /* Parser for header. */ + unformat_function_t *unformat_header; + + /* Parser for per-protocol matches. */ + unformat_function_t *unformat_match; + + /* Parser for packet generator edits for this protocol. */ + unformat_function_t *unformat_pg_edit; +} ip_protocol_info_t; + +/* Per TCP/UDP port info. */ +typedef struct +{ + /* Port name (used as hash key). */ + u8 *name; + + /* UDP/TCP port number in network byte order. */ + u16 port; + + /* Port specific format function. */ + format_function_t *format_header; + + /* Parser for packet generator edits for this protocol. */ + unformat_function_t *unformat_pg_edit; +} tcp_udp_port_info_t; + +typedef struct +{ + /* Per IP protocol info. */ + ip_protocol_info_t *protocol_infos; + + /* Protocol info index hashed by 8 bit IP protocol. */ + uword *protocol_info_by_protocol; + + /* Hash table mapping IP protocol name (see protocols.def) + to protocol number. */ + uword *protocol_info_by_name; + + /* Per TCP/UDP port info. */ + tcp_udp_port_info_t *port_infos; + + /* Hash table from network-byte-order port to port info index. */ + uword *port_info_by_port; + + /* Hash table mapping TCP/UDP name to port info index. */ + uword *port_info_by_name; +} ip_main_t; + +extern ip_main_t ip_main; + +clib_error_t *ip_main_init (vlib_main_t * vm); + +static inline ip_protocol_info_t * +ip_get_protocol_info (ip_main_t * im, u32 protocol) +{ + uword *p; + + p = hash_get (im->protocol_info_by_protocol, protocol); + return p ? vec_elt_at_index (im->protocol_infos, p[0]) : 0; +} + +static inline tcp_udp_port_info_t * +ip_get_tcp_udp_port_info (ip_main_t * im, u32 port) +{ + uword *p; + + p = hash_get (im->port_info_by_port, port); + return p ? vec_elt_at_index (im->port_infos, p[0]) : 0; +} + +always_inline ip_csum_t +ip_incremental_checksum_buffer (vlib_main_t * vm, + vlib_buffer_t * first_buffer, + u32 first_buffer_offset, + u32 n_bytes_to_checksum, ip_csum_t sum) +{ + vlib_buffer_t *b = first_buffer; + u32 n_bytes_left = n_bytes_to_checksum; + ASSERT (b->current_length >= first_buffer_offset); + void *h; + u32 n; + + n = clib_min (n_bytes_left, b->current_length); + h = vlib_buffer_get_current (b) + first_buffer_offset; + sum = ip_incremental_checksum (sum, h, n); + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + while (1) + { + n_bytes_left -= n; + if (n_bytes_left == 0) + break; + b = vlib_get_buffer (vm, b->next_buffer); + n = clib_min (n_bytes_left, b->current_length); + h = vlib_buffer_get_current (b); + sum = ip_incremental_checksum (sum, h, n); + } + } + + return sum; +} + +void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index); + +extern vlib_node_registration_t ip4_inacl_node; +extern vlib_node_registration_t ip6_inacl_node; + +void ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api, + const u8 * name); + +void ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api); + +int ip_table_bind (fib_protocol_t fproto, u32 sw_if_index, + u32 table_id, u8 is_api); + +#endif /* included_ip_main_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h new file mode 100644 index 00000000..af0e6b9a --- /dev/null +++ b/src/vnet/ip/ip4.h @@ -0,0 +1,387 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4.h: ip4 main include file + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_ip4_h +#define included_ip_ip4_h + +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/lookup.h> +#include <vnet/buffer.h> +#include <vnet/feature/feature.h> +#include <vnet/ip/icmp46_packet.h> + +typedef struct ip4_mfib_t +{ + /* Hash table for each prefix length mapping. */ + uword *fib_entry_by_dst_address[65]; + + /* Table ID (hash key) for this FIB. */ + u32 table_id; + + /* Index into FIB vector. */ + u32 index; +} ip4_mfib_t; + +struct ip4_main_t; + +typedef void (ip4_add_del_interface_address_function_t) + (struct ip4_main_t * im, + uword opaque, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, u32 if_address_index, u32 is_del); + +typedef struct +{ + ip4_add_del_interface_address_function_t *function; + uword function_opaque; +} ip4_add_del_interface_address_callback_t; + +typedef void (ip4_table_bind_function_t) + (struct ip4_main_t * im, + uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index); + +typedef struct +{ + ip4_table_bind_function_t *function; + uword function_opaque; +} ip4_table_bind_callback_t; + +/** + * @brief IPv4 main type. + * + * State of IPv4 VPP processing including: + * - FIBs + * - Feature indices used in feature topological sort + * - Feature node run time references + */ + +typedef struct ip4_main_t +{ + ip_lookup_main_t lookup_main; + + /** Vector of FIBs. */ + struct fib_table_t_ *fibs; + + /** Vector of MTries. */ + struct ip4_fib_t_ *v4_fibs; + + /** Vector of MFIBs. */ + struct mfib_table_t_ *mfibs; + + u32 fib_masks[33]; + + /** Table index indexed by software interface. */ + u32 *fib_index_by_sw_if_index; + + /** Table index indexed by software interface. */ + u32 *mfib_index_by_sw_if_index; + + /* IP4 enabled count by software interface */ + u8 *ip_enabled_by_sw_if_index; + + /** Hash table mapping table id to fib index. + ID space is not necessarily dense; index space is dense. */ + uword *fib_index_by_table_id; + + /** Hash table mapping table id to multicast fib index. + ID space is not necessarily dense; index space is dense. */ + uword *mfib_index_by_table_id; + + /** Functions to call when interface address changes. */ + ip4_add_del_interface_address_callback_t + * add_del_interface_address_callbacks; + + /** Functions to call when interface to table biding changes. */ + ip4_table_bind_callback_t *table_bind_callbacks; + + /** Template used to generate IP4 ARP packets. */ + vlib_packet_template_t ip4_arp_request_packet_template; + + /** Seed for Jenkins hash used to compute ip4 flow hash. */ + u32 flow_hash_seed; + + /** @brief Template information for VPP generated packets */ + struct + { + /** TTL to use for host generated packets. */ + u8 ttl; + + /** TOS byte to use for host generated packets. */ + u8 tos; + + u8 pad[2]; + } host_config; +} ip4_main_t; + +/** Global ip4 main structure. */ +extern ip4_main_t ip4_main; + +/** Global ip4 input node. Errors get attached to ip4 input node. */ +extern vlib_node_registration_t ip4_input_node; +extern vlib_node_registration_t ip4_lookup_node; +extern vlib_node_registration_t ip4_local_node; +extern vlib_node_registration_t ip4_rewrite_node; +extern vlib_node_registration_t ip4_rewrite_mcast_node; +extern vlib_node_registration_t ip4_rewrite_local_node; +extern vlib_node_registration_t ip4_arp_node; +extern vlib_node_registration_t ip4_glean_node; +extern vlib_node_registration_t ip4_midchain_node; + +always_inline uword +ip4_destination_matches_route (const ip4_main_t * im, + const ip4_address_t * key, + const ip4_address_t * dest, uword dest_length) +{ + return 0 == ((key->data_u32 ^ dest->data_u32) & im->fib_masks[dest_length]); +} + +always_inline uword +ip4_destination_matches_interface (ip4_main_t * im, + ip4_address_t * key, + ip_interface_address_t * ia) +{ + ip4_address_t *a = ip_interface_address_get_address (&im->lookup_main, ia); + return ip4_destination_matches_route (im, key, a, ia->address_length); +} + +/* As above but allows for unaligned destinations (e.g. works right from IP header of packet). */ +always_inline uword +ip4_unaligned_destination_matches_route (ip4_main_t * im, + ip4_address_t * key, + ip4_address_t * dest, + uword dest_length) +{ + return 0 == + ((clib_mem_unaligned (&key->data_u32, u32) ^ dest-> + data_u32) & im->fib_masks[dest_length]); +} + +always_inline int +ip4_src_address_for_packet (ip_lookup_main_t * lm, + u32 sw_if_index, ip4_address_t * src) +{ + u32 if_add_index = lm->if_address_pool_index_by_sw_if_index[sw_if_index]; + if (PREDICT_TRUE (if_add_index != ~0)) + { + ip_interface_address_t *if_add = + pool_elt_at_index (lm->if_address_pool, if_add_index); + ip4_address_t *if_ip = ip_interface_address_get_address (lm, if_add); + *src = *if_ip; + return 0; + } + else + { + src->as_u32 = 0; + } + return (!0); +} + +/* Find interface address which matches destination. */ +always_inline ip4_address_t * +ip4_interface_address_matching_destination (ip4_main_t * im, + ip4_address_t * dst, + u32 sw_if_index, + ip_interface_address_t ** + result_ia) +{ + ip_lookup_main_t *lm = &im->lookup_main; + ip_interface_address_t *ia; + ip4_address_t *result = 0; + + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip4_address_t * a = ip_interface_address_get_address (lm, ia); + if (ip4_destination_matches_route (im, dst, a, ia->address_length)) + { + result = a; + break; + } + })); + /* *INDENT-ON* */ + if (result_ia) + *result_ia = result ? ia : 0; + return result; +} + +ip4_address_t *ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, + ip_interface_address_t ** + result_ia); + +clib_error_t *ip4_add_del_interface_address (vlib_main_t * vm, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, u32 is_del); + +void ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable); + +int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2); + +/* Send an ARP request to see if given destination is reachable on given interface. */ +clib_error_t *ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, + u32 sw_if_index); + +clib_error_t *ip4_set_arp_limit (u32 arp_limit); + +uword +ip4_udp_register_listener (vlib_main_t * vm, + u16 dst_port, u32 next_node_index); + +void +ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, u32 node_index); + +u16 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, + ip4_header_t * ip0); + +void ip4_register_protocol (u32 protocol, u32 node_index); + +serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main; + +int vnet_set_ip4_flow_hash (u32 table_id, + flow_hash_config_t flow_hash_config); + +int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 table_index); + +/* Compute flow hash. We'll use it to select which adjacency to use for this + flow. And other things. */ +always_inline u32 +ip4_compute_flow_hash (const ip4_header_t * ip, + flow_hash_config_t flow_hash_config) +{ + tcp_header_t *tcp = (void *) (ip + 1); + u32 a, b, c, t1, t2; + uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP + || ip->protocol == IP_PROTOCOL_UDP); + + t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) + ? ip->src_address.data_u32 : 0; + t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR) + ? ip->dst_address.data_u32 : 0; + + a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1; + b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2; + b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0; + + t1 = is_tcp_udp ? tcp->src : 0; + t2 = is_tcp_udp ? tcp->dst : 0; + + t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0; + t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0; + + c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? + (t1 << 16) | t2 : (t2 << 16) | t1; + + hash_v3_mix32 (a, b, c); + hash_v3_finalize32 (a, b, c); + + return c; +} + +void +ip4_forward_next_trace (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + vlib_rx_or_tx_t which_adj_index); + +u8 *format_ip4_forward_next_trace (u8 * s, va_list * args); + +u32 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0); + +#define IP_DF 0x4000 /* don't fragment */ + +/** + * Push IPv4 header to buffer + * + * This does not support fragmentation. + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param src - source IP + * @param dst - destination IP + * @param prot - payload proto + * + * @return - pointer to start of IP header + */ +always_inline void * +vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b, + ip4_address_t * src, ip4_address_t * dst, int proto, + u8 csum_offload) +{ + ip4_header_t *ih; + + /* make some room */ + ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t)); + + ih->ip_version_and_header_length = 0x45; + ih->tos = 0; + ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b)); + + /* No fragments */ + ih->flags_and_fragment_offset = clib_host_to_net_u16 (IP_DF); + ih->ttl = 255; + ih->protocol = proto; + ih->src_address.as_u32 = src->as_u32; + ih->dst_address.as_u32 = dst->as_u32; + + /* Offload ip4 header checksum generation */ + if (csum_offload) + { + ih->checksum = 0; + b->flags |= VNET_BUFFER_F_OFFLOAD_IP_CKSUM | VNET_BUFFER_F_IS_IP4; + vnet_buffer (b)->l3_hdr_offset = (u8 *) ih - b->data; + vnet_buffer (b)->l4_hdr_offset = vnet_buffer (b)->l3_hdr_offset + + sizeof (*ih); + } + else + ih->checksum = ip4_header_checksum (ih); + + return ih; +} +#endif /* included_ip_ip4_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip46_cli.c b/src/vnet/ip/ip46_cli.c new file mode 100644 index 00000000..668c6506 --- /dev/null +++ b/src/vnet/ip/ip46_cli.c @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_cli.c: ip4 commands + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +/** + * @file + * @brief Set IP Address. + * + * Configure an IPv4 or IPv6 address for on an interface. + */ + + +int +ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2) +{ + return clib_net_to_host_u32 (a1->data_u32) - + clib_net_to_host_u32 (a2->data_u32); +} + +int +ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2) +{ + int i; + for (i = 0; i < ARRAY_LEN (a1->as_u16); i++) + { + int cmp = + clib_net_to_host_u16 (a1->as_u16[i]) - + clib_net_to_host_u16 (a2->as_u16[i]); + if (cmp != 0) + return cmp; + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip_command, static) = { + .path = "set interface ip", + .short_help = "IP4/IP6 commands", +}; +/* *INDENT-ON* */ + +void +ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index) +{ + ip4_main_t *im4 = &ip4_main; + ip4_address_t *ip4_addrs = 0; + u32 *ip4_masks = 0; + ip6_main_t *im6 = &ip6_main; + ip6_address_t *ip6_addrs = 0; + u32 *ip6_masks = 0; + ip_interface_address_t *ia; + int i; + + /* *INDENT-OFF* */ + foreach_ip_interface_address (&im4->lookup_main, ia, sw_if_index, + 0 /* honor unnumbered */, + ({ + ip4_address_t * x = (ip4_address_t *) + ip_interface_address_get_address (&im4->lookup_main, ia); + vec_add1 (ip4_addrs, x[0]); + vec_add1 (ip4_masks, ia->address_length); + })); + /* *INDENT-ON* */ + + /* *INDENT-OFF* */ + foreach_ip_interface_address (&im6->lookup_main, ia, sw_if_index, + 0 /* honor unnumbered */, + ({ + ip6_address_t * x = (ip6_address_t *) + ip_interface_address_get_address (&im6->lookup_main, ia); + vec_add1 (ip6_addrs, x[0]); + vec_add1 (ip6_masks, ia->address_length); + })); + /* *INDENT-ON* */ + + for (i = 0; i < vec_len (ip4_addrs); i++) + ip4_add_del_interface_address (vm, sw_if_index, &ip4_addrs[i], + ip4_masks[i], 1 /* is_del */ ); + for (i = 0; i < vec_len (ip6_addrs); i++) + ip6_add_del_interface_address (vm, sw_if_index, &ip6_addrs[i], + ip6_masks[i], 1 /* is_del */ ); + + vec_free (ip4_addrs); + vec_free (ip4_masks); + vec_free (ip6_addrs); + vec_free (ip6_masks); +} + +static clib_error_t * +ip_address_delete_cleanup (vnet_main_t * vnm, u32 hw_if_index, u32 is_create) +{ + vlib_main_t *vm = vlib_get_main (); + vnet_hw_interface_t *hw; + + if (is_create) + return 0; + + hw = vnet_get_hw_interface (vnm, hw_if_index); + + ip_del_all_interface_addresses (vm, hw->sw_if_index); + return 0; +} + +VNET_HW_INTERFACE_ADD_DEL_FUNCTION (ip_address_delete_cleanup); + +static clib_error_t * +add_del_ip_address (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ip4_address_t a4; + ip6_address_t a6; + clib_error_t *error = 0; + u32 sw_if_index, length, is_del; + + sw_if_index = ~0; + is_del = 0; + + if (unformat (input, "del")) + is_del = 1; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (is_del && unformat (input, "all")) + ip_del_all_interface_addresses (vm, sw_if_index); + else if (unformat (input, "%U/%d", unformat_ip4_address, &a4, &length)) + error = ip4_add_del_interface_address (vm, sw_if_index, &a4, length, + is_del); + else if (unformat (input, "%U/%d", unformat_ip6_address, &a6, &length)) + error = ip6_add_del_interface_address (vm, sw_if_index, &a6, length, + is_del); + else + { + error = clib_error_return (0, "expected IP4/IP6 address/length `%U'", + format_unformat_error, input); + goto done; + } + + +done: + return error; +} + +/*? + * Add an IP Address to an interface or remove and IP Address from an interface. + * The IP Address can be an IPv4 or an IPv6 address. Interfaces may have multiple + * IPv4 and IPv6 addresses. There is no concept of primary vs. secondary + * interface addresses; they're just addresses. + * + * To display the addresses associated with a given interface, use the command + * '<em>show interface address <interface></em>'. + * + * Note that the debug CLI does not enforce classful mask-width / addressing + * constraints. + * + * @cliexpar + * @parblock + * An example of how to add an IPv4 address to an interface: + * @cliexcmd{set interface ip address GigabitEthernet2/0/0 172.16.2.12/24} + * + * An example of how to add an IPv6 address to an interface: + * @cliexcmd{set interface ip address GigabitEthernet2/0/0 @::a:1:1:0:7/126} + * + * To delete a specific interface ip address: + * @cliexcmd{set interface ip address GigabitEthernet2/0/0 172.16.2.12/24 del} + * + * To delete all interfaces addresses (IPv4 and IPv6): + * @cliexcmd{set interface ip address GigabitEthernet2/0/0 del all} + * @endparblock + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip_address_command, static) = { + .path = "set interface ip address", + .function = add_del_ip_address, + .short_help = "set interface ip address [del] <interface> <ip-addr>/<mask> | [all]", +}; +/* *INDENT-ON* */ + +/* Dummy init function to get us linked in. */ +static clib_error_t * +ip4_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (ip4_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4_error.h b/src/vnet/ip/ip4_error.h new file mode 100644 index 00000000..95d12ec2 --- /dev/null +++ b/src/vnet/ip/ip4_error.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_error.h: ip4 fast path errors + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_ip4_error_h +#define included_ip_ip4_error_h + +#define foreach_ip4_error \ + /* Must be first. */ \ + _ (NONE, "valid ip4 packets") \ + \ + /* Errors signalled by ip4-input */ \ + _ (TOO_SHORT, "ip4 length < 20 bytes") \ + _ (BAD_LENGTH, "ip4 length > l2 length") \ + _ (BAD_CHECKSUM, "bad ip4 checksum") \ + _ (VERSION, "ip4 version != 4") \ + _ (OPTIONS, "ip4 options present") \ + _ (FRAGMENT_OFFSET_ONE, "ip4 fragment offset == 1") \ + _ (TIME_EXPIRED, "ip4 ttl <= 1") \ + \ + /* Errors signalled by ip4-rewrite. */ \ + _ (MTU_EXCEEDED, "ip4 MTU exceeded and DF set") \ + _ (DST_LOOKUP_MISS, "ip4 destination lookup miss") \ + _ (SRC_LOOKUP_MISS, "ip4 source lookup miss") \ + _ (ADJACENCY_DROP, "ip4 adjacency drop") \ + _ (ADJACENCY_PUNT, "ip4 adjacency punt") \ + \ + /* Errors signalled by ip4-local. */ \ + _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \ + _ (TCP_CHECKSUM, "bad tcp checksum") \ + _ (UDP_CHECKSUM, "bad udp checksum") \ + _ (UDP_LENGTH, "inconsistent udp/ip lengths") \ + \ + /* Errors signalled by ip4-source-check. */ \ + _ (UNICAST_SOURCE_CHECK_FAILS, "ip4 unicast source check fails") \ + \ + /* Spoofed packets in ip4-rewrite-local */ \ + _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \ + \ + /* Errors singalled by ip4-inacl */ \ + _ (INACL_TABLE_MISS, "input ACL table-miss drops") \ + _ (INACL_SESSION_DENY, "input ACL session deny drops") + +typedef enum +{ +#define _(sym,str) IP4_ERROR_##sym, + foreach_ip4_error +#undef _ + IP4_N_ERROR, +} ip4_error_t; + +#endif /* included_ip_ip4_error_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4_format.c b/src/vnet/ip/ip4_format.c new file mode 100644 index 00000000..c803e065 --- /dev/null +++ b/src/vnet/ip/ip4_format.c @@ -0,0 +1,256 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_format.c: ip4 formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +/* Format an IP4 address. */ +u8 * +format_ip4_address (u8 * s, va_list * args) +{ + u8 *a = va_arg (*args, u8 *); + return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]); +} + +/* Format an IP4 route destination and length. */ +u8 * +format_ip4_address_and_length (u8 * s, va_list * args) +{ + u8 *a = va_arg (*args, u8 *); + u8 l = va_arg (*args, u32); + return format (s, "%U/%d", format_ip4_address, a, l); +} + +/* Parse an IP4 address %d.%d.%d.%d. */ +uword +unformat_ip4_address (unformat_input_t * input, va_list * args) +{ + u8 *result = va_arg (*args, u8 *); + unsigned a[4]; + + if (!unformat (input, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3])) + return 0; + + if (a[0] >= 256 || a[1] >= 256 || a[2] >= 256 || a[3] >= 256) + return 0; + + result[0] = a[0]; + result[1] = a[1]; + result[2] = a[2]; + result[3] = a[3]; + + return 1; +} + +/* Format an IP4 header. */ +u8 * +format_ip4_header (u8 * s, va_list * args) +{ + ip4_header_t *ip = va_arg (*args, ip4_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 ip_version, header_bytes; + uword indent; + + /* Nothing to do. */ + if (max_header_bytes < sizeof (ip[0])) + return format (s, "IP header truncated"); + + indent = format_get_indent (s); + indent += 2; + + ip_version = (ip->ip_version_and_header_length >> 4); + header_bytes = (ip->ip_version_and_header_length & 0xf) * sizeof (u32); + + s = format (s, "%U: %U -> %U", + format_ip_protocol, ip->protocol, + format_ip4_address, ip->src_address.data, + format_ip4_address, ip->dst_address.data); + + /* Show IP version and header length only with unexpected values. */ + if (ip_version != 4 || header_bytes != sizeof (ip4_header_t)) + s = format (s, "\n%Uversion %d, header length %d", + format_white_space, indent, ip_version, header_bytes); + + s = format (s, "\n%Utos 0x%02x, ttl %d, length %d, checksum 0x%04x", + format_white_space, indent, + ip->tos, ip->ttl, + clib_net_to_host_u16 (ip->length), + clib_net_to_host_u16 (ip->checksum)); + + /* Check and report invalid checksums. */ + { + u16 c = ip4_header_checksum (ip); + if (c != ip->checksum) + s = format (s, " (should be 0x%04x)", clib_net_to_host_u16 (c)); + } + + { + u32 f = clib_net_to_host_u16 (ip->flags_and_fragment_offset); + u32 o; + + s = format (s, "\n%Ufragment id 0x%04x", + format_white_space, indent, + clib_net_to_host_u16 (ip->fragment_id)); + + /* Fragment offset. */ + o = 8 * (f & 0x1fff); + f ^= o; + if (o != 0) + s = format (s, " offset %d", o); + + if (f != 0) + { + s = format (s, ", flags "); +#define _(l) if (f & IP4_HEADER_FLAG_##l) s = format (s, #l); + _(MORE_FRAGMENTS); + _(DONT_FRAGMENT); + _(CONGESTION); +#undef _ + } + } + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && header_bytes < max_header_bytes) + { + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", + format_white_space, indent - 2, pi->format_header, + /* next protocol header */ (void *) ip + header_bytes, + max_header_bytes - header_bytes); + } + + return s; +} + +/* Parse an IP4 header. */ +uword +unformat_ip4_header (unformat_input_t * input, va_list * args) +{ + u8 **result = va_arg (*args, u8 **); + ip4_header_t *ip; + int old_length; + + /* Allocate space for IP header. */ + { + void *p; + + old_length = vec_len (*result); + vec_add2 (*result, p, sizeof (ip4_header_t)); + ip = p; + } + + memset (ip, 0, sizeof (ip[0])); + ip->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + + if (!unformat (input, "%U: %U -> %U", + unformat_ip_protocol, &ip->protocol, + unformat_ip4_address, &ip->src_address, + unformat_ip4_address, &ip->dst_address)) + return 0; + + /* Parse options. */ + while (1) + { + int i, j; + + if (unformat (input, "tos %U", unformat_vlib_number, &i)) + ip->tos = i; + + else if (unformat (input, "ttl %U", unformat_vlib_number, &i)) + ip->ttl = i; + + else if (unformat (input, "fragment id %U offset %U", + unformat_vlib_number, &i, unformat_vlib_number, &j)) + { + ip->fragment_id = clib_host_to_net_u16 (i); + ip->flags_and_fragment_offset |= + clib_host_to_net_u16 ((i / 8) & 0x1fff); + } + + /* Flags. */ + else if (unformat (input, "mf") || unformat (input, "MF")) + ip->flags_and_fragment_offset |= + clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); + + else if (unformat (input, "df") || unformat (input, "DF")) + ip->flags_and_fragment_offset |= + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT); + + else if (unformat (input, "ce") || unformat (input, "CE")) + ip->flags_and_fragment_offset |= + clib_host_to_net_u16 (IP4_HEADER_FLAG_CONGESTION); + + /* Can't parse input: try next protocol level. */ + else + break; + } + + /* Fill in checksum. */ + ip->checksum = ip4_header_checksum (ip); + + /* Recurse into next protocol layer. */ + { + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol); + + if (pi && pi->unformat_header) + { + if (!unformat_user (input, pi->unformat_header, result)) + return 0; + + /* Result may have moved. */ + ip = (void *) *result + old_length; + } + } + + /* Fill in IP length. */ + ip->length = clib_host_to_net_u16 (vec_len (*result) - old_length); + + return 1; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c new file mode 100755 index 00000000..6b3453b5 --- /dev/null +++ b/src/vnet/ip/ip4_forward.c @@ -0,0 +1,3197 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_forward.c: IP v4 forwarding + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */ +#include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */ +#include <vnet/ppp/ppp.h> +#include <vnet/srp/srp.h> /* for srp_hw_interface_class */ +#include <vnet/api_errno.h> /* for API error numbers */ +#include <vnet/fib/fib_table.h> /* for FIB table and entry creation */ +#include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */ +#include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */ +#include <vnet/fib/ip4_fib.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/classify_dpo.h> +#include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */ + +/** + * @file + * @brief IPv4 Forwarding. + * + * This file contains the source code for IPv4 forwarding. + */ + +void +ip4_forward_next_trace (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + vlib_rx_or_tx_t which_adj_index); + +always_inline uword +ip4_lookup_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int lookup_for_responses_to_locally_received_packets) +{ + ip4_main_t *im = &ip4_main; + vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters; + u32 n_left_from, n_left_to_next, *from, *to_next; + ip_lookup_next_t next; + u32 thread_index = vlib_get_thread_index (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + while (n_left_from >= 8 && n_left_to_next >= 4) + { + vlib_buffer_t *p0, *p1, *p2, *p3; + ip4_header_t *ip0, *ip1, *ip2, *ip3; + ip_lookup_next_t next0, next1, next2, next3; + const load_balance_t *lb0, *lb1, *lb2, *lb3; + ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3; + ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3; + ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3; + u32 pi0, fib_index0, lb_index0; + u32 pi1, fib_index1, lb_index1; + u32 pi2, fib_index2, lb_index2; + u32 pi3, fib_index3, lb_index3; + flow_hash_config_t flow_hash_config0, flow_hash_config1; + flow_hash_config_t flow_hash_config2, flow_hash_config3; + u32 hash_c0, hash_c1, hash_c2, hash_c3; + const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p4, *p5, *p6, *p7; + + p4 = vlib_get_buffer (vm, from[4]); + p5 = vlib_get_buffer (vm, from[5]); + p6 = vlib_get_buffer (vm, from[6]); + p7 = vlib_get_buffer (vm, from[7]); + + vlib_prefetch_buffer_header (p4, LOAD); + vlib_prefetch_buffer_header (p5, LOAD); + vlib_prefetch_buffer_header (p6, LOAD); + vlib_prefetch_buffer_header (p7, LOAD); + + CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + pi2 = to_next[2] = from[2]; + pi3 = to_next[3] = from[3]; + + from += 4; + to_next += 4; + n_left_to_next -= 4; + n_left_from -= 4; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + p2 = vlib_get_buffer (vm, pi2); + p3 = vlib_get_buffer (vm, pi3); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + ip2 = vlib_buffer_get_current (p2); + ip3 = vlib_buffer_get_current (p3); + + dst_addr0 = &ip0->dst_address; + dst_addr1 = &ip1->dst_address; + dst_addr2 = &ip2->dst_address; + dst_addr3 = &ip3->dst_address; + + fib_index0 = + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p0)->sw_if_index[VLIB_RX]); + fib_index1 = + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p1)->sw_if_index[VLIB_RX]); + fib_index2 = + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p2)->sw_if_index[VLIB_RX]); + fib_index3 = + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p3)->sw_if_index[VLIB_RX]); + fib_index0 = + (vnet_buffer (p0)->sw_if_index[VLIB_TX] == + (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; + fib_index1 = + (vnet_buffer (p1)->sw_if_index[VLIB_TX] == + (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; + fib_index2 = + (vnet_buffer (p2)->sw_if_index[VLIB_TX] == + (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX]; + fib_index3 = + (vnet_buffer (p3)->sw_if_index[VLIB_TX] == + (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX]; + + + if (!lookup_for_responses_to_locally_received_packets) + { + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; + mtrie1 = &ip4_fib_get (fib_index1)->mtrie; + mtrie2 = &ip4_fib_get (fib_index2)->mtrie; + mtrie3 = &ip4_fib_get (fib_index3)->mtrie; + + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1); + leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2); + leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3); + } + + if (!lookup_for_responses_to_locally_received_packets) + { + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2); + leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2); + leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2); + } + + if (!lookup_for_responses_to_locally_received_packets) + { + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3); + leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3); + leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3); + } + + if (lookup_for_responses_to_locally_received_packets) + { + lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; + lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX]; + lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX]; + lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX]; + } + else + { + lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); + lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2); + lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3); + } + + ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3); + lb0 = load_balance_get (lb_index0); + lb1 = load_balance_get (lb_index1); + lb2 = load_balance_get (lb_index2); + lb3 = load_balance_get (lb_index3); + + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + ASSERT (lb2->lb_n_buckets > 0); + ASSERT (is_pow2 (lb2->lb_n_buckets)); + ASSERT (lb3->lb_n_buckets > 0); + ASSERT (is_pow2 (lb3->lb_n_buckets)); + + /* Use flow hash to compute multipath adjacency. */ + hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; + hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0; + hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0; + hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0; + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + flow_hash_config0 = lb0->lb_hash_config; + hash_c0 = vnet_buffer (p0)->ip.flow_hash = + ip4_compute_flow_hash (ip0, flow_hash_config0); + dpo0 = + load_balance_get_fwd_bucket (lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) + { + flow_hash_config1 = lb1->lb_hash_config; + hash_c1 = vnet_buffer (p1)->ip.flow_hash = + ip4_compute_flow_hash (ip1, flow_hash_config1); + dpo1 = + load_balance_get_fwd_bucket (lb1, + (hash_c1 & + (lb1->lb_n_buckets_minus_1))); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); + } + if (PREDICT_FALSE (lb2->lb_n_buckets > 1)) + { + flow_hash_config2 = lb2->lb_hash_config; + hash_c2 = vnet_buffer (p2)->ip.flow_hash = + ip4_compute_flow_hash (ip2, flow_hash_config2); + dpo2 = + load_balance_get_fwd_bucket (lb2, + (hash_c2 & + (lb2->lb_n_buckets_minus_1))); + } + else + { + dpo2 = load_balance_get_bucket_i (lb2, 0); + } + if (PREDICT_FALSE (lb3->lb_n_buckets > 1)) + { + flow_hash_config3 = lb3->lb_hash_config; + hash_c3 = vnet_buffer (p3)->ip.flow_hash = + ip4_compute_flow_hash (ip3, flow_hash_config3); + dpo3 = + load_balance_get_fwd_bucket (lb3, + (hash_c3 & + (lb3->lb_n_buckets_minus_1))); + } + else + { + dpo3 = load_balance_get_bucket_i (lb3, 0); + } + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + next1 = dpo1->dpoi_next_node; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + next2 = dpo2->dpoi_next_node; + vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; + next3 = dpo3->dpoi_next_node; + vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lb_index0, 1, + vlib_buffer_length_in_chain (vm, p0)); + vlib_increment_combined_counter + (cm, thread_index, lb_index1, 1, + vlib_buffer_length_in_chain (vm, p1)); + vlib_increment_combined_counter + (cm, thread_index, lb_index2, 1, + vlib_buffer_length_in_chain (vm, p2)); + vlib_increment_combined_counter + (cm, thread_index, lb_index3, 1, + vlib_buffer_length_in_chain (vm, p3)); + + vlib_validate_buffer_enqueue_x4 (vm, node, next, + to_next, n_left_to_next, + pi0, pi1, pi2, pi3, + next0, next1, next2, next3); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip4_header_t *ip0; + ip_lookup_next_t next0; + const load_balance_t *lb0; + ip4_fib_mtrie_t *mtrie0; + ip4_fib_mtrie_leaf_t leaf0; + ip4_address_t *dst_addr0; + u32 pi0, fib_index0, lbi0; + flow_hash_config_t flow_hash_config0; + const dpo_id_t *dpo0; + u32 hash_c0; + + pi0 = from[0]; + to_next[0] = pi0; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + + dst_addr0 = &ip0->dst_address; + + fib_index0 = + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p0)->sw_if_index[VLIB_RX]); + fib_index0 = + (vnet_buffer (p0)->sw_if_index[VLIB_TX] == + (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; + + if (!lookup_for_responses_to_locally_received_packets) + { + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; + + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); + } + + if (!lookup_for_responses_to_locally_received_packets) + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); + + if (!lookup_for_responses_to_locally_received_packets) + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); + + if (lookup_for_responses_to_locally_received_packets) + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; + else + { + /* Handle default route. */ + lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + } + + ASSERT (lbi0); + lb0 = load_balance_get (lbi0); + + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + + /* Use flow hash to compute multipath adjacency. */ + hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + flow_hash_config0 = lb0->lb_hash_config; + + hash_c0 = vnet_buffer (p0)->ip.flow_hash = + ip4_compute_flow_hash (ip0, flow_hash_config0); + dpo0 = + load_balance_get_fwd_bucket (lb0, + (hash_c0 & + (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter (cm, thread_index, lbi0, 1, + vlib_buffer_length_in_chain (vm, + p0)); + + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + if (PREDICT_FALSE (next0 != next)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next0; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace (vm, node, frame, VLIB_TX); + + return frame->n_vectors; +} + +/** @brief IPv4 lookup node. + @node ip4-lookup + + This is the main IPv4 lookup dispatch node. + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param frame vlib_frame_t whose contents should be dispatched + + @par Graph mechanics: buffer metadata, next index usage + + @em Uses: + - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code> + - Indicates the @c sw_if_index value of the interface that the + packet was received on. + - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code> + - When the value is @c ~0 then the node performs a longest prefix + match (LPM) for the packet destination address in the FIB attached + to the receive interface. + - Otherwise perform LPM for the packet destination address in the + indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index + value (0, 1, ...) and not a VRF id. + + @em Sets: + - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code> + - The lookup result adjacency index. + + <em>Next Index:</em> + - Dispatches the packet to the node index found in + ip_adjacency_t @c adj->lookup_next_index + (where @c adj is the lookup result adjacency). +*/ +static uword +ip4_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_lookup_inline (vm, node, frame, + /* lookup_for_responses_to_locally_received_packets */ + 0); + +} + +static u8 *format_ip4_lookup_trace (u8 * s, va_list * args); + +VLIB_REGISTER_NODE (ip4_lookup_node) = +{ +.function = ip4_lookup,.name = "ip4-lookup",.vector_size = + sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes = + IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup); + +always_inline uword +ip4_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters; + u32 n_left_from, n_left_to_next, *from, *to_next; + ip_lookup_next_t next; + u32 thread_index = vlib_get_thread_index (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace (vm, node, frame, VLIB_TX); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + ip_lookup_next_t next0, next1; + const load_balance_t *lb0, *lb1; + vlib_buffer_t *p0, *p1; + u32 pi0, lbi0, hc0, pi1, lbi1, hc1; + const ip4_header_t *ip0, *ip1; + const dpo_id_t *dpo0, *dpo1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get (lbi0); + lb1 = load_balance_get (lbi1); + + /* + * this node is for via FIBs we can re-use the hash value from the + * to node if present. + * We don't want to use the same hash value at each level in the recursion + * graph as that would lead to polarisation + */ + hc0 = hc1 = 0; + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash)) + { + hc0 = vnet_buffer (p0)->ip.flow_hash = + vnet_buffer (p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer (p0)->ip.flow_hash = + ip4_compute_flow_hash (ip0, lb0->lb_hash_config); + } + dpo0 = load_balance_get_fwd_bucket + (lb0, (hc0 & (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash)) + { + hc1 = vnet_buffer (p1)->ip.flow_hash = + vnet_buffer (p1)->ip.flow_hash >> 1; + } + else + { + hc1 = vnet_buffer (p1)->ip.flow_hash = + ip4_compute_flow_hash (ip1, lb1->lb_hash_config); + } + dpo1 = load_balance_get_fwd_bucket + (lb1, (hc1 & (lb1->lb_n_buckets_minus_1))); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); + } + + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + vlib_increment_combined_counter + (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); + + vlib_validate_buffer_enqueue_x2 (vm, node, next, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_lookup_next_t next0; + const load_balance_t *lb0; + vlib_buffer_t *p0; + u32 pi0, lbi0, hc0; + const ip4_header_t *ip0; + const dpo_id_t *dpo0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get (lbi0); + + hc0 = 0; + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash)) + { + hc0 = vnet_buffer (p0)->ip.flow_hash = + vnet_buffer (p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer (p0)->ip.flow_hash = + ip4_compute_flow_hash (ip0, lb0->lb_hash_config); + } + dpo0 = load_balance_get_fwd_bucket + (lb0, (hc0 & (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (ip4_load_balance_node) = +{ +.function = ip4_load_balance,.name = "ip4-load-balance",.vector_size = + sizeof (u32),.sibling_of = "ip4-lookup",.format_trace = + format_ip4_lookup_trace,}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance); + +/* get first interface address */ +ip4_address_t * +ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, + ip_interface_address_t ** result_ia) +{ + ip_lookup_main_t *lm = &im->lookup_main; + ip_interface_address_t *ia = 0; + ip4_address_t *result = 0; + + /* *INDENT-OFF* */ + foreach_ip_interface_address + (lm, ia, sw_if_index, + 1 /* honor unnumbered */ , + ({ + ip4_address_t * a = + ip_interface_address_get_address (lm, ia); + result = a; + break; + })); + /* *INDENT-OFF* */ + if (result_ia) + *result_ia = result ? ia : 0; + return result; +} + +static void +ip4_add_interface_routes (u32 sw_if_index, + ip4_main_t * im, u32 fib_index, + ip_interface_address_t * a) +{ + ip_lookup_main_t *lm = &im->lookup_main; + ip4_address_t *address = ip_interface_address_get_address (lm, a); + fib_prefix_t pfx = { + .fp_len = a->address_length, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = *address, + }; + + if (pfx.fp_len <= 30) + { + /* a /30 or shorter - add a glean for the network address */ + fib_table_entry_update_one_path (fib_index, &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + DPO_PROTO_IP4, + /* No next-hop address */ + NULL, + sw_if_index, + // invalid FIB index + ~0, + 1, + // no out-label stack + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + + /* Add the two broadcast addresses as drop */ + fib_prefix_t net_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len], + }; + if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32) + fib_table_entry_special_add(fib_index, + &net_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_DROP | + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT)); + net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len]; + if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32) + fib_table_entry_special_add(fib_index, + &net_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_DROP | + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT)); + } + else if (pfx.fp_len == 31) + { + u32 mask = clib_host_to_net_u32(1); + fib_prefix_t net_pfx = pfx; + + net_pfx.fp_len = 32; + net_pfx.fp_addr.ip4.as_u32 ^= mask; + + /* a /31 - add the other end as an attached host */ + fib_table_entry_update_one_path (fib_index, &net_pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_ATTACHED), + DPO_PROTO_IP4, + &net_pfx.fp_addr, + sw_if_index, + // invalid FIB index + ~0, + 1, + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + } + pfx.fp_len = 32; + + if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index)) + { + u32 classify_table_index = + lm->classify_table_index_by_sw_if_index[sw_if_index]; + if (classify_table_index != (u32) ~ 0) + { + dpo_id_t dpo = DPO_INVALID; + + dpo_set (&dpo, + DPO_CLASSIFY, + DPO_PROTO_IP4, + classify_dpo_create (DPO_PROTO_IP4, classify_table_index)); + + fib_table_entry_special_dpo_add (fib_index, + &pfx, + FIB_SOURCE_CLASSIFY, + FIB_ENTRY_FLAG_NONE, &dpo); + dpo_reset (&dpo); + } + } + + fib_table_entry_update_one_path (fib_index, &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + DPO_PROTO_IP4, + &pfx.fp_addr, + sw_if_index, + // invalid FIB index + ~0, + 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); +} + +static void +ip4_del_interface_routes (ip4_main_t * im, + u32 fib_index, + ip4_address_t * address, u32 address_length) +{ + fib_prefix_t pfx = { + .fp_len = address_length, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = *address, + }; + + if (pfx.fp_len <= 30) + { + fib_prefix_t net_pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len], + }; + if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32) + fib_table_entry_special_remove(fib_index, + &net_pfx, + FIB_SOURCE_INTERFACE); + net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len]; + if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32) + fib_table_entry_special_remove(fib_index, + &net_pfx, + FIB_SOURCE_INTERFACE); + fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE); + } + else if (pfx.fp_len == 31) + { + u32 mask = clib_host_to_net_u32(1); + fib_prefix_t net_pfx = pfx; + + net_pfx.fp_len = 32; + net_pfx.fp_addr.ip4.as_u32 ^= mask; + + fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE); + } + + pfx.fp_len = 32; + fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE); +} + +void +ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) +{ + ip4_main_t *im = &ip4_main; + + vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0); + + /* + * enable/disable only on the 1<->0 transition + */ + if (is_enable) + { + if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index]) + return; + } + else + { + ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0); + if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) + return; + } + vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index, + !is_enable, 0, 0); + + + vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", + sw_if_index, !is_enable, 0, 0); +} + +static clib_error_t * +ip4_add_del_interface_address_internal (vlib_main_t * vm, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, u32 is_del) +{ + vnet_main_t *vnm = vnet_get_main (); + ip4_main_t *im = &ip4_main; + ip_lookup_main_t *lm = &im->lookup_main; + clib_error_t *error = 0; + u32 if_address_index, elts_before; + ip4_address_fib_t ip4_af, *addr_fib = 0; + + /* local0 interface doesn't support IP addressing */ + if (sw_if_index == 0) + { + return + clib_error_create ("local0 interface doesn't support IP addressing"); + } + + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + ip4_addr_fib_init (&ip4_af, address, + vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); + vec_add1 (addr_fib, ip4_af); + + /* FIXME-LATER + * there is no support for adj-fib handling in the presence of overlapping + * subnets on interfaces. Easy fix - disallow overlapping subnets, like + * most routers do. + */ + /* *INDENT-OFF* */ + if (!is_del) + { + /* When adding an address check that it does not conflict + with an existing address. */ + ip_interface_address_t *ia; + foreach_ip_interface_address + (&im->lookup_main, ia, sw_if_index, + 0 /* honor unnumbered */ , + ({ + ip4_address_t * x = + ip_interface_address_get_address + (&im->lookup_main, ia); + if (ip4_destination_matches_route + (im, address, x, ia->address_length) || + ip4_destination_matches_route (im, + x, + address, + address_length)) + return + clib_error_create + ("failed to add %U which conflicts with %U for interface %U", + format_ip4_address_and_length, address, + address_length, + format_ip4_address_and_length, x, + ia->address_length, + format_vnet_sw_if_index_name, vnm, + sw_if_index); + })); + } + /* *INDENT-ON* */ + + elts_before = pool_elts (lm->if_address_pool); + + error = ip_interface_address_add_del + (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index); + if (error) + goto done; + + ip4_sw_interface_enable_disable (sw_if_index, !is_del); + + if (is_del) + ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length); + else + ip4_add_interface_routes (sw_if_index, + im, ip4_af.fib_index, + pool_elt_at_index + (lm->if_address_pool, if_address_index)); + + /* If pool did not grow/shrink: add duplicate address. */ + if (elts_before != pool_elts (lm->if_address_pool)) + { + ip4_add_del_interface_address_callback_t *cb; + vec_foreach (cb, im->add_del_interface_address_callbacks) + cb->function (im, cb->function_opaque, sw_if_index, + address, address_length, if_address_index, is_del); + } + +done: + vec_free (addr_fib); + return error; +} + +clib_error_t * +ip4_add_del_interface_address (vlib_main_t * vm, + u32 sw_if_index, + ip4_address_t * address, + u32 address_length, u32 is_del) +{ + return ip4_add_del_interface_address_internal + (vm, sw_if_index, address, address_length, is_del); +} + +/* Built-in ip4 unicast rx feature path definition */ +/* *INDENT-OFF* */ +VNET_FEATURE_ARC_INIT (ip4_unicast, static) = +{ + .arc_name = "ip4-unicast", + .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"), + .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index, +}; + +VNET_FEATURE_INIT (ip4_flow_classify, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "ip4-flow-classify", + .runs_before = VNET_FEATURES ("ip4-inacl"), +}; + +VNET_FEATURE_INIT (ip4_inacl, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "ip4-inacl", + .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"), +}; + +VNET_FEATURE_INIT (ip4_source_check_1, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "ip4-source-check-via-rx", + .runs_before = VNET_FEATURES ("ip4-source-check-via-any"), +}; + +VNET_FEATURE_INIT (ip4_source_check_2, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "ip4-source-check-via-any", + .runs_before = VNET_FEATURES ("ip4-policer-classify"), +}; + +VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "ip4-source-and-port-range-check-rx", + .runs_before = VNET_FEATURES ("ip4-policer-classify"), +}; + +VNET_FEATURE_INIT (ip4_policer_classify, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "ip4-policer-classify", + .runs_before = VNET_FEATURES ("ipsec-input-ip4"), +}; + +VNET_FEATURE_INIT (ip4_ipsec, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "ipsec-input-ip4", + .runs_before = VNET_FEATURES ("vpath-input-ip4"), +}; + +VNET_FEATURE_INIT (ip4_vpath, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "vpath-input-ip4", + .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"), +}; + +VNET_FEATURE_INIT (ip4_vxlan_bypass, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "ip4-vxlan-bypass", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; + +VNET_FEATURE_INIT (ip4_drop, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "ip4-drop", + .runs_before = VNET_FEATURES ("ip4-lookup"), +}; + +VNET_FEATURE_INIT (ip4_lookup, static) = +{ + .arc_name = "ip4-unicast", + .node_name = "ip4-lookup", + .runs_before = 0, /* not before any other features */ +}; + +/* Built-in ip4 multicast rx feature path definition */ +VNET_FEATURE_ARC_INIT (ip4_multicast, static) = +{ + .arc_name = "ip4-multicast", + .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"), + .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index, +}; + +VNET_FEATURE_INIT (ip4_vpath_mc, static) = +{ + .arc_name = "ip4-multicast", + .node_name = "vpath-input-ip4", + .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), +}; + +VNET_FEATURE_INIT (ip4_mc_drop, static) = +{ + .arc_name = "ip4-multicast", + .node_name = "ip4-drop", + .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), +}; + +VNET_FEATURE_INIT (ip4_lookup_mc, static) = +{ + .arc_name = "ip4-multicast", + .node_name = "ip4-mfib-forward-lookup", + .runs_before = 0, /* last feature */ +}; + +/* Source and port-range check ip4 tx feature path definition */ +VNET_FEATURE_ARC_INIT (ip4_output, static) = +{ + .arc_name = "ip4-output", + .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"), + .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index, +}; + +VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = +{ + .arc_name = "ip4-output", + .node_name = "ip4-source-and-port-range-check-tx", + .runs_before = VNET_FEATURES ("ipsec-output-ip4"), +}; + +VNET_FEATURE_INIT (ip4_ipsec_output, static) = +{ + .arc_name = "ip4-output", + .node_name = "ipsec-output-ip4", + .runs_before = VNET_FEATURES ("interface-output"), +}; + +/* Built-in ip4 tx feature path definition */ +VNET_FEATURE_INIT (ip4_interface_output, static) = +{ + .arc_name = "ip4-output", + .node_name = "interface-output", + .runs_before = 0, /* not before any other features */ +}; +/* *INDENT-ON* */ + +static clib_error_t * +ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) +{ + ip4_main_t *im = &ip4_main; + + /* Fill in lookup tables with default table (0). */ + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + vec_validate (im->mfib_index_by_sw_if_index, sw_if_index); + + if (!is_add) + { + ip4_main_t *im4 = &ip4_main; + ip_lookup_main_t *lm4 = &im4->lookup_main; + ip_interface_address_t *ia = 0; + ip4_address_t *address; + vlib_main_t *vm = vlib_get_main (); + + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */, + ({ + address = ip_interface_address_get_address (lm4, ia); + ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1); + })); + /* *INDENT-ON* */ + } + + vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index, + is_add, 0, 0); + + vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index, + is_add, 0, 0); + + return /* no error */ 0; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del); + +/* Global IP4 main. */ +ip4_main_t ip4_main; + +clib_error_t * +ip4_lookup_init (vlib_main_t * vm) +{ + ip4_main_t *im = &ip4_main; + clib_error_t *error; + uword i; + + if ((error = vlib_call_init_function (vm, vnet_feature_init))) + return error; + + for (i = 0; i < ARRAY_LEN (im->fib_masks); i++) + { + u32 m; + + if (i < 32) + m = pow2_mask (i) << (32 - i); + else + m = ~0; + im->fib_masks[i] = clib_host_to_net_u32 (m); + } + + ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0); + + /* Create FIB with index 0 and table id of 0. */ + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0, + FIB_SOURCE_DEFAULT_ROUTE); + mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0, + MFIB_SOURCE_DEFAULT_ROUTE); + + { + pg_node_t *pn; + pn = pg_get_node (ip4_lookup_node.index); + pn->unformat_edit = unformat_pg_ip4_header; + } + + { + ethernet_arp_header_t h; + + memset (&h, 0, sizeof (h)); + + /* Set target ethernet address to all zeros. */ + memset (h.ip4_over_ethernet[1].ethernet, 0, + sizeof (h.ip4_over_ethernet[1].ethernet)); + +#define _16(f,v) h.f = clib_host_to_net_u16 (v); +#define _8(f,v) h.f = v; + _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet); + _16 (l3_type, ETHERNET_TYPE_IP4); + _8 (n_l2_address_bytes, 6); + _8 (n_l3_address_bytes, 4); + _16 (opcode, ETHERNET_ARP_OPCODE_request); +#undef _16 +#undef _8 + + vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template, + /* data */ &h, + sizeof (h), + /* alloc chunk size */ 8, + "ip4 arp"); + } + + return error; +} + +VLIB_INIT_FUNCTION (ip4_lookup_init); + +typedef struct +{ + /* Adjacency taken. */ + u32 dpo_index; + u32 flow_hash; + u32 fib_index; + + /* Packet data, possibly *after* rewrite. */ + u8 packet_data[64 - 1 * sizeof (u32)]; +} +ip4_forward_next_trace_t; + +u8 * +format_ip4_forward_next_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *); + uword indent = format_get_indent (s); + s = format (s, "%U%U", + format_white_space, indent, + format_ip4_header, t->packet_data, sizeof (t->packet_data)); + return s; +} + +static u8 * +format_ip4_lookup_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *); + uword indent = format_get_indent (s); + + s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x", + t->fib_index, t->dpo_index, t->flow_hash); + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip4_header, t->packet_data, sizeof (t->packet_data)); + return s; +} + +static u8 * +format_ip4_rewrite_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *); + uword indent = format_get_indent (s); + + s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x", + t->fib_index, t->dpo_index, format_ip_adjacency, + t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash); + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip_adjacency_packet_data, + t->dpo_index, t->packet_data, sizeof (t->packet_data)); + return s; +} + +/* Common trace function for all ip4-forward next nodes. */ +void +ip4_forward_next_trace (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index) +{ + u32 *from, n_left; + ip4_main_t *im = &ip4_main; + + n_left = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + while (n_left >= 4) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + ip4_forward_next_trace_t *t0, *t1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, from[2], LOAD); + vlib_prefetch_buffer_with_index (vm, from[3], LOAD); + + bi0 = from[0]; + bi1 = from[1]; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index]; + t0->flow_hash = vnet_buffer (b0)->ip.flow_hash; + t0->fib_index = + (vnet_buffer (b0)->sw_if_index[VLIB_TX] != + (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] : + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX]); + + clib_memcpy (t0->packet_data, + vlib_buffer_get_current (b0), + sizeof (t0->packet_data)); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); + t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index]; + t1->flow_hash = vnet_buffer (b1)->ip.flow_hash; + t1->fib_index = + (vnet_buffer (b1)->sw_if_index[VLIB_TX] != + (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] : + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (b1)->sw_if_index[VLIB_RX]); + clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1), + sizeof (t1->packet_data)); + } + from += 2; + n_left -= 2; + } + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t *b0; + ip4_forward_next_trace_t *t0; + + bi0 = from[0]; + + b0 = vlib_get_buffer (vm, bi0); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index]; + t0->flow_hash = vnet_buffer (b0)->ip.flow_hash; + t0->fib_index = + (vnet_buffer (b0)->sw_if_index[VLIB_TX] != + (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] : + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX]); + clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0), + sizeof (t0->packet_data)); + } + from += 1; + n_left -= 1; + } +} + +static uword +ip4_drop_or_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, ip4_error_t error_code) +{ + u32 *buffers = vlib_frame_vector_args (frame); + uword n_packets = frame->n_vectors; + + vlib_error_drop_buffers (vm, node, buffers, + /* stride */ 1, + n_packets, + /* next */ 0, + ip4_input_node.index, error_code); + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace (vm, node, frame, VLIB_TX); + + return n_packets; +} + +static uword +ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); +} + +static uword +ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_drop_node, static) = +{ + .function = ip4_drop, + .name = "ip4-drop", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop); + +VLIB_REGISTER_NODE (ip4_punt_node, static) = +{ + .function = ip4_punt, + .name = "ip4-punt", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-punt", + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt); +/* *INDENT-ON */ + +/* Compute TCP/UDP/ICMP4 checksum in software. */ +u16 +ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, + ip4_header_t * ip0) +{ + ip_csum_t sum0; + u32 ip_header_length, payload_length_host_byte_order; + u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer; + u16 sum16; + void *data_this_buffer; + + /* Initialize checksum with ip header. */ + ip_header_length = ip4_header_bytes (ip0); + payload_length_host_byte_order = + clib_net_to_host_u16 (ip0->length) - ip_header_length; + sum0 = + clib_host_to_net_u32 (payload_length_host_byte_order + + (ip0->protocol << 16)); + + if (BITS (uword) == 32) + { + sum0 = + ip_csum_with_carry (sum0, + clib_mem_unaligned (&ip0->src_address, u32)); + sum0 = + ip_csum_with_carry (sum0, + clib_mem_unaligned (&ip0->dst_address, u32)); + } + else + sum0 = + ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64)); + + n_bytes_left = n_this_buffer = payload_length_host_byte_order; + data_this_buffer = (void *) ip0 + ip_header_length; + n_ip_bytes_this_buffer = p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data); + if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer) + { + n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ? + n_ip_bytes_this_buffer - ip_header_length : 0; + } + while (1) + { + sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer); + n_bytes_left -= n_this_buffer; + if (n_bytes_left == 0) + break; + + ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT); + p0 = vlib_get_buffer (vm, p0->next_buffer); + data_this_buffer = vlib_buffer_get_current (p0); + n_this_buffer = p0->current_length; + } + + sum16 = ~ip_csum_fold (sum0); + + return sum16; +} + +u32 +ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) +{ + ip4_header_t *ip0 = vlib_buffer_get_current (p0); + udp_header_t *udp0; + u16 sum16; + + ASSERT (ip0->protocol == IP_PROTOCOL_TCP + || ip0->protocol == IP_PROTOCOL_UDP); + + udp0 = (void *) (ip0 + 1); + if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0) + { + p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED + | VNET_BUFFER_F_L4_CHECKSUM_CORRECT); + return p0->flags; + } + + sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0); + + p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED + | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT)); + + return p0->flags; +} + +/* *INDENT-OFF* */ +VNET_FEATURE_ARC_INIT (ip4_local) = +{ + .arc_name = "ip4-local", + .start_nodes = VNET_FEATURES ("ip4-local"), +}; +/* *INDENT-ON* */ + +static inline void +ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip, + u8 is_udp, u8 * error, u8 * good_tcp_udp) +{ + u32 flags0; + flags0 = ip4_tcp_udp_validate_checksum (vm, p); + *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + if (is_udp) + { + udp_header_t *udp; + u32 ip_len, udp_len; + i32 len_diff; + udp = ip4_next_header (ip); + /* Verify UDP length. */ + ip_len = clib_net_to_host_u16 (ip->length); + udp_len = clib_net_to_host_u16 (udp->length); + + len_diff = ip_len - udp_len; + *good_tcp_udp &= len_diff >= 0; + *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error; + } +} + +#define ip4_local_do_l4_check(is_tcp_udp, flags) \ + (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)) + +static inline uword +ip4_local_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int head_of_feature_arc) +{ + ip4_main_t *im = &ip4_main; + ip_lookup_main_t *lm = &im->lookup_main; + ip_local_next_t next_index; + u32 *from, *to_next, n_left_from, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_input_node.index); + u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace (vm, node, frame, VLIB_TX); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t *p0, *p1; + ip4_header_t *ip0, *ip1; + ip4_fib_mtrie_t *mtrie0, *mtrie1; + ip4_fib_mtrie_leaf_t leaf0, leaf1; + const dpo_id_t *dpo0, *dpo1; + const load_balance_t *lb0, *lb1; + u32 pi0, next0, fib_index0, lbi0; + u32 pi1, next1, fib_index1, lbi1; + u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; + u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1; + u32 sw_if_index0, sw_if_index1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + next0 = next1 = IP_LOCAL_NEXT_DROP; + error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + vnet_buffer (p0)->l3_hdr_offset = p0->current_data; + vnet_buffer (p1)->l3_hdr_offset = p1->current_data; + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; + + /* Treat IP frag packets as "experimental" protocol for now + until support of IP frag reassembly is implemented */ + proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; + proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol; + + if (head_of_feature_arc == 0) + goto skip_checks; + + is_udp0 = proto0 == IP_PROTOCOL_UDP; + is_udp1 = proto1 == IP_PROTOCOL_UDP; + is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; + is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP; + + good_tcp_udp0 = + (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + good_tcp_udp1 = + (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + + if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags) + || ip4_local_do_l4_check (is_tcp_udp1, + p1->flags))) + { + if (is_tcp_udp0) + ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0, + &good_tcp_udp0); + if (is_tcp_udp1) + ip4_local_validate_l4 (vm, p1, ip1, is_udp1, &error1, + &good_tcp_udp1); + } + + ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM); + error0 = (is_tcp_udp0 && !good_tcp_udp0 + ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0); + error1 = (is_tcp_udp1 && !good_tcp_udp1 + ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); + fib_index0 = + (vnet_buffer (p0)->sw_if_index[VLIB_TX] == + (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; + + fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1); + fib_index1 = + (vnet_buffer (p1)->sw_if_index[VLIB_TX] == + (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; + + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; + mtrie1 = &ip4_fib_get (fib_index1)->mtrie; + + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, + 2); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, + 2); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, + 3); + leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, + 3); + + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = + ip4_fib_mtrie_leaf_get_adj_index (leaf0); + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0; + + vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = + ip4_fib_mtrie_leaf_get_adj_index (leaf1); + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1; + + lb0 = load_balance_get (lbi0); + lb1 = load_balance_get (lbi1); + dpo0 = load_balance_get_bucket_i (lb0, 0); + dpo1 = load_balance_get_bucket_i (lb1, 0); + + /* + * Must have a route to source otherwise we drop the packet. + * ip4 broadcasts are accepted, e.g. to make dhcp client work + * + * The checks are: + * - the source is a recieve => it's from us => bogus, do this + * first since it sets a different error code. + * - uRPF check for any route to source - accept if passes. + * - allow packets destined to the broadcast address from unknown sources + */ + error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && + dpo0->dpoi_type == DPO_RECEIVE) ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0); + error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && + !fib_urpf_check_size (lb0->lb_urpf) && + ip0->dst_address.as_u32 != 0xFFFFFFFF) + ? IP4_ERROR_SRC_LOOKUP_MISS : error0); + error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL && + dpo1->dpoi_type == DPO_RECEIVE) ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1); + error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL && + !fib_urpf_check_size (lb1->lb_urpf) && + ip1->dst_address.as_u32 != 0xFFFFFFFF) + ? IP4_ERROR_SRC_LOOKUP_MISS : error1); + + skip_checks: + + next0 = lm->local_next_by_ip_protocol[proto0]; + next1 = lm->local_next_by_ip_protocol[proto1]; + + next0 = + error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; + next1 = + error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1; + + p0->error = error0 ? error_node->errors[error0] : 0; + p1->error = error1 ? error_node->errors[error1] : 0; + + if (head_of_feature_arc) + { + if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0); + if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, pi0, pi1, + next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip4_header_t *ip0; + ip4_fib_mtrie_t *mtrie0; + ip4_fib_mtrie_leaf_t leaf0; + u32 pi0, next0, fib_index0, lbi0; + u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; + load_balance_t *lb0; + const dpo_id_t *dpo0; + u32 sw_if_index0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + next0 = IP_LOCAL_NEXT_DROP; + error0 = IP4_ERROR_UNKNOWN_PROTOCOL; + + p0 = vlib_get_buffer (vm, pi0); + ip0 = vlib_buffer_get_current (p0); + vnet_buffer (p0)->l3_hdr_offset = p0->current_data; + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + /* Treat IP frag packets as "experimental" protocol for now + until support of IP frag reassembly is implemented */ + proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; + + if (head_of_feature_arc == 0) + goto skip_check; + + is_udp0 = proto0 == IP_PROTOCOL_UDP; + is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; + good_tcp_udp0 = + (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + + if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags))) + { + ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0, + &good_tcp_udp0); + } + + ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM); + error0 = (is_tcp_udp0 && !good_tcp_udp0 + ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0); + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); + fib_index0 = + (vnet_buffer (p0)->sw_if_index[VLIB_TX] == + (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, + 2); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, + 3); + lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0; + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0; + + lb0 = load_balance_get (lbi0); + dpo0 = load_balance_get_bucket_i (lb0, 0); + + error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && + dpo0->dpoi_type == DPO_RECEIVE) ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0); + error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && + !fib_urpf_check_size (lb0->lb_urpf) && + ip0->dst_address.as_u32 != 0xFFFFFFFF) + ? IP4_ERROR_SRC_LOOKUP_MISS : error0); + + skip_check: + next0 = lm->local_next_by_ip_protocol[proto0]; + next0 = + error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; + + p0->error = error0 ? error_node->errors[error0] : 0; + + if (head_of_feature_arc) + { + if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, pi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_local_node) = +{ + .function = ip4_local, + .name = "ip4-local", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_next_nodes = IP_LOCAL_N_NEXT, + .next_nodes = + { + [IP_LOCAL_NEXT_DROP] = "error-drop", + [IP_LOCAL_NEXT_PUNT] = "error-punt", + [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup", + [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local); + +static uword +ip4_local_end_of_arc (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = { + .function = ip4_local_end_of_arc, + .name = "ip4-local-end-of-arc", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_forward_next_trace, + .sibling_of = "ip4-local", +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc) + +VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = { + .arc_name = "ip4-local", + .node_name = "ip4-local-end-of-arc", + .runs_before = 0, /* not before any other features */ +}; +/* *INDENT-ON* */ + +void +ip4_register_protocol (u32 protocol, u32 node_index) +{ + vlib_main_t *vm = vlib_get_main (); + ip4_main_t *im = &ip4_main; + ip_lookup_main_t *lm = &im->lookup_main; + + ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol)); + lm->local_next_by_ip_protocol[protocol] = + vlib_node_add_next (vm, ip4_local_node.index, node_index); +} + +static clib_error_t * +show_ip_local_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + ip4_main_t *im = &ip4_main; + ip_lookup_main_t *lm = &im->lookup_main; + int i; + + vlib_cli_output (vm, "Protocols handled by ip4_local"); + for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++) + { + if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT) + { + u32 node_index = vlib_get_node (vm, + ip4_local_node.index)-> + next_nodes[lm->local_next_by_ip_protocol[i]]; + vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm, + node_index); + } + } + return 0; +} + + + +/*? + * Display the set of protocols handled by the local IPv4 stack. + * + * @cliexpar + * Example of how to display local protocol table: + * @cliexstart{show ip local} + * Protocols handled by ip4_local + * 1 + * 17 + * 47 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_ip_local, static) = +{ + .path = "show ip local", + .function = show_ip_local_command_fn, + .short_help = "show ip local", +}; +/* *INDENT-ON* */ + +always_inline uword +ip4_arp_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int is_glean) +{ + vnet_main_t *vnm = vnet_get_main (); + ip4_main_t *im = &ip4_main; + ip_lookup_main_t *lm = &im->lookup_main; + u32 *from, *to_next_drop; + uword n_left_from, n_left_to_next_drop, next_index; + static f64 time_last_seed_change = -1e100; + static u32 hash_seeds[3]; + static uword hash_bitmap[256 / BITS (uword)]; + f64 time_now; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace (vm, node, frame, VLIB_TX); + + time_now = vlib_time_now (vm); + if (time_now - time_last_seed_change > 1e-3) + { + uword i; + u32 *r = clib_random_buffer_get_data (&vm->random_buffer, + sizeof (hash_seeds)); + for (i = 0; i < ARRAY_LEN (hash_seeds); i++) + hash_seeds[i] = r[i]; + + /* Mark all hash keys as been no-seen before. */ + for (i = 0; i < ARRAY_LEN (hash_bitmap); i++) + hash_bitmap[i] = 0; + + time_last_seed_change = time_now; + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + if (next_index == IP4_ARP_NEXT_DROP) + next_index = IP4_ARP_N_NEXT; /* point to first interface */ + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP, + to_next_drop, n_left_to_next_drop); + + while (n_left_from > 0 && n_left_to_next_drop > 0) + { + u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0; + ip_adjacency_t *adj0; + vlib_buffer_t *p0; + ip4_header_t *ip0; + uword bm0; + + pi0 = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + adj0 = adj_get (adj_index0); + ip0 = vlib_buffer_get_current (p0); + + a0 = hash_seeds[0]; + b0 = hash_seeds[1]; + c0 = hash_seeds[2]; + + sw_if_index0 = adj0->rewrite_header.sw_if_index; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; + + if (is_glean) + { + /* + * this is the Glean case, so we are ARPing for the + * packet's destination + */ + a0 ^= ip0->dst_address.data_u32; + } + else + { + a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32; + } + b0 ^= sw_if_index0; + + hash_v3_mix32 (a0, b0, c0); + hash_v3_finalize32 (a0, b0, c0); + + c0 &= BITS (hash_bitmap) - 1; + m0 = (uword) 1 << (c0 % BITS (uword)); + c0 = c0 / BITS (uword); + + bm0 = hash_bitmap[c0]; + drop0 = (bm0 & m0) != 0; + + /* Mark it as seen. */ + hash_bitmap[c0] = bm0 | m0; + + from += 1; + n_left_from -= 1; + to_next_drop[0] = pi0; + to_next_drop += 1; + n_left_to_next_drop -= 1; + + p0->error = + node->errors[drop0 ? IP4_ARP_ERROR_DROP : + IP4_ARP_ERROR_REQUEST_SENT]; + + /* + * the adj has been updated to a rewrite but the node the DPO that got + * us here hasn't - yet. no big deal. we'll drop while we wait. + */ + if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index) + continue; + + if (drop0) + continue; + + /* + * Can happen if the control-plane is programming tables + * with traffic flowing; at least that's today's lame excuse. + */ + if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) + || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP)) + { + p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ]; + } + else + /* Send ARP request. */ + { + u32 bi0 = 0; + vlib_buffer_t *b0; + ethernet_arp_header_t *h0; + vnet_hw_interface_t *hw_if0; + + h0 = + vlib_packet_template_get_packet (vm, + &im->ip4_arp_request_packet_template, + &bi0); + + /* Seems we're out of buffers */ + if (PREDICT_FALSE (!h0)) + continue; + + /* Add rewrite/encap string for ARP packet. */ + vnet_rewrite_one_header (adj0[0], h0, + sizeof (ethernet_header_t)); + + hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + /* Src ethernet address in ARP header. */ + clib_memcpy (h0->ip4_over_ethernet[0].ethernet, + hw_if0->hw_address, + sizeof (h0->ip4_over_ethernet[0].ethernet)); + + if (is_glean) + { + /* The interface's source address is stashed in the Glean Adj */ + h0->ip4_over_ethernet[0].ip4 = + adj0->sub_type.glean.receive_addr.ip4; + + /* Copy in destination address we are requesting. This is the + * glean case, so it's the packet's destination.*/ + h0->ip4_over_ethernet[1].ip4.data_u32 = + ip0->dst_address.data_u32; + } + else + { + /* Src IP address in ARP header. */ + if (ip4_src_address_for_packet (lm, sw_if_index0, + &h0-> + ip4_over_ethernet[0].ip4)) + { + /* No source address available */ + p0->error = + node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS]; + vlib_buffer_free (vm, &bi0, 1); + continue; + } + + /* Copy in destination address we are requesting from the + incomplete adj */ + h0->ip4_over_ethernet[1].ip4.data_u32 = + adj0->sub_type.nbr.next_hop.ip4.as_u32; + } + + vlib_buffer_copy_trace_flag (vm, p0, bi0); + b0 = vlib_get_buffer (vm, bi0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0; + + vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); + + vlib_set_next_frame_buffer (vm, node, + adj0->rewrite_header.next_index, + bi0); + } + } + + vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop); + } + + return frame->n_vectors; +} + +static uword +ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return (ip4_arp_inline (vm, node, frame, 0)); +} + +static uword +ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return (ip4_arp_inline (vm, node, frame, 1)); +} + +static char *ip4_arp_error_strings[] = { + [IP4_ARP_ERROR_DROP] = "address overflow drops", + [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent", + [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies", + [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed", + [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed", + [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request", +}; + +VLIB_REGISTER_NODE (ip4_arp_node) = +{ + .function = ip4_arp,.name = "ip4-arp",.vector_size = + sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors = + ARRAY_LEN (ip4_arp_error_strings),.error_strings = + ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes = + { + [IP4_ARP_NEXT_DROP] = "error-drop",} +,}; + +VLIB_REGISTER_NODE (ip4_glean_node) = +{ + .function = ip4_glean,.name = "ip4-glean",.vector_size = + sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors = + ARRAY_LEN (ip4_arp_error_strings),.error_strings = + ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes = + { + [IP4_ARP_NEXT_DROP] = "error-drop",} +,}; + +#define foreach_notrace_ip4_arp_error \ +_(DROP) \ +_(REQUEST_SENT) \ +_(REPLICATE_DROP) \ +_(REPLICATE_FAIL) + +clib_error_t * +arp_notrace_init (vlib_main_t * vm) +{ + vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index); + + /* don't trace ARP request packets */ +#define _(a) \ + vnet_pcap_drop_trace_filter_add_del \ + (rt->errors[IP4_ARP_ERROR_##a], \ + 1 /* is_add */); + foreach_notrace_ip4_arp_error; +#undef _ + return 0; +} + +VLIB_INIT_FUNCTION (arp_notrace_init); + + +/* Send an ARP request to see if given destination is reachable on given interface. */ +clib_error_t * +ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + ip4_main_t *im = &ip4_main; + ethernet_arp_header_t *h; + ip4_address_t *src; + ip_interface_address_t *ia; + ip_adjacency_t *adj; + vnet_hw_interface_t *hi; + vnet_sw_interface_t *si; + vlib_buffer_t *b; + adj_index_t ai; + u32 bi = 0; + + si = vnet_get_sw_interface (vnm, sw_if_index); + + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + return clib_error_return (0, "%U: interface %U down", + format_ip4_address, dst, + format_vnet_sw_if_index_name, vnm, + sw_if_index); + } + + src = + ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia); + if (!src) + { + vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE; + return clib_error_return + (0, + "no matching interface address for destination %U (interface %U)", + format_ip4_address, dst, format_vnet_sw_if_index_name, vnm, + sw_if_index); + } + + h = vlib_packet_template_get_packet (vm, + &im->ip4_arp_request_packet_template, + &bi); + + hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (PREDICT_FALSE (!hi->hw_address)) + { + return clib_error_return (0, "%U: interface %U do not support ip probe", + format_ip4_address, dst, + format_vnet_sw_if_index_name, vnm, + sw_if_index); + } + + clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, + sizeof (h->ip4_over_ethernet[0].ethernet)); + + h->ip4_over_ethernet[0].ip4 = src[0]; + h->ip4_over_ethernet[1].ip4 = dst[0]; + + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + + ip46_address_t nh = { + .ip4 = *dst, + }; + + ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, + VNET_LINK_IP4, &nh, sw_if_index); + adj = adj_get (ai); + + /* Peer has been previously resolved, retrieve glean adj instead */ + if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) + { + adj_unlock (ai); + ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh); + adj = adj_get (ai); + } + + /* Add encapsulation string for software interface (e.g. ethernet header). */ + vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); + vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); + + { + vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 *to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } + + adj_unlock (ai); + return /* no error */ 0; +} + +typedef enum +{ + IP4_REWRITE_NEXT_DROP, + IP4_REWRITE_NEXT_ICMP_ERROR, +} ip4_rewrite_next_t; + +always_inline uword +ip4_rewrite_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int do_counters, int is_midchain, int is_mcast) +{ + ip_lookup_main_t *lm = &ip4_main.lookup_main; + u32 *from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, *to_next, next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_input_node.index); + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + u32 thread_index = vlib_get_thread_index (); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + ip_adjacency_t *adj0, *adj1; + vlib_buffer_t *p0, *p1; + ip4_header_t *ip0, *ip1; + u32 pi0, rw_len0, next0, error0, checksum0, adj_index0; + u32 pi1, rw_len1, next1, error1, checksum1, adj_index1; + u32 tx_sw_if_index0, tx_sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + /* + * pre-fetch the per-adjacency counters + */ + if (do_counters) + { + vlib_prefetch_combined_counter (&adjacency_counters, + thread_index, adj_index0); + vlib_prefetch_combined_counter (&adjacency_counters, + thread_index, adj_index1); + } + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + error0 = error1 = IP4_ERROR_NONE; + next0 = next1 = IP4_REWRITE_NEXT_DROP; + + /* Decrement TTL & update checksum. + Works either endian, so no need for byte swap. */ + if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) + { + i32 ttl0 = ip0->ttl; + + /* Input node should have reject packets with ttl 0. */ + ASSERT (ip0->ttl > 0); + + checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100); + checksum0 += checksum0 >= 0xffff; + + ip0->checksum = checksum0; + ttl0 -= 1; + ip0->ttl = ttl0; + + /* + * If the ttl drops below 1 when forwarding, generate + * an ICMP response. + */ + if (PREDICT_FALSE (ttl0 <= 0)) + { + error0 = IP4_ERROR_TIME_EXPIRED; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = IP4_REWRITE_NEXT_ICMP_ERROR; + } + + /* Verify checksum. */ + ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) || + (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)); + } + else + { + p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) + { + i32 ttl1 = ip1->ttl; + + /* Input node should have reject packets with ttl 0. */ + ASSERT (ip1->ttl > 0); + + checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100); + checksum1 += checksum1 >= 0xffff; + + ip1->checksum = checksum1; + ttl1 -= 1; + ip1->ttl = ttl1; + + /* + * If the ttl drops below 1 when forwarding, generate + * an ICMP response. + */ + if (PREDICT_FALSE (ttl1 <= 0)) + { + error1 = IP4_ERROR_TIME_EXPIRED; + vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next1 = IP4_REWRITE_NEXT_ICMP_ERROR; + } + + /* Verify checksum. */ + ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) || + (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)); + } + else + { + p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + + /* Rewrite packet header and updates lengths. */ + adj0 = adj_get (adj_index0); + adj1 = adj_get (adj_index1); + + /* Worth pipelining. No guarantee that adj0,1 are hot... */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + rw_len1 = adj1[0].rewrite_header.data_bytes; + vnet_buffer (p0)->ip.save_rewrite_length = rw_len0; + vnet_buffer (p1)->ip.save_rewrite_length = rw_len1; + + /* Check MTU of outgoing interface. */ + error0 = + (vlib_buffer_length_in_chain (vm, p0) > + adj0[0]. + rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED : + error0); + error1 = + (vlib_buffer_length_in_chain (vm, p1) > + adj1[0]. + rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED : + error1); + + /* Don't adjust the buffer for ttl issue; icmp-error node wants + * to see the IP headerr */ + if (PREDICT_TRUE (error0 == IP4_ERROR_NONE)) + { + next0 = adj0[0].rewrite_header.next_index; + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0; + + if (PREDICT_FALSE + (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start (lm->output_feature_arc_index, + tx_sw_if_index0, &next0, p0); + } + if (PREDICT_TRUE (error1 == IP4_ERROR_NONE)) + { + next1 = adj1[0].rewrite_header.next_index; + p1->current_data -= rw_len1; + p1->current_length += rw_len1; + + tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index; + vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1; + + if (PREDICT_FALSE + (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start (lm->output_feature_arc_index, + tx_sw_if_index1, &next1, p1); + } + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], + ip0, ip1, sizeof (ethernet_header_t)); + + /* + * Bump the per-adjacency counters + */ + if (do_counters) + { + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, + adj_index0, 1, + vlib_buffer_length_in_chain (vm, p0) + rw_len0); + + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, + adj_index1, 1, + vlib_buffer_length_in_chain (vm, p1) + rw_len1); + } + + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func (vm, adj0, p0); + adj1->sub_type.midchain.fixup_func (vm, adj1, p1); + } + if (is_mcast) + { + /* + * copy bytes from the IP address into the MAC rewrite + */ + vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); + vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t *adj0; + vlib_buffer_t *p0; + ip4_header_t *ip0; + u32 pi0, rw_len0, adj_index0, next0, error0, checksum0; + u32 tx_sw_if_index0; + + pi0 = to_next[0] = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + adj0 = adj_get (adj_index0); + + ip0 = vlib_buffer_get_current (p0); + + error0 = IP4_ERROR_NONE; + next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */ + + /* Decrement TTL & update checksum. */ + if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) + { + i32 ttl0 = ip0->ttl; + + checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100); + + checksum0 += checksum0 >= 0xffff; + + ip0->checksum = checksum0; + + ASSERT (ip0->ttl > 0); + + ttl0 -= 1; + + ip0->ttl = ttl0; + + ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) || + (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)); + + if (PREDICT_FALSE (ttl0 <= 0)) + { + /* + * If the ttl drops below 1 when forwarding, generate + * an ICMP response. + */ + error0 = IP4_ERROR_TIME_EXPIRED; + next0 = IP4_REWRITE_NEXT_ICMP_ERROR; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + } + } + else + { + p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + + if (do_counters) + vlib_prefetch_combined_counter (&adjacency_counters, + thread_index, adj_index0); + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); + if (is_mcast) + { + /* + * copy bytes from the IP address into the MAC rewrite + */ + vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); + } + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + vnet_buffer (p0)->ip.save_rewrite_length = rw_len0; + + if (do_counters) + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, adj_index0, 1, + vlib_buffer_length_in_chain (vm, p0) + rw_len0); + + /* Check MTU of outgoing interface. */ + error0 = (vlib_buffer_length_in_chain (vm, p0) + > adj0[0].rewrite_header.max_l3_packet_bytes + ? IP4_ERROR_MTU_EXCEEDED : error0); + + p0->error = error_node->errors[error0]; + + /* Don't adjust the buffer for ttl issue; icmp-error node wants + * to see the IP headerr */ + if (PREDICT_TRUE (error0 == IP4_ERROR_NONE)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0; + next0 = adj0[0].rewrite_header.next_index; + + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func (vm, adj0, p0); + } + + if (PREDICT_FALSE + (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start (lm->output_feature_arc_index, + tx_sw_if_index0, &next0, p0); + + } + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Need to do trace after rewrites to pick up new packet data. */ + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace (vm, node, frame, VLIB_TX); + + return frame->n_vectors; +} + + +/** @brief IPv4 rewrite node. + @node ip4-rewrite + + This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4 + header checksum, fetch the ip adjacency, check the outbound mtu, + apply the adjacency rewrite, and send pkts to the adjacency + rewrite header's rewrite_next_index. + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param frame vlib_frame_t whose contents should be dispatched + + @par Graph mechanics: buffer metadata, next index usage + + @em Uses: + - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code> + - the rewrite adjacency index + - <code>adj->lookup_next_index</code> + - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise + the packet will be dropped. + - <code>adj->rewrite_header</code> + - Rewrite string length, rewrite string, next_index + + @em Sets: + - <code>b->current_data, b->current_length</code> + - Updated net of applying the rewrite string + + <em>Next Indices:</em> + - <code> adj->rewrite_header.next_index </code> + or @c error-drop +*/ +static uword +ip4_rewrite (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip4_rewrite_inline (vm, node, frame, 1, 0, 0); + else + return ip4_rewrite_inline (vm, node, frame, 0, 0, 0); +} + +static uword +ip4_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip4_rewrite_inline (vm, node, frame, 1, 1, 0); + else + return ip4_rewrite_inline (vm, node, frame, 0, 1, 0); +} + +static uword +ip4_rewrite_mcast (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip4_rewrite_inline (vm, node, frame, 1, 0, 1); + else + return ip4_rewrite_inline (vm, node, frame, 0, 0, 1); +} + +static uword +ip4_mcast_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip4_rewrite_inline (vm, node, frame, 1, 1, 1); + else + return ip4_rewrite_inline (vm, node, frame, 0, 1, 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_rewrite_node) = { + .function = ip4_rewrite, + .name = "ip4-rewrite", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_rewrite_trace, + + .n_next_nodes = 2, + .next_nodes = { + [IP4_REWRITE_NEXT_DROP] = "error-drop", + [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite) + +VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = { + .function = ip4_rewrite_mcast, + .name = "ip4-rewrite-mcast", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_rewrite_trace, + .sibling_of = "ip4-rewrite", +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast) + +VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = { + .function = ip4_mcast_midchain, + .name = "ip4-mcast-midchain", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_rewrite_trace, + .sibling_of = "ip4-rewrite", +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain) + +VLIB_REGISTER_NODE (ip4_midchain_node) = { + .function = ip4_midchain, + .name = "ip4-midchain", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .sibling_of = "ip4-rewrite", +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain); +/* *INDENT-ON */ + +int +ip4_lookup_validate (ip4_address_t * a, u32 fib_index0) +{ + ip4_fib_mtrie_t *mtrie0; + ip4_fib_mtrie_leaf_t leaf0; + u32 lbi0; + + mtrie0 = &ip4_fib_get (fib_index0)->mtrie; + + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3); + + lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + + return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a); +} + +static clib_error_t * +test_lookup_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + ip4_fib_t *fib; + u32 table_id = 0; + f64 count = 1; + u32 n; + int i; + ip4_address_t ip4_base_address; + u64 errors = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "table %d", &table_id)) + { + /* Make sure the entry exists. */ + fib = ip4_fib_get (table_id); + if ((fib) && (fib->index != table_id)) + return clib_error_return (0, "<fib-index> %d does not exist", + table_id); + } + else if (unformat (input, "count %f", &count)) + ; + + else if (unformat (input, "%U", + unformat_ip4_address, &ip4_base_address)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + n = count; + + for (i = 0; i < n; i++) + { + if (!ip4_lookup_validate (&ip4_base_address, table_id)) + errors++; + + ip4_base_address.as_u32 = + clib_host_to_net_u32 (1 + + clib_net_to_host_u32 (ip4_base_address.as_u32)); + } + + if (errors) + vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n); + else + vlib_cli_output (vm, "No errors in %d lookups\n", n); + + return 0; +} + +/*? + * Perform a lookup of an IPv4 Address (or range of addresses) in the + * given FIB table to determine if there is a conflict with the + * adjacency table. The fib-id can be determined by using the + * '<em>show ip fib</em>' command. If fib-id is not entered, default value + * of 0 is used. + * + * @todo This command uses fib-id, other commands use table-id (not + * just a name, they are different indexes). Would like to change this + * to table-id for consistency. + * + * @cliexpar + * Example of how to run the test lookup command: + * @cliexstart{test lookup 172.16.1.1 table 1 count 2} + * No errors in 2 lookups + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (lookup_test_command, static) = +{ + .path = "test lookup", + .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]", + .function = test_lookup_command_fn, +}; +/* *INDENT-ON* */ + +int +vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config) +{ + u32 fib_index; + + fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id); + + if (~0 == fib_index) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4, + flow_hash_config); + + return 0; +} + +static clib_error_t * +set_ip_flow_hash_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int matched = 0; + u32 table_id = 0; + u32 flow_hash_config = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "table %d", &table_id)) + matched = 1; +#define _(a,v) \ + else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;} + foreach_flow_hash_bit +#undef _ + else + break; + } + + if (matched == 0) + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config); + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_SUCH_FIB: + return clib_error_return (0, "no such FIB table %d", table_id); + + default: + clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config); + break; + } + + return 0; +} + +/*? + * Configure the set of IPv4 fields used by the flow hash. + * + * @cliexpar + * Example of how to set the flow hash on a given table: + * @cliexcmd{set ip flow-hash table 7 dst sport dport proto} + * Example of display the configured flow hash: + * @cliexstart{show ip fib} + * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto + * 0.0.0.0/0 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 0.0.0.0/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 224.0.0.0/8 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 6.0.1.2/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]] + * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0 + * 7.0.0.1/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]] + * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0 + * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0 + * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0 + * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0 + * 240.0.0.0/8 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 255.255.255.255/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto + * 0.0.0.0/0 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 0.0.0.0/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 172.16.1.0/24 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]] + * [0] [@4]: ipv4-glean: af_packet0 + * 172.16.1.1/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]] + * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0 + * 172.16.1.2/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]] + * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36 + * 172.16.2.0/24 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]] + * [0] [@4]: ipv4-glean: af_packet1 + * 172.16.2.1/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]] + * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1 + * 224.0.0.0/8 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 240.0.0.0/8 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * 255.255.255.255/32 + * unicast-ip4-chain + * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = +{ + .path = "set ip flow-hash", + .short_help = + "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]", + .function = set_ip_flow_hash_command_fn, +}; +/* *INDENT-ON* */ + +int +vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 table_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + ip4_main_t *ipm = &ip4_main; + ip_lookup_main_t *lm = &ipm->lookup_main; + vnet_classify_main_t *cm = &vnet_classify_main; + ip4_address_t *if_addr; + + if (pool_is_free_index (im->sw_interfaces, sw_if_index)) + return VNET_API_ERROR_NO_MATCHING_INTERFACE; + + if (table_index != ~0 && pool_is_free_index (cm->tables, table_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index); + lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index; + + if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL); + + if (NULL != if_addr) + { + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr.ip4 = *if_addr, + }; + u32 fib_index; + + fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, + sw_if_index); + + + if (table_index != (u32) ~ 0) + { + dpo_id_t dpo = DPO_INVALID; + + dpo_set (&dpo, + DPO_CLASSIFY, + DPO_PROTO_IP4, + classify_dpo_create (DPO_PROTO_IP4, table_index)); + + fib_table_entry_special_dpo_add (fib_index, + &pfx, + FIB_SOURCE_CLASSIFY, + FIB_ENTRY_FLAG_NONE, &dpo); + dpo_reset (&dpo); + } + else + { + fib_table_entry_special_remove (fib_index, + &pfx, FIB_SOURCE_CLASSIFY); + } + } + + return 0; +} + +static clib_error_t * +set_ip_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 table_index = ~0; + int table_index_set = 0; + u32 sw_if_index = ~0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "table-index %d", &table_index)) + table_index_set = 1; + else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, + vnet_get_main (), &sw_if_index)) + ; + else + break; + } + + if (table_index_set == 0) + return clib_error_return (0, "classify table-index must be specified"); + + if (sw_if_index == ~0) + return clib_error_return (0, "interface / subif must be specified"); + + rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_MATCHING_INTERFACE: + return clib_error_return (0, "No such interface"); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "No such classifier table"); + } + return 0; +} + +/*? + * Assign a classification table to an interface. The classification + * table is created using the '<em>classify table</em>' and '<em>classify session</em>' + * commands. Once the table is create, use this command to filter packets + * on an interface. + * + * @cliexpar + * Example of how to assign a classification table to an interface: + * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_ip_classify_command, static) = +{ + .path = "set ip classify", + .short_help = + "set ip classify intfc <interface> table-index <classify-idx>", + .function = set_ip_classify_command_fn, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c new file mode 100644 index 00000000..3b08f4b0 --- /dev/null +++ b/src/vnet/ip/ip4_input.c @@ -0,0 +1,507 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_input.c: IP v4 input node + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ppp/ppp.h> +#include <vnet/hdlc/hdlc.h> + +typedef struct +{ + u8 packet_data[64]; +} ip4_input_trace_t; + +static u8 * +format_ip4_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ip4_input_trace_t *t = va_arg (*va, ip4_input_trace_t *); + + s = format (s, "%U", + format_ip4_header, t->packet_data, sizeof (t->packet_data)); + + return s; +} + +typedef enum +{ + IP4_INPUT_NEXT_DROP, + IP4_INPUT_NEXT_PUNT, + IP4_INPUT_NEXT_LOOKUP, + IP4_INPUT_NEXT_LOOKUP_MULTICAST, + IP4_INPUT_NEXT_ICMP_ERROR, + IP4_INPUT_N_NEXT, +} ip4_input_next_t; + +/* Validate IP v4 packets and pass them either to forwarding code + or drop/punt exception packets. */ +always_inline uword +ip4_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int verify_checksum) +{ + ip4_main_t *im = &ip4_main; + vnet_main_t *vnm = vnet_get_main (); + ip_lookup_main_t *lm = &im->lookup_main; + u32 n_left_from, *from, *to_next; + ip4_input_next_t next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_input_node.index); + vlib_simple_counter_main_t *cm; + u32 thread_index = vlib_get_thread_index (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (ip4_input_trace_t)); + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_IP4); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t *p0, *p1; + ip4_header_t *ip0, *ip1; + u32 sw_if_index0, pi0, ip_len0, cur_len0, next0; + u32 sw_if_index1, pi1, ip_len1, cur_len1, next1; + i32 len_diff0, len_diff1; + u8 error0, error1, arc0, arc1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); + } + + to_next[0] = pi0 = from[0]; + to_next[1] = pi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; + + error0 = error1 = IP4_ERROR_NONE; + + if (PREDICT_FALSE (ip4_address_is_multicast (&ip0->dst_address))) + { + arc0 = lm->mcast_feature_arc_index; + next0 = IP4_INPUT_NEXT_LOOKUP_MULTICAST; + } + else + { + arc0 = lm->ucast_feature_arc_index; + next0 = IP4_INPUT_NEXT_LOOKUP; + if (PREDICT_FALSE (ip0->ttl < 1)) + error0 = IP4_ERROR_TIME_EXPIRED; + } + + if (PREDICT_FALSE (ip4_address_is_multicast (&ip1->dst_address))) + { + arc1 = lm->mcast_feature_arc_index; + next1 = IP4_INPUT_NEXT_LOOKUP_MULTICAST; + } + else + { + arc1 = lm->ucast_feature_arc_index; + next1 = IP4_INPUT_NEXT_LOOKUP; + if (PREDICT_FALSE (ip1->ttl < 1)) + error1 = IP4_ERROR_TIME_EXPIRED; + } + + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; + vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0; + + vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0); + vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1); + + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1); + + /* Punt packets with options or wrong version. */ + if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45)) + error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ? + IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; + + if (PREDICT_FALSE (ip1->ip_version_and_header_length != 0x45)) + error1 = (ip1->ip_version_and_header_length & 0xf) != 5 ? + IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; + + /* Verify header checksum. */ + if (verify_checksum) + { + ip_csum_t sum0, sum1; + + ip4_partial_header_checksum_x1 (ip0, sum0); + ip4_partial_header_checksum_x1 (ip1, sum1); + + error0 = 0xffff != ip_csum_fold (sum0) ? + IP4_ERROR_BAD_CHECKSUM : error0; + error1 = 0xffff != ip_csum_fold (sum1) ? + IP4_ERROR_BAD_CHECKSUM : error1; + } + + /* Drop fragmentation offset 1 packets. */ + error0 = ip4_get_fragment_offset (ip0) == 1 ? + IP4_ERROR_FRAGMENT_OFFSET_ONE : error0; + error1 = ip4_get_fragment_offset (ip1) == 1 ? + IP4_ERROR_FRAGMENT_OFFSET_ONE : error1; + + /* Verify lengths. */ + ip_len0 = clib_net_to_host_u16 (ip0->length); + ip_len1 = clib_net_to_host_u16 (ip1->length); + + /* IP length must be at least minimal IP header. */ + error0 = ip_len0 < sizeof (ip0[0]) ? IP4_ERROR_TOO_SHORT : error0; + error1 = ip_len1 < sizeof (ip1[0]) ? IP4_ERROR_TOO_SHORT : error1; + + cur_len0 = vlib_buffer_length_in_chain (vm, p0); + cur_len1 = vlib_buffer_length_in_chain (vm, p1); + + len_diff0 = cur_len0 - ip_len0; + len_diff1 = cur_len1 - ip_len1; + + error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0; + error1 = len_diff1 < 0 ? IP4_ERROR_BAD_LENGTH : error1; + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + if (PREDICT_FALSE (error0 != IP4_ERROR_NONE)) + { + if (error0 == IP4_ERROR_TIME_EXPIRED) + { + icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = IP4_INPUT_NEXT_ICMP_ERROR; + } + else + next0 = error0 != IP4_ERROR_OPTIONS ? + IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT; + } + if (PREDICT_FALSE (error1 != IP4_ERROR_NONE)) + { + if (error1 == IP4_ERROR_TIME_EXPIRED) + { + icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next1 = IP4_INPUT_NEXT_ICMP_ERROR; + } + else + next1 = error1 != IP4_ERROR_OPTIONS ? + IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT; + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip4_header_t *ip0; + u32 sw_if_index0, pi0, ip_len0, cur_len0, next0; + i32 len_diff0; + u8 error0, arc0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip0 = vlib_buffer_get_current (p0); + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + error0 = IP4_ERROR_NONE; + + if (PREDICT_FALSE (ip4_address_is_multicast (&ip0->dst_address))) + { + arc0 = lm->mcast_feature_arc_index; + next0 = IP4_INPUT_NEXT_LOOKUP_MULTICAST; + } + else + { + arc0 = lm->ucast_feature_arc_index; + next0 = IP4_INPUT_NEXT_LOOKUP; + if (PREDICT_FALSE (ip0->ttl < 1)) + error0 = IP4_ERROR_TIME_EXPIRED; + } + + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; + vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0); + + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); + + /* Punt packets with options or wrong version. */ + if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45)) + error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ? + IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; + + /* Verify header checksum. */ + if (verify_checksum) + { + ip_csum_t sum0; + + ip4_partial_header_checksum_x1 (ip0, sum0); + error0 = + 0xffff != + ip_csum_fold (sum0) ? IP4_ERROR_BAD_CHECKSUM : error0; + } + + /* Drop fragmentation offset 1 packets. */ + error0 = + ip4_get_fragment_offset (ip0) == + 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error0; + + /* Verify lengths. */ + ip_len0 = clib_net_to_host_u16 (ip0->length); + + /* IP length must be at least minimal IP header. */ + error0 = ip_len0 < sizeof (ip0[0]) ? IP4_ERROR_TOO_SHORT : error0; + + cur_len0 = vlib_buffer_length_in_chain (vm, p0); + len_diff0 = cur_len0 - ip_len0; + error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0; + + p0->error = error_node->errors[error0]; + if (PREDICT_FALSE (error0 != IP4_ERROR_NONE)) + { + if (error0 == IP4_ERROR_TIME_EXPIRED) + { + icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded, + ICMP4_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = IP4_INPUT_NEXT_ICMP_ERROR; + } + else + next0 = error0 != IP4_ERROR_OPTIONS ? + IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT; + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +/** \brief IPv4 input node. + @node ip4-input + + This is the IPv4 input node: validates ip4 header checksums, + verifies ip header lengths, discards pkts with expired TTLs, + and sends pkts to the set of ip feature nodes configured on + the rx interface. + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param frame vlib_frame_t whose contents should be dispatched + + @par Graph mechanics: buffer metadata, next index usage + + @em Uses: + - vnet_feature_config_main_t cm corresponding to each pkt's dst address unicast / + multicast status. + - <code>b->current_config_index</code> corresponding to each pkt's + rx sw_if_index. + - This sets the per-packet graph trajectory, ensuring that + each packet visits the per-interface features in order. + + - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code> + - Indicates the @c sw_if_index value of the interface that the + packet was received on. + + @em Sets: + - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code> + - The lookup result adjacency index. + + <em>Next Indices:</em> + - Dispatches pkts to the (first) feature node: + <code> vnet_get_config_data (... &next0 ...); </code> + or @c error-drop +*/ +static uword +ip4_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_input_inline (vm, node, frame, /* verify_checksum */ 1); +} + +static uword +ip4_input_no_checksum (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0); +} + +static char *ip4_error_strings[] = { +#define _(sym,string) string, + foreach_ip4_error +#undef _ +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_input_node) = { + .function = ip4_input, + .name = "ip4-input", + .vector_size = sizeof (u32), + + .n_errors = IP4_N_ERROR, + .error_strings = ip4_error_strings, + + .n_next_nodes = IP4_INPUT_N_NEXT, + .next_nodes = { + [IP4_INPUT_NEXT_DROP] = "error-drop", + [IP4_INPUT_NEXT_PUNT] = "error-punt", + [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup", + [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup", + [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, + + .format_buffer = format_ip4_header, + .format_trace = format_ip4_input_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_input_node, ip4_input); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = { + .function = ip4_input_no_checksum, + .name = "ip4-input-no-checksum", + .vector_size = sizeof (u32), + + .n_next_nodes = IP4_INPUT_N_NEXT, + .next_nodes = { + [IP4_INPUT_NEXT_DROP] = "error-drop", + [IP4_INPUT_NEXT_PUNT] = "error-punt", + [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup", + [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup", + [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error", + }, + + .format_buffer = format_ip4_header, + .format_trace = format_ip4_input_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_input_no_checksum_node, + ip4_input_no_checksum); + +static clib_error_t * +ip4_init (vlib_main_t * vm) +{ + clib_error_t *error; + + ethernet_register_input_type (vm, ETHERNET_TYPE_IP4, ip4_input_node.index); + ppp_register_input_protocol (vm, PPP_PROTOCOL_ip4, ip4_input_node.index); + hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip4, ip4_input_node.index); + + { + pg_node_t *pn; + pn = pg_get_node (ip4_input_node.index); + pn->unformat_edit = unformat_pg_ip4_header; + pn = pg_get_node (ip4_input_no_checksum_node.index); + pn->unformat_edit = unformat_pg_ip4_header; + } + + if ((error = vlib_call_init_function (vm, ip4_cli_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip4_source_check_init))) + return error; + + if ((error = vlib_call_init_function + (vm, ip4_source_and_port_range_check_init))) + return error; + + /* Set flow hash to something non-zero. */ + ip4_main.flow_hash_seed = 0xdeadbeef; + + /* Default TTL for packets we generate. */ + ip4_main.host_config.ttl = 64; + + return error; +} + +VLIB_INIT_FUNCTION (ip4_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c new file mode 100644 index 00000000..cc82384d --- /dev/null +++ b/src/vnet/ip/ip4_mtrie.c @@ -0,0 +1,811 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_fib.h: ip4 mtrie fib + * + * Copyright (c) 2012 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/ip/ip4_mtrie.h> +#include <vnet/fib/ip4_fib.h> + + +/** + * Global pool of IPv4 8bit PLYs + */ +ip4_fib_mtrie_8_ply_t *ip4_ply_pool; + +always_inline u32 +ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte) +{ + /* + * It's 'non-empty' if the length of the leaf stored is greater than the + * length of a leaf in the covering ply. i.e. the leaf is more specific + * than it's would be cover in the covering ply + */ + if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base) + return (1); + return (0); +} + +always_inline ip4_fib_mtrie_leaf_t +ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index) +{ + ip4_fib_mtrie_leaf_t l; + l = 1 + 2 * adj_index; + ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index); + return l; +} + +always_inline u32 +ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n) +{ + return (n & 1) == 0; +} + +always_inline u32 +ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n) +{ + ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n)); + return n >> 1; +} + +always_inline ip4_fib_mtrie_leaf_t +ip4_fib_mtrie_leaf_set_next_ply_index (u32 i) +{ + ip4_fib_mtrie_leaf_t l; + l = 0 + 2 * i; + ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i); + return l; +} + +#ifndef __ALTIVEC__ +#define PLY_X4_SPLAT_INIT(init_x4, init) \ + init_x4 = u32x4_splat (init); +#else +#define PLY_X4_SPLAT_INIT(init_x4, init) \ +{ \ + u32x4_union_t y; \ + y.as_u32[0] = init; \ + y.as_u32[1] = init; \ + y.as_u32[2] = init; \ + y.as_u32[3] = init; \ + init_x4 = y.as_u32x4; \ +} +#endif + +#ifdef CLIB_HAVE_VEC128 +#define PLY_INIT_LEAVES(p) \ +{ \ + u32x4 *l, init_x4; \ + \ + PLY_X4_SPLAT_INIT(init_x4, init); \ + for (l = p->leaves_as_u32x4; \ + l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \ + l += 4) \ + { \ + l[0] = init_x4; \ + l[1] = init_x4; \ + l[2] = init_x4; \ + l[3] = init_x4; \ + } \ +} +#else +#define PLY_INIT_LEAVES(p) \ +{ \ + u32 *l; \ + \ + for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \ + { \ + l[0] = init; \ + l[1] = init; \ + l[2] = init; \ + l[3] = init; \ + } \ +} +#endif + +#define PLY_INIT(p, init, prefix_len, ply_base_len) \ +{ \ + /* \ + * A leaf is 'empty' if it represents a leaf from the covering PLY \ + * i.e. if the prefix length of the leaf is less than or equal to \ + * the prefix length of the PLY \ + */ \ + p->n_non_empty_leafs = (prefix_len > ply_base_len ? \ + ARRAY_LEN (p->leaves) : 0); \ + memset (p->dst_address_bits_of_leaves, prefix_len, \ + sizeof (p->dst_address_bits_of_leaves)); \ + p->dst_address_bits_base = ply_base_len; \ + \ + /* Initialize leaves. */ \ + PLY_INIT_LEAVES(p); \ +} + +static void +ply_8_init (ip4_fib_mtrie_8_ply_t * p, + ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len) +{ + PLY_INIT (p, init, prefix_len, ply_base_len); +} + +static void +ply_16_init (ip4_fib_mtrie_16_ply_t * p, + ip4_fib_mtrie_leaf_t init, uword prefix_len) +{ + memset (p->dst_address_bits_of_leaves, prefix_len, + sizeof (p->dst_address_bits_of_leaves)); + PLY_INIT_LEAVES (p); +} + +static ip4_fib_mtrie_leaf_t +ply_create (ip4_fib_mtrie_t * m, + ip4_fib_mtrie_leaf_t init_leaf, + u32 leaf_prefix_len, u32 ply_base_len) +{ + ip4_fib_mtrie_8_ply_t *p; + + /* Get cache aligned ply. */ + pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES); + + ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len); + return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool); +} + +always_inline ip4_fib_mtrie_8_ply_t * +get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l) +{ + uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l); + + return pool_elt_at_index (ip4_ply_pool, n); +} + +void +ip4_mtrie_free (ip4_fib_mtrie_t * m) +{ + /* the root ply is embedded so the is nothing to do, + * the assumption being that the IP4 FIB table has emptied the trie + * before deletion. + */ +#if CLIB_DEBUG > 0 + int i; + for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++) + { + ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i])); + } +#endif +} + +void +ip4_mtrie_init (ip4_fib_mtrie_t * m) +{ + ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0); +} + +typedef struct +{ + ip4_address_t dst_address; + u32 dst_address_length; + u32 adj_index; + u32 cover_address_length; + u32 cover_adj_index; +} ip4_fib_mtrie_set_unset_leaf_args_t; + +static void +set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m, + ip4_fib_mtrie_8_ply_t * ply, + ip4_fib_mtrie_leaf_t new_leaf, + uword new_leaf_dst_address_bits) +{ + ip4_fib_mtrie_leaf_t old_leaf; + uword i; + + ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf)); + + for (i = 0; i < ARRAY_LEN (ply->leaves); i++) + { + old_leaf = ply->leaves[i]; + + /* Recurse into sub plies. */ + if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf)) + { + ip4_fib_mtrie_8_ply_t *sub_ply = + get_next_ply_for_leaf (m, old_leaf); + set_ply_with_more_specific_leaf (m, sub_ply, new_leaf, + new_leaf_dst_address_bits); + } + + /* Replace less specific terminal leaves with new leaf. */ + else if (new_leaf_dst_address_bits >= + ply->dst_address_bits_of_leaves[i]) + { + __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf); + ASSERT (ply->leaves[i] == new_leaf); + ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits; + ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i); + } + } +} + +static void +set_leaf (ip4_fib_mtrie_t * m, + const ip4_fib_mtrie_set_unset_leaf_args_t * a, + u32 old_ply_index, u32 dst_address_byte_index) +{ + ip4_fib_mtrie_leaf_t old_leaf, new_leaf; + i32 n_dst_bits_next_plies; + u8 dst_byte; + ip4_fib_mtrie_8_ply_t *old_ply; + + old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index); + + ASSERT (a->dst_address_length <= 32); + ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); + + /* how many bits of the destination address are in the next PLY */ + n_dst_bits_next_plies = + a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1); + + dst_byte = a->dst_address.as_u8[dst_address_byte_index]; + + /* Number of bits next plies <= 0 => insert leaves this ply. */ + if (n_dst_bits_next_plies <= 0) + { + /* The mask length of the address to insert maps to this ply */ + uword old_leaf_is_terminal; + u32 i, n_dst_bits_this_ply; + + /* The number of bits, and hence slots/buckets, we will fill */ + n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies); + ASSERT ((a->dst_address.as_u8[dst_address_byte_index] & + pow2_mask (n_dst_bits_this_ply)) == 0); + + /* Starting at the value of the byte at this section of the v4 address + * fill the buckets/slots of the ply */ + for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++) + { + ip4_fib_mtrie_8_ply_t *new_ply; + + old_leaf = old_ply->leaves[i]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i]) + { + /* The new leaf is more or equally specific than the one currently + * occupying the slot */ + new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + if (old_leaf_is_terminal) + { + /* The current leaf is terminal, we can replace it with + * the new one */ + old_ply->n_non_empty_leafs -= + ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); + + old_ply->dst_address_bits_of_leaves[i] = + a->dst_address_length; + __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf, + new_leaf); + ASSERT (old_ply->leaves[i] == new_leaf); + + old_ply->n_non_empty_leafs += + ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); + ASSERT (old_ply->n_non_empty_leafs <= + ARRAY_LEN (old_ply->leaves)); + } + else + { + /* Existing leaf points to another ply. We need to place + * new_leaf into all more specific slots. */ + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_ply_with_more_specific_leaf (m, new_ply, new_leaf, + a->dst_address_length); + } + } + else if (!old_leaf_is_terminal) + { + /* The current leaf is less specific and not termial (i.e. a ply), + * recurse on down the trie */ + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_leaf (m, a, new_ply - ip4_ply_pool, + dst_address_byte_index + 1); + } + /* + * else + * the route we are adding is less specific than the leaf currently + * occupying this slot. leave it there + */ + } + } + else + { + /* The address to insert requires us to move down at a lower level of + * the trie - recurse on down */ + ip4_fib_mtrie_8_ply_t *new_ply; + u8 ply_base_len; + + ply_base_len = 8 * (dst_address_byte_index + 1); + + old_leaf = old_ply->leaves[dst_byte]; + + if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) + { + /* There is a leaf occupying the slot. Replace it with a new ply */ + old_ply->n_non_empty_leafs -= + ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte); + + new_leaf = ply_create (m, old_leaf, + clib_max (old_ply->dst_address_bits_of_leaves + [dst_byte], ply_base_len), + ply_base_len); + new_ply = get_next_ply_for_leaf (m, new_leaf); + + /* Refetch since ply_create may move pool. */ + old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index); + + __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, + new_leaf); + ASSERT (old_ply->leaves[dst_byte] == new_leaf); + old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len; + + old_ply->n_non_empty_leafs += + ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte); + ASSERT (old_ply->n_non_empty_leafs >= 0); + } + else + new_ply = get_next_ply_for_leaf (m, old_leaf); + + set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1); + } +} + +static void +set_root_leaf (ip4_fib_mtrie_t * m, + const ip4_fib_mtrie_set_unset_leaf_args_t * a) +{ + ip4_fib_mtrie_leaf_t old_leaf, new_leaf; + ip4_fib_mtrie_16_ply_t *old_ply; + i32 n_dst_bits_next_plies; + u16 dst_byte; + + old_ply = &m->root_ply; + + ASSERT (a->dst_address_length <= 32); + + /* how many bits of the destination address are in the next PLY */ + n_dst_bits_next_plies = a->dst_address_length - BITS (u16); + + dst_byte = a->dst_address.as_u16[0]; + + /* Number of bits next plies <= 0 => insert leaves this ply. */ + if (n_dst_bits_next_plies <= 0) + { + /* The mask length of the address to insert maps to this ply */ + uword old_leaf_is_terminal; + u32 i, n_dst_bits_this_ply; + + /* The number of bits, and hence slots/buckets, we will fill */ + n_dst_bits_this_ply = 16 - a->dst_address_length; + ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) & + pow2_mask (n_dst_bits_this_ply)) == 0); + + /* Starting at the value of the byte at this section of the v4 address + * fill the buckets/slots of the ply */ + for (i = 0; i < (1 << n_dst_bits_this_ply); i++) + { + ip4_fib_mtrie_8_ply_t *new_ply; + u16 slot; + + slot = clib_net_to_host_u16 (dst_byte); + slot += i; + slot = clib_host_to_net_u16 (slot); + + old_leaf = old_ply->leaves[slot]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + if (a->dst_address_length >= + old_ply->dst_address_bits_of_leaves[slot]) + { + /* The new leaf is more or equally specific than the one currently + * occupying the slot */ + new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + if (old_leaf_is_terminal) + { + /* The current leaf is terminal, we can replace it with + * the new one */ + old_ply->dst_address_bits_of_leaves[slot] = + a->dst_address_length; + __sync_val_compare_and_swap (&old_ply->leaves[slot], + old_leaf, new_leaf); + ASSERT (old_ply->leaves[slot] == new_leaf); + } + else + { + /* Existing leaf points to another ply. We need to place + * new_leaf into all more specific slots. */ + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_ply_with_more_specific_leaf (m, new_ply, new_leaf, + a->dst_address_length); + } + } + else if (!old_leaf_is_terminal) + { + /* The current leaf is less specific and not termial (i.e. a ply), + * recurse on down the trie */ + new_ply = get_next_ply_for_leaf (m, old_leaf); + set_leaf (m, a, new_ply - ip4_ply_pool, 2); + } + /* + * else + * the route we are adding is less specific than the leaf currently + * occupying this slot. leave it there + */ + } + } + else + { + /* The address to insert requires us to move down at a lower level of + * the trie - recurse on down */ + ip4_fib_mtrie_8_ply_t *new_ply; + u8 ply_base_len; + + ply_base_len = 16; + + old_leaf = old_ply->leaves[dst_byte]; + + if (ip4_fib_mtrie_leaf_is_terminal (old_leaf)) + { + /* There is a leaf occupying the slot. Replace it with a new ply */ + new_leaf = ply_create (m, old_leaf, + clib_max (old_ply->dst_address_bits_of_leaves + [dst_byte], ply_base_len), + ply_base_len); + new_ply = get_next_ply_for_leaf (m, new_leaf); + + __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf, + new_leaf); + ASSERT (old_ply->leaves[dst_byte] == new_leaf); + old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len; + } + else + new_ply = get_next_ply_for_leaf (m, old_leaf); + + set_leaf (m, a, new_ply - ip4_ply_pool, 2); + } +} + +static uword +unset_leaf (ip4_fib_mtrie_t * m, + const ip4_fib_mtrie_set_unset_leaf_args_t * a, + ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index) +{ + ip4_fib_mtrie_leaf_t old_leaf, del_leaf; + i32 n_dst_bits_next_plies; + i32 i, n_dst_bits_this_ply, old_leaf_is_terminal; + u8 dst_byte; + + ASSERT (a->dst_address_length <= 32); + ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8)); + + n_dst_bits_next_plies = + a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1); + + dst_byte = a->dst_address.as_u8[dst_address_byte_index]; + if (n_dst_bits_next_plies < 0) + dst_byte &= ~pow2_mask (-n_dst_bits_next_plies); + + n_dst_bits_this_ply = + n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0; + n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply); + + del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++) + { + old_leaf = old_ply->leaves[i]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + if (old_leaf == del_leaf + || (!old_leaf_is_terminal + && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), + dst_address_byte_index + 1))) + { + old_ply->n_non_empty_leafs -= + ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); + + old_ply->leaves[i] = + ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index); + old_ply->dst_address_bits_of_leaves[i] = + clib_max (old_ply->dst_address_bits_base, + a->cover_address_length); + + old_ply->n_non_empty_leafs += + ip4_fib_mtrie_leaf_is_non_empty (old_ply, i); + + ASSERT (old_ply->n_non_empty_leafs >= 0); + if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0) + { + pool_put (ip4_ply_pool, old_ply); + /* Old ply was deleted. */ + return 1; + } +#if CLIB_DEBUG > 0 + else if (dst_address_byte_index) + { + int ii, count = 0; + for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++) + { + count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii); + } + ASSERT (count); + } +#endif + } + } + + /* Old ply was not deleted. */ + return 0; +} + +static void +unset_root_leaf (ip4_fib_mtrie_t * m, + const ip4_fib_mtrie_set_unset_leaf_args_t * a) +{ + ip4_fib_mtrie_leaf_t old_leaf, del_leaf; + i32 n_dst_bits_next_plies; + i32 i, n_dst_bits_this_ply, old_leaf_is_terminal; + u16 dst_byte; + ip4_fib_mtrie_16_ply_t *old_ply; + + ASSERT (a->dst_address_length <= 32); + + old_ply = &m->root_ply; + n_dst_bits_next_plies = a->dst_address_length - BITS (u16); + + dst_byte = a->dst_address.as_u16[0]; + + n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ? + (16 - a->dst_address_length) : 0); + + del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index); + + /* Starting at the value of the byte at this section of the v4 address + * fill the buckets/slots of the ply */ + for (i = 0; i < (1 << n_dst_bits_this_ply); i++) + { + u16 slot; + + slot = clib_net_to_host_u16 (dst_byte); + slot += i; + slot = clib_host_to_net_u16 (slot); + + old_leaf = old_ply->leaves[slot]; + old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf); + + if (old_leaf == del_leaf + || (!old_leaf_is_terminal + && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2))) + { + old_ply->leaves[slot] = + ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index); + old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length; + } + } +} + +void +ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, u32 adj_index) +{ + ip4_fib_mtrie_set_unset_leaf_args_t a; + ip4_main_t *im = &ip4_main; + + /* Honor dst_address_length. Fib masks are in network byte order */ + a.dst_address.as_u32 = (dst_address->as_u32 & + im->fib_masks[dst_address_length]); + a.dst_address_length = dst_address_length; + a.adj_index = adj_index; + + set_root_leaf (m, &a); +} + +void +ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, + u32 adj_index, + u32 cover_address_length, u32 cover_adj_index) +{ + ip4_fib_mtrie_set_unset_leaf_args_t a; + ip4_main_t *im = &ip4_main; + + /* Honor dst_address_length. Fib masks are in network byte order */ + a.dst_address.as_u32 = (dst_address->as_u32 & + im->fib_masks[dst_address_length]); + a.dst_address_length = dst_address_length; + a.adj_index = adj_index; + a.cover_adj_index = cover_adj_index; + a.cover_address_length = cover_address_length; + + /* the top level ply is never removed */ + unset_root_leaf (m, &a); +} + +/* Returns number of bytes of memory used by mtrie. */ +static uword +mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p) +{ + uword bytes, i; + + bytes = sizeof (p[0]); + for (i = 0; i < ARRAY_LEN (p->leaves); i++) + { + ip4_fib_mtrie_leaf_t l = p->leaves[i]; + if (ip4_fib_mtrie_leaf_is_next_ply (l)) + bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l)); + } + + return bytes; +} + +/* Returns number of bytes of memory used by mtrie. */ +static uword +mtrie_memory_usage (ip4_fib_mtrie_t * m) +{ + uword bytes, i; + + bytes = sizeof (*m); + for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++) + { + ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i]; + if (ip4_fib_mtrie_leaf_is_next_ply (l)) + bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l)); + } + + return bytes; +} + +static u8 * +format_ip4_fib_mtrie_leaf (u8 * s, va_list * va) +{ + ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t); + + if (ip4_fib_mtrie_leaf_is_terminal (l)) + s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l)); + else + s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l)); + return s; +} + +#define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent) \ +({ \ + u32 a, ia_length; \ + ip4_address_t ia; \ + ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)]; \ + \ + a = (_base_address) + ((_i) << (32 - (_ply_max_len))); \ + ia.as_u32 = clib_host_to_net_u32 (a); \ + ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \ + s = format (s, "\n%U%20U %U", \ + format_white_space, (_indent) + 2, \ + format_ip4_address_and_length, &ia, ia_length, \ + format_ip4_fib_mtrie_leaf, _l); \ + \ + if (ip4_fib_mtrie_leaf_is_next_ply (_l)) \ + s = format (s, "\n%U%U", \ + format_white_space, (_indent) + 2, \ + format_ip4_fib_mtrie_ply, m, a, \ + ip4_fib_mtrie_leaf_get_next_ply_index (_l)); \ + s; \ +}) + +static u8 * +format_ip4_fib_mtrie_ply (u8 * s, va_list * va) +{ + ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *); + u32 base_address = va_arg (*va, u32); + u32 ply_index = va_arg (*va, u32); + ip4_fib_mtrie_8_ply_t *p; + uword indent; + int i; + + p = pool_elt_at_index (ip4_ply_pool, ply_index); + indent = format_get_indent (s); + s = format (s, "ply index %d, %d non-empty leaves", ply_index, + p->n_non_empty_leafs); + + for (i = 0; i < ARRAY_LEN (p->leaves); i++) + { + if (ip4_fib_mtrie_leaf_is_non_empty (p, i)) + { + FORMAT_PLY (s, p, i, base_address, + p->dst_address_bits_base + 8, indent); + } + } + + return s; +} + +u8 * +format_ip4_fib_mtrie (u8 * s, va_list * va) +{ + ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *); + ip4_fib_mtrie_16_ply_t *p; + u32 base_address = 0; + int i; + + s = format (s, "%d plies, memory usage %U\n", + pool_elts (ip4_ply_pool), + format_memory_size, mtrie_memory_usage (m)); + s = format (s, "root-ply"); + p = &m->root_ply; + + for (i = 0; i < ARRAY_LEN (p->leaves); i++) + { + u16 slot; + + slot = clib_host_to_net_u16 (i); + + if (p->dst_address_bits_of_leaves[slot] > 0) + { + FORMAT_PLY (s, p, slot, base_address, 16, 2); + } + } + + return s; +} + +static clib_error_t * +ip4_mtrie_module_init (vlib_main_t * vm) +{ + /* Burn one ply so index 0 is taken */ + CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p); + + pool_get (ip4_ply_pool, p); + + return (NULL); +} + +VLIB_INIT_FUNCTION (ip4_mtrie_module_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h new file mode 100644 index 00000000..be262c2c --- /dev/null +++ b/src/vnet/ip/ip4_mtrie.h @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_fib.h: ip4 mtrie fib + * + * Copyright (c) 2012 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_ip4_fib_h +#define included_ip_ip4_fib_h + +#include <vppinfra/cache.h> +#include <vppinfra/vector.h> +#include <vnet/ip/lookup.h> +#include <vnet/ip/ip4_packet.h> /* for ip4_address_t */ + +/* ip4 fib leafs: 4 ply 8-8-8-8 mtrie. + 1 + 2*adj_index for terminal leaves. + 0 + 2*next_ply_index for non-terminals, i.e. PLYs + 1 => empty (adjacency index of zero is special miss adjacency). */ +typedef u32 ip4_fib_mtrie_leaf_t; + +#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0) + +/** + * @brief the 16 way stride that is the top PLY of the mtrie + * We do not maintain the count of 'real' leaves in this PLY, since + * it is never removed. The FIB will destroy the mtrie and the ply once + * the FIB is destroyed. + */ +#define PLY_16_SIZE (1<<16) +typedef struct ip4_fib_mtrie_16_ply_t_ +{ + /** + * The leaves/slots/buckets to be filed with leafs + */ + union + { + ip4_fib_mtrie_leaf_t leaves[PLY_16_SIZE]; + +#ifdef CLIB_HAVE_VEC128 + u32x4 leaves_as_u32x4[PLY_16_SIZE / 4]; +#endif + }; + + /** + * Prefix length for terminal leaves. + */ + u8 dst_address_bits_of_leaves[PLY_16_SIZE]; +} ip4_fib_mtrie_16_ply_t; + +/** + * @brief One ply of the 4 ply mtrie fib. + */ +typedef struct ip4_fib_mtrie_8_ply_t_ +{ + /** + * The leaves/slots/buckets to be filed with leafs + */ + union + { + ip4_fib_mtrie_leaf_t leaves[256]; + +#ifdef CLIB_HAVE_VEC128 + u32x4 leaves_as_u32x4[256 / 4]; +#endif + }; + + /** + * Prefix length for leaves/ply. + */ + u8 dst_address_bits_of_leaves[256]; + + /** + * Number of non-empty leafs (whether terminal or not). + */ + i32 n_non_empty_leafs; + + /** + * The length of the ply's coviering prefix. Also a measure of its depth + * If a leaf in a slot has a mask length longer than this then it is + * 'non-empty'. Otherwise it is the value of the cover. + */ + i32 dst_address_bits_base; + + /* Pad to cache line boundary. */ + u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)]; +} +ip4_fib_mtrie_8_ply_t; + +STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES, + "IP4 Mtrie ply cache line"); + +/** + * @brief The mutiway-TRIE. + * There is no data associated with the mtrie apart from the top PLY + */ +typedef struct +{ + /** + * Embed the PLY with the mtrie struct. This means that the Data-plane + * 'get me the mtrie' returns the first ply, and not an indirect 'pointer' + * to it. therefore no cachline misses in the data-path. + */ + ip4_fib_mtrie_16_ply_t root_ply; +} ip4_fib_mtrie_t; + +/** + * @brief Initialise an mtrie + */ +void ip4_mtrie_init (ip4_fib_mtrie_t * m); + +/** + * @brief Free an mtrie, It must be emty when free'd + */ +void ip4_mtrie_free (ip4_fib_mtrie_t * m); + +/** + * @brief Add a route/rntry to the mtrie + */ +void ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, u32 adj_index); +/** + * @brief remove a route/rntry to the mtrie + */ +void ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address, + u32 dst_address_length, + u32 adj_index, + u32 cover_address_length, u32 cover_adj_index); + +/** + * @brief Format/display the contents of the mtrie + */ +format_function_t format_ip4_fib_mtrie; + +/** + * @brief A global pool of 8bit stride plys + */ +extern ip4_fib_mtrie_8_ply_t *ip4_ply_pool; + +/** + * Is the leaf terminal (i.e. an LB index) or non-terminak (i.e. a PLY index) + */ +always_inline u32 +ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n) +{ + return n & 1; +} + +/** + * From the stored slot value extract the LB index value + */ +always_inline u32 +ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n) +{ + ASSERT (ip4_fib_mtrie_leaf_is_terminal (n)); + return n >> 1; +} + +/** + * @brief Lookup step. Processes 1 byte of 4 byte ip4 address. + */ +always_inline ip4_fib_mtrie_leaf_t +ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m, + ip4_fib_mtrie_leaf_t current_leaf, + const ip4_address_t * dst_address, + u32 dst_address_byte_index) +{ + ip4_fib_mtrie_8_ply_t *ply; + + uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf); + + if (!current_is_terminal) + { + ply = ip4_ply_pool + (current_leaf >> 1); + return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]); + } + + return current_leaf; +} + +/** + * @brief Lookup step number 1. Processes 2 bytes of 4 byte ip4 address. + */ +always_inline ip4_fib_mtrie_leaf_t +ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m, + const ip4_address_t * dst_address) +{ + ip4_fib_mtrie_leaf_t next_leaf; + + next_leaf = m->root_ply.leaves[dst_address->as_u16[0]]; + + return next_leaf; +} + +#endif /* included_ip_ip4_fib_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h new file mode 100644 index 00000000..1ff9fbdb --- /dev/null +++ b/src/vnet/ip/ip4_packet.h @@ -0,0 +1,385 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip4/packet.h: ip4 packet format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip4_packet_h +#define included_ip4_packet_h + +#include <vnet/ip/ip_packet.h> /* for ip_csum_t */ +#include <vnet/tcp/tcp_packet.h> /* for tcp_header_t */ +#include <vppinfra/byte_order.h> /* for clib_net_to_host_u16 */ + +/* IP4 address which can be accessed either as 4 bytes + or as a 32-bit number. */ +typedef union +{ + u8 data[4]; + u32 data_u32; + /* Aliases. */ + u8 as_u8[4]; + u16 as_u16[2]; + u32 as_u32; +} ip4_address_t; + +typedef struct +{ + /* IP address must be first for ip_interface_address_get_address() to work */ + ip4_address_t ip4_addr; + u32 fib_index; +} ip4_address_fib_t; + +always_inline void +ip4_addr_fib_init (ip4_address_fib_t * addr_fib, ip4_address_t * address, + u32 fib_index) +{ + clib_memcpy (&addr_fib->ip4_addr, address, sizeof (addr_fib->ip4_addr)); + addr_fib->fib_index = fib_index; +} + +/* (src,dst) pair of addresses as found in packet header. */ +typedef struct +{ + ip4_address_t src, dst; +} ip4_address_pair_t; + +/* If address is a valid netmask, return length of mask. */ +always_inline uword +ip4_address_netmask_length (ip4_address_t * a) +{ + uword result = 0; + uword i; + for (i = 0; i < ARRAY_LEN (a->as_u8); i++) + { + switch (a->as_u8[i]) + { + case 0xff: + result += 8; + break; + case 0xfe: + result += 7; + goto done; + case 0xfc: + result += 6; + goto done; + case 0xf8: + result += 5; + goto done; + case 0xf0: + result += 4; + goto done; + case 0xe0: + result += 3; + goto done; + case 0xc0: + result += 2; + goto done; + case 0x80: + result += 1; + goto done; + case 0x00: + result += 0; + goto done; + default: + /* Not a valid netmask mask. */ + return ~0; + } + } +done: + return result; +} + +typedef union +{ + struct + { + /* 4 bit packet length (in 32bit units) and version VVVVLLLL. + e.g. for packets w/ no options ip_version_and_header_length == 0x45. */ + u8 ip_version_and_header_length; + + /* Type of service. */ + u8 tos; + + /* Total layer 3 packet length including this header. */ + u16 length; + + /* Fragmentation ID. */ + u16 fragment_id; + + /* 3 bits of flags and 13 bits of fragment offset (in units + of 8 byte quantities). */ + u16 flags_and_fragment_offset; +#define IP4_HEADER_FLAG_MORE_FRAGMENTS (1 << 13) +#define IP4_HEADER_FLAG_DONT_FRAGMENT (1 << 14) +#define IP4_HEADER_FLAG_CONGESTION (1 << 15) + + /* Time to live decremented by router at each hop. */ + u8 ttl; + + /* Next level protocol packet. */ + u8 protocol; + + /* Checksum. */ + u16 checksum; + + /* Source and destination address. */ + union + { + struct + { + ip4_address_t src_address, dst_address; + }; + ip4_address_pair_t address_pair; + }; + }; + + /* For checksumming we'll want to access IP header in word sized chunks. */ + /* For 64 bit machines. */ + /* *INDENT-OFF* */ + CLIB_PACKED (struct { + u64 checksum_data_64[2]; + u32 checksum_data_64_32[1]; + }); + /* *INDENT-ON* */ + + /* For 32 bit machines. */ + /* *INDENT-OFF* */ + CLIB_PACKED (struct { + u32 checksum_data_32[5]; + }); + /* *INDENT-ON* */ +} ip4_header_t; + +/* Value of ip_version_and_header_length for packets w/o options. */ +#define IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS \ + ((4 << 4) | (sizeof (ip4_header_t) / sizeof (u32))) + +always_inline int +ip4_get_fragment_offset (ip4_header_t * i) +{ + return clib_net_to_host_u16 (i->flags_and_fragment_offset) & 0x1fff; +} + +always_inline int +ip4_get_fragment_more (ip4_header_t * i) +{ + return clib_net_to_host_u16 (i->flags_and_fragment_offset) & + IP4_HEADER_FLAG_MORE_FRAGMENTS; +} + +always_inline int +ip4_is_fragment (ip4_header_t * i) +{ + return (i->flags_and_fragment_offset & + clib_net_to_host_u16 (0x1fff | IP4_HEADER_FLAG_MORE_FRAGMENTS)); +} + +always_inline int +ip4_is_first_fragment (ip4_header_t * i) +{ + return (i->flags_and_fragment_offset & + clib_net_to_host_u16 (0x1fff | IP4_HEADER_FLAG_MORE_FRAGMENTS)) == + clib_net_to_host_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); +} + +/* Fragment offset in bytes. */ +always_inline int +ip4_get_fragment_offset_bytes (ip4_header_t * i) +{ + return 8 * ip4_get_fragment_offset (i); +} + +always_inline int +ip4_header_bytes (ip4_header_t * i) +{ + return sizeof (u32) * (i->ip_version_and_header_length & 0xf); +} + +always_inline void * +ip4_next_header (ip4_header_t * i) +{ + return (void *) i + ip4_header_bytes (i); +} + +always_inline u16 +ip4_header_checksum (ip4_header_t * i) +{ + u16 save, csum; + ip_csum_t sum; + + save = i->checksum; + i->checksum = 0; + sum = ip_incremental_checksum (0, i, ip4_header_bytes (i)); + csum = ~ip_csum_fold (sum); + + i->checksum = save; + + /* Make checksum agree for special case where either + 0 or 0xffff would give same 1s complement sum. */ + if (csum == 0 && save == 0xffff) + csum = save; + + return csum; +} + +static inline uword +ip4_header_checksum_is_valid (ip4_header_t * i) +{ + return i->checksum == ip4_header_checksum (i); +} + +#define ip4_partial_header_checksum_x1(ip0,sum0) \ +do { \ + if (BITS (ip_csum_t) > 32) \ + { \ + sum0 = ip0->checksum_data_64[0]; \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64[1]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64_32[0]); \ + } \ + else \ + { \ + sum0 = ip0->checksum_data_32[0]; \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[1]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[2]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[3]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[4]); \ + } \ +} while (0) + +#define ip4_partial_header_checksum_x2(ip0,ip1,sum0,sum1) \ +do { \ + if (BITS (ip_csum_t) > 32) \ + { \ + sum0 = ip0->checksum_data_64[0]; \ + sum1 = ip1->checksum_data_64[0]; \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64[1]); \ + sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_64[1]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64_32[0]); \ + sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_64_32[0]); \ + } \ + else \ + { \ + sum0 = ip0->checksum_data_32[0]; \ + sum1 = ip1->checksum_data_32[0]; \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[1]); \ + sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[1]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[2]); \ + sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[2]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[3]); \ + sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[3]); \ + sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[4]); \ + sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[4]); \ + } \ +} while (0) + +always_inline uword +ip4_address_is_multicast (ip4_address_t * a) +{ + return (a->data[0] & 0xf0) == 0xe0; +} + +always_inline void +ip4_multicast_address_set_for_group (ip4_address_t * a, + ip_multicast_group_t g) +{ + ASSERT ((u32) g < (1 << 28)); + a->as_u32 = clib_host_to_net_u32 ((0xe << 28) + g); +} + +always_inline void +ip4_multicast_ethernet_address (u8 * ethernet_address, ip4_address_t * a) +{ + u8 *d = a->as_u8; + + ethernet_address[0] = 0x01; + ethernet_address[1] = 0x00; + ethernet_address[2] = 0x5e; + ethernet_address[3] = d[1] & 0x7f; + ethernet_address[4] = d[2]; + ethernet_address[5] = d[3]; +} + +always_inline void +ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0) +{ + u32 src0, dst0; + + src0 = ip0->src_address.data_u32; + dst0 = ip0->dst_address.data_u32; + ip0->src_address.data_u32 = dst0; + ip0->dst_address.data_u32 = src0; + + src0 = tcp0->src; + dst0 = tcp0->dst; + tcp0->src = dst0; + tcp0->dst = src0; +} + +always_inline void +ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1, + tcp_header_t * tcp0, tcp_header_t * tcp1) +{ + u32 src0, dst0, src1, dst1; + + src0 = ip0->src_address.data_u32; + src1 = ip1->src_address.data_u32; + dst0 = ip0->dst_address.data_u32; + dst1 = ip1->dst_address.data_u32; + ip0->src_address.data_u32 = dst0; + ip1->src_address.data_u32 = dst1; + ip0->dst_address.data_u32 = src0; + ip1->dst_address.data_u32 = src1; + + src0 = tcp0->src; + src1 = tcp1->src; + dst0 = tcp0->dst; + dst1 = tcp1->dst; + tcp0->src = dst0; + tcp1->src = dst1; + tcp0->dst = src0; + tcp1->dst = src1; +} + +#endif /* included_ip4_packet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4_pg.c b/src/vnet/ip/ip4_pg.c new file mode 100644 index 00000000..9697a3b9 --- /dev/null +++ b/src/vnet/ip/ip4_pg.c @@ -0,0 +1,387 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_pg: IP v4 packet-generator interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> + +#define IP4_PG_EDIT_CHECKSUM (1 << 0) +#define IP4_PG_EDIT_LENGTH (1 << 1) + +static_always_inline void +compute_length_and_or_checksum (vlib_main_t * vm, + u32 * packets, + u32 n_packets, + u32 ip_header_offset, u32 flags) +{ + ASSERT (flags != 0); + + while (n_packets >= 2) + { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip4_header_t *ip0, *ip1; + ip_csum_t sum0, sum1; + + pi0 = packets[0]; + pi1 = packets[1]; + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + n_packets -= 2; + packets += 2; + + ip0 = (void *) (p0->data + ip_header_offset); + ip1 = (void *) (p1->data + ip_header_offset); + + if (flags & IP4_PG_EDIT_LENGTH) + { + ip0->length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) - + ip_header_offset); + ip1->length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p1) - + ip_header_offset); + } + + if (flags & IP4_PG_EDIT_CHECKSUM) + { + ASSERT (ip4_header_bytes (ip0) == sizeof (ip0[0])); + ASSERT (ip4_header_bytes (ip1) == sizeof (ip1[0])); + + ip0->checksum = 0; + ip1->checksum = 0; + + ip4_partial_header_checksum_x2 (ip0, ip1, sum0, sum1); + ip0->checksum = ~ip_csum_fold (sum0); + ip1->checksum = ~ip_csum_fold (sum1); + + ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + ASSERT (ip1->checksum == ip4_header_checksum (ip1)); + } + } + + while (n_packets >= 1) + { + u32 pi0; + vlib_buffer_t *p0; + ip4_header_t *ip0; + ip_csum_t sum0; + + pi0 = packets[0]; + p0 = vlib_get_buffer (vm, pi0); + n_packets -= 1; + packets += 1; + + ip0 = (void *) (p0->data + ip_header_offset); + + if (flags & IP4_PG_EDIT_LENGTH) + ip0->length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) - + ip_header_offset); + + if (flags & IP4_PG_EDIT_CHECKSUM) + { + ASSERT (ip4_header_bytes (ip0) == sizeof (ip0[0])); + + ip0->checksum = 0; + + ip4_partial_header_checksum_x1 (ip0, sum0); + ip0->checksum = ~ip_csum_fold (sum0); + + ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + } + } +} + +static void +ip4_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, u32 * packets, u32 n_packets) +{ + vlib_main_t *vm = vlib_get_main (); + u32 ip_offset; + + ip_offset = g->start_byte_offset; + + switch (g->edit_function_opaque) + { + case IP4_PG_EDIT_LENGTH: + compute_length_and_or_checksum (vm, packets, n_packets, ip_offset, + IP4_PG_EDIT_LENGTH); + break; + + case IP4_PG_EDIT_CHECKSUM: + compute_length_and_or_checksum (vm, packets, n_packets, ip_offset, + IP4_PG_EDIT_CHECKSUM); + break; + + case IP4_PG_EDIT_LENGTH | IP4_PG_EDIT_CHECKSUM: + compute_length_and_or_checksum (vm, packets, n_packets, ip_offset, + IP4_PG_EDIT_LENGTH + | IP4_PG_EDIT_CHECKSUM); + break; + + default: + ASSERT (0); + break; + } +} + +typedef struct +{ + pg_edit_t ip_version, header_length; + pg_edit_t tos; + pg_edit_t length; + + pg_edit_t fragment_id, fragment_offset; + + /* Flags together with fragment offset. */ + pg_edit_t mf_flag, df_flag, ce_flag; + + pg_edit_t ttl; + + pg_edit_t protocol; + + pg_edit_t checksum; + + pg_edit_t src_address, dst_address; +} pg_ip4_header_t; + +static inline void +pg_ip4_header_init (pg_ip4_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, ip4_header_t, f); + _(tos); + _(length); + _(fragment_id); + _(ttl); + _(protocol); + _(checksum); + _(src_address); + _(dst_address); +#undef _ + + /* Initialize bit fields. */ + pg_edit_init_bitfield (&p->header_length, ip4_header_t, + ip_version_and_header_length, 0, 4); + pg_edit_init_bitfield (&p->ip_version, ip4_header_t, + ip_version_and_header_length, 4, 4); + + pg_edit_init_bitfield (&p->fragment_offset, ip4_header_t, + flags_and_fragment_offset, 0, 13); + pg_edit_init_bitfield (&p->mf_flag, ip4_header_t, + flags_and_fragment_offset, 13, 1); + pg_edit_init_bitfield (&p->df_flag, ip4_header_t, + flags_and_fragment_offset, 14, 1); + pg_edit_init_bitfield (&p->ce_flag, ip4_header_t, + flags_and_fragment_offset, 15, 1); +} + +uword +unformat_pg_ip4_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t *s = va_arg (*args, pg_stream_t *); + pg_ip4_header_t *p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ip4_header_t), + &group_index); + pg_ip4_header_init (p); + + /* Defaults. */ + pg_edit_set_fixed (&p->ip_version, 4); + pg_edit_set_fixed (&p->header_length, sizeof (ip4_header_t) / sizeof (u32)); + + pg_edit_set_fixed (&p->tos, 0); + pg_edit_set_fixed (&p->ttl, 64); + + pg_edit_set_fixed (&p->fragment_id, 0); + pg_edit_set_fixed (&p->fragment_offset, 0); + pg_edit_set_fixed (&p->mf_flag, 0); + pg_edit_set_fixed (&p->df_flag, 0); + pg_edit_set_fixed (&p->ce_flag, 0); + + p->length.type = PG_EDIT_UNSPECIFIED; + p->checksum.type = PG_EDIT_UNSPECIFIED; + + if (unformat (input, "%U: %U -> %U", + unformat_pg_edit, + unformat_ip_protocol, &p->protocol, + unformat_pg_edit, + unformat_ip4_address, &p->src_address, + unformat_pg_edit, unformat_ip4_address, &p->dst_address)) + goto found; + + if (!unformat (input, "%U:", + unformat_pg_edit, unformat_ip_protocol, &p->protocol)) + goto error; + +found: + /* Parse options. */ + while (1) + { + if (unformat (input, "version %U", + unformat_pg_edit, unformat_pg_number, &p->ip_version)) + ; + + else if (unformat (input, "header-length %U", + unformat_pg_edit, + unformat_pg_number, &p->header_length)) + ; + + else if (unformat (input, "tos %U", + unformat_pg_edit, unformat_pg_number, &p->tos)) + ; + + else if (unformat (input, "length %U", + unformat_pg_edit, unformat_pg_number, &p->length)) + ; + + else if (unformat (input, "checksum %U", + unformat_pg_edit, unformat_pg_number, &p->checksum)) + ; + + else if (unformat (input, "ttl %U", + unformat_pg_edit, unformat_pg_number, &p->ttl)) + ; + + else if (unformat (input, "fragment id %U offset %U", + unformat_pg_edit, + unformat_pg_number, &p->fragment_id, + unformat_pg_edit, + unformat_pg_number, &p->fragment_offset)) + { + int i; + for (i = 0; i < ARRAY_LEN (p->fragment_offset.values); i++) + pg_edit_set_value (&p->fragment_offset, i, + pg_edit_get_value (&p->fragment_offset, + i) / 8); + + } + + /* Flags. */ + else if (unformat (input, "mf") || unformat (input, "MF")) + pg_edit_set_fixed (&p->mf_flag, 1); + + else if (unformat (input, "df") || unformat (input, "DF")) + pg_edit_set_fixed (&p->df_flag, 1); + + else if (unformat (input, "ce") || unformat (input, "CE")) + pg_edit_set_fixed (&p->ce_flag, 1); + + /* Can't parse input: try next protocol level. */ + else + break; + } + + { + ip_main_t *im = &ip_main; + ip_protocol_t protocol; + ip_protocol_info_t *pi; + + pi = 0; + if (p->protocol.type == PG_EDIT_FIXED) + { + protocol = pg_edit_get_value (&p->protocol, PG_EDIT_LO); + pi = ip_get_protocol_info (im, protocol); + } + + if (pi && pi->unformat_pg_edit + && unformat_user (input, pi->unformat_pg_edit, s)) + ; + + else if (!unformat_user (input, unformat_pg_payload, s)) + goto error; + + if (p->length.type == PG_EDIT_UNSPECIFIED + && s->min_packet_bytes == s->max_packet_bytes + && group_index + 1 < vec_len (s->edit_groups)) + { + pg_edit_set_fixed (&p->length, + pg_edit_group_n_bytes (s, group_index)); + } + + /* Compute IP header checksum if all edits are fixed. */ + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + { + ip4_header_t fixed_header, fixed_mask, cmp_mask; + + /* See if header is all fixed and specified except for + checksum field. */ + memset (&cmp_mask, ~0, sizeof (cmp_mask)); + cmp_mask.checksum = 0; + + pg_edit_group_get_fixed_packet_data (s, group_index, + &fixed_header, &fixed_mask); + if (!memcmp (&fixed_mask, &cmp_mask, sizeof (cmp_mask))) + pg_edit_set_fixed (&p->checksum, + clib_net_to_host_u16 (ip4_header_checksum + (&fixed_header))); + } + + p = pg_get_edit_group (s, group_index); + if (p->length.type == PG_EDIT_UNSPECIFIED + || p->checksum.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t *g = pg_stream_get_group (s, group_index); + g->edit_function = ip4_pg_edit_function; + g->edit_function_opaque = 0; + if (p->length.type == PG_EDIT_UNSPECIFIED) + g->edit_function_opaque |= IP4_PG_EDIT_LENGTH; + if (p->checksum.type == PG_EDIT_UNSPECIFIED) + g->edit_function_opaque |= IP4_PG_EDIT_CHECKSUM; + } + + return 1; + } + +error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c new file mode 100644 index 00000000..4829079b --- /dev/null +++ b/src/vnet/ip/ip4_source_and_port_range_check.c @@ -0,0 +1,1424 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip.h> +#include <vnet/ip/ip_source_and_port_range_check.h> +#include <vnet/dpo/load_balance.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip4_fib.h> + +/** + * @file + * @brief IPv4 Source and Port Range Checking. + * + * This file contains the source code for IPv4 source and port range + * checking. + */ + + +/** + * @brief The pool of range chack DPOs + */ +static protocol_port_range_dpo_t *ppr_dpo_pool; + +/** + * @brief Dynamically registered DPO type + */ +static dpo_type_t ppr_dpo_type; + +vlib_node_registration_t ip4_source_port_and_range_check_rx; +vlib_node_registration_t ip4_source_port_and_range_check_tx; + +#define foreach_ip4_source_and_port_range_check_error \ + _(CHECK_FAIL, "ip4 source and port range check bad packets") \ + _(CHECK_OK, "ip4 source and port range check good packets") + +typedef enum +{ +#define _(sym,str) IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_##sym, + foreach_ip4_source_and_port_range_check_error +#undef _ + IP4_SOURCE_AND_PORT_RANGE_CHECK_N_ERROR, +} ip4_source_and_port_range_check_error_t; + +static char *ip4_source_and_port_range_check_error_strings[] = { +#define _(sym,string) string, + foreach_ip4_source_and_port_range_check_error +#undef _ +}; + +typedef struct +{ + u32 pass; + u32 bypass; + u32 is_tcp; + ip4_address_t src_addr; + u16 port; + u32 fib_index; +} ip4_source_and_port_range_check_trace_t; + +static u8 * +format_ip4_source_and_port_range_check_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ip4_source_and_port_range_check_trace_t *t = + va_arg (*va, ip4_source_and_port_range_check_trace_t *); + + if (t->bypass) + s = format (s, "PASS (bypass case)"); + else + s = format (s, "fib %d src ip %U %s dst port %d: %s", + t->fib_index, format_ip4_address, &t->src_addr, + t->is_tcp ? "TCP" : "UDP", (u32) t->port, + (t->pass == 1) ? "PASS" : "FAIL"); + return s; +} + +typedef enum +{ + IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP, + IP4_SOURCE_AND_PORT_RANGE_CHECK_N_NEXT, +} ip4_source_and_port_range_check_next_t; + + +static inline u32 +check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo, + u16 dst_port, u32 next) +{ + u16x8vec_t key; + u16x8vec_t diff1; + u16x8vec_t diff2; + u16x8vec_t sum, sum_equal_diff2; + u16 sum_nonzero, sum_equal, winner_mask; + int i; + + if (NULL == ppr_dpo || dst_port == 0) + return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP; + + /* Make the obvious screw-case work. A variant also works w/ no MMX */ + if (PREDICT_FALSE (dst_port == 65535)) + { + int j; + + for (i = 0; + i < VLIB_BUFFER_PRE_DATA_SIZE / sizeof (protocol_port_range_t); + i++) + { + for (j = 0; j < 8; j++) + if (ppr_dpo->blocks[i].low.as_u16[j] == 65535) + return next; + } + return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP; + } + + key.as_u16x8 = u16x8_splat (dst_port); + + for (i = 0; i < ppr_dpo->n_used_blocks; i++) + { + diff1.as_u16x8 = + u16x8_sub_saturate (ppr_dpo->blocks[i].low.as_u16x8, key.as_u16x8); + diff2.as_u16x8 = + u16x8_sub_saturate (ppr_dpo->blocks[i].hi.as_u16x8, key.as_u16x8); + sum.as_u16x8 = u16x8_add (diff1.as_u16x8, diff2.as_u16x8); + sum_equal_diff2.as_u16x8 = + u16x8_is_equal (sum.as_u16x8, diff2.as_u16x8); + sum_nonzero = ~u16x8_zero_byte_mask (sum.as_u16x8); + sum_equal = ~u16x8_zero_byte_mask (sum_equal_diff2.as_u16x8); + winner_mask = sum_nonzero & sum_equal; + if (winner_mask) + return next; + } + return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP; +} + +always_inline protocol_port_range_dpo_t * +protocol_port_range_dpo_get (index_t index) +{ + return (pool_elt_at_index (ppr_dpo_pool, index)); +} + +always_inline uword +ip4_source_and_port_range_check_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int is_tx) +{ + ip4_main_t *im = &ip4_main; + u32 n_left_from, *from, *to_next; + u32 next_index; + vlib_node_runtime_t *error_node = node; + u32 good_packets = 0; + int i; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + + /* while (n_left_from >= 4 && n_left_to_next >= 2) */ + /* { */ + /* vlib_buffer_t *b0, *b1; */ + /* ip4_header_t *ip0, *ip1; */ + /* ip4_fib_mtrie_t *mtrie0, *mtrie1; */ + /* ip4_fib_mtrie_leaf_t leaf0, leaf1; */ + /* ip_source_and_port_range_check_config_t *c0, *c1; */ + /* ip_adjacency_t *adj0 = 0, *adj1 = 0; */ + /* u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0; */ + /* u32 bi1, next1, adj_index1, pass1, save_next1, fib_index1; */ + /* udp_header_t *udp0, *udp1; */ + + /* /\* Prefetch next iteration. *\/ */ + /* { */ + /* vlib_buffer_t *p2, *p3; */ + + /* p2 = vlib_get_buffer (vm, from[2]); */ + /* p3 = vlib_get_buffer (vm, from[3]); */ + + /* vlib_prefetch_buffer_header (p2, LOAD); */ + /* vlib_prefetch_buffer_header (p3, LOAD); */ + + /* CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); */ + /* CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); */ + /* } */ + + /* bi0 = to_next[0] = from[0]; */ + /* bi1 = to_next[1] = from[1]; */ + /* from += 2; */ + /* to_next += 2; */ + /* n_left_from -= 2; */ + /* n_left_to_next -= 2; */ + + /* b0 = vlib_get_buffer (vm, bi0); */ + /* b1 = vlib_get_buffer (vm, bi1); */ + + /* fib_index0 = */ + /* vec_elt (im->fib_index_by_sw_if_index, */ + /* vnet_buffer (b0)->sw_if_index[VLIB_RX]); */ + /* fib_index1 = */ + /* vec_elt (im->fib_index_by_sw_if_index, */ + /* vnet_buffer (b1)->sw_if_index[VLIB_RX]); */ + + /* ip0 = vlib_buffer_get_current (b0); */ + /* ip1 = vlib_buffer_get_current (b1); */ + + /* if (is_tx) */ + /* { */ + /* c0 = vnet_get_config_data (&tx_cm->config_main, */ + /* &b0->current_config_index, */ + /* &next0, sizeof (c0[0])); */ + /* c1 = vnet_get_config_data (&tx_cm->config_main, */ + /* &b1->current_config_index, */ + /* &next1, sizeof (c1[0])); */ + /* } */ + /* else */ + /* { */ + /* c0 = vnet_get_config_data (&rx_cm->config_main, */ + /* &b0->current_config_index, */ + /* &next0, sizeof (c0[0])); */ + /* c1 = vnet_get_config_data (&rx_cm->config_main, */ + /* &b1->current_config_index, */ + /* &next1, sizeof (c1[0])); */ + /* } */ + + /* /\* we can't use the default VRF here... *\/ */ + /* for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) */ + /* { */ + /* ASSERT (c0->fib_index[i] && c1->fib_index[i]); */ + /* } */ + + + /* if (is_tx) */ + /* { */ + /* if (ip0->protocol == IP_PROTOCOL_UDP) */ + /* fib_index0 = */ + /* c0->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */ + /* if (ip0->protocol == IP_PROTOCOL_TCP) */ + /* fib_index0 = */ + /* c0->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */ + /* } */ + /* else */ + /* { */ + /* if (ip0->protocol == IP_PROTOCOL_UDP) */ + /* fib_index0 = */ + /* c0->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */ + /* if (ip0->protocol == IP_PROTOCOL_TCP) */ + /* fib_index0 = */ + /* c0->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */ + /* } */ + + /* if (PREDICT_TRUE (fib_index0 != ~0)) */ + /* { */ + + /* mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; */ + + /* leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; */ + + /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */ + /* &ip0->src_address, 0); */ + + /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */ + /* &ip0->src_address, 1); */ + + /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */ + /* &ip0->src_address, 2); */ + + /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */ + /* &ip0->src_address, 3); */ + + /* adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); */ + + /* ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, */ + /* &ip0->src_address, */ + /* 0 */ + /* /\* use dflt rt *\/ */ + /* )); */ + /* adj0 = ip_get_adjacency (lm, adj_index0); */ + /* } */ + + /* if (is_tx) */ + /* { */ + /* if (ip1->protocol == IP_PROTOCOL_UDP) */ + /* fib_index1 = */ + /* c1->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */ + /* if (ip1->protocol == IP_PROTOCOL_TCP) */ + /* fib_index1 = */ + /* c1->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */ + /* } */ + /* else */ + /* { */ + /* if (ip1->protocol == IP_PROTOCOL_UDP) */ + /* fib_index1 = */ + /* c1->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */ + /* if (ip1->protocol == IP_PROTOCOL_TCP) */ + /* fib_index1 = */ + /* c1->fib_index */ + /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */ + /* } */ + + /* if (PREDICT_TRUE (fib_index1 != ~0)) */ + /* { */ + + /* mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; */ + + /* leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; */ + + /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */ + /* &ip1->src_address, 0); */ + + /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */ + /* &ip1->src_address, 1); */ + + /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */ + /* &ip1->src_address, 2); */ + + /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */ + /* &ip1->src_address, 3); */ + + /* adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); */ + + /* ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, */ + /* &ip1->src_address, */ + /* 0)); */ + /* adj1 = ip_get_adjacency (lm, adj_index1); */ + /* } */ + + /* pass0 = 0; */ + /* pass0 |= adj0 == 0; */ + /* pass0 |= ip4_address_is_multicast (&ip0->src_address); */ + /* pass0 |= */ + /* ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */ + /* pass0 |= (ip0->protocol != IP_PROTOCOL_UDP) */ + /* && (ip0->protocol != IP_PROTOCOL_TCP); */ + + /* pass1 = 0; */ + /* pass1 |= adj1 == 0; */ + /* pass1 |= ip4_address_is_multicast (&ip1->src_address); */ + /* pass1 |= */ + /* ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */ + /* pass1 |= (ip1->protocol != IP_PROTOCOL_UDP) */ + /* && (ip1->protocol != IP_PROTOCOL_TCP); */ + + /* save_next0 = next0; */ + /* udp0 = ip4_next_header (ip0); */ + /* save_next1 = next1; */ + /* udp1 = ip4_next_header (ip1); */ + + /* if (PREDICT_TRUE (pass0 == 0)) */ + /* { */ + /* good_packets++; */ + /* next0 = check_adj_port_range_x1 */ + /* (adj0, clib_net_to_host_u16 (udp0->dst_port), next0); */ + /* good_packets -= (save_next0 != next0); */ + /* b0->error = error_node->errors */ + /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */ + /* } */ + + /* if (PREDICT_TRUE (pass1 == 0)) */ + /* { */ + /* good_packets++; */ + /* next1 = check_adj_port_range_x1 */ + /* (adj1, clib_net_to_host_u16 (udp1->dst_port), next1); */ + /* good_packets -= (save_next1 != next1); */ + /* b1->error = error_node->errors */ + /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */ + /* } */ + + /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */ + /* && (b0->flags & VLIB_BUFFER_IS_TRACED))) */ + /* { */ + /* ip4_source_and_port_range_check_trace_t *t = */ + /* vlib_add_trace (vm, node, b0, sizeof (*t)); */ + /* t->pass = next0 == save_next0; */ + /* t->bypass = pass0; */ + /* t->fib_index = fib_index0; */ + /* t->src_addr.as_u32 = ip0->src_address.as_u32; */ + /* t->port = (pass0 == 0) ? */ + /* clib_net_to_host_u16 (udp0->dst_port) : 0; */ + /* t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP; */ + /* } */ + + /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */ + /* && (b1->flags & VLIB_BUFFER_IS_TRACED))) */ + /* { */ + /* ip4_source_and_port_range_check_trace_t *t = */ + /* vlib_add_trace (vm, node, b1, sizeof (*t)); */ + /* t->pass = next1 == save_next1; */ + /* t->bypass = pass1; */ + /* t->fib_index = fib_index1; */ + /* t->src_addr.as_u32 = ip1->src_address.as_u32; */ + /* t->port = (pass1 == 0) ? */ + /* clib_net_to_host_u16 (udp1->dst_port) : 0; */ + /* t->is_tcp = ip1->protocol == IP_PROTOCOL_TCP; */ + /* } */ + + /* vlib_validate_buffer_enqueue_x2 (vm, node, next_index, */ + /* to_next, n_left_to_next, */ + /* bi0, bi1, next0, next1); */ + /* } */ + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *b0; + ip4_header_t *ip0; + ip_source_and_port_range_check_config_t *c0; + u32 bi0, next0, lb_index0, pass0, save_next0, fib_index0; + udp_header_t *udp0; + const protocol_port_range_dpo_t *ppr_dpo0 = NULL; + const dpo_id_t *dpo; + u32 sw_if_index0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + + fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); + + if (is_tx) + vlib_buffer_advance (b0, sizeof (ethernet_header_t)); + + ip0 = vlib_buffer_get_current (b0); + + c0 = vnet_feature_next_with_data (sw_if_index0, &next0, + b0, sizeof (c0[0])); + + /* we can't use the default VRF here... */ + for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) + { + ASSERT (c0->fib_index[i]); + } + + + if (is_tx) + { + if (ip0->protocol == IP_PROTOCOL_UDP) + fib_index0 = + c0->fib_index + [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; + if (ip0->protocol == IP_PROTOCOL_TCP) + fib_index0 = + c0->fib_index + [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; + } + else + { + if (ip0->protocol == IP_PROTOCOL_UDP) + fib_index0 = + c0->fib_index + [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; + if (ip0->protocol == IP_PROTOCOL_TCP) + fib_index0 = + c0->fib_index + [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; + } + + if (fib_index0 != ~0) + { + lb_index0 = ip4_fib_forwarding_lookup (fib_index0, + &ip0->src_address); + + dpo = + load_balance_get_bucket_i (load_balance_get (lb_index0), 0); + + if (ppr_dpo_type == dpo->dpoi_type) + { + ppr_dpo0 = protocol_port_range_dpo_get (dpo->dpoi_index); + } + /* + * else the lookup hit an enty that was no inserted + * by this range checker, which is the default route + */ + } + /* + * $$$ which (src,dst) categories should we always pass? + */ + pass0 = 0; + pass0 |= ip4_address_is_multicast (&ip0->src_address); + pass0 |= + ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); + pass0 |= (ip0->protocol != IP_PROTOCOL_UDP) + && (ip0->protocol != IP_PROTOCOL_TCP); + + save_next0 = next0; + udp0 = ip4_next_header (ip0); + + if (PREDICT_TRUE (pass0 == 0)) + { + good_packets++; + next0 = check_adj_port_range_x1 + (ppr_dpo0, clib_net_to_host_u16 (udp0->dst_port), next0); + good_packets -= (save_next0 != next0); + b0->error = error_node->errors + [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip4_source_and_port_range_check_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->pass = next0 == save_next0; + t->bypass = pass0; + t->fib_index = fib_index0; + t->src_addr.as_u32 = ip0->src_address.as_u32; + t->port = (pass0 == 0) ? + clib_net_to_host_u16 (udp0->dst_port) : 0; + t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP; + } + + if (is_tx) + vlib_buffer_advance (b0, -sizeof (ethernet_header_t)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + if (is_tx) + vlib_node_increment_counter (vm, ip4_source_port_and_range_check_tx.index, + IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_OK, + good_packets); + else + vlib_node_increment_counter (vm, ip4_source_port_and_range_check_rx.index, + IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_OK, + good_packets); + return frame->n_vectors; +} + +static uword +ip4_source_and_port_range_check_rx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_source_and_port_range_check_inline (vm, node, frame, + 0 /* !is_tx */ ); +} + +static uword +ip4_source_and_port_range_check_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_source_and_port_range_check_inline (vm, node, frame, + 1 /* is_tx */ ); +} + +/* Note: Calling same function for both RX and TX nodes + as always checking dst_port, although + if this changes can easily make new function +*/ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = { + .function = ip4_source_and_port_range_check_rx, + .name = "ip4-source-and-port-range-check-rx", + .vector_size = sizeof (u32), + + .n_errors = ARRAY_LEN(ip4_source_and_port_range_check_error_strings), + .error_strings = ip4_source_and_port_range_check_error_strings, + + .n_next_nodes = IP4_SOURCE_AND_PORT_RANGE_CHECK_N_NEXT, + .next_nodes = { + [IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP] = "error-drop", + }, + + .format_buffer = format_ip4_header, + .format_trace = format_ip4_source_and_port_range_check_trace, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = { + .function = ip4_source_and_port_range_check_tx, + .name = "ip4-source-and-port-range-check-tx", + .vector_size = sizeof (u32), + + .n_errors = ARRAY_LEN(ip4_source_and_port_range_check_error_strings), + .error_strings = ip4_source_and_port_range_check_error_strings, + + .n_next_nodes = IP4_SOURCE_AND_PORT_RANGE_CHECK_N_NEXT, + .next_nodes = { + [IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP] = "error-drop", + }, + + .format_buffer = format_ip4_header, + .format_trace = format_ip4_source_and_port_range_check_trace, +}; +/* *INDENT-ON* */ + +int +set_ip_source_and_port_range_check (vlib_main_t * vm, + u32 * fib_index, + u32 sw_if_index, u32 is_add) +{ + ip_source_and_port_range_check_config_t config; + int rv = 0; + int i; + + for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) + { + config.fib_index[i] = fib_index[i]; + } + + /* For OUT we are in the RX path */ + if ((fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT] != ~0) || + (fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT] != ~0)) + { + vnet_feature_enable_disable ("ip4-unicast", + "ip4-source-and-port-range-check-rx", + sw_if_index, is_add, &config, + sizeof (config)); + } + + /* For IN we are in the TX path */ + if ((fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN] != ~0) || + (fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN] != ~0)) + { + vnet_feature_enable_disable ("ip4-output", + "ip4-source-and-port-range-check-tx", + sw_if_index, is_add, &config, + sizeof (config)); + } + return rv; +} + +static clib_error_t * +set_ip_source_and_port_range_check_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ip4_main_t *im = &ip4_main; + clib_error_t *error = 0; + u8 is_add = 1; + u32 sw_if_index = ~0; + u32 vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS]; + u32 fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS]; + int vrf_set = 0; + uword *p; + int rv = 0; + int i; + + sw_if_index = ~0; + for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) + { + fib_index[i] = ~0; + vrf_id[i] = ~0; + } + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else + if (unformat + (input, "tcp-out-vrf %d", + &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT])) + vrf_set = 1; + else + if (unformat + (input, "udp-out-vrf %d", + &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT])) + vrf_set = 1; + else + if (unformat + (input, "tcp-in-vrf %d", + &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN])) + vrf_set = 1; + else + if (unformat + (input, "udp-in-vrf %d", + &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN])) + vrf_set = 1; + else if (unformat (input, "del")) + is_add = 0; + else + break; + } + + if (sw_if_index == ~0) + return clib_error_return (0, "Interface required but not specified"); + + if (!vrf_set) + return clib_error_return (0, + "TCP or UDP VRF ID required but not specified"); + + for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) + { + + if (vrf_id[i] == 0) + return clib_error_return (0, + "TCP, UDP VRF ID should not be 0 (default). Should be distinct VRF for this purpose. "); + + if (vrf_id[i] != ~0) + { + p = hash_get (im->fib_index_by_table_id, vrf_id[i]); + + if (p == 0) + return clib_error_return (0, "Invalid VRF ID %d", vrf_id[i]); + + fib_index[i] = p[0]; + } + } + rv = + set_ip_source_and_port_range_check (vm, fib_index, sw_if_index, is_add); + + switch (rv) + { + case 0: + break; + + default: + return clib_error_return + (0, + "set source and port-range on interface returned an unexpected value: %d", + rv); + } + return error; +} + +/*? + * Add the 'ip4-source-and-port-range-check-rx' or + * 'ip4-source-and-port-range-check-tx' graph node for a given + * interface. 'tcp-out-vrf' and 'udp-out-vrf' will add to + * the RX path. 'tcp-in-vrf' and 'udp-in-vrf' will add to + * the TX path. A graph node will be inserted into the chain when + * the range check is added to the first interface. It will not + * be removed from when range check is removed from the last + * interface. + * + * By adding the range check graph node to the interface, incoming + * or outgoing TCP/UDP packets will be validated using the + * provided IPv4 FIB table (VRF). + * + * @note 'ip4-source-and-port-range-check-rx' and + * 'ip4-source-and-port-range-check-tx' strings are too long, so + * they are truncated on the 'show vlib graph' output. + * + * @todo This content needs to be validated and potentially more detail added. + * + * @cliexpar + * @parblock + * Example of graph node before range checking is enabled: + * @cliexstart{show vlib graph ip4-source-and-port-range-check-tx} + * Name Next Previous + * ip4-source-and-port-range- error-drop [0] + * @cliexend + * + * Example of how to enable range checking on TX: + * @cliexcmd{set interface ip source-and-port-range-check GigabitEthernet2/0/0 udp-in-vrf 7} + * + * Example of graph node after range checking is enabled: + * @cliexstart{show vlib graph ip4-source-and-port-range-check-tx} + * Name Next Previous + * ip4-source-and-port-range- error-drop [0] ip4-rewrite + * interface-output [1] + * @cliexend + * + * Example of how to display the features enabed on an interface: + * @cliexstart{show ip interface features GigabitEthernet2/0/0} + * IP feature paths configured on GigabitEthernet2/0/0... + * + * ipv4 unicast: + * ip4-source-and-port-range-check-rx + * ip4-lookup + * + * ipv4 multicast: + * ip4-lookup-multicast + * + * ipv4 multicast: + * interface-output + * + * ipv6 unicast: + * ip6-lookup + * + * ipv6 multicast: + * ip6-lookup + * + * ipv6 multicast: + * interface-output + * @cliexend + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command, static) = { + .path = "set interface ip source-and-port-range-check", + .function = set_ip_source_and_port_range_check_fn, + .short_help = "set interface ip source-and-port-range-check <interface> [tcp-out-vrf <table-id>] [udp-out-vrf <table-id>] [tcp-in-vrf <table-id>] [udp-in-vrf <table-id>] [del]", +}; +/* *INDENT-ON* */ + +static u8 * +format_ppr_dpo (u8 * s, va_list * args) +{ + index_t index = va_arg (*args, index_t); + CLIB_UNUSED (u32 indent) = va_arg (*args, u32); + + protocol_port_range_dpo_t *ppr_dpo; + int i, j; + int printed = 0; + + ppr_dpo = protocol_port_range_dpo_get (index); + + s = format (s, "allow "); + + for (i = 0; i < ppr_dpo->n_used_blocks; i++) + { + for (j = 0; j < 8; j++) + { + if (ppr_dpo->blocks[i].low.as_u16[j]) + { + if (printed) + s = format (s, ", "); + if (ppr_dpo->blocks[i].hi.as_u16[j] > + (ppr_dpo->blocks[i].low.as_u16[j] + 1)) + s = + format (s, "%d-%d", (u32) ppr_dpo->blocks[i].low.as_u16[j], + (u32) ppr_dpo->blocks[i].hi.as_u16[j] - 1); + else + s = format (s, "%d", ppr_dpo->blocks[i].low.as_u16[j]); + printed = 1; + } + } + } + return s; +} + +static void +ppr_dpo_lock (dpo_id_t * dpo) +{ +} + +static void +ppr_dpo_unlock (dpo_id_t * dpo) +{ +} + +const static dpo_vft_t ppr_vft = { + .dv_lock = ppr_dpo_lock, + .dv_unlock = ppr_dpo_unlock, + .dv_format = format_ppr_dpo, +}; + +const static char *const ppr_ip4_nodes[] = { + "ip4-source-and-port-range-check-rx", + NULL, +}; + +const static char *const *const ppr_nodes[DPO_PROTO_NUM] = { + [DPO_PROTO_IP4] = ppr_ip4_nodes, +}; + +clib_error_t * +ip4_source_and_port_range_check_init (vlib_main_t * vm) +{ + source_range_check_main_t *srm = &source_range_check_main; + + srm->vlib_main = vm; + srm->vnet_main = vnet_get_main (); + + ppr_dpo_type = dpo_register_new_type (&ppr_vft, ppr_nodes); + + return 0; +} + +VLIB_INIT_FUNCTION (ip4_source_and_port_range_check_init); + +protocol_port_range_dpo_t * +protocol_port_range_dpo_alloc (void) +{ + protocol_port_range_dpo_t *ppr_dpo; + + pool_get_aligned (ppr_dpo_pool, ppr_dpo, CLIB_CACHE_LINE_BYTES); + memset (ppr_dpo, 0, sizeof (*ppr_dpo)); + + ppr_dpo->n_free_ranges = N_PORT_RANGES_PER_DPO; + + return (ppr_dpo); +} + + +static int +add_port_range_adjacency (u32 fib_index, + ip4_address_t * address, + u32 length, u16 * low_ports, u16 * high_ports) +{ + protocol_port_range_dpo_t *ppr_dpo; + dpo_id_t dpop = DPO_INVALID; + int i, j, k; + + fib_node_index_t fei; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = length, + .fp_addr = { + .ip4 = *address, + }, + }; + + /* + * check to see if we have already sourced this prefix + */ + fei = fib_table_lookup_exact_match (fib_index, &pfx); + + if (FIB_NODE_INDEX_INVALID == fei) + { + /* + * this is a first time add for this prefix. + */ + ppr_dpo = protocol_port_range_dpo_alloc (); + } + else + { + /* + * the prefix is already there. + * check it was sourced by us, and if so get the ragne DPO from it. + */ + dpo_id_t dpo = DPO_INVALID; + const dpo_id_t *bucket; + + if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo)) + { + /* + * there is existing state. we'll want to add the new ranges to it + */ + bucket = + load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0); + ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index); + dpo_reset (&dpo); + } + else + { + /* + * there is no PPR state associated with this prefix, + * so we'll need a new DPO + */ + ppr_dpo = protocol_port_range_dpo_alloc (); + } + } + + if (vec_len (low_ports) > ppr_dpo->n_free_ranges) + return VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY; + + j = k = 0; + + for (i = 0; i < vec_len (low_ports); i++) + { + for (; j < N_BLOCKS_PER_DPO; j++) + { + for (; k < 8; k++) + { + if (ppr_dpo->blocks[j].low.as_u16[k] == 0) + { + ppr_dpo->blocks[j].low.as_u16[k] = low_ports[i]; + ppr_dpo->blocks[j].hi.as_u16[k] = high_ports[i]; + goto doublebreak; + } + } + } + doublebreak:; + } + ppr_dpo->n_used_blocks = j + 1; + + /* + * add or update the entry in the FIB + */ + dpo_set (&dpop, ppr_dpo_type, DPO_PROTO_IP4, (ppr_dpo - ppr_dpo_pool)); + + if (FIB_NODE_INDEX_INVALID == fei) + { + fib_table_entry_special_dpo_add (fib_index, + &pfx, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_NONE, &dpop); + } + else + { + fib_entry_special_update (fei, + FIB_SOURCE_SPECIAL, + FIB_ENTRY_FLAG_NONE, &dpop); + } + + return 0; +} + +static int +remove_port_range_adjacency (u32 fib_index, + ip4_address_t * address, + u32 length, u16 * low_ports, u16 * high_ports) +{ + protocol_port_range_dpo_t *ppr_dpo; + fib_node_index_t fei; + int i, j, k; + + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = length, + .fp_addr = { + .ip4 = *address, + }, + }; + + /* + * check to see if we have sourced this prefix + */ + fei = fib_table_lookup_exact_match (fib_index, &pfx); + + if (FIB_NODE_INDEX_INVALID == fei) + { + /* + * not one of ours + */ + return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; + } + else + { + /* + * the prefix is already there. + * check it was sourced by us + */ + dpo_id_t dpo = DPO_INVALID; + const dpo_id_t *bucket; + + if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo)) + { + /* + * there is existing state. we'll want to add the new ranges to it + */ + bucket = + load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0); + ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index); + dpo_reset (&dpo); + } + else + { + /* + * not one of ours + */ + return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE; + } + } + + for (i = 0; i < vec_len (low_ports); i++) + { + for (j = 0; j < N_BLOCKS_PER_DPO; j++) + { + for (k = 0; k < 8; k++) + { + if (low_ports[i] == ppr_dpo->blocks[j].low.as_u16[k] && + high_ports[i] == ppr_dpo->blocks[j].hi.as_u16[k]) + { + ppr_dpo->blocks[j].low.as_u16[k] = + ppr_dpo->blocks[j].hi.as_u16[k] = 0; + goto doublebreak; + } + } + } + doublebreak:; + } + + ppr_dpo->n_free_ranges = 0; + + /* Have we deleted all ranges yet? */ + for (i = 0; i < N_BLOCKS_PER_DPO; i++) + { + for (j = 0; j < 8; j++) + { + if (ppr_dpo->blocks[j].low.as_u16[i] == 0) + ppr_dpo->n_free_ranges++; + } + } + + if (N_PORT_RANGES_PER_DPO == ppr_dpo->n_free_ranges) + { + /* Yes, lose the adjacency... */ + fib_table_entry_special_remove (fib_index, &pfx, FIB_SOURCE_SPECIAL); + } + else + { + /* + * compact the ranges down to a contiguous block + */ + // FIXME. TODO. + } + + return 0; +} + +// This will be moved to another file and implemented post API freeze. +int +ip6_source_and_port_range_check_add_del (ip6_address_t * address, + u32 length, + u32 vrf_id, + u16 * low_ports, + u16 * high_ports, int is_add) +{ + u32 fib_index; + + fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id); + + ASSERT (~0 != fib_index); + + fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY); + + return 0; +} + +int +ip4_source_and_port_range_check_add_del (ip4_address_t * address, + u32 length, + u32 vrf_id, + u16 * low_ports, + u16 * high_ports, int is_add) +{ + u32 fib_index; + + fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id, + FIB_SOURCE_CLASSIFY); + + if (is_add == 0) + { + remove_port_range_adjacency (fib_index, address, length, + low_ports, high_ports); + } + else + { + add_port_range_adjacency (fib_index, address, length, + low_ports, high_ports); + } + + return 0; +} + +static clib_error_t * +ip_source_and_port_range_check_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u16 *low_ports = 0; + u16 *high_ports = 0; + u16 this_low; + u16 this_hi; + ip4_address_t ip4_addr; + ip6_address_t ip6_addr; //This function will be moved to generic impl when v6 done. + u32 length; + u32 tmp, tmp2; + u32 vrf_id = ~0; + int is_add = 1, ip_ver = ~0; + int rv; + + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U/%d", unformat_ip4_address, &ip4_addr, &length)) + ip_ver = 4; + else + if (unformat + (input, "%U/%d", unformat_ip6_address, &ip6_addr, &length)) + ip_ver = 6; + else if (unformat (input, "vrf %d", &vrf_id)) + ; + else if (unformat (input, "del")) + is_add = 0; + else if (unformat (input, "port %d", &tmp)) + { + if (tmp == 0 || tmp > 65535) + return clib_error_return (0, "port %d out of range", tmp); + this_low = tmp; + this_hi = this_low + 1; + vec_add1 (low_ports, this_low); + vec_add1 (high_ports, this_hi); + } + else if (unformat (input, "range %d - %d", &tmp, &tmp2)) + { + if (tmp > tmp2) + return clib_error_return (0, "ports %d and %d out of order", + tmp, tmp2); + if (tmp == 0 || tmp > 65535) + return clib_error_return (0, "low port %d out of range", tmp); + if (tmp2 == 0 || tmp2 > 65535) + return clib_error_return (0, "high port %d out of range", tmp2); + this_low = tmp; + this_hi = tmp2 + 1; + vec_add1 (low_ports, this_low); + vec_add1 (high_ports, this_hi); + } + else + break; + } + + if (ip_ver == ~0) + return clib_error_return (0, " <address>/<mask> not specified"); + + if (vrf_id == ~0) + return clib_error_return (0, " VRF ID required, not specified"); + + if (vec_len (low_ports) == 0) + return clib_error_return (0, + " Both VRF ID and range/port must be set for a protocol."); + + if (vrf_id == 0) + return clib_error_return (0, " VRF ID can not be 0 (default)."); + + + if (ip_ver == 4) + rv = ip4_source_and_port_range_check_add_del + (&ip4_addr, length, vrf_id, low_ports, high_ports, is_add); + else + return clib_error_return (0, " IPv6 in subsequent patch"); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE: + return clib_error_return + (0, " Incorrect adjacency for add/del operation"); + + case VNET_API_ERROR_EXCEEDED_NUMBER_OF_PORTS_CAPACITY: + return clib_error_return (0, " Too many ports in add/del operation"); + + case VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY: + return clib_error_return + (0, " Too many ranges requested for add operation"); + + default: + return clib_error_return (0, " returned an unexpected value: %d", rv); + } + + return 0; +} + +/*? + * This command adds an IP Subnet and range of ports to be validated + * by an IP FIB table (VRF). + * + * @todo This is incomplete. This needs a detailed description and a + * practical example. + * + * @cliexpar + * Example of how to add an IPv4 subnet and single port to an IPv4 FIB table: + * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 port 23} + * Example of how to add an IPv4 subnet and range of ports to an IPv4 FIB table: + * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100} + * Example of how to delete an IPv4 subnet and single port from an IPv4 FIB table: + * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 port 23 del} + * Example of how to delete an IPv4 subnet and range of ports from an IPv4 FIB table: + * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100 del} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip_source_and_port_range_check_command, static) = { + .path = "set ip source-and-port-range-check", + .function = ip_source_and_port_range_check_command_fn, + .short_help = + "set ip source-and-port-range-check vrf <table-id> <ip-addr>/<mask> {port nn | range <nn> - <nn>} [del]", +}; +/* *INDENT-ON* */ + + +static clib_error_t * +show_source_and_port_range_check_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + protocol_port_range_dpo_t *ppr_dpo; + u32 fib_index; + u8 addr_set = 0; + u32 vrf_id = ~0; + int rv, i, j; + u32 port = 0; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + .fp_len = 32, + }; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_ip4_address, &pfx.fp_addr.ip4)) + addr_set = 1; + else if (unformat (input, "vrf %d", &vrf_id)) + ; + else if (unformat (input, "port %d", &port)) + ; + else + break; + } + + if (addr_set == 0) + return clib_error_return (0, "<address> not specified"); + + if (vrf_id == ~0) + return clib_error_return (0, "VRF ID required, not specified"); + + fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id); + if (~0 == fib_index) + return clib_error_return (0, "VRF %d not found", vrf_id); + + /* + * find the longest prefix match on the address requested, + * check it was sourced by us + */ + dpo_id_t dpo = DPO_INVALID; + const dpo_id_t *bucket; + + if (!fib_entry_get_dpo_for_source (fib_table_lookup (fib_index, &pfx), + FIB_SOURCE_SPECIAL, &dpo)) + { + /* + * not one of ours + */ + vlib_cli_output (vm, "%U: src address drop", format_ip4_address, + &pfx.fp_addr.ip4); + return 0; + } + + bucket = load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0); + ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index); + dpo_reset (&dpo); + + if (port) + { + rv = check_adj_port_range_x1 (ppr_dpo, (u16) port, 1234); + if (rv == 1234) + vlib_cli_output (vm, "%U port %d PASS", format_ip4_address, + &pfx.fp_addr.ip4, port); + else + vlib_cli_output (vm, "%U port %d FAIL", format_ip4_address, + &pfx.fp_addr.ip4, port); + return 0; + } + else + { + u8 *s; + + s = format (0, "%U: ", format_ip4_address, &pfx.fp_addr.ip4); + + for (i = 0; i < N_BLOCKS_PER_DPO; i++) + { + for (j = 0; j < 8; j++) + { + if (ppr_dpo->blocks[i].low.as_u16[j]) + s = format (s, "%d - %d ", + (u32) ppr_dpo->blocks[i].low.as_u16[j], + (u32) ppr_dpo->blocks[i].hi.as_u16[j]); + } + } + vlib_cli_output (vm, "%s", s); + vec_free (s); + } + + return 0; +} + +/*? + * Display the range of ports being validated by an IPv4 FIB for a given + * IP or subnet, or test if a given IP and port are being validated. + * + * @todo This is incomplete. This needs a detailed description and a + * practical example. + * + * @cliexpar + * Example of how to display the set of ports being validated for a given + * IPv4 subnet: + * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.0} + * 172.16.2.0: 23 - 101 + * @cliexend + * Example of how to test to determine of a given Pv4 address and port + * are being validated: + * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.2 port 23} + * 172.16.2.2 port 23 PASS + * @cliexend + * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.2 port 250} + * 172.16.2.2 port 250 FAIL + * @cliexend + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_source_and_port_range_check, static) = { + .path = "show ip source-and-port-range-check", + .function = show_source_and_port_range_check_fn, + .short_help = + "show ip source-and-port-range-check vrf <table-id> <ip-addr> [port <n>]", +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c new file mode 100644 index 00000000..17a1cb1b --- /dev/null +++ b/src/vnet/ip/ip4_source_check.c @@ -0,0 +1,562 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip4_source_check.c: IP v4 check source address (unicast RPF check) + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/fib_urpf_list.h> +#include <vnet/dpo/load_balance.h> + +/** + * @file + * @brief IPv4 Unicast Source Check. + * + * This file contains the IPv4 interface unicast source check. + */ + + +typedef struct +{ + u8 packet_data[64]; + index_t urpf; +} ip4_source_check_trace_t; + +static u8 * +format_ip4_source_check_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ip4_source_check_trace_t *t = va_arg (*va, ip4_source_check_trace_t *); + + s = format (s, "%U", + format_ip4_header, t->packet_data, sizeof (t->packet_data)); + + return s; +} + +typedef enum +{ + IP4_SOURCE_CHECK_NEXT_DROP, + IP4_SOURCE_CHECK_N_NEXT, +} ip4_source_check_next_t; + +typedef enum +{ + IP4_SOURCE_CHECK_REACHABLE_VIA_RX, + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY, +} ip4_source_check_type_t; + +typedef union +{ + u32 fib_index; +} ip4_source_check_config_t; + +always_inline uword +ip4_source_check_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + ip4_source_check_type_t source_check_type) +{ + u32 n_left_from, *from, *to_next; + u32 next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_input_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (ip4_source_check_trace_t)); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t *p0, *p1; + ip4_header_t *ip0, *ip1; + ip4_fib_mtrie_t *mtrie0, *mtrie1; + ip4_fib_mtrie_leaf_t leaf0, leaf1; + ip4_source_check_config_t *c0, *c1; + const load_balance_t *lb0, *lb1; + u32 pi0, next0, pass0, lb_index0; + u32 pi1, next1, pass1, lb_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + c0 = + vnet_feature_next_with_data (vnet_buffer (p0)->sw_if_index + [VLIB_RX], &next0, p0, + sizeof (c0[0])); + c1 = + vnet_feature_next_with_data (vnet_buffer (p1)->sw_if_index + [VLIB_RX], &next1, p1, + sizeof (c1[0])); + + mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; + mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie; + + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); + leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); + + leaf0 = + ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); + leaf1 = + ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2); + + leaf0 = + ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); + leaf1 = + ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3); + + lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); + + lb0 = load_balance_get (lb_index0); + lb1 = load_balance_get (lb_index1); + + /* Pass multicast. */ + pass0 = ip4_address_is_multicast (&ip0->src_address) + || ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); + pass1 = ip4_address_is_multicast (&ip1->src_address) + || ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); + + if (IP4_SOURCE_CHECK_REACHABLE_VIA_RX == source_check_type) + { + pass0 |= fib_urpf_check (lb0->lb_urpf, + vnet_buffer (p0)->sw_if_index + [VLIB_RX]); + pass1 |= + fib_urpf_check (lb1->lb_urpf, + vnet_buffer (p1)->sw_if_index[VLIB_RX]); + } + else + { + pass0 |= fib_urpf_check_size (lb0->lb_urpf); + pass1 |= fib_urpf_check_size (lb1->lb_urpf); + } + next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP); + next1 = (pass1 ? next1 : IP4_SOURCE_CHECK_NEXT_DROP); + + p0->error = + error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS]; + p1->error = + error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS]; + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip4_header_t *ip0; + ip4_fib_mtrie_t *mtrie0; + ip4_fib_mtrie_leaf_t leaf0; + ip4_source_check_config_t *c0; + u32 pi0, next0, pass0, lb_index0; + const load_balance_t *lb0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip0 = vlib_buffer_get_current (p0); + + c0 = + vnet_feature_next_with_data (vnet_buffer (p0)->sw_if_index + [VLIB_RX], &next0, p0, + sizeof (c0[0])); + + mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie; + + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); + + leaf0 = + ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); + + leaf0 = + ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); + + lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + + lb0 = load_balance_get (lb_index0); + + /* Pass multicast. */ + pass0 = ip4_address_is_multicast (&ip0->src_address) + || ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); + + if (IP4_SOURCE_CHECK_REACHABLE_VIA_RX == source_check_type) + { + pass0 |= fib_urpf_check (lb0->lb_urpf, + vnet_buffer (p0)->sw_if_index + [VLIB_RX]); + } + else + { + pass0 |= fib_urpf_check_size (lb0->lb_urpf); + } + + next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP); + p0->error = + error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +ip4_source_check_reachable_via_any (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_source_check_inline (vm, node, frame, + IP4_SOURCE_CHECK_REACHABLE_VIA_ANY); +} + +static uword +ip4_source_check_reachable_via_rx (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return ip4_source_check_inline (vm, node, frame, + IP4_SOURCE_CHECK_REACHABLE_VIA_RX); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_check_source_reachable_via_any) = { + .function = ip4_source_check_reachable_via_any, + .name = "ip4-source-check-via-any", + .vector_size = sizeof (u32), + + .n_next_nodes = IP4_SOURCE_CHECK_N_NEXT, + .next_nodes = { + [IP4_SOURCE_CHECK_NEXT_DROP] = "error-drop", + }, + + .format_buffer = format_ip4_header, + .format_trace = format_ip4_source_check_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_check_source_reachable_via_any, + ip4_source_check_reachable_via_any); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_check_source_reachable_via_rx) = { + .function = ip4_source_check_reachable_via_rx, + .name = "ip4-source-check-via-rx", + .vector_size = sizeof (u32), + + .n_next_nodes = IP4_SOURCE_CHECK_N_NEXT, + .next_nodes = { + [IP4_SOURCE_CHECK_NEXT_DROP] = "error-drop", + }, + + .format_buffer = format_ip4_header, + .format_trace = format_ip4_source_check_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_check_source_reachable_via_rx, + ip4_source_check_reachable_via_rx); + +static clib_error_t * +set_ip_source_check (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vnet_main_t *vnm = vnet_get_main (); + ip4_main_t *im = &ip4_main; + clib_error_t *error = 0; + u32 sw_if_index, is_del; + ip4_source_check_config_t config; + char *feature_name = "ip4-source-check-via-rx"; + + sw_if_index = ~0; + is_del = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat_user + (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "loose")) + feature_name = "ip4-source-check-via-any"; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == sw_if_index) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, line_input); + goto done; + } + + config.fib_index = im->fib_index_by_sw_if_index[sw_if_index]; + vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index, + is_del == 0, &config, sizeof (config)); +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command adds the 'ip4-source-check-via-rx' graph node for + * a given interface. By adding the IPv4 source check graph node to + * an interface, the code verifies that the source address of incoming + * unicast packets are reachable over the incoming interface. Two flavours + * are supported (the default is strict): + * - loose: accept ingress packet if there is a route to reach the source + * - strict: accept ingress packet if it arrived on an interface which + * the route to the source uses. i.e. an interface that the source + * is reachable via. + * + * @cliexpar + * @parblock + * Example of graph node before range checking is enabled: + * @cliexstart{show vlib graph ip4-source-check-via-rx} + * Name Next Previous + * ip4-source-check-via-rx error-drop [0] + * @cliexend + * + * Example of how to enable unicast source checking on an interface: + * @cliexcmd{set interface ip source-check GigabitEthernet2/0/0 loose} + * + * Example of graph node after range checking is enabled: + * @cliexstart{show vlib graph ip4-source-check-via-rx} + * Name Next Previous + * ip4-source-check-via-rx error-drop [0] ip4-input-no-checksum + * ip4-source-and-port-range- ip4-input + * @cliexend + * + * Example of how to display the feature enabed on an interface: + * @cliexstart{show ip interface features GigabitEthernet2/0/0} + * IP feature paths configured on GigabitEthernet2/0/0... + * + * ipv4 unicast: + * ip4-source-check-via-rx + * ip4-lookup + * + * ipv4 multicast: + * ip4-lookup-multicast + * + * ipv4 multicast: + * interface-output + * + * ipv6 unicast: + * ip6-lookup + * + * ipv6 multicast: + * ip6-lookup + * + * ipv6 multicast: + * interface-output + * @cliexend + * + * Example of how to disable unicast source checking on an interface: + * @cliexcmd{set interface ip source-check GigabitEthernet2/0/0 del} + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip_source_check_command, static) = { + .path = "set interface ip source-check", + .function = set_ip_source_check, + .short_help = "set interface ip source-check <interface> [strict|loose] [del]", +}; +/* *INDENT-ON* */ + +static clib_error_t * +ip_source_check_accept (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + fib_prefix_t pfx = { + .fp_proto = FIB_PROTOCOL_IP4, + }; + clib_error_t *error = NULL; + u32 table_id, is_add, fib_index; + + is_add = 1; + table_id = ~0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "table %d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "add")) + is_add = 1; + else if (unformat (line_input, "%U/%d", + unformat_ip4_address, &pfx.fp_addr.ip4, &pfx.fp_len)) + pfx.fp_proto = FIB_PROTOCOL_IP4; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 != table_id) + { + fib_index = fib_table_find (pfx.fp_proto, table_id); + if (~0 == fib_index) + { + error = clib_error_return (0, "Nonexistent table id %d", table_id); + goto done; + } + } + else + { + fib_index = 0; + } + + if (is_add) + { + fib_table_entry_special_add (fib_index, + &pfx, + FIB_SOURCE_URPF_EXEMPT, + FIB_ENTRY_FLAG_DROP); + } + else + { + fib_table_entry_special_remove (fib_index, + &pfx, FIB_SOURCE_URPF_EXEMPT); + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * Add an exemption for a prefix to pass the Unicast Reverse Path + * Forwarding (uRPF) loose check. This is for testing purposes only. + * If the '<em>table</em>' is not enter it is defaulted to 0. Default + * is to '<em>add</em>'. VPP always performs a loose uRPF check for + * for-us traffic. + * + * @cliexpar + * Example of how to add a uRPF exception to a FIB table to pass the + * loose RPF tests: + * @cliexcmd{ip urpf-accept table 7 add} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip_source_check_accept_command, static) = { + .path = "ip urpf-accept", + .function = ip_source_check_accept, + .short_help = "ip urpf-accept [table <table-id>] [add|del]", +}; +/* *INDENT-ON* */ + + +/* Dummy init function to get us linked in. */ +clib_error_t * +ip4_source_check_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (ip4_source_check_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4_test.c b/src/vnet/ip/ip4_test.c new file mode 100644 index 00000000..73dabfdc --- /dev/null +++ b/src/vnet/ip/ip4_test.c @@ -0,0 +1,347 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> + +/** + * @file + * @brief IPv4 FIB Tester. + * + * Not compiled in by default. IPv4 FIB tester. Add, probe, delete a bunch of + * random routes / masks and make sure that the mtrie agrees with + * the hash-table FIB. + * + * Manipulate the FIB by means of the debug CLI commands, to minimize + * the chances of doing something idiotic. + */ + +/* + * These routines need to be redeclared non-static elsewhere. + * + * Also: rename ip_route() -> vnet_ip_route_cmd() and add the usual + * test_route_init() call to main.c + */ +clib_error_t *vnet_ip_route_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd_arg); + +int ip4_lookup_validate (ip4_address_t * a, u32 fib_index0); + +ip4_fib_t *find_fib_by_table_index_or_id (ip4_main_t * im, + u32 table_index_or_id, u32 flags); + +/* Routes to insert/delete/probe in FIB */ +typedef struct +{ + ip4_address_t address; + u32 mask_width; + u32 interface_id; /* not an xx_if_index */ +} test_route_t; + +typedef struct +{ + /* Test routes in use */ + test_route_t *route_pool; + + /* Number of fake ethernets created */ + u32 test_interfaces_created; +} test_main_t; + +test_main_t test_main; + +/* fake ethernet device class, distinct from "fake-ethX" */ +static u8 * +format_test_interface_name (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + return format (s, "test-eth%d", dev_instance); +} + +static uword +dummy_interface_tx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + clib_warning ("you shouldn't be here, leaking buffers..."); + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VNET_DEVICE_CLASS (test_interface_device_class,static) = { + .name = "Test interface", + .format_device_name = format_test_interface_name, + .tx_function = dummy_interface_tx, +}; +/* *INDENT-ON* */ + +static clib_error_t * +thrash (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd_arg) +{ + u32 seed = 0xdeaddabe; + u32 niter = 10; + u32 nroutes = 10; + u32 ninterfaces = 4; + f64 min_mask_bits = 7.0; + f64 max_mask_bits = 32.0; + u32 table_id = 11; /* my amp goes to 11 (use fib 11) */ + u32 table_index; + int iter, i; + u8 *cmd; + test_route_t *tr; + test_main_t *tm = &test_main; + ip4_main_t *im = &ip4_main; + vnet_main_t *vnm = vnet_get_main (); + unformat_input_t cmd_input; + f64 rf; + u32 *masks = 0; + u32 tmp; + u32 hw_if_index; + clib_error_t *error = 0; + uword *p; + unformat_input_t _line_input, *line_input = &_line_input; + u8 hw_address[6]; + ip4_fib_t *fib; + int verbose = 0; + + /* Precompute mask width -> mask vector */ + tmp = (u32) ~ 0; + vec_validate (masks, 32); + for (i = 32; i > 0; i--) + { + masks[i] = tmp; + tmp <<= 1; + } + + if (unformat_user (main_input, unformat_line_input, line_input)) + { + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "seed %d", &seed)) + ; + else if (unformat (line_input, "niter %d", &niter)) + ; + else if (unformat (line_input, "nroutes %d", &nroutes)) + ; + else if (unformat (line_input, "ninterfaces %d", &ninterfaces)) + ; + else if (unformat (line_input, "min-mask-bits %d", &tmp)) + min_mask_bits = (f64) tmp; + else if (unformat (line_input, "max-mask-bits %d", &tmp)) + max_mask_bits = (f64) tmp; + else if (unformat (line_input, "verbose")) + verbose = 1; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + goto done; + } + } + } + + /* Find or create FIB table 11 */ + fib = ip4_fib_find_or_create_fib_by_table_id (table_id); + + for (i = tm->test_interfaces_created; i < ninterfaces; i++) + { + vnet_hw_interface_t *hw; + memset (hw_address, 0, sizeof (hw_address)); + hw_address[0] = 0xd0; + hw_address[1] = 0x0f; + hw_address[5] = i; + + error = ethernet_register_interface + (vnm, test_interface_device_class.index, i /* instance */ , + hw_address, &hw_if_index, + /* flag change */ 0); + + /* Fake interfaces use FIB table 11 */ + hw = vnet_get_hw_interface (vnm, hw_if_index); + vec_validate (im->fib_index_by_sw_if_index, hw->sw_if_index); + im->fib_index_by_sw_if_index[hw->sw_if_index] = fib->index; + ip4_sw_interface_enable_disable (sw_if_index, 1); + } + + tm->test_interfaces_created = ninterfaces; + + /* Find fib index corresponding to FIB id 11 */ + p = hash_get (im->fib_index_by_table_id, table_id); + if (p == 0) + { + vlib_cli_output (vm, "Couldn't map fib id %d to fib index\n", table_id); + goto done; + } + table_index = p[0]; + + for (iter = 0; iter < niter; iter++) + { + /* Pick random routes to install */ + for (i = 0; i < nroutes; i++) + { + int j; + + pool_get (tm->route_pool, tr); + memset (tr, 0, sizeof (*tr)); + + again: + rf = random_f64 (&seed); + tr->mask_width = (u32) (min_mask_bits + + rf * (max_mask_bits - min_mask_bits)); + tmp = random_u32 (&seed); + tmp &= masks[tr->mask_width]; + tr->address.as_u32 = clib_host_to_net_u32 (tmp); + + /* We can't add the same address/mask twice... */ + for (j = 0; j < i; j++) + { + test_route_t *prev; + prev = pool_elt_at_index (tm->route_pool, j); + if ((prev->address.as_u32 == tr->address.as_u32) + && (prev->mask_width == tr->mask_width)) + goto again; + } + + rf = random_f64 (&seed); + tr->interface_id = (u32) (rf * ninterfaces); + } + + /* Add them */ + for (i = 0; i < nroutes; i++) + { + tr = pool_elt_at_index (tm->route_pool, i); + cmd = format (0, "add table %d %U/%d via test-eth%d", + table_id, + format_ip4_address, &tr->address, + tr->mask_width, tr->interface_id); + vec_add1 (cmd, 0); + if (verbose) + fformat (stderr, "ip route %s\n", cmd); + unformat_init_string (&cmd_input, (char *) cmd, vec_len (cmd) - 1); + error = vnet_ip_route_cmd (vm, &cmd_input, cmd_arg); + if (error) + clib_error_report (error); + unformat_free (&cmd_input); + vec_free (cmd); + } + /* Probe them */ + for (i = 0; i < nroutes; i++) + { + tr = pool_elt_at_index (tm->route_pool, i); + if (!ip4_lookup_validate (&tr->address, table_index)) + { + if (verbose) + fformat (stderr, "test lookup table %d %U\n", + table_index, format_ip4_address, &tr->address); + + fformat (stderr, "FAIL-after-insert: %U/%d\n", + format_ip4_address, &tr->address, tr->mask_width); + } + } + + /* Delete them */ + for (i = 0; i < nroutes; i++) + { + int j; + tr = pool_elt_at_index (tm->route_pool, i); + if (0) + cmd = format (0, "del table %d %U/%d via test-eth%d", + table_id, + format_ip4_address, &tr->address, + tr->mask_width, tr->interface_id); + else + cmd = format (0, "del table %d %U/%d", + table_id, + format_ip4_address, &tr->address, tr->mask_width); + vec_add1 (cmd, 0); + if (verbose) + fformat (stderr, "ip route %s\n", cmd); + unformat_init_string (&cmd_input, (char *) cmd, vec_len (cmd) - 1); + error = vnet_ip_route_cmd (vm, &cmd_input, cmd_arg); + if (error) + clib_error_report (error); + unformat_free (&cmd_input); + vec_free (cmd); + + /* Make sure all undeleted routes still work */ + for (j = i + 1; j < nroutes; j++) + { + test_route_t *rr; /* remaining route */ + rr = pool_elt_at_index (tm->route_pool, j); + if (!ip4_lookup_validate (&rr->address, table_index)) + { + if (verbose) + fformat (stderr, "test lookup table %d %U\n", + table_index, format_ip4_address, &rr->address); + + fformat (stderr, "FAIL: %U/%d AWOL\n", + format_ip4_address, &rr->address, rr->mask_width); + fformat (stderr, " iter %d after %d of %d deletes\n", + iter, i, nroutes); + fformat (stderr, " last route deleted %U/%d\n", + format_ip4_address, &tr->address, tr->mask_width); + } + } + } + + pool_free (tm->route_pool); + } + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command in not in the build by default. It is an internal + * command used to test the route functonality. + * + * Create test routes on IPv4 FIB table 11. Table will be created if it + * does not exist. + * + * There are several optional attributes: + * - If not provided, <seed> defaults to 0xdeaddabe. + * - If not provided, <num-iter> defaults to 10. + * - If not provided, <num-iface> defaults to 4. + * - If not provided, <min-mask> defaults to 7.0. + * - If not provided, <max-mask> defaults to 32.0. + * + * @cliexpar + * Example of how to run: + * @cliexcmd{test route} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (test_route_command, static) = { + .path = "test route", + .short_help = "test route [seed <seed-num>] [niter <num-iter>] [ninterfaces <num-iface>] [min-mask-bits <min-mask>] [max-mask-bits <max-mask>] [verbose]", .function = thrash, + .function = thrash, +}; +/* *INDENT-ON* */ + +clib_error_t * +test_route_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (test_route_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h new file mode 100644 index 00000000..6ffc562c --- /dev/null +++ b/src/vnet/ip/ip4_to_ip6.h @@ -0,0 +1,659 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief IPv4 to IPv6 translation + */ +#ifndef __included_ip4_to_ip6_h__ +#define __included_ip4_to_ip6_h__ + +#include <vnet/ip/ip.h> + + +/** + * IPv4 to IPv6 set call back function type + */ +typedef int (*ip4_to_ip6_set_fn_t) (ip4_header_t * ip4, ip6_header_t * ip6, + void *ctx); + +/* *INDENT-OFF* */ +static u8 icmp_to_icmp6_updater_pointer_table[] = + { 0, 1, 4, 4, ~0, + ~0, ~0, ~0, 7, 6, + ~0, ~0, 8, 8, 8, + 8, 24, 24, 24, 24 + }; +/* *INDENT-ON* */ + +#define frag_id_4to6(id) (id) + +/** + * @brief Get TCP/UDP port number or ICMP id from IPv4 packet. + * + * @param ip4 IPv4 header. + * @param sender 1 get sender port, 0 get receiver port. + * + * @returns Port number on success, 0 otherwise. + */ +always_inline u16 +ip4_get_port (ip4_header_t * ip, u8 sender) +{ + if (ip->ip_version_and_header_length != 0x45 || + ip4_get_fragment_offset (ip)) + return 0; + + if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || + (ip->protocol == IP_PROTOCOL_UDP))) + { + udp_header_t *udp = (void *) (ip + 1); + return (sender) ? udp->src_port : udp->dst_port; + } + else if (ip->protocol == IP_PROTOCOL_ICMP) + { + icmp46_header_t *icmp = (void *) (ip + 1); + if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply) + { + return *((u16 *) (icmp + 1)); + } + else if (clib_net_to_host_u16 (ip->length) >= 64) + { + ip = (ip4_header_t *) (icmp + 2); + if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) || + (ip->protocol == IP_PROTOCOL_UDP))) + { + udp_header_t *udp = (void *) (ip + 1); + return (sender) ? udp->dst_port : udp->src_port; + } + else if (ip->protocol == IP_PROTOCOL_ICMP) + { + icmp46_header_t *icmp = (void *) (ip + 1); + if (icmp->type == ICMP4_echo_request || + icmp->type == ICMP4_echo_reply) + { + return *((u16 *) (icmp + 1)); + } + } + } + } + return 0; +} + +/** + * @brief Convert type and code value from ICMP4 to ICMP6. + * + * @param icmp ICMP header. + * @param inner_ip4 Inner IPv4 header if present, 0 otherwise. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +icmp_to_icmp6_header (icmp46_header_t * icmp, ip4_header_t ** inner_ip4) +{ + *inner_ip4 = NULL; + switch (icmp->type) + { + case ICMP4_echo_reply: + icmp->type = ICMP6_echo_reply; + break; + case ICMP4_echo_request: + icmp->type = ICMP6_echo_request; + break; + case ICMP4_destination_unreachable: + *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8); + + switch (icmp->code) + { + case ICMP4_destination_unreachable_destination_unreachable_net: //0 + case ICMP4_destination_unreachable_destination_unreachable_host: //1 + icmp->type = ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_no_route_to_destination; + break; + case ICMP4_destination_unreachable_protocol_unreachable: //2 + icmp->type = ICMP6_parameter_problem; + icmp->code = ICMP6_parameter_problem_unrecognized_next_header; + break; + case ICMP4_destination_unreachable_port_unreachable: //3 + icmp->type = ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_port_unreachable; + break; + case ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set: //4 + icmp->type = + ICMP6_packet_too_big; + icmp->code = 0; + { + u32 advertised_mtu = clib_net_to_host_u32 (*((u32 *) (icmp + 1))); + if (advertised_mtu) + advertised_mtu += 20; + else + advertised_mtu = 1000; //FIXME ! (RFC 1191 - plateau value) + + //FIXME: = minimum(advertised MTU+20, MTU_of_IPv6_nexthop, (MTU_of_IPv4_nexthop)+20) + *((u32 *) (icmp + 1)) = clib_host_to_net_u32 (advertised_mtu); + } + break; + + case ICMP4_destination_unreachable_source_route_failed: //5 + case ICMP4_destination_unreachable_destination_network_unknown: //6 + case ICMP4_destination_unreachable_destination_host_unknown: //7 + case ICMP4_destination_unreachable_source_host_isolated: //8 + case ICMP4_destination_unreachable_network_unreachable_for_type_of_service: //11 + case ICMP4_destination_unreachable_host_unreachable_for_type_of_service: //12 + icmp->type = + ICMP6_destination_unreachable; + icmp->code = ICMP6_destination_unreachable_no_route_to_destination; + break; + case ICMP4_destination_unreachable_network_administratively_prohibited: //9 + case ICMP4_destination_unreachable_host_administratively_prohibited: //10 + case ICMP4_destination_unreachable_communication_administratively_prohibited: //13 + case ICMP4_destination_unreachable_precedence_cutoff_in_effect: //15 + icmp->type = ICMP6_destination_unreachable; + icmp->code = + ICMP6_destination_unreachable_destination_administratively_prohibited; + break; + case ICMP4_destination_unreachable_host_precedence_violation: //14 + default: + return -1; + } + break; + + case ICMP4_time_exceeded: //11 + *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8); + icmp->type = ICMP6_time_exceeded; + break; + + case ICMP4_parameter_problem: + *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8); + + switch (icmp->code) + { + case ICMP4_parameter_problem_pointer_indicates_error: + case ICMP4_parameter_problem_bad_length: + icmp->type = ICMP6_parameter_problem; + icmp->code = ICMP6_parameter_problem_erroneous_header_field; + { + u8 ptr = + icmp_to_icmp6_updater_pointer_table[*((u8 *) (icmp + 1))]; + if (ptr == 0xff) + return -1; + + *((u32 *) (icmp + 1)) = clib_host_to_net_u32 (ptr); + } + break; + default: + //All other codes cause error + return -1; + } + break; + + default: + //All other types cause error + return -1; + break; + } + return 0; +} + +/** + * @brief Translate ICMP4 packet to ICMP6. + * + * @param p Buffer to translate. + * @param fn The function to translate outer header. + * @param ctx A context passed in the outer header translate function. + * @param inner_fn The function to translate inner header. + * @param inner_ctx A context passed in the inner header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx, + ip4_to_ip6_set_fn_t inner_fn, void *inner_ctx) +{ + ip4_header_t *ip4, *inner_ip4; + ip6_header_t *ip6, *inner_ip6; + u32 ip_len; + icmp46_header_t *icmp; + ip_csum_t csum; + ip6_frag_hdr_t *inner_frag; + u32 inner_frag_id; + u32 inner_frag_offset; + u8 inner_frag_more; + u16 *inner_L4_checksum = 0; + int rv; + + ip4 = vlib_buffer_get_current (p); + ip_len = clib_net_to_host_u16 (ip4->length); + ASSERT (ip_len <= p->current_length); + + icmp = (icmp46_header_t *) (ip4 + 1); + if (icmp_to_icmp6_header (icmp, &inner_ip4)) + return -1; + + if (inner_ip4) + { + //We have 2 headers to translate. + //We need to make some room in the middle of the packet + if (PREDICT_FALSE (ip4_is_fragment (inner_ip4))) + { + //Here it starts getting really tricky + //We will add a fragmentation header in the inner packet + + if (!ip4_is_first_fragment (inner_ip4)) + { + //For now we do not handle unless it is the first fragment + //Ideally we should handle the case as we are in slow path already + return -1; + } + + vlib_buffer_advance (p, + -2 * (sizeof (*ip6) - sizeof (*ip4)) - + sizeof (*inner_frag)); + ip6 = vlib_buffer_get_current (p); + clib_memcpy (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4, + 20 + 8); + ip4 = + (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); + icmp = (icmp46_header_t *) (ip4 + 1); + + inner_ip6 = + (ip6_header_t *) u8_ptr_add (inner_ip4, + sizeof (*ip4) - sizeof (*ip6) - + sizeof (*inner_frag)); + inner_frag = + (ip6_frag_hdr_t *) u8_ptr_add (inner_ip6, sizeof (*inner_ip6)); + ip6->payload_length = + u16_net_add (ip4->length, + sizeof (*ip6) - 2 * sizeof (*ip4) + + sizeof (*inner_frag)); + inner_frag_id = frag_id_4to6 (inner_ip4->fragment_id); + inner_frag_offset = ip4_get_fragment_offset (inner_ip4); + inner_frag_more = + ! !(inner_ip4->flags_and_fragment_offset & + clib_net_to_host_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)); + } + else + { + vlib_buffer_advance (p, -2 * (sizeof (*ip6) - sizeof (*ip4))); + ip6 = vlib_buffer_get_current (p); + clib_memcpy (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4, + 20 + 8); + ip4 = + (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); + icmp = (icmp46_header_t *) u8_ptr_add (ip4, sizeof (*ip4)); + inner_ip6 = + (ip6_header_t *) u8_ptr_add (inner_ip4, + sizeof (*ip4) - sizeof (*ip6)); + ip6->payload_length = + u16_net_add (ip4->length, sizeof (*ip6) - 2 * sizeof (*ip4)); + inner_frag = NULL; + } + + if (PREDICT_TRUE (inner_ip4->protocol == IP_PROTOCOL_TCP)) + { + inner_L4_checksum = &((tcp_header_t *) (inner_ip4 + 1))->checksum; + *inner_L4_checksum = + ip_csum_fold (ip_csum_sub_even + (*inner_L4_checksum, + *((u64 *) (&inner_ip4->src_address)))); + } + else if (PREDICT_TRUE (inner_ip4->protocol == IP_PROTOCOL_UDP)) + { + inner_L4_checksum = &((udp_header_t *) (inner_ip4 + 1))->checksum; + if (*inner_L4_checksum) + *inner_L4_checksum = + ip_csum_fold (ip_csum_sub_even + (*inner_L4_checksum, + *((u64 *) (&inner_ip4->src_address)))); + } + else if (inner_ip4->protocol == IP_PROTOCOL_ICMP) + { + //We have an ICMP inside an ICMP + //It needs to be translated, but not for error ICMP messages + icmp46_header_t *inner_icmp = (icmp46_header_t *) (inner_ip4 + 1); + //Only types ICMP4_echo_request and ICMP4_echo_reply are handled by icmp_to_icmp6_header + inner_icmp->type = (inner_icmp->type == ICMP4_echo_request) ? + ICMP6_echo_request : ICMP6_echo_reply; + inner_L4_checksum = &inner_icmp->checksum; + inner_ip4->protocol = IP_PROTOCOL_ICMP6; + } + else + { + /* To shut up Coverity */ + os_panic (); + } + + inner_ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (inner_ip4->tos << 20)); + inner_ip6->payload_length = + u16_net_add (inner_ip4->length, -sizeof (*inner_ip4)); + inner_ip6->hop_limit = inner_ip4->ttl; + inner_ip6->protocol = inner_ip4->protocol; + + if ((rv = inner_fn (inner_ip4, inner_ip6, inner_ctx)) != 0) + return rv; + + if (PREDICT_FALSE (inner_frag != NULL)) + { + inner_frag->next_hdr = inner_ip6->protocol; + inner_frag->identification = inner_frag_id; + inner_frag->rsv = 0; + inner_frag->fragment_offset_and_more = + ip6_frag_hdr_offset_and_more (inner_frag_offset, inner_frag_more); + inner_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + inner_ip6->payload_length = + clib_host_to_net_u16 (clib_net_to_host_u16 + (inner_ip6->payload_length) + + sizeof (*inner_frag)); + } + + csum = *inner_L4_checksum; + if (inner_ip6->protocol == IP_PROTOCOL_ICMP6) + { + //Recompute ICMP checksum + icmp46_header_t *inner_icmp = (icmp46_header_t *) (inner_ip4 + 1); + + inner_icmp->checksum = 0; + csum = ip_csum_with_carry (0, inner_ip6->payload_length); + csum = + ip_csum_with_carry (csum, + clib_host_to_net_u16 (inner_ip6->protocol)); + csum = ip_csum_with_carry (csum, inner_ip6->src_address.as_u64[0]); + csum = ip_csum_with_carry (csum, inner_ip6->src_address.as_u64[1]); + csum = ip_csum_with_carry (csum, inner_ip6->dst_address.as_u64[0]); + csum = ip_csum_with_carry (csum, inner_ip6->dst_address.as_u64[1]); + csum = + ip_incremental_checksum (csum, inner_icmp, + clib_net_to_host_u16 + (inner_ip6->payload_length)); + inner_icmp->checksum = ~ip_csum_fold (csum); + } + else + { + /* UDP checksum is optional */ + if (csum) + { + csum = + ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]); + csum = + ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]); + csum = + ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]); + csum = + ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]); + *inner_L4_checksum = ip_csum_fold (csum); + } + } + } + else + { + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); + ip6 = vlib_buffer_get_current (p); + ip6->payload_length = + clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - + sizeof (*ip4)); + } + + //Translate outer IPv6 + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + + ip6->hop_limit = ip4->ttl; + ip6->protocol = IP_PROTOCOL_ICMP6; + + if ((rv = fn (ip4, ip6, ctx)) != 0) + return rv; + + //Truncate when the packet exceeds the minimal IPv6 MTU + if (p->current_length > 1280) + { + ip6->payload_length = clib_host_to_net_u16 (1280 - sizeof (*ip6)); + p->current_length = 1280; //Looks too simple to be correct... + } + + //Recompute ICMP checksum + icmp->checksum = 0; + csum = ip_csum_with_carry (0, ip6->payload_length); + csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol)); + csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]); + csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]); + csum = + ip_incremental_checksum (csum, icmp, + clib_net_to_host_u16 (ip6->payload_length)); + icmp->checksum = ~ip_csum_fold (csum); + + return 0; +} + +/** + * @brief Translate IPv4 fragmented packet to IPv6. + * + * @param p Buffer to translate. + * @param fn The function to translate header. + * @param ctx A context passed in the header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +ip4_to_ip6_fragmented (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx) +{ + ip4_header_t *ip4; + ip6_header_t *ip6; + ip6_frag_hdr_t *frag; + int rv; + + ip4 = vlib_buffer_get_current (p); + frag = (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*frag) - + sizeof (*ip6)); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + + //We know that the protocol was one of ICMP, TCP or UDP + //because the first fragment was found and cached + frag->next_hdr = + (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol; + frag->identification = frag_id_4to6 (ip4->fragment_id); + frag->rsv = 0; + frag->fragment_offset_and_more = + ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip4), + clib_net_to_host_u16 + (ip4->flags_and_fragment_offset) & + IP4_HEADER_FLAG_MORE_FRAGMENTS); + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = + clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) - + sizeof (*ip4) + sizeof (*frag)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + + if ((rv = fn (ip4, ip6, ctx)) != 0) + return rv; + + return 0; +} + +/** + * @brief Translate IPv4 UDP/TCP packet to IPv6. + * + * @param p Buffer to translate. + * @param fn The function to translate header. + * @param ctx A context passed in the header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +ip4_to_ip6_tcp_udp (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx) +{ + ip4_header_t *ip4; + ip6_header_t *ip6; + ip_csum_t csum; + u16 *checksum; + ip6_frag_hdr_t *frag; + u32 frag_id; + int rv; + + ip4 = vlib_buffer_get_current (p); + + if (ip4->protocol == IP_PROTOCOL_UDP) + { + udp_header_t *udp = ip4_next_header (ip4); + checksum = &udp->checksum; + + //UDP checksum is optional over IPv4 but mandatory for IPv6 + //We do not check udp->length sanity but use our safe computed value instead + if (PREDICT_FALSE (!checksum)) + { + u16 udp_len = clib_host_to_net_u16 (ip4->length) - sizeof (*ip4); + csum = ip_incremental_checksum (0, udp, udp_len); + csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len)); + csum = + ip_csum_with_carry (csum, clib_host_to_net_u16 (IP_PROTOCOL_UDP)); + csum = ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address))); + *checksum = ~ip_csum_fold (csum); + } + } + else + { + tcp_header_t *tcp = ip4_next_header (ip4); + checksum = &tcp->checksum; + } + + csum = ip_csum_sub_even (*checksum, ip4->src_address.as_u32); + csum = ip_csum_sub_even (csum, ip4->dst_address.as_u32); + *checksum = ip_csum_fold (csum); + + // Deal with fragmented packets + if (PREDICT_FALSE (ip4->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS))) + { + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*ip6) - + sizeof (*frag)); + frag = + (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + frag_id = frag_id_4to6 (ip4->fragment_id); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + } + else + { + ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); + frag = NULL; + } + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = ip4->protocol; + + if (PREDICT_FALSE (frag != NULL)) + { + frag->next_hdr = ip6->protocol; + frag->identification = frag_id; + frag->rsv = 0; + frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1); + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); + } + + if ((rv = fn (ip4, ip6, ctx)) != 0) + return rv; + + csum = ip_csum_add_even (*checksum, ip6->src_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]); + *checksum = ip_csum_fold (csum); + + return 0; +} + +/** + * @brief Translate IPv4 packet to IPv6 (IP header only). + * + * @param p Buffer to translate. + * @param fn The function to translate header. + * @param ctx A context passed in the header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +ip4_to_ip6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx) +{ + ip4_header_t *ip4; + ip6_header_t *ip6; + ip6_frag_hdr_t *frag; + u32 frag_id; + int rv; + + ip4 = vlib_buffer_get_current (p); + + // Deal with fragmented packets + if (PREDICT_FALSE (ip4->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS))) + { + ip6 = + (ip6_header_t *) u8_ptr_add (ip4, + sizeof (*ip4) - sizeof (*ip6) - + sizeof (*frag)); + frag = + (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag)); + frag_id = frag_id_4to6 (ip4->fragment_id); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag)); + } + else + { + ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6)); + vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6)); + frag = NULL; + } + + ip6->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20)); + ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4)); + ip6->hop_limit = ip4->ttl; + ip6->protocol = ip4->protocol; + + if (PREDICT_FALSE (frag != NULL)) + { + frag->next_hdr = ip6->protocol; + frag->identification = frag_id; + frag->rsv = 0; + frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1); + ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION; + ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag)); + } + + if ((rv = fn (ip4, ip6, ctx)) != 0) + return rv; + + return 0; +} + +#endif /* __included_ip4_to_ip6_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h new file mode 100644 index 00000000..8aef53a9 --- /dev/null +++ b/src/vnet/ip/ip6.h @@ -0,0 +1,605 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip6.h: ip6 main include file + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_ip6_h +#define included_ip_ip6_h + +#include <vlib/mc.h> +#include <vlib/buffer.h> +#include <vnet/ethernet/packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/ip/ip6_hop_by_hop_packet.h> +#include <vnet/ip/lookup.h> +#include <stdbool.h> +#include <vppinfra/bihash_24_8.h> +#include <vppinfra/bihash_template.h> +#include <vnet/util/radix.h> + +/* + * Default size of the ip6 fib hash table + */ +#define IP6_FIB_DEFAULT_HASH_NUM_BUCKETS (64 * 1024) +#define IP6_FIB_DEFAULT_HASH_MEMORY_SIZE (32<<20) + +typedef struct +{ + ip6_address_t addr; + u32 dst_address_length; + u32 vrf_index; +} ip6_fib_key_t; + +typedef struct +{ + /* Table ID (hash key) for this FIB. */ + u32 table_id; + + /* Index into FIB vector. */ + u32 index; +} ip6_fib_t; + +typedef struct ip6_mfib_t +{ + /* Table ID (hash key) for this FIB. */ + u32 table_id; + + /* Index into FIB vector. */ + u32 index; + + /* + * Pointer to the top of a radix tree. + * This cannot be realloc'd, hence it cannot be inlined with this table + */ + struct radix_node_head *rhead; +} ip6_mfib_t; + +struct ip6_main_t; + +typedef void (ip6_add_del_interface_address_function_t) + (struct ip6_main_t * im, + uword opaque, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, u32 if_address_index, u32 is_del); + +typedef struct +{ + ip6_add_del_interface_address_function_t *function; + uword function_opaque; +} ip6_add_del_interface_address_callback_t; + +typedef void (ip6_table_bind_function_t) + (struct ip6_main_t * im, + uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index); + +typedef struct +{ + ip6_table_bind_function_t *function; + uword function_opaque; +} ip6_table_bind_callback_t; + +/** + * Enumeration of the FIB table instance types + */ +typedef enum ip6_fib_table_instance_type_t_ +{ + /** + * This table stores the routes that are used to forward traffic. + * The key is the prefix, the result the adjacnecy to forward on. + */ + IP6_FIB_TABLE_FWDING, + /** + * The table that stores ALL routes learned by the DP. + * Some of these routes may not be ready to install in forwarding + * at a given time. + * The key in this table is the prefix, the result is the fib_entry_t + */ + IP6_FIB_TABLE_NON_FWDING, +} ip6_fib_table_instance_type_t; + +#define IP6_FIB_NUM_TABLES (IP6_FIB_TABLE_NON_FWDING+1) + +/** + * A represenation of a single IP6 table + */ +typedef struct ip6_fib_table_instance_t_ +{ + /* The hash table */ + BVT (clib_bihash) ip6_hash; + + /* bitmap / refcounts / vector of mask widths to search */ + uword *non_empty_dst_address_length_bitmap; + u8 *prefix_lengths_in_search_order; + i32 dst_address_length_refcounts[129]; +} ip6_fib_table_instance_t; + +typedef struct ip6_main_t +{ + /** + * The two FIB tables; fwding and non-fwding + */ + ip6_fib_table_instance_t ip6_table[IP6_FIB_NUM_TABLES]; + + ip_lookup_main_t lookup_main; + + /* Pool of FIBs. */ + struct fib_table_t_ *fibs; + + /* Pool of V6 FIBs. */ + ip6_fib_t *v6_fibs; + + /** Vector of MFIBs. */ + struct mfib_table_t_ *mfibs; + + /* Network byte orders subnet mask for each prefix length */ + ip6_address_t fib_masks[129]; + + /* Table index indexed by software interface. */ + u32 *fib_index_by_sw_if_index; + + /** Table index indexed by software interface. */ + u32 *mfib_index_by_sw_if_index; + + /* IP6 enabled count by software interface */ + u8 *ip_enabled_by_sw_if_index; + + /* Hash table mapping table id to fib index. + ID space is not necessarily dense; index space is dense. */ + uword *fib_index_by_table_id; + + /** Hash table mapping table id to multicast fib index. + ID space is not necessarily dense; index space is dense. */ + uword *mfib_index_by_table_id; + + /* Hash table mapping interface rewrite adjacency index by sw if index. */ + uword *interface_route_adj_index_by_sw_if_index; + + /* Functions to call when interface address changes. */ + ip6_add_del_interface_address_callback_t + * add_del_interface_address_callbacks; + + /** Functions to call when interface to table biding changes. */ + ip6_table_bind_callback_t *table_bind_callbacks; + + /* Template used to generate IP6 neighbor solicitation packets. */ + vlib_packet_template_t discover_neighbor_packet_template; + + /* ip6 lookup table config parameters */ + u32 lookup_table_nbuckets; + uword lookup_table_size; + + /* Seed for Jenkins hash used to compute ip6 flow hash. */ + u32 flow_hash_seed; + + struct + { + /* TTL to use for host generated packets. */ + u8 ttl; + + u8 pad[3]; + } host_config; + + /* HBH processing enabled? */ + u8 hbh_enabled; +} ip6_main_t; + +/* Global ip6 main structure. */ +extern ip6_main_t ip6_main; + +/* Global ip6 input node. Errors get attached to ip6 input node. */ +extern vlib_node_registration_t ip6_input_node; +extern vlib_node_registration_t ip6_rewrite_node; +extern vlib_node_registration_t ip6_rewrite_mcast_node; +extern vlib_node_registration_t ip6_rewrite_local_node; +extern vlib_node_registration_t ip6_discover_neighbor_node; +extern vlib_node_registration_t ip6_glean_node; +extern vlib_node_registration_t ip6_midchain_node; + +always_inline uword +ip6_destination_matches_route (const ip6_main_t * im, + const ip6_address_t * key, + const ip6_address_t * dest, uword dest_length) +{ + int i; + for (i = 0; i < ARRAY_LEN (key->as_uword); i++) + { + if ((key->as_uword[i] ^ dest->as_uword[i]) & im-> + fib_masks[dest_length].as_uword[i]) + return 0; + } + return 1; +} + +always_inline uword +ip6_destination_matches_interface (ip6_main_t * im, + ip6_address_t * key, + ip_interface_address_t * ia) +{ + ip6_address_t *a = ip_interface_address_get_address (&im->lookup_main, ia); + return ip6_destination_matches_route (im, key, a, ia->address_length); +} + +/* As above but allows for unaligned destinations (e.g. works right from IP header of packet). */ +always_inline uword +ip6_unaligned_destination_matches_route (ip6_main_t * im, + ip6_address_t * key, + ip6_address_t * dest, + uword dest_length) +{ + int i; + for (i = 0; i < ARRAY_LEN (key->as_uword); i++) + { + if ((clib_mem_unaligned (&key->as_uword[i], uword) ^ dest->as_uword[i]) + & im->fib_masks[dest_length].as_uword[i]) + return 0; + } + return 1; +} + +always_inline int +ip6_src_address_for_packet (ip_lookup_main_t * lm, + u32 sw_if_index, ip6_address_t * src) +{ + u32 if_add_index = lm->if_address_pool_index_by_sw_if_index[sw_if_index]; + if (PREDICT_TRUE (if_add_index != ~0)) + { + ip_interface_address_t *if_add = + pool_elt_at_index (lm->if_address_pool, if_add_index); + ip6_address_t *if_ip = ip_interface_address_get_address (lm, if_add); + *src = *if_ip; + return (0); + } + else + { + src->as_u64[0] = 0; + src->as_u64[1] = 0; + } + return (!0); +} + +/* Find interface address which matches destination. */ +always_inline ip6_address_t * +ip6_interface_address_matching_destination (ip6_main_t * im, + ip6_address_t * dst, + u32 sw_if_index, + ip_interface_address_t ** + result_ia) +{ + ip_lookup_main_t *lm = &im->lookup_main; + ip_interface_address_t *ia; + ip6_address_t *result = 0; + + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip6_address_t * a = ip_interface_address_get_address (lm, ia); + if (ip6_destination_matches_route (im, dst, a, ia->address_length)) + { + result = a; + break; + } + })); + /* *INDENT-ON* */ + if (result_ia) + *result_ia = result ? ia : 0; + return result; +} + +clib_error_t *ip6_add_del_interface_address (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, u32 is_del); +void ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable); + +/** + * @brie get first IPv6 interface address + */ +ip6_address_t *ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index); + +int ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2); + +clib_error_t *ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, + u32 sw_if_index); + +uword +ip6_udp_register_listener (vlib_main_t * vm, + u16 dst_port, u32 next_node_index); + +u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, + ip6_header_t * ip0, + int *bogus_lengthp); + +void ip6_register_protocol (u32 protocol, u32 node_index); + +serialize_function_t serialize_vnet_ip6_main, unserialize_vnet_ip6_main; + +void ip6_ethernet_update_adjacency (vnet_main_t * vnm, + u32 sw_if_index, u32 ai); + + +void +ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip, + u8 * mac); + +void +ip6_ethernet_mac_address_from_link_local_address (u8 * mac, + ip6_address_t * ip); + +int vnet_set_ip6_flow_hash (u32 table_id, + flow_hash_config_t flow_hash_config); + +clib_error_t *enable_ip6_interface (vlib_main_t * vm, u32 sw_if_index); + +clib_error_t *disable_ip6_interface (vlib_main_t * vm, u32 sw_if_index); + +int ip6_interface_enabled (vlib_main_t * vm, u32 sw_if_index); + +clib_error_t *set_ip6_link_local_address (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * address); + +int vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm, + void *data_callback, + u32 pid, + void *address_arg, + uword node_index, + uword type_opaque, + uword data, int is_add); + +int vnet_ip6_nd_term (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_buffer_t * p0, + ethernet_header_t * eth, + ip6_header_t * ip, u32 sw_if_index, u16 bd_index); + +void send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi); + +u8 *format_ip6_forward_next_trace (u8 * s, va_list * args); + +u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0); + +int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 table_index); +extern vlib_node_registration_t ip6_lookup_node; + +/* Compute flow hash. We'll use it to select which Sponge to use for this + flow. And other things. */ +always_inline u32 +ip6_compute_flow_hash (const ip6_header_t * ip, + flow_hash_config_t flow_hash_config) +{ + tcp_header_t *tcp; + u64 a, b, c; + u64 t1, t2; + uword is_tcp_udp = 0; + u8 protocol = ip->protocol; + + if (PREDICT_TRUE + ((ip->protocol == IP_PROTOCOL_TCP) + || (ip->protocol == IP_PROTOCOL_UDP))) + { + is_tcp_udp = 1; + tcp = (void *) (ip + 1); + } + else if (ip->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + { + ip6_hop_by_hop_header_t *hbh = (ip6_hop_by_hop_header_t *) (ip + 1); + if ((hbh->protocol == IP_PROTOCOL_TCP) || + (hbh->protocol == IP_PROTOCOL_UDP)) + { + is_tcp_udp = 1; + tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3)); + } + protocol = hbh->protocol; + } + + t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]); + t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) ? t1 : 0; + + t2 = (ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1]); + t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR) ? t2 : 0; + + a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1; + b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2; + b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? protocol : 0; + + t1 = is_tcp_udp ? tcp->src : 0; + t2 = is_tcp_udp ? tcp->dst : 0; + + t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0; + t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0; + + c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? + ((t1 << 16) | t2) : ((t2 << 16) | t1); + + hash_mix64 (a, b, c); + return (u32) c; +} + +/* ip6_locate_header + * + * This function is to search for the header specified by the protocol number + * in find_hdr_type. + * This is used to locate a specific IPv6 extension header + * or to find transport layer header. + * 1. If the find_hdr_type < 0 then it finds and returns the protocol number and + * offset stored in *offset of the transport or ESP header in the chain if + * found. + * 2. If a header with find_hdr_type > 0 protocol number is found then the + * offset is stored in *offset and protocol number of the header is + * returned. + * 3. If find_hdr_type is not found or packet is malformed or + * it is a non-first fragment -1 is returned. + */ +always_inline int +ip6_locate_header (vlib_buffer_t * p0, + ip6_header_t * ip0, int find_hdr_type, u32 * offset) +{ + u8 next_proto = ip0->protocol; + u8 *next_header; + u8 done = 0; + u32 cur_offset; + u8 *temp_nxthdr = 0; + u32 exthdr_len = 0; + + next_header = ip6_next_header (ip0); + cur_offset = sizeof (ip6_header_t); + while (1) + { + done = (next_proto == find_hdr_type); + if (PREDICT_FALSE + (next_header >= + (u8 *) vlib_buffer_get_current (p0) + p0->current_length)) + { + //A malicious packet could set an extension header with a too big size + return (-1); + } + if (done) + break; + if ((!ip6_ext_hdr (next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT) + { + if (find_hdr_type < 0) + break; + return -1; + } + if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) next_header; + u16 frag_off = ip6_frag_hdr_offset (frag_hdr); + /* Non first fragment return -1 */ + if (frag_off) + return (-1); + exthdr_len = sizeof (ip6_frag_hdr_t); + temp_nxthdr = next_header + exthdr_len; + } + else if (next_proto == IP_PROTOCOL_IPSEC_AH) + { + exthdr_len = + ip6_ext_authhdr_len (((ip6_ext_header_t *) next_header)); + temp_nxthdr = next_header + exthdr_len; + } + else + { + exthdr_len = + ip6_ext_header_len (((ip6_ext_header_t *) next_header)); + temp_nxthdr = next_header + exthdr_len; + } + next_proto = ((ip6_ext_header_t *) next_header)->next_hdr; + next_header = temp_nxthdr; + cur_offset += exthdr_len; + } + + *offset = cur_offset; + return (next_proto); +} + +u8 *format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args); +/* + * Hop-by-Hop handling + */ +typedef struct +{ + /* Array of function pointers to HBH option handling routines */ + int (*options[256]) (vlib_buffer_t * b, ip6_header_t * ip, + ip6_hop_by_hop_option_t * opt); + u8 *(*trace[256]) (u8 * s, ip6_hop_by_hop_option_t * opt); + uword next_override; +} ip6_hop_by_hop_main_t; + +extern ip6_hop_by_hop_main_t ip6_hop_by_hop_main; + +int ip6_hbh_register_option (u8 option, + int options (vlib_buffer_t * b, + ip6_header_t * ip, + ip6_hop_by_hop_option_t * opt), + u8 * trace (u8 * s, + ip6_hop_by_hop_option_t * opt)); +int ip6_hbh_unregister_option (u8 option); +void ip6_hbh_set_next_override (uword next); + +/** + * Push IPv6 header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param src - source IP + * @param dst - destination IP + * @param prot - payload proto + * + * @return - pointer to start of IP header + */ +always_inline void * +vlib_buffer_push_ip6 (vlib_main_t * vm, vlib_buffer_t * b, + ip6_address_t * src, ip6_address_t * dst, int proto) +{ + ip6_header_t *ip6h; + u16 payload_length; + + /* make some room */ + ip6h = vlib_buffer_push_uninit (b, sizeof (ip6_header_t)); + + ip6h->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + + /* calculate ip6 payload length */ + payload_length = vlib_buffer_length_in_chain (vm, b); + payload_length -= sizeof (*ip6h); + + ip6h->payload_length = clib_host_to_net_u16 (payload_length); + + ip6h->hop_limit = 0xff; + ip6h->protocol = proto; + clib_memcpy (ip6h->src_address.as_u8, src->as_u8, + sizeof (ip6h->src_address)); + clib_memcpy (ip6h->dst_address.as_u8, dst->as_u8, + sizeof (ip6h->src_address)); + b->flags |= VNET_BUFFER_F_IS_IP6; + + return ip6h; +} + +#endif /* included_ip_ip6_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_error.h b/src/vnet/ip/ip6_error.h new file mode 100644 index 00000000..a2807169 --- /dev/null +++ b/src/vnet/ip/ip6_error.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip6_error.h: ip6 fast path errors + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_ip6_error_h +#define included_ip_ip6_error_h + +#define foreach_ip6_error \ + /* Must be first. */ \ + _ (NONE, "valid ip6 packets") \ + \ + /* Errors signalled by ip6-input */ \ + _ (TOO_SHORT, "ip6 length < 40 bytes") \ + _ (BAD_LENGTH, "ip6 length > l2 length") \ + _ (VERSION, "ip6 version != 6") \ + _ (TIME_EXPIRED, "ip6 ttl <= 1") \ + \ + /* Errors signalled by ip6-rewrite. */ \ + _ (MTU_EXCEEDED, "ip6 MTU exceeded") \ + _ (DST_LOOKUP_MISS, "ip6 destination lookup miss") \ + _ (SRC_LOOKUP_MISS, "ip6 source lookup miss") \ + _ (ADJACENCY_DROP, "ip6 adjacency drop") \ + _ (ADJACENCY_PUNT, "ip6 adjacency punt") \ + \ + /* Errors signalled by ip6-local. */ \ + _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \ + _ (UDP_CHECKSUM, "bad udp checksum") \ + _ (ICMP_CHECKSUM, "bad icmp checksum") \ + _ (UDP_LENGTH, "inconsistent udp/ip lengths") \ + \ + /* Errors signalled by udp6-lookup. */ \ + _ (UNKNOWN_UDP_PORT, "no listener for udp port") \ + \ + /* Spoofed packets in ip6-rewrite-local */ \ + _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \ + \ + /* Erros singalled by ip6-inacl */ \ + _ (INACL_TABLE_MISS, "input ACL table-miss drops") \ + _ (INACL_SESSION_DENY, "input ACL session deny drops") + +typedef enum +{ +#define _(sym,str) IP6_ERROR_##sym, + foreach_ip6_error +#undef _ + IP6_N_ERROR, +} ip6_error_t; + +#endif /* included_ip_ip6_error_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_format.c b/src/vnet/ip/ip6_format.c new file mode 100644 index 00000000..56899b73 --- /dev/null +++ b/src/vnet/ip/ip6_format.c @@ -0,0 +1,383 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip6_format.c: ip6 formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +/* Format an IP6 address. */ +u8 * +format_ip6_address (u8 * s, va_list * args) +{ + ip6_address_t *a = va_arg (*args, ip6_address_t *); + u32 max_zero_run = 0, this_zero_run = 0; + int max_zero_run_index = -1, this_zero_run_index = 0; + int in_zero_run = 0, i; + int last_double_colon = 0; + + /* Ugh, this is a pain. Scan forward looking for runs of 0's */ + for (i = 0; i < ARRAY_LEN (a->as_u16); i++) + { + if (a->as_u16[i] == 0) + { + if (in_zero_run) + this_zero_run++; + else + { + in_zero_run = 1; + this_zero_run = 1; + this_zero_run_index = i; + } + } + else + { + if (in_zero_run) + { + /* offer to compress the biggest run of > 1 zero */ + if (this_zero_run > max_zero_run && this_zero_run > 1) + { + max_zero_run_index = this_zero_run_index; + max_zero_run = this_zero_run; + } + } + in_zero_run = 0; + this_zero_run = 0; + } + } + + if (in_zero_run) + { + if (this_zero_run > max_zero_run && this_zero_run > 1) + { + max_zero_run_index = this_zero_run_index; + max_zero_run = this_zero_run; + } + } + + for (i = 0; i < ARRAY_LEN (a->as_u16); i++) + { + if (i == max_zero_run_index) + { + s = format (s, "::"); + i += max_zero_run - 1; + last_double_colon = 1; + } + else + { + s = format (s, "%s%x", + (last_double_colon || i == 0) ? "" : ":", + clib_net_to_host_u16 (a->as_u16[i])); + last_double_colon = 0; + } + } + + return s; +} + +/* Format an IP6 route destination and length. */ +u8 * +format_ip6_address_and_length (u8 * s, va_list * args) +{ + ip6_address_t *a = va_arg (*args, ip6_address_t *); + u8 l = va_arg (*args, u32); + return format (s, "%U/%d", format_ip6_address, a, l); +} + +/* Parse an IP6 address. */ +uword +unformat_ip6_address (unformat_input_t * input, va_list * args) +{ + ip6_address_t *result = va_arg (*args, ip6_address_t *); + u16 hex_quads[8]; + uword hex_quad, n_hex_quads, hex_digit, n_hex_digits; + uword c, n_colon, double_colon_index; + + n_hex_quads = hex_quad = n_hex_digits = n_colon = 0; + double_colon_index = ARRAY_LEN (hex_quads); + while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT) + { + hex_digit = 16; + if (c >= '0' && c <= '9') + hex_digit = c - '0'; + else if (c >= 'a' && c <= 'f') + hex_digit = c + 10 - 'a'; + else if (c >= 'A' && c <= 'F') + hex_digit = c + 10 - 'A'; + else if (c == ':' && n_colon < 2) + n_colon++; + else + { + unformat_put_input (input); + break; + } + + /* Too many hex quads. */ + if (n_hex_quads >= ARRAY_LEN (hex_quads)) + return 0; + + if (hex_digit < 16) + { + hex_quad = (hex_quad << 4) | hex_digit; + + /* Hex quad must fit in 16 bits. */ + if (n_hex_digits >= 4) + return 0; + + n_colon = 0; + n_hex_digits++; + } + + /* Save position of :: */ + if (n_colon == 2) + { + /* More than one :: ? */ + if (double_colon_index < ARRAY_LEN (hex_quads)) + return 0; + double_colon_index = n_hex_quads; + } + + if (n_colon > 0 && n_hex_digits > 0) + { + hex_quads[n_hex_quads++] = hex_quad; + hex_quad = 0; + n_hex_digits = 0; + } + } + + if (n_hex_digits > 0) + hex_quads[n_hex_quads++] = hex_quad; + + { + word i; + + /* Expand :: to appropriate number of zero hex quads. */ + if (double_colon_index < ARRAY_LEN (hex_quads)) + { + word n_zero = ARRAY_LEN (hex_quads) - n_hex_quads; + + for (i = n_hex_quads - 1; i >= (signed) double_colon_index; i--) + hex_quads[n_zero + i] = hex_quads[i]; + + for (i = 0; i < n_zero; i++) + { + ASSERT ((double_colon_index + i) < ARRAY_LEN (hex_quads)); + hex_quads[double_colon_index + i] = 0; + } + + n_hex_quads = ARRAY_LEN (hex_quads); + } + + /* Too few hex quads given. */ + if (n_hex_quads < ARRAY_LEN (hex_quads)) + return 0; + + for (i = 0; i < ARRAY_LEN (hex_quads); i++) + result->as_u16[i] = clib_host_to_net_u16 (hex_quads[i]); + + return 1; + } +} + +/* Format an IP6 header. */ +u8 * +format_ip6_header (u8 * s, va_list * args) +{ + ip6_header_t *ip = va_arg (*args, ip6_header_t *); + u32 max_header_bytes = va_arg (*args, u32); + u32 i, ip_version, traffic_class, flow_label; + uword indent; + + /* Nothing to do. */ + if (max_header_bytes < sizeof (ip[0])) + return format (s, "IP header truncated"); + + indent = format_get_indent (s); + indent += 2; + + s = format (s, "%U: %U -> %U", + format_ip_protocol, ip->protocol, + format_ip6_address, &ip->src_address, + format_ip6_address, &ip->dst_address); + + i = clib_net_to_host_u32 (ip->ip_version_traffic_class_and_flow_label); + ip_version = (i >> 28); + traffic_class = (i >> 20) & 0xff; + flow_label = i & pow2_mask (20); + + if (ip_version != 6) + s = format (s, "\n%Uversion %d", format_white_space, indent, ip_version); + + s = + format (s, + "\n%Utos 0x%02x, flow label 0x%x, hop limit %d, payload length %d", + format_white_space, indent, traffic_class, flow_label, + ip->hop_limit, clib_net_to_host_u16 (ip->payload_length)); + + /* Recurse into next protocol layer. */ + if (max_header_bytes != 0 && sizeof (ip[0]) < max_header_bytes) + { + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol); + + if (pi && pi->format_header) + s = format (s, "\n%U%U", + format_white_space, indent - 2, pi->format_header, + /* next protocol header */ (void *) (ip + 1), + max_header_bytes - sizeof (ip[0])); + } + + return s; +} + +/* Parse an IP6 header. */ +uword +unformat_ip6_header (unformat_input_t * input, va_list * args) +{ + u8 **result = va_arg (*args, u8 **); + ip6_header_t *ip; + int old_length; + + /* Allocate space for IP header. */ + { + void *p; + + old_length = vec_len (*result); + vec_add2 (*result, p, sizeof (ip[0])); + ip = p; + } + + memset (ip, 0, sizeof (ip[0])); + ip->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (6 << 28); + + if (!unformat (input, "%U: %U -> %U", + unformat_ip_protocol, &ip->protocol, + unformat_ip6_address, &ip->src_address, + unformat_ip6_address, &ip->dst_address)) + return 0; + + /* Parse options. */ + while (1) + { + int i; + + if (unformat (input, "tos %U", unformat_vlib_number, &i)) + ip->ip_version_traffic_class_and_flow_label |= + clib_host_to_net_u32 ((i & 0xff) << 20); + + else if (unformat (input, "hop-limit %U", unformat_vlib_number, &i)) + ip->hop_limit = i; + + /* Can't parse input: try next protocol level. */ + else + break; + } + + /* Recurse into next protocol layer. */ + { + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol); + + if (pi && pi->unformat_header) + { + if (!unformat_user (input, pi->unformat_header, result)) + return 0; + + /* Result may have moved. */ + ip = (void *) *result + old_length; + } + } + + ip->payload_length = + clib_host_to_net_u16 (vec_len (*result) - (old_length + sizeof (ip[0]))); + + return 1; +} + +/* Parse an IP46 address. */ +uword +unformat_ip46_address (unformat_input_t * input, va_list * args) +{ + ip46_address_t *ip46 = va_arg (*args, ip46_address_t *); + ip46_type_t type = va_arg (*args, ip46_type_t); + if ((type != IP46_TYPE_IP6) && + unformat (input, "%U", unformat_ip4_address, &ip46->ip4)) + { + ip46_address_mask_ip4 (ip46); + return 1; + } + else if ((type != IP46_TYPE_IP4) && + unformat (input, "%U", unformat_ip6_address, &ip46->ip6)) + { + return 1; + } + return 0; +} + +/* Format an IP46 address. */ +u8 * +format_ip46_address (u8 * s, va_list * args) +{ + ip46_address_t *ip46 = va_arg (*args, ip46_address_t *); + ip46_type_t type = va_arg (*args, ip46_type_t); + int is_ip4 = 1; + + switch (type) + { + case IP46_TYPE_ANY: + is_ip4 = ip46_address_is_ip4 (ip46); + break; + case IP46_TYPE_IP4: + is_ip4 = 1; + break; + case IP46_TYPE_IP6: + is_ip4 = 0; + break; + } + + return is_ip4 ? + format (s, "%U", format_ip4_address, &ip46->ip4) : + format (s, "%U", format_ip6_address, &ip46->ip6); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c new file mode 100644 index 00000000..54582d38 --- /dev/null +++ b/src/vnet/ip/ip6_forward.c @@ -0,0 +1,3558 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip6_forward.c: IP v6 forwarding + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/ip6_neighbor.h> +#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */ +#include <vnet/srp/srp.h> /* for srp_hw_interface_class */ +#include <vppinfra/cache.h> +#include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */ +#include <vnet/fib/ip6_fib.h> +#include <vnet/mfib/ip6_mfib.h> +#include <vnet/dpo/load_balance_map.h> +#include <vnet/dpo/classify_dpo.h> + +#include <vppinfra/bihash_template.c> + +/* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */ +#define OI_DECAP 0x80000000 + +/** + * @file + * @brief IPv6 Forwarding. + * + * This file contains the source code for IPv6 forwarding. + */ + +void +ip6_forward_next_trace (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + vlib_rx_or_tx_t which_adj_index); + +always_inline uword +ip6_lookup_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + ip6_main_t *im = &ip6_main; + vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters; + u32 n_left_from, n_left_to_next, *from, *to_next; + ip_lookup_next_t next; + u32 thread_index = vlib_get_thread_index (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t *p0, *p1; + u32 pi0, pi1, lbi0, lbi1, wrong_next; + ip_lookup_next_t next0, next1; + ip6_header_t *ip0, *ip1; + ip6_address_t *dst_addr0, *dst_addr1; + u32 fib_index0, fib_index1; + u32 flow_hash_config0, flow_hash_config1; + const dpo_id_t *dpo0, *dpo1; + const load_balance_t *lb0, *lb1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + dst_addr0 = &ip0->dst_address; + dst_addr1 = &ip1->dst_address; + + fib_index0 = + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p0)->sw_if_index[VLIB_RX]); + fib_index1 = + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p1)->sw_if_index[VLIB_RX]); + + fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? + fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; + fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ? + fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; + + lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0); + lbi1 = ip6_fib_table_fwding_lookup (im, fib_index1, dst_addr1); + + lb0 = load_balance_get (lbi0); + lb1 = load_balance_get (lbi1); + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (lb1->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + ASSERT (is_pow2 (lb1->lb_n_buckets)); + + vnet_buffer (p0)->ip.flow_hash = vnet_buffer (p1)->ip.flow_hash = 0; + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + flow_hash_config0 = lb0->lb_hash_config; + vnet_buffer (p0)->ip.flow_hash = + ip6_compute_flow_hash (ip0, flow_hash_config0); + dpo0 = + load_balance_get_fwd_bucket (lb0, + (vnet_buffer (p0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) + { + flow_hash_config1 = lb1->lb_hash_config; + vnet_buffer (p1)->ip.flow_hash = + ip6_compute_flow_hash (ip1, flow_hash_config1); + dpo1 = + load_balance_get_fwd_bucket (lb1, + (vnet_buffer (p1)->ip.flow_hash & + (lb1->lb_n_buckets_minus_1))); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); + } + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + /* Only process the HBH Option Header if explicitly configured to do so */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0; + } + if (PREDICT_FALSE + (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + next1 = (dpo_is_adj (dpo1) && im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1; + } + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + vlib_increment_combined_counter + (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); + + from += 2; + to_next += 2; + n_left_to_next -= 2; + n_left_from -= 2; + + wrong_next = (next0 != next) + 2 * (next1 != next); + if (PREDICT_FALSE (wrong_next != 0)) + { + switch (wrong_next) + { + case 1: + /* A B A */ + to_next[-2] = pi1; + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + break; + + case 2: + /* A A B */ + to_next -= 1; + n_left_to_next += 1; + vlib_set_next_frame_buffer (vm, node, next1, pi1); + break; + + case 3: + /* A B C */ + to_next -= 2; + n_left_to_next += 2; + vlib_set_next_frame_buffer (vm, node, next0, pi0); + vlib_set_next_frame_buffer (vm, node, next1, pi1); + if (next0 == next1) + { + /* A B B */ + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next1; + vlib_get_next_frame (vm, node, next, to_next, + n_left_to_next); + } + } + } + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip6_header_t *ip0; + u32 pi0, lbi0; + ip_lookup_next_t next0; + load_balance_t *lb0; + ip6_address_t *dst_addr0; + u32 fib_index0, flow_hash_config0; + const dpo_id_t *dpo0; + + pi0 = from[0]; + to_next[0] = pi0; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + + dst_addr0 = &ip0->dst_address; + + fib_index0 = + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (p0)->sw_if_index[VLIB_RX]); + fib_index0 = + (vnet_buffer (p0)->sw_if_index[VLIB_TX] == + (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; + + lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0); + + lb0 = load_balance_get (lbi0); + flow_hash_config0 = lb0->lb_hash_config; + + vnet_buffer (p0)->ip.flow_hash = 0; + ASSERT (lb0->lb_n_buckets > 0); + ASSERT (is_pow2 (lb0->lb_n_buckets)); + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + flow_hash_config0 = lb0->lb_hash_config; + vnet_buffer (p0)->ip.flow_hash = + ip6_compute_flow_hash (ip0, flow_hash_config0); + dpo0 = + load_balance_get_fwd_bucket (lb0, + (vnet_buffer (p0)->ip.flow_hash & + (lb0->lb_n_buckets_minus_1))); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + + dpo0 = load_balance_get_bucket_i (lb0, + (vnet_buffer (p0)->ip.flow_hash & + lb0->lb_n_buckets_minus_1)); + next0 = dpo0->dpoi_next_node; + + /* Only process the HBH Option Header if explicitly configured to do so */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0; + } + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + if (PREDICT_FALSE (next0 != next)) + { + n_left_to_next += 1; + vlib_put_next_frame (vm, node, next, n_left_to_next); + next = next0; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + to_next[0] = pi0; + to_next += 1; + n_left_to_next -= 1; + } + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace (vm, node, frame, VLIB_TX); + + return frame->n_vectors; +} + +static void +ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index, + ip6_main_t * im, u32 fib_index, + ip_interface_address_t * a) +{ + ip_lookup_main_t *lm = &im->lookup_main; + ip6_address_t *address = ip_interface_address_get_address (lm, a); + fib_prefix_t pfx = { + .fp_len = a->address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = *address, + }; + + if (a->address_length < 128) + { + fib_table_entry_update_one_path (fib_index, + &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_ATTACHED), + DPO_PROTO_IP6, + /* No next-hop address */ + NULL, sw_if_index, + /* invalid FIB index */ + ~0, 1, + /* no label stack */ + NULL, FIB_ROUTE_PATH_FLAG_NONE); + } + + pfx.fp_len = 128; + if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index)) + { + u32 classify_table_index = + lm->classify_table_index_by_sw_if_index[sw_if_index]; + if (classify_table_index != (u32) ~ 0) + { + dpo_id_t dpo = DPO_INVALID; + + dpo_set (&dpo, + DPO_CLASSIFY, + DPO_PROTO_IP6, + classify_dpo_create (DPO_PROTO_IP6, classify_table_index)); + + fib_table_entry_special_dpo_add (fib_index, + &pfx, + FIB_SOURCE_CLASSIFY, + FIB_ENTRY_FLAG_NONE, &dpo); + dpo_reset (&dpo); + } + } + + fib_table_entry_update_one_path (fib_index, &pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_CONNECTED | + FIB_ENTRY_FLAG_LOCAL), + DPO_PROTO_IP6, + &pfx.fp_addr, + sw_if_index, ~0, + 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); +} + +static void +ip6_del_interface_routes (ip6_main_t * im, + u32 fib_index, + ip6_address_t * address, u32 address_length) +{ + fib_prefix_t pfx = { + .fp_len = address_length, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = *address, + }; + + if (pfx.fp_len < 128) + { + fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE); + + } + + pfx.fp_len = 128; + fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE); +} + +void +ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) +{ + ip6_main_t *im = &ip6_main; + + vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0); + + /* + * enable/disable only on the 1<->0 transition + */ + if (is_enable) + { + if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index]) + return; + } + else + { + /* The ref count is 0 when an address is removed from an interface that has + * no address - this is not a ciritical error */ + if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] || + 0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) + return; + } + + vnet_feature_enable_disable ("ip6-unicast", "ip6-drop", sw_if_index, + !is_enable, 0, 0); + + vnet_feature_enable_disable ("ip6-multicast", "ip6-drop", sw_if_index, + !is_enable, 0, 0); +} + +/* get first interface address */ +ip6_address_t * +ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index) +{ + ip_lookup_main_t *lm = &im->lookup_main; + ip_interface_address_t *ia = 0; + ip6_address_t *result = 0; + + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + ip6_address_t * a = ip_interface_address_get_address (lm, ia); + result = a; + break; + })); + /* *INDENT-ON* */ + return result; +} + +clib_error_t * +ip6_add_del_interface_address (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, u32 is_del) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_main_t *im = &ip6_main; + ip_lookup_main_t *lm = &im->lookup_main; + clib_error_t *error; + u32 if_address_index; + ip6_address_fib_t ip6_af, *addr_fib = 0; + + /* local0 interface doesn't support IP addressing */ + if (sw_if_index == 0) + { + return + clib_error_create ("local0 interface doesn't support IP addressing"); + } + + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + vec_validate (im->mfib_index_by_sw_if_index, sw_if_index); + + ip6_addr_fib_init (&ip6_af, address, + vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); + vec_add1 (addr_fib, ip6_af); + + { + uword elts_before = pool_elts (lm->if_address_pool); + + error = ip_interface_address_add_del + (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index); + if (error) + goto done; + + /* Pool did not grow: add duplicate address. */ + if (elts_before == pool_elts (lm->if_address_pool)) + goto done; + } + + ip6_sw_interface_enable_disable (sw_if_index, !is_del); + + if (is_del) + ip6_del_interface_routes (im, ip6_af.fib_index, address, address_length); + else + ip6_add_interface_routes (vnm, sw_if_index, + im, ip6_af.fib_index, + pool_elt_at_index (lm->if_address_pool, + if_address_index)); + + { + ip6_add_del_interface_address_callback_t *cb; + vec_foreach (cb, im->add_del_interface_address_callbacks) + cb->function (im, cb->function_opaque, sw_if_index, + address, address_length, if_address_index, is_del); + } + +done: + vec_free (addr_fib); + return error; +} + +clib_error_t * +ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags) +{ + ip6_main_t *im = &ip6_main; + ip_interface_address_t *ia; + ip6_address_t *a; + u32 is_admin_up, fib_index; + + /* Fill in lookup tables with default table (0). */ + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + + vec_validate_init_empty (im-> + lookup_main.if_address_pool_index_by_sw_if_index, + sw_if_index, ~0); + + is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0; + + fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index); + + /* *INDENT-OFF* */ + foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index, + 0 /* honor unnumbered */, + ({ + a = ip_interface_address_get_address (&im->lookup_main, ia); + if (is_admin_up) + ip6_add_interface_routes (vnm, sw_if_index, + im, fib_index, + ia); + else + ip6_del_interface_routes (im, fib_index, + a, ia->address_length); + })); + /* *INDENT-ON* */ + + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down); + +/* Built-in ip6 unicast rx feature path definition */ +/* *INDENT-OFF* */ +VNET_FEATURE_ARC_INIT (ip6_unicast, static) = +{ + .arc_name = "ip6-unicast", + .start_nodes = VNET_FEATURES ("ip6-input"), + .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index, +}; + +VNET_FEATURE_INIT (ip6_flow_classify, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "ip6-flow-classify", + .runs_before = VNET_FEATURES ("ip6-inacl"), +}; + +VNET_FEATURE_INIT (ip6_inacl, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "ip6-inacl", + .runs_before = VNET_FEATURES ("ip6-policer-classify"), +}; + +VNET_FEATURE_INIT (ip6_policer_classify, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "ip6-policer-classify", + .runs_before = VNET_FEATURES ("ipsec-input-ip6"), +}; + +VNET_FEATURE_INIT (ip6_ipsec, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "ipsec-input-ip6", + .runs_before = VNET_FEATURES ("l2tp-decap"), +}; + +VNET_FEATURE_INIT (ip6_l2tp, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "l2tp-decap", + .runs_before = VNET_FEATURES ("vpath-input-ip6"), +}; + +VNET_FEATURE_INIT (ip6_vpath, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "vpath-input-ip6", + .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"), +}; + +VNET_FEATURE_INIT (ip6_vxlan_bypass, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "ip6-vxlan-bypass", + .runs_before = VNET_FEATURES ("ip6-lookup"), +}; + +VNET_FEATURE_INIT (ip6_drop, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "ip6-drop", + .runs_before = VNET_FEATURES ("ip6-lookup"), +}; + +VNET_FEATURE_INIT (ip6_lookup, static) = +{ + .arc_name = "ip6-unicast", + .node_name = "ip6-lookup", + .runs_before = 0, /*last feature*/ +}; + +/* Built-in ip6 multicast rx feature path definition (none now) */ +VNET_FEATURE_ARC_INIT (ip6_multicast, static) = +{ + .arc_name = "ip6-multicast", + .start_nodes = VNET_FEATURES ("ip6-input"), + .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index, +}; + +VNET_FEATURE_INIT (ip6_vpath_mc, static) = { + .arc_name = "ip6-multicast", + .node_name = "vpath-input-ip6", + .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"), +}; + +VNET_FEATURE_INIT (ip6_drop_mc, static) = { + .arc_name = "ip6-multicast", + .node_name = "ip6-drop", + .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"), +}; + +VNET_FEATURE_INIT (ip6_mc_lookup, static) = { + .arc_name = "ip6-multicast", + .node_name = "ip6-mfib-forward-lookup", + .runs_before = 0, /* last feature */ +}; + +/* Built-in ip4 tx feature path definition */ +VNET_FEATURE_ARC_INIT (ip6_output, static) = +{ + .arc_name = "ip6-output", + .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain"), + .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index, +}; + +VNET_FEATURE_INIT (ip6_ipsec_output, static) = { + .arc_name = "ip6-output", + .node_name = "ipsec-output-ip6", + .runs_before = VNET_FEATURES ("interface-output"), +}; + +VNET_FEATURE_INIT (ip6_interface_output, static) = { + .arc_name = "ip6-output", + .node_name = "interface-output", + .runs_before = 0, /* not before any other features */ +}; +/* *INDENT-ON* */ + +clib_error_t * +ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) +{ + ip6_main_t *im = &ip6_main; + + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); + vec_validate (im->mfib_index_by_sw_if_index, sw_if_index); + + if (!is_add) + { + /* Ensure that IPv6 is disabled */ + ip6_main_t *im6 = &ip6_main; + ip_lookup_main_t *lm6 = &im6->lookup_main; + ip_interface_address_t *ia = 0; + ip6_address_t *address; + vlib_main_t *vm = vlib_get_main (); + + ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, 0 /* is_add */ ); + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* honor unnumbered */, + ({ + address = ip_interface_address_get_address (lm6, ia); + ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1); + })); + /* *INDENT-ON* */ + ip6_mfib_interface_enable_disable (sw_if_index, 0); + } + + vnet_feature_enable_disable ("ip6-unicast", "ip6-drop", sw_if_index, + is_add, 0, 0); + + vnet_feature_enable_disable ("ip6-multicast", "ip6-drop", sw_if_index, + is_add, 0, 0); + + return /* no error */ 0; +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del); + +static uword +ip6_lookup (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip6_lookup_inline (vm, node, frame); +} + +static u8 *format_ip6_lookup_trace (u8 * s, va_list * args); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_lookup_node) = +{ + .function = ip6_lookup, + .name = "ip6-lookup", + .vector_size = sizeof (u32), + .format_trace = format_ip6_lookup_trace, + .n_next_nodes = IP6_LOOKUP_N_NEXT, + .next_nodes = IP6_LOOKUP_NEXT_NODES, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup); + +always_inline uword +ip6_load_balance (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters; + u32 n_left_from, n_left_to_next, *from, *to_next; + ip_lookup_next_t next; + u32 thread_index = vlib_get_thread_index (); + ip6_main_t *im = &ip6_main; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace (vm, node, frame, VLIB_TX); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + ip_lookup_next_t next0, next1; + const load_balance_t *lb0, *lb1; + vlib_buffer_t *p0, *p1; + u32 pi0, lbi0, hc0, pi1, lbi1, hc1; + const ip6_header_t *ip0, *ip1; + const dpo_id_t *dpo0, *dpo1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, STORE); + vlib_prefetch_buffer_header (p3, STORE); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get (lbi0); + lb1 = load_balance_get (lbi1); + + /* + * this node is for via FIBs we can re-use the hash value from the + * to node if present. + * We don't want to use the same hash value at each level in the recursion + * graph as that would lead to polarisation + */ + hc0 = hc1 = 0; + + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash)) + { + hc0 = vnet_buffer (p0)->ip.flow_hash = + vnet_buffer (p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer (p0)->ip.flow_hash = + ip6_compute_flow_hash (ip0, lb0->lb_hash_config); + } + dpo0 = + load_balance_get_fwd_bucket (lb0, + (hc0 & + lb0->lb_n_buckets_minus_1)); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash)) + { + hc1 = vnet_buffer (p1)->ip.flow_hash = + vnet_buffer (p1)->ip.flow_hash >> 1; + } + else + { + hc1 = vnet_buffer (p1)->ip.flow_hash = + ip6_compute_flow_hash (ip1, lb1->lb_hash_config); + } + dpo1 = + load_balance_get_fwd_bucket (lb1, + (hc1 & + lb1->lb_n_buckets_minus_1)); + } + else + { + dpo1 = load_balance_get_bucket_i (lb1, 0); + } + + next0 = dpo0->dpoi_next_node; + next1 = dpo1->dpoi_next_node; + + /* Only process the HBH Option Header if explicitly configured to do so */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0; + } + /* Only process the HBH Option Header if explicitly configured to do so */ + if (PREDICT_FALSE + (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + next1 = (dpo_is_adj (dpo1) && im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1; + } + + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + vlib_increment_combined_counter + (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1)); + + vlib_validate_buffer_enqueue_x2 (vm, node, next, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_lookup_next_t next0; + const load_balance_t *lb0; + vlib_buffer_t *p0; + u32 pi0, lbi0, hc0; + const ip6_header_t *ip0; + const dpo_id_t *dpo0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_to_next -= 1; + n_left_from -= 1; + + p0 = vlib_get_buffer (vm, pi0); + + ip0 = vlib_buffer_get_current (p0); + lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + lb0 = load_balance_get (lbi0); + + hc0 = 0; + if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) + { + if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash)) + { + hc0 = vnet_buffer (p0)->ip.flow_hash = + vnet_buffer (p0)->ip.flow_hash >> 1; + } + else + { + hc0 = vnet_buffer (p0)->ip.flow_hash = + ip6_compute_flow_hash (ip0, lb0->lb_hash_config); + } + dpo0 = + load_balance_get_fwd_bucket (lb0, + (hc0 & + lb0->lb_n_buckets_minus_1)); + } + else + { + dpo0 = load_balance_get_bucket_i (lb0, 0); + } + + next0 = dpo0->dpoi_next_node; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; + + /* Only process the HBH Option Header if explicitly configured to do so */ + if (PREDICT_FALSE + (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ? + (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0; + } + + vlib_increment_combined_counter + (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0)); + + vlib_validate_buffer_enqueue_x1 (vm, node, next, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next, n_left_to_next); + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_load_balance_node) = +{ + .function = ip6_load_balance, + .name = "ip6-load-balance", + .vector_size = sizeof (u32), + .sibling_of = "ip6-lookup", + .format_trace = format_ip6_lookup_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance); + +typedef struct +{ + /* Adjacency taken. */ + u32 adj_index; + u32 flow_hash; + u32 fib_index; + + /* Packet data, possibly *after* rewrite. */ + u8 packet_data[128 - 1 * sizeof (u32)]; +} +ip6_forward_next_trace_t; + +u8 * +format_ip6_forward_next_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *); + uword indent = format_get_indent (s); + + s = format (s, "%U%U", + format_white_space, indent, + format_ip6_header, t->packet_data, sizeof (t->packet_data)); + return s; +} + +static u8 * +format_ip6_lookup_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *); + uword indent = format_get_indent (s); + + s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x", + t->fib_index, t->adj_index, t->flow_hash); + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip6_header, t->packet_data, sizeof (t->packet_data)); + return s; +} + + +static u8 * +format_ip6_rewrite_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *); + uword indent = format_get_indent (s); + + s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x", + t->fib_index, t->adj_index, format_ip_adjacency, + t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash); + s = format (s, "\n%U%U", + format_white_space, indent, + format_ip_adjacency_packet_data, + t->adj_index, t->packet_data, sizeof (t->packet_data)); + return s; +} + +/* Common trace function for all ip6-forward next nodes. */ +void +ip6_forward_next_trace (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index) +{ + u32 *from, n_left; + ip6_main_t *im = &ip6_main; + + n_left = frame->n_vectors; + from = vlib_frame_vector_args (frame); + + while (n_left >= 4) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + ip6_forward_next_trace_t *t0, *t1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, from[2], LOAD); + vlib_prefetch_buffer_with_index (vm, from[3], LOAD); + + bi0 = from[0]; + bi1 = from[1]; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index]; + t0->flow_hash = vnet_buffer (b0)->ip.flow_hash; + t0->fib_index = + (vnet_buffer (b0)->sw_if_index[VLIB_TX] != + (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] : + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX]); + + clib_memcpy (t0->packet_data, + vlib_buffer_get_current (b0), + sizeof (t0->packet_data)); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0])); + t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index]; + t1->flow_hash = vnet_buffer (b1)->ip.flow_hash; + t1->fib_index = + (vnet_buffer (b1)->sw_if_index[VLIB_TX] != + (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] : + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (b1)->sw_if_index[VLIB_RX]); + + clib_memcpy (t1->packet_data, + vlib_buffer_get_current (b1), + sizeof (t1->packet_data)); + } + from += 2; + n_left -= 2; + } + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t *b0; + ip6_forward_next_trace_t *t0; + + bi0 = from[0]; + + b0 = vlib_get_buffer (vm, bi0); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0])); + t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index]; + t0->flow_hash = vnet_buffer (b0)->ip.flow_hash; + t0->fib_index = + (vnet_buffer (b0)->sw_if_index[VLIB_TX] != + (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] : + vec_elt (im->fib_index_by_sw_if_index, + vnet_buffer (b0)->sw_if_index[VLIB_RX]); + + clib_memcpy (t0->packet_data, + vlib_buffer_get_current (b0), + sizeof (t0->packet_data)); + } + from += 1; + n_left -= 1; + } +} + +static uword +ip6_drop_or_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, ip6_error_t error_code) +{ + u32 *buffers = vlib_frame_vector_args (frame); + uword n_packets = frame->n_vectors; + + vlib_error_drop_buffers (vm, node, buffers, + /* stride */ 1, + n_packets, + /* next */ 0, + ip6_input_node.index, error_code); + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace (vm, node, frame, VLIB_TX); + + return n_packets; +} + +static uword +ip6_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_DROP); +} + +static uword +ip6_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_drop_node, static) = +{ + .function = ip6_drop, + .name = "ip6-drop", + .vector_size = sizeof (u32), + .format_trace = format_ip6_forward_next_trace, + .n_next_nodes = 1, + .next_nodes = + { + [0] = "error-drop",}, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_punt_node, static) = +{ + .function = ip6_punt, + .name = "ip6-punt", + .vector_size = sizeof (u32), + .format_trace = format_ip6_forward_next_trace, + .n_next_nodes = 1, + .next_nodes = + { + [0] = "error-punt",}, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt); + +/* Compute TCP/UDP/ICMP6 checksum in software. */ +u16 +ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, + ip6_header_t * ip0, int *bogus_lengthp) +{ + ip_csum_t sum0; + u16 sum16, payload_length_host_byte_order; + u32 i, n_this_buffer, n_bytes_left; + u32 headers_size = sizeof (ip0[0]); + void *data_this_buffer; + + ASSERT (bogus_lengthp); + *bogus_lengthp = 0; + + /* Initialize checksum with ip header. */ + sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol); + payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length); + data_this_buffer = (void *) (ip0 + 1); + + for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++) + { + sum0 = ip_csum_with_carry (sum0, + clib_mem_unaligned (&ip0-> + src_address.as_uword[i], + uword)); + sum0 = + ip_csum_with_carry (sum0, + clib_mem_unaligned (&ip0->dst_address.as_uword[i], + uword)); + } + + /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) + * or UDP-Ping packets */ + if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + u32 skip_bytes; + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) data_this_buffer; + + /* validate really icmp6 next */ + ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_ICMP6) + || (ext_hdr->next_hdr == IP_PROTOCOL_UDP)); + + skip_bytes = 8 * (1 + ext_hdr->n_data_u64s); + data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes); + + payload_length_host_byte_order -= skip_bytes; + headers_size += skip_bytes; + } + + n_bytes_left = n_this_buffer = payload_length_host_byte_order; + if (p0 && n_this_buffer + headers_size > p0->current_length) + n_this_buffer = + p0->current_length > + headers_size ? p0->current_length - headers_size : 0; + while (1) + { + sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer); + n_bytes_left -= n_this_buffer; + if (n_bytes_left == 0) + break; + + if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + *bogus_lengthp = 1; + return 0xfefe; + } + p0 = vlib_get_buffer (vm, p0->next_buffer); + data_this_buffer = vlib_buffer_get_current (p0); + n_this_buffer = p0->current_length; + } + + sum16 = ~ip_csum_fold (sum0); + + return sum16; +} + +u32 +ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) +{ + ip6_header_t *ip0 = vlib_buffer_get_current (p0); + udp_header_t *udp0; + u16 sum16; + int bogus_length; + + /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */ + ASSERT (ip0->protocol == IP_PROTOCOL_TCP + || ip0->protocol == IP_PROTOCOL_ICMP6 + || ip0->protocol == IP_PROTOCOL_UDP + || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS); + + udp0 = (void *) (ip0 + 1); + if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0) + { + p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED + | VNET_BUFFER_F_L4_CHECKSUM_CORRECT); + return p0->flags; + } + + sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length); + + p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED + | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT)); + + return p0->flags; +} + +/** + * @brief returns number of links on which src is reachable. + */ +always_inline int +ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i) +{ + const load_balance_t *lb0; + index_t lbi; + + lbi = ip6_fib_table_fwding_lookup_with_if_index (im, + vnet_buffer + (b)->sw_if_index[VLIB_RX], + &i->src_address); + + lb0 = load_balance_get (lbi); + + return (fib_urpf_check_size (lb0->lb_urpf)); +} + +always_inline u8 +ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0, + u32 * udp_offset0) +{ + u32 proto0; + proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0); + if (proto0 != IP_PROTOCOL_UDP) + { + proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0); + proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0; + } + return proto0; +} + +/* *INDENT-OFF* */ +VNET_FEATURE_ARC_INIT (ip6_local) = +{ + .arc_name = "ip6-local", + .start_nodes = VNET_FEATURES ("ip6-local"), +}; +/* *INDENT-ON* */ + +static uword +ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, int head_of_feature_arc) +{ + ip6_main_t *im = &ip6_main; + ip_lookup_main_t *lm = &im->lookup_main; + ip_local_next_t next_index; + u32 *from, *to_next, n_left_from, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_input_node.index); + u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace (vm, node, frame, VLIB_TX); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t *p0, *p1; + ip6_header_t *ip0, *ip1; + udp_header_t *udp0, *udp1; + u32 pi0, ip_len0, udp_len0, flags0, next0; + u32 pi1, ip_len1, udp_len1, flags1, next1; + i32 len_diff0, len_diff1; + u8 error0, type0, good_l4_csum0, is_tcp_udp0; + u8 error1, type1, good_l4_csum1, is_tcp_udp1; + u32 udp_offset0, udp_offset1; + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + if (head_of_feature_arc == 0) + goto skip_checks; + + vnet_buffer (p0)->l3_hdr_offset = p0->current_data; + vnet_buffer (p1)->l3_hdr_offset = p1->current_data; + + type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol]; + type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol]; + + flags0 = p0->flags; + flags1 = p1->flags; + + is_tcp_udp0 = ip6_next_proto_is_tcp_udp (p0, ip0, &udp_offset0); + is_tcp_udp1 = ip6_next_proto_is_tcp_udp (p1, ip1, &udp_offset1); + + good_l4_csum0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + good_l4_csum1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + len_diff0 = 0; + len_diff1 = 0; + + if (PREDICT_TRUE (is_tcp_udp0)) + { + udp0 = (udp_header_t *) ((u8 *) ip0 + udp_offset0); + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP + && udp0->checksum == 0; + /* Verify UDP length. */ + if (is_tcp_udp0 == IP_PROTOCOL_UDP) + { + ip_len0 = clib_net_to_host_u16 (ip0->payload_length); + udp_len0 = clib_net_to_host_u16 (udp0->length); + len_diff0 = ip_len0 - udp_len0; + } + } + if (PREDICT_TRUE (is_tcp_udp1)) + { + udp1 = (udp_header_t *) ((u8 *) ip1 + udp_offset1); + /* Don't verify UDP checksum for packets with explicit zero checksum. */ + good_l4_csum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP + && udp1->checksum == 0; + /* Verify UDP length. */ + if (is_tcp_udp1 == IP_PROTOCOL_UDP) + { + ip_len1 = clib_net_to_host_u16 (ip1->payload_length); + udp_len1 = clib_net_to_host_u16 (udp1->length); + len_diff1 = ip_len1 - udp_len1; + } + } + + good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN; + good_l4_csum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN; + + len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0; + len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0; + + if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN + && !good_l4_csum0 + && !(flags0 & + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))) + { + flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0); + good_l4_csum0 = + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + } + if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN + && !good_l4_csum1 + && !(flags1 & + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))) + { + flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1); + good_l4_csum1 = + (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + } + + error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL; + error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0; + error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1; + + ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == + IP6_ERROR_UDP_CHECKSUM); + ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == + IP6_ERROR_ICMP_CHECKSUM); + error0 = (!good_l4_csum0 ? IP6_ERROR_UDP_CHECKSUM + type0 : error0); + error1 = (!good_l4_csum1 ? IP6_ERROR_UDP_CHECKSUM + type1 : error1); + + /* Drop packets from unroutable hosts. */ + /* If this is a neighbor solicitation (ICMP), skip source RPF check */ + if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && + type0 != IP_BUILTIN_PROTOCOL_ICMP && + !ip6_address_is_link_local_unicast (&ip0->src_address)) + { + error0 = (!ip6_urpf_loose_check (im, p0, ip0) + ? IP6_ERROR_SRC_LOOKUP_MISS : error0); + } + if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL && + type1 != IP_BUILTIN_PROTOCOL_ICMP && + !ip6_address_is_link_local_unicast (&ip1->src_address)) + { + error1 = (!ip6_urpf_loose_check (im, p1, ip1) + ? IP6_ERROR_SRC_LOOKUP_MISS : error1); + } + + skip_checks: + + next0 = lm->local_next_by_ip_protocol[ip0->protocol]; + next1 = lm->local_next_by_ip_protocol[ip1->protocol]; + + next0 = + error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; + next1 = + error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1; + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + if (head_of_feature_arc) + { + if (PREDICT_TRUE (error0 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, + vnet_buffer (p0)->sw_if_index + [VLIB_RX], &next0, p0); + if (PREDICT_TRUE (error1 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, + vnet_buffer (p1)->sw_if_index + [VLIB_RX], &next1, p1); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip6_header_t *ip0; + udp_header_t *udp0; + u32 pi0, ip_len0, udp_len0, flags0, next0; + i32 len_diff0; + u8 error0, type0, good_l4_csum0; + u32 udp_offset0; + u8 is_tcp_udp0; + + pi0 = to_next[0] = from[0]; + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + error0 = IP6_ERROR_UNKNOWN_PROTOCOL; + + p0 = vlib_get_buffer (vm, pi0); + ip0 = vlib_buffer_get_current (p0); + + if (head_of_feature_arc == 0) + goto skip_check; + + vnet_buffer (p0)->l3_hdr_offset = p0->current_data; + + type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol]; + flags0 = p0->flags; + is_tcp_udp0 = ip6_next_proto_is_tcp_udp (p0, ip0, &udp_offset0); + good_l4_csum0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + + len_diff0 = 0; + if (PREDICT_TRUE (is_tcp_udp0)) + { + udp0 = (udp_header_t *) ((u8 *) ip0 + udp_offset0); + /* Don't verify UDP checksum for packets with explicit zero + * checksum. */ + good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP + && udp0->checksum == 0; + /* Verify UDP length. */ + if (is_tcp_udp0 == IP_PROTOCOL_UDP) + { + ip_len0 = clib_net_to_host_u16 (ip0->payload_length); + udp_len0 = clib_net_to_host_u16 (udp0->length); + len_diff0 = ip_len0 - udp_len0; + } + } + + good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN; + len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0; + + if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN + && !good_l4_csum0 + && !(flags0 & + VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))) + { + flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0); + good_l4_csum0 = + (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + } + + error0 = IP6_ERROR_UNKNOWN_PROTOCOL; + error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0; + + ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP == + IP6_ERROR_UDP_CHECKSUM); + ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP == + IP6_ERROR_ICMP_CHECKSUM); + error0 = (!good_l4_csum0 ? IP6_ERROR_UDP_CHECKSUM + type0 : error0); + + /* If this is a neighbor solicitation (ICMP), skip src RPF check */ + if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL && + type0 != IP_BUILTIN_PROTOCOL_ICMP && + !ip6_address_is_link_local_unicast (&ip0->src_address)) + { + error0 = (!ip6_urpf_loose_check (im, p0, ip0) + ? IP6_ERROR_SRC_LOOKUP_MISS : error0); + } + + skip_check: + + next0 = lm->local_next_by_ip_protocol[ip0->protocol]; + next0 = + error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; + p0->error = error_node->errors[error0]; + + if (head_of_feature_arc) + { + if (PREDICT_TRUE (error0 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL)) + vnet_feature_arc_start (arc_index, + vnet_buffer (p0)->sw_if_index + [VLIB_RX], &next0, p0); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static uword +ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_local_node, static) = +{ + .function = ip6_local, + .name = "ip6-local", + .vector_size = sizeof (u32), + .format_trace = format_ip6_forward_next_trace, + .n_next_nodes = IP_LOCAL_N_NEXT, + .next_nodes = + { + [IP_LOCAL_NEXT_DROP] = "error-drop", + [IP_LOCAL_NEXT_PUNT] = "error-punt", + [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup", + [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local); + + +static uword +ip6_local_end_of_arc (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_local_end_of_arc_node,static) = { + .function = ip6_local_end_of_arc, + .name = "ip6-local-end-of-arc", + .vector_size = sizeof (u32), + + .format_trace = format_ip6_forward_next_trace, + .sibling_of = "ip6-local", +}; + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_end_of_arc_node, ip6_local_end_of_arc) + +VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = { + .arc_name = "ip6-local", + .node_name = "ip6-local-end-of-arc", + .runs_before = 0, /* not before any other features */ +}; +/* *INDENT-ON* */ + +void +ip6_register_protocol (u32 protocol, u32 node_index) +{ + vlib_main_t *vm = vlib_get_main (); + ip6_main_t *im = &ip6_main; + ip_lookup_main_t *lm = &im->lookup_main; + + ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol)); + lm->local_next_by_ip_protocol[protocol] = + vlib_node_add_next (vm, ip6_local_node.index, node_index); +} + +typedef enum +{ + IP6_DISCOVER_NEIGHBOR_NEXT_DROP, + IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX, + IP6_DISCOVER_NEIGHBOR_N_NEXT, +} ip6_discover_neighbor_next_t; + +typedef enum +{ + IP6_DISCOVER_NEIGHBOR_ERROR_DROP, + IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT, + IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS, +} ip6_discover_neighbor_error_t; + +static uword +ip6_discover_neighbor_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int is_glean) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_main_t *im = &ip6_main; + ip_lookup_main_t *lm = &im->lookup_main; + u32 *from, *to_next_drop; + uword n_left_from, n_left_to_next_drop; + static f64 time_last_seed_change = -1e100; + static u32 hash_seeds[3]; + static uword hash_bitmap[256 / BITS (uword)]; + f64 time_now; + int bogus_length; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace (vm, node, frame, VLIB_TX); + + time_now = vlib_time_now (vm); + if (time_now - time_last_seed_change > 1e-3) + { + uword i; + u32 *r = clib_random_buffer_get_data (&vm->random_buffer, + sizeof (hash_seeds)); + for (i = 0; i < ARRAY_LEN (hash_seeds); i++) + hash_seeds[i] = r[i]; + + /* Mark all hash keys as been not-seen before. */ + for (i = 0; i < ARRAY_LEN (hash_bitmap); i++) + hash_bitmap[i] = 0; + + time_last_seed_change = time_now; + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP, + to_next_drop, n_left_to_next_drop); + + while (n_left_from > 0 && n_left_to_next_drop > 0) + { + vlib_buffer_t *p0; + ip6_header_t *ip0; + u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0; + uword bm0; + ip_adjacency_t *adj0; + vnet_hw_interface_t *hw_if0; + u32 next0; + + pi0 = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + ip0 = vlib_buffer_get_current (p0); + + adj0 = adj_get (adj_index0); + + if (!is_glean) + { + ip0->dst_address.as_u64[0] = + adj0->sub_type.nbr.next_hop.ip6.as_u64[0]; + ip0->dst_address.as_u64[1] = + adj0->sub_type.nbr.next_hop.ip6.as_u64[1]; + } + + a0 = hash_seeds[0]; + b0 = hash_seeds[1]; + c0 = hash_seeds[2]; + + sw_if_index0 = adj0->rewrite_header.sw_if_index; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; + + a0 ^= sw_if_index0; + b0 ^= ip0->dst_address.as_u32[0]; + c0 ^= ip0->dst_address.as_u32[1]; + + hash_v3_mix32 (a0, b0, c0); + + b0 ^= ip0->dst_address.as_u32[2]; + c0 ^= ip0->dst_address.as_u32[3]; + + hash_v3_finalize32 (a0, b0, c0); + + c0 &= BITS (hash_bitmap) - 1; + c0 = c0 / BITS (uword); + m0 = (uword) 1 << (c0 % BITS (uword)); + + bm0 = hash_bitmap[c0]; + drop0 = (bm0 & m0) != 0; + + /* Mark it as seen. */ + hash_bitmap[c0] = bm0 | m0; + + from += 1; + n_left_from -= 1; + to_next_drop[0] = pi0; + to_next_drop += 1; + n_left_to_next_drop -= 1; + + hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0); + + /* If the interface is link-down, drop the pkt */ + if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP)) + drop0 = 1; + + p0->error = + node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP + : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT]; + if (drop0) + continue; + + /* + * the adj has been updated to a rewrite but the node the DPO that got + * us here hasn't - yet. no big deal. we'll drop while we wait. + */ + if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index) + continue; + + { + u32 bi0 = 0; + icmp6_neighbor_solicitation_header_t *h0; + vlib_buffer_t *b0; + + h0 = vlib_packet_template_get_packet + (vm, &im->discover_neighbor_packet_template, &bi0); + + /* + * Build ethernet header. + * Choose source address based on destination lookup + * adjacency. + */ + if (ip6_src_address_for_packet (lm, + sw_if_index0, + &h0->ip.src_address)) + { + /* There is no address on the interface */ + p0->error = + node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS]; + vlib_buffer_free (vm, &bi0, 1); + continue; + } + + /* + * Destination address is a solicited node multicast address. + * We need to fill in + * the low 24 bits with low 24 bits of target's address. + */ + h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13]; + h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14]; + h0->ip.dst_address.as_u8[15] = ip0->dst_address.as_u8[15]; + + h0->neighbor.target_address = ip0->dst_address; + + clib_memcpy (h0->link_layer_option.ethernet_address, + hw_if0->hw_address, vec_len (hw_if0->hw_address)); + + /* $$$$ appears we need this; why is the checksum non-zero? */ + h0->neighbor.icmp.checksum = 0; + h0->neighbor.icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip, + &bogus_length); + + ASSERT (bogus_length == 0); + + vlib_buffer_copy_trace_flag (vm, p0, bi0); + b0 = vlib_get_buffer (vm, bi0); + vnet_buffer (b0)->sw_if_index[VLIB_TX] + = vnet_buffer (p0)->sw_if_index[VLIB_TX]; + + /* Add rewrite/encap string. */ + vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t)); + vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); + + next0 = IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX; + + vlib_set_next_frame_buffer (vm, node, next0, bi0); + } + } + + vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP, + n_left_to_next_drop); + } + + return frame->n_vectors; +} + +static uword +ip6_discover_neighbor (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return (ip6_discover_neighbor_inline (vm, node, frame, 0)); +} + +static uword +ip6_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return (ip6_discover_neighbor_inline (vm, node, frame, 1)); +} + +static char *ip6_discover_neighbor_error_strings[] = { + [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops", + [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT] = "neighbor solicitations sent", + [IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS] + = "no source address for ND solicitation", +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_discover_neighbor_node) = +{ + .function = ip6_discover_neighbor, + .name = "ip6-discover-neighbor", + .vector_size = sizeof (u32), + .format_trace = format_ip6_forward_next_trace, + .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings), + .error_strings = ip6_discover_neighbor_error_strings, + .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT, + .next_nodes = + { + [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop", + [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_glean_node) = +{ + .function = ip6_glean, + .name = "ip6-glean", + .vector_size = sizeof (u32), + .format_trace = format_ip6_forward_next_trace, + .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings), + .error_strings = ip6_discover_neighbor_error_strings, + .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT, + .next_nodes = + { + [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop", + [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output", + }, +}; +/* *INDENT-ON* */ + +clib_error_t * +ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_main_t *im = &ip6_main; + icmp6_neighbor_solicitation_header_t *h; + ip6_address_t *src; + ip_interface_address_t *ia; + ip_adjacency_t *adj; + vnet_hw_interface_t *hi; + vnet_sw_interface_t *si; + vlib_buffer_t *b; + adj_index_t ai; + u32 bi = 0; + int bogus_length; + + si = vnet_get_sw_interface (vnm, sw_if_index); + + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + return clib_error_return (0, "%U: interface %U down", + format_ip6_address, dst, + format_vnet_sw_if_index_name, vnm, + sw_if_index); + } + + src = + ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia); + if (!src) + { + vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE; + return clib_error_return + (0, "no matching interface address for destination %U (interface %U)", + format_ip6_address, dst, + format_vnet_sw_if_index_name, vnm, sw_if_index); + } + + h = + vlib_packet_template_get_packet (vm, + &im->discover_neighbor_packet_template, + &bi); + + hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + + /* Destination address is a solicited node multicast address. We need to fill in + the low 24 bits with low 24 bits of target's address. */ + h->ip.dst_address.as_u8[13] = dst->as_u8[13]; + h->ip.dst_address.as_u8[14] = dst->as_u8[14]; + h->ip.dst_address.as_u8[15] = dst->as_u8[15]; + + h->ip.src_address = src[0]; + h->neighbor.target_address = dst[0]; + + if (PREDICT_FALSE (!hi->hw_address)) + { + return clib_error_return (0, "%U: interface %U do not support ip probe", + format_ip6_address, dst, + format_vnet_sw_if_index_name, vnm, + sw_if_index); + } + + clib_memcpy (h->link_layer_option.ethernet_address, hi->hw_address, + vec_len (hi->hw_address)); + + h->neighbor.icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length); + ASSERT (bogus_length == 0); + + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + + /* Add encapsulation string for software interface (e.g. ethernet header). */ + ip46_address_t nh = { + .ip6 = *dst, + }; + + ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, + VNET_LINK_IP6, &nh, sw_if_index); + adj = adj_get (ai); + + /* Peer has been previously resolved, retrieve glean adj instead */ + if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) + { + adj_unlock (ai); + ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP6, sw_if_index, &nh); + adj = adj_get (ai); + } + + vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); + vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); + + { + vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 *to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } + + adj_unlock (ai); + return /* no error */ 0; +} + +typedef enum +{ + IP6_REWRITE_NEXT_DROP, + IP6_REWRITE_NEXT_ICMP_ERROR, +} ip6_rewrite_next_t; + +always_inline uword +ip6_rewrite_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + int do_counters, int is_midchain, int is_mcast) +{ + ip_lookup_main_t *lm = &ip6_main.lookup_main; + u32 *from = vlib_frame_vector_args (frame); + u32 n_left_from, n_left_to_next, *to_next, next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_input_node.index); + + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + u32 thread_index = vlib_get_thread_index (); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + ip_adjacency_t *adj0, *adj1; + vlib_buffer_t *p0, *p1; + ip6_header_t *ip0, *ip1; + u32 pi0, rw_len0, next0, error0, adj_index0; + u32 pi1, rw_len1, next1, error1, adj_index1; + u32 tx_sw_if_index0, tx_sw_if_index1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->pre_data, 32, STORE); + CLIB_PREFETCH (p3->pre_data, 32, STORE); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE); + CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + + from += 2; + n_left_from -= 2; + to_next += 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX]; + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + error0 = error1 = IP6_ERROR_NONE; + next0 = next1 = IP6_REWRITE_NEXT_DROP; + + if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) + { + i32 hop_limit0 = ip0->hop_limit; + + /* Input node should have reject packets with hop limit 0. */ + ASSERT (ip0->hop_limit > 0); + + hop_limit0 -= 1; + + ip0->hop_limit = hop_limit0; + + /* + * If the hop count drops below 1 when forwarding, generate + * an ICMP response. + */ + if (PREDICT_FALSE (hop_limit0 <= 0)) + { + error0 = IP6_ERROR_TIME_EXPIRED; + next0 = IP6_REWRITE_NEXT_ICMP_ERROR; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded, + ICMP6_time_exceeded_ttl_exceeded_in_transit, + 0); + } + } + else + { + p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) + { + i32 hop_limit1 = ip1->hop_limit; + + /* Input node should have reject packets with hop limit 0. */ + ASSERT (ip1->hop_limit > 0); + + hop_limit1 -= 1; + + ip1->hop_limit = hop_limit1; + + /* + * If the hop count drops below 1 when forwarding, generate + * an ICMP response. + */ + if (PREDICT_FALSE (hop_limit1 <= 0)) + { + error1 = IP6_ERROR_TIME_EXPIRED; + next1 = IP6_REWRITE_NEXT_ICMP_ERROR; + vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded, + ICMP6_time_exceeded_ttl_exceeded_in_transit, + 0); + } + } + else + { + p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + adj0 = adj_get (adj_index0); + adj1 = adj_get (adj_index1); + + rw_len0 = adj0[0].rewrite_header.data_bytes; + rw_len1 = adj1[0].rewrite_header.data_bytes; + vnet_buffer (p0)->ip.save_rewrite_length = rw_len0; + vnet_buffer (p1)->ip.save_rewrite_length = rw_len1; + + if (do_counters) + { + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, adj_index0, 1, + vlib_buffer_length_in_chain (vm, p0) + rw_len0); + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, adj_index1, 1, + vlib_buffer_length_in_chain (vm, p1) + rw_len1); + } + + /* Check MTU of outgoing interface. */ + error0 = + (vlib_buffer_length_in_chain (vm, p0) > + adj0[0]. + rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED : + error0); + error1 = + (vlib_buffer_length_in_chain (vm, p1) > + adj1[0]. + rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED : + error1); + + /* Don't adjust the buffer for hop count issue; icmp-error node + * wants to see the IP headerr */ + if (PREDICT_TRUE (error0 == IP6_ERROR_NONE)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0; + next0 = adj0[0].rewrite_header.next_index; + + if (PREDICT_FALSE + (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start (lm->output_feature_arc_index, + tx_sw_if_index0, &next0, p0); + } + if (PREDICT_TRUE (error1 == IP6_ERROR_NONE)) + { + p1->current_data -= rw_len1; + p1->current_length += rw_len1; + + tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index; + vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1; + next1 = adj1[0].rewrite_header.next_index; + + if (PREDICT_FALSE + (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start (lm->output_feature_arc_index, + tx_sw_if_index1, &next1, p1); + } + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_two_headers (adj0[0], adj1[0], + ip0, ip1, sizeof (ethernet_header_t)); + + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func (vm, adj0, p0); + adj1->sub_type.midchain.fixup_func (vm, adj1, p1); + } + if (is_mcast) + { + /* + * copy bytes from the IP address into the MAC rewrite + */ + vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); + vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1); + } + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + ip_adjacency_t *adj0; + vlib_buffer_t *p0; + ip6_header_t *ip0; + u32 pi0, rw_len0; + u32 adj_index0, next0, error0; + u32 tx_sw_if_index0; + + pi0 = to_next[0] = from[0]; + + p0 = vlib_get_buffer (vm, pi0); + + adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX]; + + adj0 = adj_get (adj_index0); + + ip0 = vlib_buffer_get_current (p0); + + error0 = IP6_ERROR_NONE; + next0 = IP6_REWRITE_NEXT_DROP; + + /* Check hop limit */ + if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) + { + i32 hop_limit0 = ip0->hop_limit; + + ASSERT (ip0->hop_limit > 0); + + hop_limit0 -= 1; + + ip0->hop_limit = hop_limit0; + + if (PREDICT_FALSE (hop_limit0 <= 0)) + { + /* + * If the hop count drops below 1 when forwarding, generate + * an ICMP response. + */ + error0 = IP6_ERROR_TIME_EXPIRED; + next0 = IP6_REWRITE_NEXT_ICMP_ERROR; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded, + ICMP6_time_exceeded_ttl_exceeded_in_transit, + 0); + } + } + else + { + p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; + } + + /* Guess we are only writing on simple Ethernet header. */ + vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t)); + + /* Update packet buffer attributes/set output interface. */ + rw_len0 = adj0[0].rewrite_header.data_bytes; + vnet_buffer (p0)->ip.save_rewrite_length = rw_len0; + + if (do_counters) + { + vlib_increment_combined_counter + (&adjacency_counters, + thread_index, adj_index0, 1, + vlib_buffer_length_in_chain (vm, p0) + rw_len0); + } + + /* Check MTU of outgoing interface. */ + error0 = + (vlib_buffer_length_in_chain (vm, p0) > + adj0[0]. + rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED : + error0); + + /* Don't adjust the buffer for hop count issue; icmp-error node + * wants to see the IP headerr */ + if (PREDICT_TRUE (error0 == IP6_ERROR_NONE)) + { + p0->current_data -= rw_len0; + p0->current_length += rw_len0; + + tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index; + + vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0; + next0 = adj0[0].rewrite_header.next_index; + + if (PREDICT_FALSE + (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES)) + vnet_feature_arc_start (lm->output_feature_arc_index, + tx_sw_if_index0, &next0, p0); + } + + if (is_midchain) + { + adj0->sub_type.midchain.fixup_func (vm, adj0, p0); + } + if (is_mcast) + { + vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); + } + + p0->error = error_node->errors[error0]; + + from += 1; + n_left_from -= 1; + to_next += 1; + n_left_to_next -= 1; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Need to do trace after rewrites to pick up new packet data. */ + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip6_forward_next_trace (vm, node, frame, VLIB_TX); + + return frame->n_vectors; +} + +static uword +ip6_rewrite (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip6_rewrite_inline (vm, node, frame, 1, 0, 0); + else + return ip6_rewrite_inline (vm, node, frame, 0, 0, 0); +} + +static uword +ip6_rewrite_mcast (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip6_rewrite_inline (vm, node, frame, 1, 0, 1); + else + return ip6_rewrite_inline (vm, node, frame, 0, 0, 1); +} + +static uword +ip6_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip6_rewrite_inline (vm, node, frame, 1, 1, 0); + else + return ip6_rewrite_inline (vm, node, frame, 0, 1, 0); +} + +static uword +ip6_mcast_midchain (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip6_rewrite_inline (vm, node, frame, 1, 1, 1); + else + return ip6_rewrite_inline (vm, node, frame, 0, 1, 1); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_midchain_node) = +{ + .function = ip6_midchain, + .name = "ip6-midchain", + .vector_size = sizeof (u32), + .format_trace = format_ip6_forward_next_trace, + .sibling_of = "ip6-rewrite", + }; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_rewrite_node) = +{ + .function = ip6_rewrite, + .name = "ip6-rewrite", + .vector_size = sizeof (u32), + .format_trace = format_ip6_rewrite_trace, + .n_next_nodes = 2, + .next_nodes = + { + [IP6_REWRITE_NEXT_DROP] = "error-drop", + [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) = +{ + .function = ip6_rewrite_mcast, + .name = "ip6-rewrite-mcast", + .vector_size = sizeof (u32), + .format_trace = format_ip6_rewrite_trace, + .sibling_of = "ip6-rewrite", +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_mcast_node, ip6_rewrite_mcast); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_mcast_midchain_node, static) = +{ + .function = ip6_mcast_midchain, + .name = "ip6-mcast-midchain", + .vector_size = sizeof (u32), + .format_trace = format_ip6_rewrite_trace, + .sibling_of = "ip6-rewrite", +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_mcast_midchain_node, ip6_mcast_midchain); + +/* + * Hop-by-Hop handling + */ +ip6_hop_by_hop_main_t ip6_hop_by_hop_main; + +#define foreach_ip6_hop_by_hop_error \ +_(PROCESSED, "pkts with ip6 hop-by-hop options") \ +_(FORMAT, "incorrectly formatted hop-by-hop options") \ +_(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options") + +/* *INDENT-OFF* */ +typedef enum +{ +#define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym, + foreach_ip6_hop_by_hop_error +#undef _ + IP6_HOP_BY_HOP_N_ERROR, +} ip6_hop_by_hop_error_t; +/* *INDENT-ON* */ + +/* + * Primary h-b-h handler trace support + * We work pretty hard on the problem for obvious reasons + */ +typedef struct +{ + u32 next_index; + u32 trace_len; + u8 option_data[256]; +} ip6_hop_by_hop_trace_t; + +vlib_node_registration_t ip6_hop_by_hop_node; + +static char *ip6_hop_by_hop_error_strings[] = { +#define _(sym,string) string, + foreach_ip6_hop_by_hop_error +#undef _ +}; + +u8 * +format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args) +{ + ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *); + int total_len = va_arg (*args, int); + ip6_hop_by_hop_option_t *opt0, *limit0; + ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; + u8 type0; + + s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d", + hbh0->protocol, (hbh0->length + 1) << 3, total_len); + + opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1); + limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len); + + while (opt0 < limit0) + { + type0 = opt0->type; + switch (type0) + { + case 0: /* Pad, just stop */ + opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1); + break; + + default: + if (hm->trace[type0]) + { + s = (*hm->trace[type0]) (s, opt0); + } + else + { + s = + format (s, "\n unrecognized option %d length %d", type0, + opt0->length); + } + opt0 = + (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length + + sizeof (ip6_hop_by_hop_option_t)); + break; + } + } + return s; +} + +static u8 * +format_ip6_hop_by_hop_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *); + ip6_hop_by_hop_header_t *hbh0; + ip6_hop_by_hop_option_t *opt0, *limit0; + ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; + + u8 type0; + + hbh0 = (ip6_hop_by_hop_header_t *) t->option_data; + + s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d", + t->next_index, (hbh0->length + 1) << 3, t->trace_len); + + opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1); + limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len; + + while (opt0 < limit0) + { + type0 = opt0->type; + switch (type0) + { + case 0: /* Pad, just stop */ + opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1; + break; + + default: + if (hm->trace[type0]) + { + s = (*hm->trace[type0]) (s, opt0); + } + else + { + s = + format (s, "\n unrecognized option %d length %d", type0, + opt0->length); + } + opt0 = + (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length + + sizeof (ip6_hop_by_hop_option_t)); + break; + } + } + return s; +} + +always_inline u8 +ip6_scan_hbh_options (vlib_buffer_t * b0, + ip6_header_t * ip0, + ip6_hop_by_hop_header_t * hbh0, + ip6_hop_by_hop_option_t * opt0, + ip6_hop_by_hop_option_t * limit0, u32 * next0) +{ + ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; + u8 type0; + u8 error0 = 0; + + while (opt0 < limit0) + { + type0 = opt0->type; + switch (type0) + { + case 0: /* Pad1 */ + opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1; + continue; + case 1: /* PadN */ + break; + default: + if (hm->options[type0]) + { + if ((*hm->options[type0]) (b0, ip0, opt0) < 0) + { + error0 = IP6_HOP_BY_HOP_ERROR_FORMAT; + return (error0); + } + } + else + { + /* Unrecognized mandatory option, check the two high order bits */ + switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS) + { + case HBH_OPTION_TYPE_SKIP_UNKNOWN: + break; + case HBH_OPTION_TYPE_DISCARD_UNKNOWN: + error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION; + *next0 = IP_LOOKUP_NEXT_DROP; + break; + case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP: + error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION; + *next0 = IP_LOOKUP_NEXT_ICMP_ERROR; + icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem, + ICMP6_parameter_problem_unrecognized_option, + (u8 *) opt0 - (u8 *) ip0); + break; + case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST: + error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION; + if (!ip6_address_is_multicast (&ip0->dst_address)) + { + *next0 = IP_LOOKUP_NEXT_ICMP_ERROR; + icmp6_error_set_vnet_buffer (b0, + ICMP6_parameter_problem, + ICMP6_parameter_problem_unrecognized_option, + (u8 *) opt0 - (u8 *) ip0); + } + else + { + *next0 = IP_LOOKUP_NEXT_DROP; + } + break; + } + return (error0); + } + } + opt0 = + (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length + + sizeof (ip6_hop_by_hop_option_t)); + } + return (error0); +} + +/* + * Process the Hop-by-Hop Options header + */ +static uword +ip6_hop_by_hop (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index); + ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; + u32 n_left_from, *from, *to_next; + ip_lookup_next_t next_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + ip6_header_t *ip0, *ip1; + ip6_hop_by_hop_header_t *hbh0, *hbh1; + ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1; + u8 error0 = 0, error1 = 0; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD); + } + + /* Speculatively enqueue b0, b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */ + u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + ip_adjacency_t *adj0 = adj_get (adj_index0); + u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; + ip_adjacency_t *adj1 = adj_get (adj_index1); + + /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */ + next0 = adj0->lookup_next_index; + next1 = adj1->lookup_next_index; + + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1); + opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1); + opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1); + limit0 = + (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + + ((hbh0->length + 1) << 3)); + limit1 = + (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 + + ((hbh1->length + 1) << 3)); + + /* + * Basic validity checks + */ + if ((hbh0->length + 1) << 3 > + clib_net_to_host_u16 (ip0->payload_length)) + { + error0 = IP6_HOP_BY_HOP_ERROR_FORMAT; + next0 = IP_LOOKUP_NEXT_DROP; + goto outdual; + } + /* Scan the set of h-b-h options, process ones that we understand */ + error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0); + + if ((hbh1->length + 1) << 3 > + clib_net_to_host_u16 (ip1->payload_length)) + { + error1 = IP6_HOP_BY_HOP_ERROR_FORMAT; + next1 = IP_LOOKUP_NEXT_DROP; + goto outdual; + } + /* Scan the set of h-b-h options, process ones that we understand */ + error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1); + + outdual: + /* Has the classifier flagged this buffer for special treatment? */ + if (PREDICT_FALSE + ((error0 == 0) + && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP))) + next0 = hm->next_override; + + /* Has the classifier flagged this buffer for special treatment? */ + if (PREDICT_FALSE + ((error1 == 0) + && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP))) + next1 = hm->next_override; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + u32 trace_len = (hbh0->length + 1) << 3; + t->next_index = next0; + /* Capture the h-b-h option verbatim */ + trace_len = + trace_len < + ARRAY_LEN (t->option_data) ? trace_len : + ARRAY_LEN (t->option_data); + t->trace_len = trace_len; + clib_memcpy (t->option_data, hbh0, trace_len); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + u32 trace_len = (hbh1->length + 1) << 3; + t->next_index = next1; + /* Capture the h-b-h option verbatim */ + trace_len = + trace_len < + ARRAY_LEN (t->option_data) ? trace_len : + ARRAY_LEN (t->option_data); + t->trace_len = trace_len; + clib_memcpy (t->option_data, hbh1, trace_len); + } + + } + + b0->error = error_node->errors[error0]; + b1->error = error_node->errors[error1]; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, + n_left_to_next, bi0, bi1, next0, + next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + ip6_header_t *ip0; + ip6_hop_by_hop_header_t *hbh0; + ip6_hop_by_hop_option_t *opt0, *limit0; + u8 error0 = 0; + + /* Speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + /* + * Default use the next_index from the adjacency. + * A HBH option rarely redirects to a different node + */ + u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + ip_adjacency_t *adj0 = adj_get (adj_index0); + next0 = adj0->lookup_next_index; + + ip0 = vlib_buffer_get_current (b0); + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1); + limit0 = + (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + + ((hbh0->length + 1) << 3)); + + /* + * Basic validity checks + */ + if ((hbh0->length + 1) << 3 > + clib_net_to_host_u16 (ip0->payload_length)) + { + error0 = IP6_HOP_BY_HOP_ERROR_FORMAT; + next0 = IP_LOOKUP_NEXT_DROP; + goto out0; + } + + /* Scan the set of h-b-h options, process ones that we understand */ + error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0); + + out0: + /* Has the classifier flagged this buffer for special treatment? */ + if (PREDICT_FALSE + ((error0 == 0) + && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP))) + next0 = hm->next_override; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip6_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + u32 trace_len = (hbh0->length + 1) << 3; + t->next_index = next0; + /* Capture the h-b-h option verbatim */ + trace_len = + trace_len < + ARRAY_LEN (t->option_data) ? trace_len : + ARRAY_LEN (t->option_data); + t->trace_len = trace_len; + clib_memcpy (t->option_data, hbh0, trace_len); + } + + b0->error = error_node->errors[error0]; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = +{ + .function = ip6_hop_by_hop, + .name = "ip6-hop-by-hop", + .sibling_of = "ip6-lookup", + .vector_size = sizeof (u32), + .format_trace = format_ip6_hop_by_hop_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings), + .error_strings = ip6_hop_by_hop_error_strings, + .n_next_nodes = 0, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop); + +static clib_error_t * +ip6_hop_by_hop_init (vlib_main_t * vm) +{ + ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; + memset (hm->options, 0, sizeof (hm->options)); + memset (hm->trace, 0, sizeof (hm->trace)); + hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP; + return (0); +} + +VLIB_INIT_FUNCTION (ip6_hop_by_hop_init); + +void +ip6_hbh_set_next_override (uword next) +{ + ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; + + hm->next_override = next; +} + +int +ip6_hbh_register_option (u8 option, + int options (vlib_buffer_t * b, ip6_header_t * ip, + ip6_hop_by_hop_option_t * opt), + u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt)) +{ + ip6_main_t *im = &ip6_main; + ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; + + ASSERT (option < ARRAY_LEN (hm->options)); + + /* Already registered */ + if (hm->options[option]) + return (-1); + + hm->options[option] = options; + hm->trace[option] = trace; + + /* Set global variable */ + im->hbh_enabled = 1; + + return (0); +} + +int +ip6_hbh_unregister_option (u8 option) +{ + ip6_main_t *im = &ip6_main; + ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main; + + ASSERT (option < ARRAY_LEN (hm->options)); + + /* Not registered */ + if (!hm->options[option]) + return (-1); + + hm->options[option] = NULL; + hm->trace[option] = NULL; + + /* Disable global knob if this was the last option configured */ + int i; + bool found = false; + for (i = 0; i < 256; i++) + { + if (hm->options[option]) + { + found = true; + break; + } + } + if (!found) + im->hbh_enabled = 0; + + return (0); +} + +/* Global IP6 main. */ +ip6_main_t ip6_main; + +static clib_error_t * +ip6_lookup_init (vlib_main_t * vm) +{ + ip6_main_t *im = &ip6_main; + clib_error_t *error; + uword i; + + if ((error = vlib_call_init_function (vm, vnet_feature_init))) + return error; + + for (i = 0; i < ARRAY_LEN (im->fib_masks); i++) + { + u32 j, i0, i1; + + i0 = i / 32; + i1 = i % 32; + + for (j = 0; j < i0; j++) + im->fib_masks[i].as_u32[j] = ~0; + + if (i1) + im->fib_masks[i].as_u32[i0] = + clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1)); + } + + ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1); + + if (im->lookup_table_nbuckets == 0) + im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS; + + im->lookup_table_nbuckets = 1 << max_log2 (im->lookup_table_nbuckets); + + if (im->lookup_table_size == 0) + im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE; + + BV (clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash), + "ip6 FIB fwding table", + im->lookup_table_nbuckets, im->lookup_table_size); + BV (clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash, + "ip6 FIB non-fwding table", + im->lookup_table_nbuckets, im->lookup_table_size); + + /* Create FIB with index 0 and table id of 0. */ + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0, + FIB_SOURCE_DEFAULT_ROUTE); + mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0, + MFIB_SOURCE_DEFAULT_ROUTE); + + { + pg_node_t *pn; + pn = pg_get_node (ip6_lookup_node.index); + pn->unformat_edit = unformat_pg_ip6_header; + } + + /* Unless explicitly configured, don't process HBH options */ + im->hbh_enabled = 0; + + { + icmp6_neighbor_solicitation_header_t p; + + memset (&p, 0, sizeof (p)); + + p.ip.ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + p.ip.payload_length = + clib_host_to_net_u16 (sizeof (p) - + STRUCT_OFFSET_OF + (icmp6_neighbor_solicitation_header_t, neighbor)); + p.ip.protocol = IP_PROTOCOL_ICMP6; + p.ip.hop_limit = 255; + ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0); + + p.neighbor.icmp.type = ICMP6_neighbor_solicitation; + + p.link_layer_option.header.type = + ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address; + p.link_layer_option.header.n_data_u64s = + sizeof (p.link_layer_option) / sizeof (u64); + + vlib_packet_template_init (vm, + &im->discover_neighbor_packet_template, + &p, sizeof (p), + /* alloc chunk size */ 8, + "ip6 neighbor discovery"); + } + + return error; +} + +VLIB_INIT_FUNCTION (ip6_lookup_init); + +void +ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip, + u8 * mac) +{ + ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL); + /* Invert the "u" bit */ + ip->as_u8[8] = mac[0] ^ (1 << 1); + ip->as_u8[9] = mac[1]; + ip->as_u8[10] = mac[2]; + ip->as_u8[11] = 0xFF; + ip->as_u8[12] = 0xFE; + ip->as_u8[13] = mac[3]; + ip->as_u8[14] = mac[4]; + ip->as_u8[15] = mac[5]; +} + +void +ip6_ethernet_mac_address_from_link_local_address (u8 * mac, + ip6_address_t * ip) +{ + /* Invert the previously inverted "u" bit */ + mac[0] = ip->as_u8[8] ^ (1 << 1); + mac[1] = ip->as_u8[9]; + mac[2] = ip->as_u8[10]; + mac[3] = ip->as_u8[13]; + mac[4] = ip->as_u8[14]; + mac[5] = ip->as_u8[15]; +} + +static clib_error_t * +test_ip6_link_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + u8 mac[6]; + ip6_address_t _a, *a = &_a; + + if (unformat (input, "%U", unformat_ethernet_address, mac)) + { + ip6_link_local_address_from_ethernet_mac_address (a, mac); + vlib_cli_output (vm, "Link local address: %U", format_ip6_address, a); + ip6_ethernet_mac_address_from_link_local_address (mac, a); + vlib_cli_output (vm, "Original MAC address: %U", + format_ethernet_address, mac); + } + + return 0; +} + +/*? + * This command converts the given MAC Address into an IPv6 link-local + * address. + * + * @cliexpar + * Example of how to create an IPv6 link-local address: + * @cliexstart{test ip6 link 16:d9:e0:91:79:86} + * Link local address: fe80::14d9:e0ff:fe91:7986 + * Original MAC address: 16:d9:e0:91:79:86 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (test_link_command, static) = +{ + .path = "test ip6 link", + .function = test_ip6_link_command_fn, + .short_help = "test ip6 link <mac-address>", +}; +/* *INDENT-ON* */ + +int +vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config) +{ + u32 fib_index; + + fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id); + + if (~0 == fib_index) + return VNET_API_ERROR_NO_SUCH_FIB; + + fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP6, + flow_hash_config); + + return 0; +} + +static clib_error_t * +set_ip6_flow_hash_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int matched = 0; + u32 table_id = 0; + u32 flow_hash_config = 0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "table %d", &table_id)) + matched = 1; +#define _(a,v) \ + else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;} + foreach_flow_hash_bit +#undef _ + else + break; + } + + if (matched == 0) + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + + rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config); + switch (rv) + { + case 0: + break; + + case -1: + return clib_error_return (0, "no such FIB table %d", table_id); + + default: + clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config); + break; + } + + return 0; +} + +/*? + * Configure the set of IPv6 fields used by the flow hash. + * + * @cliexpar + * @parblock + * Example of how to set the flow hash on a given table: + * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto} + * + * Example of display the configured flow hash: + * @cliexstart{show ip6 fib} + * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto + * @::/0 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * fe80::/10 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::1/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::2/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::16/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::1:ff00:0/104 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]] + * [0] [@2]: dpo-receive + * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto + * @::/0 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]] + * [0] [@0]: dpo-drop ip6 + * @::a:1:1:0:4/126 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]] + * [0] [@4]: ipv6-glean: af_packet0 + * @::a:1:1:0:7/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]] + * [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0 + * fe80::/10 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]] + * [0] [@2]: dpo-receive + * fe80::fe:3eff:fe3e:9222/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]] + * [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0 + * ff02::1/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::2/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::16/128 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]] + * [0] [@2]: dpo-receive + * ff02::1:ff00:0/104 + * unicast-ip6-chain + * [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]] + * [0] [@2]: dpo-receive + * @cliexend + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = +{ + .path = "set ip6 flow-hash", + .short_help = + "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]", + .function = set_ip6_flow_hash_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +show_ip6_local_command_fn (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + ip6_main_t *im = &ip6_main; + ip_lookup_main_t *lm = &im->lookup_main; + int i; + + vlib_cli_output (vm, "Protocols handled by ip6_local"); + for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++) + { + if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT) + { + + u32 node_index = vlib_get_node (vm, + ip6_local_node.index)-> + next_nodes[lm->local_next_by_ip_protocol[i]]; + vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm, + node_index); + } + } + return 0; +} + + + +/*? + * Display the set of protocols handled by the local IPv6 stack. + * + * @cliexpar + * Example of how to display local protocol table: + * @cliexstart{show ip6 local} + * Protocols handled by ip6_local + * 17 + * 43 + * 58 + * 115 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_ip6_local, static) = +{ + .path = "show ip6 local", + .function = show_ip6_local_command_fn, + .short_help = "show ip6 local", +}; +/* *INDENT-ON* */ + +int +vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index, + u32 table_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + ip6_main_t *ipm = &ip6_main; + ip_lookup_main_t *lm = &ipm->lookup_main; + vnet_classify_main_t *cm = &vnet_classify_main; + ip6_address_t *if_addr; + + if (pool_is_free_index (im->sw_interfaces, sw_if_index)) + return VNET_API_ERROR_NO_MATCHING_INTERFACE; + + if (table_index != ~0 && pool_is_free_index (cm->tables, table_index)) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index); + lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index; + + if_addr = ip6_interface_first_address (ipm, sw_if_index); + + if (NULL != if_addr) + { + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = *if_addr, + }; + u32 fib_index; + + fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, + sw_if_index); + + + if (table_index != (u32) ~ 0) + { + dpo_id_t dpo = DPO_INVALID; + + dpo_set (&dpo, + DPO_CLASSIFY, + DPO_PROTO_IP6, + classify_dpo_create (DPO_PROTO_IP6, table_index)); + + fib_table_entry_special_dpo_add (fib_index, + &pfx, + FIB_SOURCE_CLASSIFY, + FIB_ENTRY_FLAG_NONE, &dpo); + dpo_reset (&dpo); + } + else + { + fib_table_entry_special_remove (fib_index, + &pfx, FIB_SOURCE_CLASSIFY); + } + } + + return 0; +} + +static clib_error_t * +set_ip6_classify_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 table_index = ~0; + int table_index_set = 0; + u32 sw_if_index = ~0; + int rv; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "table-index %d", &table_index)) + table_index_set = 1; + else if (unformat (input, "intfc %U", unformat_vnet_sw_interface, + vnet_get_main (), &sw_if_index)) + ; + else + break; + } + + if (table_index_set == 0) + return clib_error_return (0, "classify table-index must be specified"); + + if (sw_if_index == ~0) + return clib_error_return (0, "interface / subif must be specified"); + + rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index); + + switch (rv) + { + case 0: + break; + + case VNET_API_ERROR_NO_MATCHING_INTERFACE: + return clib_error_return (0, "No such interface"); + + case VNET_API_ERROR_NO_SUCH_ENTRY: + return clib_error_return (0, "No such classifier table"); + } + return 0; +} + +/*? + * Assign a classification table to an interface. The classification + * table is created using the '<em>classify table</em>' and '<em>classify session</em>' + * commands. Once the table is create, use this command to filter packets + * on an interface. + * + * @cliexpar + * Example of how to assign a classification table to an interface: + * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_ip6_classify_command, static) = +{ + .path = "set ip6 classify", + .short_help = + "set ip6 classify intfc <interface> table-index <classify-idx>", + .function = set_ip6_classify_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +ip6_config (vlib_main_t * vm, unformat_input_t * input) +{ + ip6_main_t *im = &ip6_main; + uword heapsize = 0; + u32 tmp; + u32 nbuckets = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "hash-buckets %d", &tmp)) + nbuckets = tmp; + else if (unformat (input, "heap-size %dm", &tmp)) + heapsize = ((u64) tmp) << 20; + else if (unformat (input, "heap-size %dM", &tmp)) + heapsize = ((u64) tmp) << 20; + else if (unformat (input, "heap-size %dg", &tmp)) + heapsize = ((u64) tmp) << 30; + else if (unformat (input, "heap-size %dG", &tmp)) + heapsize = ((u64) tmp) << 30; + else + return clib_error_return (0, "unknown input '%U'", + format_unformat_error, input); + } + + im->lookup_table_nbuckets = nbuckets; + im->lookup_table_size = heapsize; + + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6"); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_hop_by_hop.c b/src/vnet/ip/ip6_hop_by_hop.c new file mode 100644 index 00000000..14fbb392 --- /dev/null +++ b/src/vnet/ip/ip6_hop_by_hop.c @@ -0,0 +1,1166 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vnet/vnet.h> +#include <vnet/pg/pg.h> +#include <vppinfra/error.h> + +#include <vnet/ip/ip.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <vppinfra/elog.h> + +#include <vnet/ip/ip6_hop_by_hop.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/classify/vnet_classify.h> + +/** + * @file + * @brief In-band OAM (iOAM). + * + * In-band OAM (iOAM) is an implementation study to record operational + * information in the packet while the packet traverses a path between + * two points in the network. + * + * VPP can function as in-band OAM encapsulating, transit and + * decapsulating node. In this version of VPP in-band OAM data is + * transported as options in an IPv6 hop-by-hop extension header. Hence + * in-band OAM can be enabled for IPv6 traffic. + */ + +ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main; + +#define foreach_ip6_hbyh_ioam_input_next \ + _(IP6_REWRITE, "ip6-rewrite") \ + _(IP6_LOOKUP, "ip6-lookup") \ + _(DROP, "error-drop") + +typedef enum +{ +#define _(s,n) IP6_HBYH_IOAM_INPUT_NEXT_##s, + foreach_ip6_hbyh_ioam_input_next +#undef _ + IP6_HBYH_IOAM_INPUT_N_NEXT, +} ip6_hbyh_ioam_input_next_t; + +static uword +unformat_opaque_ioam (unformat_input_t * input, va_list * args) +{ + u64 *opaquep = va_arg (*args, u64 *); + u8 *flow_name = NULL; + uword ret = 0; + + if (unformat (input, "ioam-encap %s", &flow_name)) + { + *opaquep = ioam_flow_add (1, flow_name); + ret = 1; + } + else if (unformat (input, "ioam-decap %s", &flow_name)) + { + *opaquep = ioam_flow_add (0, flow_name); + ret = 1; + } + + vec_free (flow_name); + return ret; +} + +u8 * +get_flow_name_from_flow_ctx (u32 flow_ctx) +{ + flow_data_t *flow = NULL; + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + u32 index; + + index = IOAM_MASK_DECAP_BIT (flow_ctx); + + if (pool_is_free_index (hm->flows, index)) + return NULL; + + flow = pool_elt_at_index (hm->flows, index); + return (flow->flow_name); +} + +/* The main h-b-h tracer will be invoked, no need to do much here */ +int +ip6_hbh_add_register_option (u8 option, + u8 size, + int rewrite_options (u8 * rewrite_string, + u8 * rewrite_size)) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->add_options)); + + /* Already registered */ + if (hm->add_options[option]) + return (-1); + + hm->add_options[option] = rewrite_options; + hm->options_size[option] = size; + + return (0); +} + +int +ip6_hbh_add_unregister_option (u8 option) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->add_options)); + + /* Not registered */ + if (!hm->add_options[option]) + return (-1); + + hm->add_options[option] = NULL; + hm->options_size[option] = 0; + return (0); +} + +/* Config handler registration */ +int +ip6_hbh_config_handler_register (u8 option, + int config_handler (void *data, u8 disable)) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->config_handler)); + + /* Already registered */ + if (hm->config_handler[option]) + return (VNET_API_ERROR_INVALID_REGISTRATION); + + hm->config_handler[option] = config_handler; + + return (0); +} + +int +ip6_hbh_config_handler_unregister (u8 option) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->config_handler)); + + /* Not registered */ + if (!hm->config_handler[option]) + return (VNET_API_ERROR_INVALID_REGISTRATION); + + hm->config_handler[option] = NULL; + return (0); +} + +/* Flow handler registration */ +int +ip6_hbh_flow_handler_register (u8 option, + u32 ioam_flow_handler (u32 flow_ctx, u8 add)) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->flow_handler)); + + /* Already registered */ + if (hm->flow_handler[option]) + return (VNET_API_ERROR_INVALID_REGISTRATION); + + hm->flow_handler[option] = ioam_flow_handler; + + return (0); +} + +int +ip6_hbh_flow_handler_unregister (u8 option) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->flow_handler)); + + /* Not registered */ + if (!hm->flow_handler[option]) + return (VNET_API_ERROR_INVALID_REGISTRATION); + + hm->flow_handler[option] = NULL; + return (0); +} + +typedef struct +{ + u32 next_index; +} ip6_add_hop_by_hop_trace_t; + +/* packet trace format function */ +static u8 * +format_ip6_add_hop_by_hop_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_add_hop_by_hop_trace_t *t = va_arg (*args, + ip6_add_hop_by_hop_trace_t *); + + s = format (s, "IP6_ADD_HOP_BY_HOP: next index %d", t->next_index); + return s; +} + +vlib_node_registration_t ip6_add_hop_by_hop_node; + +#define foreach_ip6_add_hop_by_hop_error \ +_(PROCESSED, "Pkts w/ added ip6 hop-by-hop options") + +typedef enum +{ +#define _(sym,str) IP6_ADD_HOP_BY_HOP_ERROR_##sym, + foreach_ip6_add_hop_by_hop_error +#undef _ + IP6_ADD_HOP_BY_HOP_N_ERROR, +} ip6_add_hop_by_hop_error_t; + +static char *ip6_add_hop_by_hop_error_strings[] = { +#define _(sym,string) string, + foreach_ip6_add_hop_by_hop_error +#undef _ +}; + +static uword +ip6_add_hop_by_hop_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + u32 n_left_from, *from, *to_next; + ip_lookup_next_t next_index; + u32 processed = 0; + u8 *rewrite = hm->rewrite; + u32 rewrite_length = vec_len (rewrite); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + ip6_header_t *ip0, *ip1; + ip6_hop_by_hop_header_t *hbh0, *hbh1; + u64 *copy_src0, *copy_dst0, *copy_src1, *copy_dst1; + u16 new_l0, new_l1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data - rewrite_length, + 2 * CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data - rewrite_length, + 2 * CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* $$$$$ Dual loop: process 2 x packets here $$$$$ */ + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + /* Copy the ip header left by the required amount */ + copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length); + copy_dst1 = (u64 *) (((u8 *) ip1) - rewrite_length); + copy_src0 = (u64 *) ip0; + copy_src1 = (u64 *) ip1; + + copy_dst0[0] = copy_src0[0]; + copy_dst0[1] = copy_src0[1]; + copy_dst0[2] = copy_src0[2]; + copy_dst0[3] = copy_src0[3]; + copy_dst0[4] = copy_src0[4]; + + copy_dst1[0] = copy_src1[0]; + copy_dst1[1] = copy_src1[1]; + copy_dst1[2] = copy_src1[2]; + copy_dst1[3] = copy_src1[3]; + copy_dst1[4] = copy_src1[4]; + + vlib_buffer_advance (b0, -(word) rewrite_length); + vlib_buffer_advance (b1, -(word) rewrite_length); + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1); + /* $$$ tune, rewrite_length is a multiple of 8 */ + clib_memcpy (hbh0, rewrite, rewrite_length); + clib_memcpy (hbh1, rewrite, rewrite_length); + /* Patch the protocol chain, insert the h-b-h (type 0) header */ + hbh0->protocol = ip0->protocol; + hbh1->protocol = ip1->protocol; + ip0->protocol = 0; + ip1->protocol = 0; + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + rewrite_length; + new_l1 = + clib_net_to_host_u16 (ip1->payload_length) + rewrite_length; + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip1->payload_length = clib_host_to_net_u16 (new_l1); + + /* Populate the (first) h-b-h list elt */ + next0 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP; + next1 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP; + + + /* $$$$$ End of processing 2 x packets $$$$$ */ + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_add_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_add_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->next_index = next1; + } + } + processed += 2; + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + ip6_header_t *ip0; + ip6_hop_by_hop_header_t *hbh0; + u64 *copy_src0, *copy_dst0; + u16 new_l0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + + /* Copy the ip header left by the required amount */ + copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length); + copy_src0 = (u64 *) ip0; + + copy_dst0[0] = copy_src0[0]; + copy_dst0[1] = copy_src0[1]; + copy_dst0[2] = copy_src0[2]; + copy_dst0[3] = copy_src0[3]; + copy_dst0[4] = copy_src0[4]; + vlib_buffer_advance (b0, -(word) rewrite_length); + ip0 = vlib_buffer_get_current (b0); + + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + /* $$$ tune, rewrite_length is a multiple of 8 */ + clib_memcpy (hbh0, rewrite, rewrite_length); + /* Patch the protocol chain, insert the h-b-h (type 0) header */ + hbh0->protocol = ip0->protocol; + ip0->protocol = 0; + new_l0 = + clib_net_to_host_u16 (ip0->payload_length) + rewrite_length; + ip0->payload_length = clib_host_to_net_u16 (new_l0); + + /* Populate the (first) h-b-h list elt */ + next0 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip6_add_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + } + + processed++; + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ip6_add_hop_by_hop_node.index, + IP6_ADD_HOP_BY_HOP_ERROR_PROCESSED, processed); + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */ +{ + .function = ip6_add_hop_by_hop_node_fn,.name = + "ip6-add-hop-by-hop",.vector_size = sizeof (u32),.format_trace = + format_ip6_add_hop_by_hop_trace,.type = + VLIB_NODE_TYPE_INTERNAL,.n_errors = + ARRAY_LEN (ip6_add_hop_by_hop_error_strings),.error_strings = + ip6_add_hop_by_hop_error_strings, + /* See ip/lookup.h */ + .n_next_nodes = IP6_HBYH_IOAM_INPUT_N_NEXT,.next_nodes = + { +#define _(s,n) [IP6_HBYH_IOAM_INPUT_NEXT_##s] = n, + foreach_ip6_hbyh_ioam_input_next +#undef _ + } +,}; +/* *INDENT-ON* */ + +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_add_hop_by_hop_node, + ip6_add_hop_by_hop_node_fn); +/* The main h-b-h tracer was already invoked, no need to do much here */ +typedef struct +{ + u32 next_index; +} ip6_pop_hop_by_hop_trace_t; + +/* packet trace format function */ +static u8 * +format_ip6_pop_hop_by_hop_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip6_pop_hop_by_hop_trace_t *t = + va_arg (*args, ip6_pop_hop_by_hop_trace_t *); + + s = format (s, "IP6_POP_HOP_BY_HOP: next index %d", t->next_index); + return s; +} + +int +ip6_hbh_pop_register_option (u8 option, + int options (vlib_buffer_t * b, + ip6_header_t * ip, + ip6_hop_by_hop_option_t * opt)) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->pop_options)); + + /* Already registered */ + if (hm->pop_options[option]) + return (-1); + + hm->pop_options[option] = options; + + return (0); +} + +int +ip6_hbh_pop_unregister_option (u8 option) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + ASSERT (option < ARRAY_LEN (hm->pop_options)); + + /* Not registered */ + if (!hm->pop_options[option]) + return (-1); + + hm->pop_options[option] = NULL; + return (0); +} + +vlib_node_registration_t ip6_pop_hop_by_hop_node; + +#define foreach_ip6_pop_hop_by_hop_error \ +_(PROCESSED, "Pkts w/ removed ip6 hop-by-hop options") \ +_(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options") \ +_(OPTION_FAILED, "ip6 pop hop-by-hop failed to process") + +typedef enum +{ +#define _(sym,str) IP6_POP_HOP_BY_HOP_ERROR_##sym, + foreach_ip6_pop_hop_by_hop_error +#undef _ + IP6_POP_HOP_BY_HOP_N_ERROR, +} ip6_pop_hop_by_hop_error_t; + +static char *ip6_pop_hop_by_hop_error_strings[] = { +#define _(sym,string) string, + foreach_ip6_pop_hop_by_hop_error +#undef _ +}; + +static inline void +ioam_pop_hop_by_hop_processing (vlib_main_t * vm, + ip6_header_t * ip0, + ip6_hop_by_hop_header_t * hbh0, + vlib_buffer_t * b) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + ip6_hop_by_hop_option_t *opt0, *limit0; + u8 type0; + + if (!hbh0 || !ip0) + return; + + opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1); + limit0 = (ip6_hop_by_hop_option_t *) + ((u8 *) hbh0 + ((hbh0->length + 1) << 3)); + + /* Scan the set of h-b-h options, process ones that we understand */ + while (opt0 < limit0) + { + type0 = opt0->type; + switch (type0) + { + case 0: /* Pad1 */ + opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1; + continue; + case 1: /* PadN */ + break; + default: + if (hm->pop_options[type0]) + { + if ((*hm->pop_options[type0]) (b, ip0, opt0) < 0) + { + vlib_node_increment_counter (vm, + ip6_pop_hop_by_hop_node.index, + IP6_POP_HOP_BY_HOP_ERROR_OPTION_FAILED, + 1); + } + } + } + opt0 = + (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length + + sizeof (ip6_hop_by_hop_option_t)); + } +} + +static uword +ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, *to_next; + ip_lookup_next_t next_index; + u32 processed = 0; + u32 no_header = 0; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + u32 bi0, bi1; + vlib_buffer_t *b0, *b1; + u32 next0, next1; + u32 adj_index0, adj_index1; + ip6_header_t *ip0, *ip1; + ip_adjacency_t *adj0, *adj1; + ip6_hop_by_hop_header_t *hbh0, *hbh1; + u64 *copy_dst0, *copy_src0, *copy_dst1, *copy_src1; + u16 new_l0, new_l1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + /* speculatively enqueue b0 and b1 to the current next frame */ + to_next[0] = bi0 = from[0]; + to_next[1] = bi1 = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + /* $$$$$ Dual loop: process 2 x packets here $$$$$ */ + ip0 = vlib_buffer_get_current (b0); + ip1 = vlib_buffer_get_current (b1); + adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX]; + adj0 = adj_get (adj_index0); + adj1 = adj_get (adj_index1); + + next0 = adj0->lookup_next_index; + next1 = adj1->lookup_next_index; + + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1); + + ioam_pop_hop_by_hop_processing (vm, ip0, hbh0, b0); + ioam_pop_hop_by_hop_processing (vm, ip1, hbh1, b1); + + vlib_buffer_advance (b0, (hbh0->length + 1) << 3); + vlib_buffer_advance (b1, (hbh1->length + 1) << 3); + + new_l0 = clib_net_to_host_u16 (ip0->payload_length) - + ((hbh0->length + 1) << 3); + new_l1 = clib_net_to_host_u16 (ip1->payload_length) - + ((hbh1->length + 1) << 3); + + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip1->payload_length = clib_host_to_net_u16 (new_l1); + + ip0->protocol = hbh0->protocol; + ip1->protocol = hbh1->protocol; + + copy_src0 = (u64 *) ip0; + copy_src1 = (u64 *) ip1; + copy_dst0 = copy_src0 + (hbh0->length + 1); + copy_dst0[4] = copy_src0[4]; + copy_dst0[3] = copy_src0[3]; + copy_dst0[2] = copy_src0[2]; + copy_dst0[1] = copy_src0[1]; + copy_dst0[0] = copy_src0[0]; + copy_dst1 = copy_src1 + (hbh1->length + 1); + copy_dst1[4] = copy_src1[4]; + copy_dst1[3] = copy_src1[3]; + copy_dst1[2] = copy_src1[2]; + copy_dst1[1] = copy_src1[1]; + copy_dst1[0] = copy_src1[0]; + processed += 2; + /* $$$$$ End of processing 2 x packets $$$$$ */ + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_pop_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + ip6_pop_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b1, sizeof (*t)); + t->next_index = next1; + } + } + + /* verify speculative enqueues, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + bi0, bi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + u32 adj_index0; + ip6_header_t *ip0; + ip_adjacency_t *adj0; + ip6_hop_by_hop_header_t *hbh0; + u64 *copy_dst0, *copy_src0; + u16 new_l0; + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + ip0 = vlib_buffer_get_current (b0); + adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX]; + adj0 = adj_get (adj_index0); + + /* Default use the next_index from the adjacency. */ + next0 = adj0->lookup_next_index; + + /* Perfectly normal to end up here w/ out h-b-h header */ + hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1); + + /* TODO:Temporarily doing it here.. do this validation in end_of_path_cb */ + ioam_pop_hop_by_hop_processing (vm, ip0, hbh0, b0); + /* Pop the trace data */ + vlib_buffer_advance (b0, (hbh0->length + 1) << 3); + new_l0 = clib_net_to_host_u16 (ip0->payload_length) - + ((hbh0->length + 1) << 3); + ip0->payload_length = clib_host_to_net_u16 (new_l0); + ip0->protocol = hbh0->protocol; + copy_src0 = (u64 *) ip0; + copy_dst0 = copy_src0 + (hbh0->length + 1); + copy_dst0[4] = copy_src0[4]; + copy_dst0[3] = copy_src0[3]; + copy_dst0[2] = copy_src0[2]; + copy_dst0[1] = copy_src0[1]; + copy_dst0[0] = copy_src0[0]; + processed++; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip6_pop_hop_by_hop_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->next_index = next0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index, + IP6_POP_HOP_BY_HOP_ERROR_PROCESSED, processed); + vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index, + IP6_POP_HOP_BY_HOP_ERROR_NO_HOHO, no_header); + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = +{ + .function = ip6_pop_hop_by_hop_node_fn,.name = + "ip6-pop-hop-by-hop",.vector_size = sizeof (u32),.format_trace = + format_ip6_pop_hop_by_hop_trace,.type = + VLIB_NODE_TYPE_INTERNAL,.sibling_of = "ip6-lookup",.n_errors = + ARRAY_LEN (ip6_pop_hop_by_hop_error_strings),.error_strings = + ip6_pop_hop_by_hop_error_strings, + /* See ip/lookup.h */ +.n_next_nodes = 0,}; + +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_pop_hop_by_hop_node, + ip6_pop_hop_by_hop_node_fn); +static clib_error_t * +ip6_hop_by_hop_ioam_init (vlib_main_t * vm) +{ + clib_error_t *error; + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return (error); + + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + hm->vlib_main = vm; + hm->vnet_main = vnet_get_main (); + hm->unix_time_0 = (u32) time (0); /* Store starting time */ + hm->vlib_time_0 = vlib_time_now (vm); + hm->ioam_flag = IOAM_HBYH_MOD; + memset (hm->add_options, 0, sizeof (hm->add_options)); + memset (hm->pop_options, 0, sizeof (hm->pop_options)); + memset (hm->options_size, 0, sizeof (hm->options_size)); + + vnet_classify_register_unformat_opaque_index_fn (unformat_opaque_ioam); + + return (0); +} + +VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_init); + +int +ip6_ioam_set_rewrite (u8 ** rwp, int has_trace_option, + int has_pot_option, int has_seqno_option) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + u8 *rewrite = NULL; + u32 size, rnd_size; + ip6_hop_by_hop_header_t *hbh; + u8 *current; + u8 *trace_data_size = NULL; + u8 *pot_data_size = NULL; + + vec_free (*rwp); + + if (has_trace_option == 0 && has_pot_option == 0) + return -1; + + /* Work out how much space we need */ + size = sizeof (ip6_hop_by_hop_header_t); + + //if (has_trace_option && hm->get_sizeof_options[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] != 0) + if (has_trace_option + && hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] != 0) + { + size += hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST]; + } + if (has_pot_option + && hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0) + { + size += hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT]; + } + + if (has_seqno_option) + { + size += hm->options_size[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE]; + } + + /* Round to a multiple of 8 octets */ + rnd_size = (size + 7) & ~7; + + /* allocate it, zero-fill / pad by construction */ + vec_validate (rewrite, rnd_size - 1); + + hbh = (ip6_hop_by_hop_header_t *) rewrite; + /* Length of header in 8 octet units, not incl first 8 octets */ + hbh->length = (rnd_size >> 3) - 1; + current = (u8 *) (hbh + 1); + + if (has_trace_option + && hm->add_options[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] != 0) + { + if (0 != (hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST])) + { + trace_data_size = + &hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST]; + if (0 == + hm->add_options[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] (current, + trace_data_size)) + current += *trace_data_size; + } + } + if (has_pot_option + && hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0) + { + pot_data_size = + &hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT]; + if (0 == + hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] (current, + pot_data_size)) + current += *pot_data_size; + } + + if (has_seqno_option && + (hm->add_options[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] != 0)) + { + if (0 == hm->add_options[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] (current, + & + (hm->options_size + [HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE]))) + current += hm->options_size[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE]; + } + + *rwp = rewrite; + return 0; +} + +clib_error_t * +clear_ioam_rewrite_fn (void) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + vec_free (hm->rewrite); + hm->rewrite = 0; + hm->has_trace_option = 0; + hm->has_pot_option = 0; + hm->has_seqno_option = 0; + hm->has_analyse_option = 0; + if (hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST]) + hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] (NULL, 1); + + if (hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT]) + hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] (NULL, 1); + + if (hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE]) + { + hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] ((void *) + &hm->has_analyse_option, + 1); + } + + return 0; +} + +clib_error_t * +clear_ioam_rewrite_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + return (clear_ioam_rewrite_fn ()); +} + +/*? + * This command clears all the In-band OAM (iOAM) features enabled by + * the '<em>set ioam rewrite</em>' command. Use '<em>show ioam summary</em>' to + * verify the configured settings cleared. + * + * @cliexpar + * Example of how to clear iOAM features: + * @cliexcmd{clear ioam rewrite} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_clear_ioam_rewrite_cmd, static) = { + .path = "clear ioam rewrite", + .short_help = "clear ioam rewrite", + .function = clear_ioam_rewrite_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +ip6_ioam_enable (int has_trace_option, int has_pot_option, + int has_seqno_option, int has_analyse_option) +{ + int rv; + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + rv = ip6_ioam_set_rewrite (&hm->rewrite, has_trace_option, + has_pot_option, has_seqno_option); + + switch (rv) + { + case 0: + if (has_trace_option) + { + hm->has_trace_option = has_trace_option; + if (hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST]) + hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] (NULL, + 0); + } + + if (has_pot_option) + { + hm->has_pot_option = has_pot_option; + if (hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT]) + hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] (NULL, + 0); + } + hm->has_analyse_option = has_analyse_option; + if (has_seqno_option) + { + hm->has_seqno_option = has_seqno_option; + if (hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE]) + { + hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] ((void *) + &has_analyse_option, + 0); + } + } + break; + + default: + return clib_error_return_code (0, rv, 0, + "ip6_ioam_set_rewrite returned %d", rv); + } + + return 0; +} + + +static clib_error_t * +ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int has_trace_option = 0; + int has_pot_option = 0; + int has_seqno_option = 0; + int has_analyse_option = 0; + clib_error_t *rv = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "trace")) + has_trace_option = 1; + else if (unformat (input, "pot")) + has_pot_option = 1; + else if (unformat (input, "seqno")) + has_seqno_option = 1; + else if (unformat (input, "analyse")) + has_analyse_option = 1; + else + break; + } + + + rv = ip6_ioam_enable (has_trace_option, has_pot_option, + has_seqno_option, has_analyse_option); + + return rv; +} + +/*? + * This command is used to enable In-band OAM (iOAM) features on IPv6. + * '<em>trace</em>' is used to enable iOAM trace feature. '<em>pot</em>' is used to + * enable the Proof Of Transit feature. '<em>ppc</em>' is used to indicate the + * Per Packet Counter feature for Edge to Edge processing. '<em>ppc</em>' is + * used to indicate if this node is an '<em>encap</em>' node (iOAM edge node + * where packet enters iOAM domain), a '<em>decap</em>' node (iOAM edge node + * where packet leaves iOAM domain) or '<em>none</em>' (iOAM node where packet + * is in-transit through the iOAM domain). '<em>ppc</em>' can only be set if + * '<em>trace</em>' or '<em>pot</em>' is enabled. + * + * Use '<em>clear ioam rewrite</em>' to disable all features enabled by this + * command. Use '<em>show ioam summary</em>' to verify the configured settings. + * + * @cliexpar + * Example of how to enable trace and pot with ppc set to encap: + * @cliexcmd{set ioam rewrite trace pot ppc encap} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = { + .path = "set ioam rewrite", + .short_help = "set ioam [trace] [pot] [seqno] [analyse]", + .function = ip6_set_ioam_rewrite_command_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + u8 *s = 0; + + + if (!is_zero_ip6_address (&hm->adj)) + { + s = format (s, " REWRITE FLOW CONFIGS - \n"); + s = format (s, " Destination Address : %U\n", + format_ip6_address, &hm->adj, sizeof (ip6_address_t)); + s = + format (s, " Flow operation : %d (%s)\n", + hm->ioam_flag, + (hm->ioam_flag == + IOAM_HBYH_ADD) ? "Add" : ((hm->ioam_flag == + IOAM_HBYH_MOD) ? "Mod" : "Pop")); + } + else + { + s = format (s, " REWRITE FLOW CONFIGS - Not configured\n"); + } + + + s = format (s, " TRACE OPTION - %d (%s)\n", + hm->has_trace_option, + (hm->has_trace_option ? "Enabled" : "Disabled")); + if (hm->has_trace_option) + s = + format (s, + "Try 'show ioam trace and show ioam-trace profile' for more information\n"); + + + s = format (s, " POT OPTION - %d (%s)\n", + hm->has_pot_option, + (hm->has_pot_option ? "Enabled" : "Disabled")); + if (hm->has_pot_option) + s = + format (s, + "Try 'show ioam pot and show pot profile' for more information\n"); + + s = format (s, " EDGE TO EDGE - SeqNo OPTION - %d (%s)\n", + hm->has_seqno_option, + hm->has_seqno_option ? "Enabled" : "Disabled"); + if (hm->has_seqno_option) + s = format (s, "Try 'show ioam e2e' for more information\n"); + + s = format (s, " iOAM Analyse OPTION - %d (%s)\n", + hm->has_analyse_option, + hm->has_analyse_option ? "Enabled" : "Disabled"); + + vlib_cli_output (vm, "%v", s); + vec_free (s); + return 0; +} + +/*? + * This command displays the current configuration data for In-band + * OAM (iOAM). + * + * @cliexpar + * Example to show the iOAM configuration: + * @cliexstart{show ioam summary} + * REWRITE FLOW CONFIGS - + * Destination Address : ff02::1 + * Flow operation : 2 (Pop) + * TRACE OPTION - 1 (Enabled) + * Try 'show ioam trace and show ioam-trace profile' for more information + * POT OPTION - 1 (Enabled) + * Try 'show ioam pot and show pot profile' for more information + * EDGE TO EDGE - PPC OPTION - 1 (Encap) + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = { + .path = "show ioam summary", + .short_help = "show ioam summary", + .function = ip6_show_ioam_summary_cmd_fn, +}; +/* *INDENT-ON* */ + +void +vnet_register_ioam_end_of_path_callback (void *cb) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + hm->ioam_end_of_path_cb = cb; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_hop_by_hop.h b/src/vnet/ip/ip6_hop_by_hop.h new file mode 100644 index 00000000..5f12f647 --- /dev/null +++ b/src/vnet/ip/ip6_hop_by_hop.h @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ip6_hop_by_hop_ioam_h__ +#define __included_ip6_hop_by_hop_ioam_h__ + +#include <vnet/ip/ip6_hop_by_hop_packet.h> +#include <vnet/ip/ip.h> + + +#define MAX_IP6_HBH_OPTION 256 + +/* To determine whether a node is decap MS bit is set */ +#define IOAM_DECAP_BIT 0x80000000 + +#define IOAM_DEAP_ENABLED(opaque_data) (opaque_data & IOAM_DECAP_BIT) + +#define IOAM_SET_DECAP(opaque_data) \ + (opaque_data |= IOAM_DECAP_BIT) + +#define IOAM_MASK_DECAP_BIT(x) (x & ~IOAM_DECAP_BIT) + +/* + * Stores the run time flow data of hbh options + */ +typedef struct +{ + u32 ctx[MAX_IP6_HBH_OPTION]; + u8 flow_name[64]; +} flow_data_t; + +typedef struct +{ + /* The current rewrite we're using */ + u8 *rewrite; + + /* Trace data processing callback */ + void *ioam_end_of_path_cb; + /* Configuration data */ + /* Adjacency */ + ip6_address_t adj; +#define IOAM_HBYH_ADD 0 +#define IOAM_HBYH_MOD 1 +#define IOAM_HBYH_POP 2 + u8 ioam_flag; + /* time scale transform. Joy. */ + u32 unix_time_0; + f64 vlib_time_0; + + + /* Trace option */ + u8 has_trace_option; + + /* Pot option */ + u8 has_pot_option; + + /* Per Packet Counter option */ + u8 has_seqno_option; + + /* Enabling analyis of iOAM data on decap node */ + u8 has_analyse_option; + + /* Array of function pointers to ADD and POP HBH option handling routines */ + u8 options_size[MAX_IP6_HBH_OPTION]; + int (*add_options[MAX_IP6_HBH_OPTION]) (u8 * rewrite_string, + u8 * rewrite_size); + int (*pop_options[MAX_IP6_HBH_OPTION]) (vlib_buffer_t * b, + ip6_header_t * ip, + ip6_hop_by_hop_option_t * opt); + int (*get_sizeof_options[MAX_IP6_HBH_OPTION]) (u32 * rewrite_size); + int (*config_handler[MAX_IP6_HBH_OPTION]) (void *data, u8 disable); + + /* Array of function pointers to handle hbh options being used with classifier */ + u32 (*flow_handler[MAX_IP6_HBH_OPTION]) (u32 flow_ctx, u8 add); + flow_data_t *flows; + + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} ip6_hop_by_hop_ioam_main_t; + +extern ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main; + +extern clib_error_t *ip6_ioam_enable (int has_trace_option, + int has_pot_option, + int has_seqno_option, + int has_analyse_option); + +extern int ip6_ioam_set_destination (ip6_address_t * addr, u32 mask_width, + u32 vrf_id, int is_add, int is_pop, + int is_none); + +extern clib_error_t *clear_ioam_rewrite_fn (void); + +static inline u8 +is_zero_ip4_address (ip4_address_t * a) +{ + return (a->as_u32 == 0); +} + +static inline void +copy_ip6_address (ip6_address_t * dst, ip6_address_t * src) +{ + dst->as_u64[0] = src->as_u64[0]; + dst->as_u64[1] = src->as_u64[1]; +} + +static inline void +set_zero_ip6_address (ip6_address_t * a) +{ + a->as_u64[0] = 0; + a->as_u64[1] = 0; +} + +static inline u8 +cmp_ip6_address (ip6_address_t * a1, ip6_address_t * a2) +{ + return ((a1->as_u64[0] == a2->as_u64[0]) + && (a1->as_u64[1] == a2->as_u64[1])); +} + +static inline u8 +is_zero_ip6_address (ip6_address_t * a) +{ + return ((a->as_u64[0] == 0) && (a->as_u64[1] == 0)); +} + +int ip6_hbh_add_register_option (u8 option, + u8 size, + int rewrite_options (u8 * rewrite_string, + u8 * size)); +int ip6_hbh_add_unregister_option (u8 option); + +int ip6_hbh_pop_register_option (u8 option, + int options (vlib_buffer_t * b, + ip6_header_t * ip, + ip6_hop_by_hop_option_t * opt)); +int ip6_hbh_pop_unregister_option (u8 option); + +int +ip6_hbh_get_sizeof_register_option (u8 option, + int get_sizeof_hdr_options (u32 * + rewrite_size)); + +int +ip6_ioam_set_rewrite (u8 ** rwp, int has_trace_option, + int has_pot_option, int has_seq_no); + +int +ip6_hbh_config_handler_register (u8 option, + int config_handler (void *data, u8 disable)); + +int ip6_hbh_config_handler_unregister (u8 option); + +int ip6_hbh_flow_handler_register (u8 option, + u32 ioam_flow_handler (u32 flow_ctx, + u8 add)); + +int ip6_hbh_flow_handler_unregister (u8 option); + +u8 *get_flow_name_from_flow_ctx (u32 flow_ctx); + +static inline flow_data_t * +get_flow (u32 index) +{ + flow_data_t *flow = NULL; + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + + if (pool_is_free_index (hm->flows, index)) + return NULL; + + flow = pool_elt_at_index (hm->flows, index); + return flow; +} + +static inline u32 +get_flow_data_from_flow_ctx (u32 flow_ctx, u8 option) +{ + flow_data_t *flow = NULL; + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + u32 index; + + index = IOAM_MASK_DECAP_BIT (flow_ctx); + //flow = pool_elt_at_index (hm->flows, index); + flow = &hm->flows[index]; + return (flow->ctx[option]); +} + +static inline u8 +is_seqno_enabled (void) +{ + return (ip6_hop_by_hop_ioam_main.has_seqno_option); +} + +int ip6_trace_profile_setup (); + +static inline u32 +ioam_flow_add (u8 encap, u8 * flow_name) +{ + ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main; + flow_data_t *flow = 0; + u32 index = 0; + u8 i; + + pool_get (hm->flows, flow); + memset (flow, 0, sizeof (flow_data_t)); + + index = flow - hm->flows; + strncpy ((char *) flow->flow_name, (char *) flow_name, 31); + + if (!encap) + IOAM_SET_DECAP (index); + + for (i = 0; i < 255; i++) + { + if (hm->flow_handler[i]) + flow->ctx[i] = hm->flow_handler[i] (index, 1); + } + return (index); +} + +always_inline ip6_hop_by_hop_option_t * +ip6_hbh_get_option (ip6_hop_by_hop_header_t * hbh0, u8 option_to_search) +{ + ip6_hop_by_hop_option_t *opt0, *limit0; + u8 type0; + + if (!hbh0) + return NULL; + + opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1); + limit0 = (ip6_hop_by_hop_option_t *) + ((u8 *) hbh0 + ((hbh0->length + 1) << 3)); + + /* Scan the set of h-b-h options, process ones that we understand */ + while (opt0 < limit0) + { + type0 = opt0->type; + switch (type0) + { + case 0: /* Pad1 */ + opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1; + continue; + case 1: /* PadN */ + break; + default: + if (type0 == option_to_search) + return opt0; + break; + } + opt0 = + (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length + + sizeof (ip6_hop_by_hop_option_t)); + } + return NULL; +} + +#endif /* __included_ip6_hop_by_hop_ioam_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_hop_by_hop_packet.h b/src/vnet/ip/ip6_hop_by_hop_packet.h new file mode 100644 index 00000000..dd8c7d5e --- /dev/null +++ b/src/vnet/ip/ip6_hop_by_hop_packet.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_ip6_hop_by_hop_packet_h__ +#define __included_ip6_hop_by_hop_packet_h__ + +typedef struct +{ + /* Protocol for next header */ + u8 protocol; + /* + * Length of hop_by_hop header in 8 octet units, + * not including the first 8 octets + */ + u8 length; +} ip6_hop_by_hop_header_t; + +typedef struct +{ + /* Option Type */ +#define HBH_OPTION_TYPE_SKIP_UNKNOWN (0x00) +#define HBH_OPTION_TYPE_DISCARD_UNKNOWN (0x40) +#define HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP (0x80) +#define HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST (0xc0) +#define HBH_OPTION_TYPE_HIGH_ORDER_BITS (0xc0) +#define HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE (1<<5) + u8 type; + /* Length in octets of the option data field */ + u8 length; +} ip6_hop_by_hop_option_t; + +/* $$$$ IANA banana constants */ +#define HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST 59 /* Third highest bit set (change en-route) */ +#define HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT 60 /* Third highest bit set (change en-route) */ +#define HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE 29 + +#endif /* __included_ip6_hop_by_hop_packet_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c new file mode 100644 index 00000000..ffdc4727 --- /dev/null +++ b/src/vnet/ip/ip6_input.c @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip6_input.c: IP v6 input node + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ppp/ppp.h> +#include <vnet/hdlc/hdlc.h> + +typedef struct +{ + u8 packet_data[64]; +} ip6_input_trace_t; + +static u8 * +format_ip6_input_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + ip6_input_trace_t *t = va_arg (*va, ip6_input_trace_t *); + + s = format (s, "%U", + format_ip6_header, t->packet_data, sizeof (t->packet_data)); + + return s; +} + +typedef enum +{ + IP6_INPUT_NEXT_DROP, + IP6_INPUT_NEXT_LOOKUP, + IP6_INPUT_NEXT_LOOKUP_MULTICAST, + IP6_INPUT_NEXT_ICMP_ERROR, + IP6_INPUT_N_NEXT, +} ip6_input_next_t; + +/* Validate IP v6 packets and pass them either to forwarding code + or drop exception packets. */ +static uword +ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_main_t *im = &ip6_main; + ip_lookup_main_t *lm = &im->lookup_main; + u32 n_left_from, *from, *to_next; + ip6_input_next_t next_index; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_input_node.index); + vlib_simple_counter_main_t *cm; + u32 thread_index = vlib_get_thread_index (); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (ip6_input_trace_t)); + + cm = vec_elt_at_index (vnm->interface_main.sw_if_counters, + VNET_INTERFACE_COUNTER_IP6); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t *p0, *p1; + ip6_header_t *ip0, *ip1; + u32 pi0, sw_if_index0, next0 = 0; + u32 pi1, sw_if_index1, next1 = 0; + u8 error0, error1, arc0, arc1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t *p2, *p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); + CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); + } + + pi0 = from[0]; + pi1 = from[1]; + + to_next[0] = pi0; + to_next[1] = pi1; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + ip0 = vlib_buffer_get_current (p0); + ip1 = vlib_buffer_get_current (p1); + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; + + if (PREDICT_FALSE (ip6_address_is_multicast (&ip0->dst_address))) + { + arc0 = lm->mcast_feature_arc_index; + next0 = IP6_INPUT_NEXT_LOOKUP_MULTICAST; + } + else + { + arc0 = lm->ucast_feature_arc_index; + next0 = IP6_INPUT_NEXT_LOOKUP; + } + + if (PREDICT_FALSE (ip6_address_is_multicast (&ip1->dst_address))) + { + arc1 = lm->mcast_feature_arc_index; + next1 = IP6_INPUT_NEXT_LOOKUP_MULTICAST; + } + else + { + arc1 = lm->ucast_feature_arc_index; + next1 = IP6_INPUT_NEXT_LOOKUP; + } + + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; + vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0; + + vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0); + vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1); + + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); + vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1); + + error0 = error1 = IP6_ERROR_NONE; + + /* Version != 6? Drop it. */ + error0 = + (clib_net_to_host_u32 + (ip0->ip_version_traffic_class_and_flow_label) >> 28) != + 6 ? IP6_ERROR_VERSION : error0; + error1 = + (clib_net_to_host_u32 + (ip1->ip_version_traffic_class_and_flow_label) >> 28) != + 6 ? IP6_ERROR_VERSION : error1; + + /* hop limit < 1? Drop it. for link-local broadcast packets, + * like dhcpv6 packets from client has hop-limit 1, which should not + * be dropped. + */ + error0 = ip0->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error0; + error1 = ip1->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error1; + + /* L2 length must be at least minimal IP header. */ + error0 = + p0->current_length < + sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0; + error1 = + p1->current_length < + sizeof (ip1[0]) ? IP6_ERROR_TOO_SHORT : error1; + + if (PREDICT_FALSE (error0 != IP6_ERROR_NONE)) + { + if (error0 == IP6_ERROR_TIME_EXPIRED) + { + icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded, + ICMP6_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = IP6_INPUT_NEXT_ICMP_ERROR; + } + else + { + next0 = IP6_INPUT_NEXT_DROP; + } + } + if (PREDICT_FALSE (error1 != IP6_ERROR_NONE)) + { + if (error1 == IP6_ERROR_TIME_EXPIRED) + { + icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded, + ICMP6_time_exceeded_ttl_exceeded_in_transit, + 0); + next1 = IP6_INPUT_NEXT_ICMP_ERROR; + } + else + { + next1 = IP6_INPUT_NEXT_DROP; + } + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip6_header_t *ip0; + u32 pi0, sw_if_index0, next0 = 0; + u8 error0, arc0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + ip0 = vlib_buffer_get_current (p0); + + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + if (PREDICT_FALSE (ip6_address_is_multicast (&ip0->dst_address))) + { + arc0 = lm->mcast_feature_arc_index; + next0 = IP6_INPUT_NEXT_LOOKUP_MULTICAST; + } + else + { + arc0 = lm->ucast_feature_arc_index; + next0 = IP6_INPUT_NEXT_LOOKUP; + } + + vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0; + vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0); + + vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1); + error0 = IP6_ERROR_NONE; + + /* Version != 6? Drop it. */ + error0 = + (clib_net_to_host_u32 + (ip0->ip_version_traffic_class_and_flow_label) >> 28) != + 6 ? IP6_ERROR_VERSION : error0; + + /* hop limit < 1? Drop it. for link-local broadcast packets, + * like dhcpv6 packets from client has hop-limit 1, which should not + * be dropped. + */ + error0 = ip0->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error0; + + /* L2 length must be at least minimal IP header. */ + error0 = + p0->current_length < + sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0; + + if (PREDICT_FALSE (error0 != IP6_ERROR_NONE)) + { + if (error0 == IP6_ERROR_TIME_EXPIRED) + { + icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded, + ICMP6_time_exceeded_ttl_exceeded_in_transit, + 0); + next0 = IP6_INPUT_NEXT_ICMP_ERROR; + } + else + { + next0 = IP6_INPUT_NEXT_DROP; + } + } + p0->error = error_node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +static char *ip6_error_strings[] = { +#define _(sym,string) string, + foreach_ip6_error +#undef _ +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_input_node) = { + .function = ip6_input, + .name = "ip6-input", + .vector_size = sizeof (u32), + + .n_errors = IP6_N_ERROR, + .error_strings = ip6_error_strings, + + .n_next_nodes = IP6_INPUT_N_NEXT, + .next_nodes = { + [IP6_INPUT_NEXT_DROP] = "error-drop", + [IP6_INPUT_NEXT_LOOKUP] = "ip6-lookup", + [IP6_INPUT_NEXT_ICMP_ERROR] = "ip6-icmp-error", + [IP6_INPUT_NEXT_LOOKUP_MULTICAST] = "ip6-mfib-forward-lookup", + }, + + .format_buffer = format_ip6_header, + .format_trace = format_ip6_input_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_input_node, ip6_input) + static clib_error_t *ip6_init (vlib_main_t * vm) +{ + ethernet_register_input_type (vm, ETHERNET_TYPE_IP6, ip6_input_node.index); + ppp_register_input_protocol (vm, PPP_PROTOCOL_ip6, ip6_input_node.index); + hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip6, ip6_input_node.index); + + { + pg_node_t *pn; + pn = pg_get_node (ip6_input_node.index); + pn->unformat_edit = unformat_pg_ip6_header; + } + + /* Set flow hash to something non-zero. */ + ip6_main.flow_hash_seed = 0xdeadbeef; + + /* Default hop limit for packets we generate. */ + ip6_main.host_config.ttl = 64; + + return /* no error */ 0; +} + +VLIB_INIT_FUNCTION (ip6_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c new file mode 100644 index 00000000..d549ac37 --- /dev/null +++ b/src/vnet/ip/ip6_neighbor.c @@ -0,0 +1,4332 @@ +/* + * ip/ip6_neighbor.c: IP6 neighbor handling + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/ip/ip.h> +#include <vnet/ip/ip6_neighbor.h> +#include <vnet/ethernet/ethernet.h> +#include <vppinfra/mhash.h> +#include <vppinfra/md5.h> +#include <vnet/adj/adj.h> +#include <vnet/adj/adj_mcast.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/mfib/ip6_mfib.h> + +/** + * @file + * @brief IPv6 Neighbor Adjacency and Neighbor Discovery. + * + * The files contains the API and CLI code for managing IPv6 neighbor + * adjacency tables and neighbor discovery logic. + */ + +/* can't use sizeof link_layer_address, that's 8 */ +#define ETHER_MAC_ADDR_LEN 6 + +/* advertised prefix option */ +typedef struct +{ + /* basic advertised information */ + ip6_address_t prefix; + u8 prefix_len; + int adv_on_link_flag; + int adv_autonomous_flag; + u32 adv_valid_lifetime_in_secs; + u32 adv_pref_lifetime_in_secs; + + /* advertised values are computed from these times if decrementing */ + f64 valid_lifetime_expires; + f64 pref_lifetime_expires; + + /* local information */ + int enabled; + int deprecated_prefix_flag; + int decrement_lifetime_flag; + +#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */ +#define DEF_ADV_VALID_LIFETIME 2592000 +#define DEF_ADV_PREF_LIFETIME 604800 + + /* extensions are added here, mobile, DNS etc.. */ +} ip6_radv_prefix_t; + + +typedef struct +{ + /* group information */ + u8 type; + ip6_address_t mcast_address; + u16 num_sources; + ip6_address_t *mcast_source_address_pool; +} ip6_mldp_group_t; + +/* configured router advertisement information per ipv6 interface */ +typedef struct +{ + + /* advertised config information, zero means unspecified */ + u8 curr_hop_limit; + int adv_managed_flag; + int adv_other_flag; + u16 adv_router_lifetime_in_sec; + u32 adv_neighbor_reachable_time_in_msec; + u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations; + + /* mtu option */ + u32 adv_link_mtu; + + /* source link layer option */ + u8 link_layer_address[8]; + u8 link_layer_addr_len; + + /* prefix option */ + ip6_radv_prefix_t *adv_prefixes_pool; + + /* Hash table mapping address to index in interface advertised prefix pool. */ + mhash_t address_to_prefix_index; + + /* MLDP group information */ + ip6_mldp_group_t *mldp_group_pool; + + /* Hash table mapping address to index in mldp address pool. */ + mhash_t address_to_mldp_index; + + /* local information */ + u32 sw_if_index; + int send_radv; /* radv on/off on this interface - set by config */ + int cease_radv; /* we are ceasing to send - set byf config */ + int send_unicast; + int adv_link_layer_address; + int prefix_option; + int failed_device_check; + int all_routers_mcast; + u32 seed; + u64 randomizer; + int ref_count; + adj_index_t mcast_adj_index; + + /* timing information */ +#define DEF_MAX_RADV_INTERVAL 200 +#define DEF_MIN_RADV_INTERVAL .75 * DEF_MAX_RADV_INTERVAL +#define DEF_CURR_HOP_LIMIT 64 +#define DEF_DEF_RTR_LIFETIME 3 * DEF_MAX_RADV_INTERVAL +#define MAX_DEF_RTR_LIFETIME 9000 + +#define MAX_INITIAL_RTR_ADVERT_INTERVAL 16 /* seconds */ +#define MAX_INITIAL_RTR_ADVERTISEMENTS 3 /*transmissions */ +#define MIN_DELAY_BETWEEN_RAS 3 /* seconds */ +#define MAX_DELAY_BETWEEN_RAS 1800 /* seconds */ +#define MAX_RA_DELAY_TIME .5 /* seconds */ + + f64 max_radv_interval; + f64 min_radv_interval; + f64 min_delay_between_radv; + f64 max_delay_between_radv; + f64 max_rtr_default_lifetime; + + f64 last_radv_time; + f64 last_multicast_time; + f64 next_multicast_time; + + + u32 initial_adverts_count; + f64 initial_adverts_interval; + u32 initial_adverts_sent; + + /* stats */ + u32 n_advertisements_sent; + u32 n_solicitations_rcvd; + u32 n_solicitations_dropped; + + /* Link local address to use (defaults to underlying physical for logical interfaces */ + ip6_address_t link_local_address; +} ip6_radv_t; + +typedef struct +{ + u32 next_index; + uword node_index; + uword type_opaque; + uword data; + /* Used for nd event notification only */ + void *data_callback; + u32 pid; +} pending_resolution_t; + + +typedef struct +{ + /* Hash tables mapping name to opcode. */ + uword *opcode_by_name; + + /* lite beer "glean" adjacency handling */ + mhash_t pending_resolutions_by_address; + pending_resolution_t *pending_resolutions; + + /* Mac address change notification */ + mhash_t mac_changes_by_address; + pending_resolution_t *mac_changes; + + u32 *neighbor_input_next_index_by_hw_if_index; + + ip6_neighbor_t *neighbor_pool; + + mhash_t neighbor_index_by_key; + + u32 *if_radv_pool_index_by_sw_if_index; + + ip6_radv_t *if_radv_pool; + + /* Neighbor attack mitigation */ + u32 limit_neighbor_cache_size; + u32 neighbor_delete_rotor; + + /* Wildcard nd report publisher */ + uword wc_ip6_nd_publisher_node; + uword wc_ip6_nd_publisher_et; +} ip6_neighbor_main_t; + +/* ipv6 neighbor discovery - timer/event types */ +typedef enum +{ + ICMP6_ND_EVENT_INIT, +} ip6_icmp_neighbor_discovery_event_type_t; + +typedef union +{ + u32 add_del_swindex; + struct + { + u32 up_down_swindex; + u32 fib_index; + } up_down_event; +} ip6_icmp_neighbor_discovery_event_data_t; + +static ip6_neighbor_main_t ip6_neighbor_main; +static ip6_address_t ip6a_zero; /* ip6 address 0 */ + +static void wc_nd_signal_report (wc_nd_report_t * r); + +/** + * @brief publish wildcard arp event + * @param sw_if_index The interface on which the ARP entires are acted + */ +static int +vnet_nd_wc_publish (u32 sw_if_index, u8 * mac, ip6_address_t * ip6) +{ + wc_nd_report_t r = { + .sw_if_index = sw_if_index, + .ip6 = *ip6, + }; + memcpy (r.mac, mac, sizeof r.mac); + + void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); + vl_api_rpc_call_main_thread (wc_nd_signal_report, (u8 *) & r, sizeof r); + return 0; +} + +static void +wc_nd_signal_report (wc_nd_report_t * r) +{ + vlib_main_t *vm = vlib_get_main (); + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + uword ni = nm->wc_ip6_nd_publisher_node; + uword et = nm->wc_ip6_nd_publisher_et; + + if (ni == (uword) ~ 0) + return; + wc_nd_report_t *q = + vlib_process_signal_event_data (vm, ni, et, 1, sizeof *q); + + *q = *r; +} + +void +wc_nd_set_publisher_node (uword node_index, uword event_type) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + nm->wc_ip6_nd_publisher_node = node_index; + nm->wc_ip6_nd_publisher_et = event_type; +} + +static u8 * +format_ip6_neighbor_ip6_entry (u8 * s, va_list * va) +{ + vlib_main_t *vm = va_arg (*va, vlib_main_t *); + ip6_neighbor_t *n = va_arg (*va, ip6_neighbor_t *); + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *si; + u8 *flags = 0; + + if (!n) + return format (s, "%=12s%=20s%=6s%=20s%=40s", "Time", "Address", "Flags", + "Link layer", "Interface"); + + if (n->flags & IP6_NEIGHBOR_FLAG_DYNAMIC) + flags = format (flags, "D"); + + if (n->flags & IP6_NEIGHBOR_FLAG_STATIC) + flags = format (flags, "S"); + + if (n->flags & IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY) + flags = format (flags, "N"); + + si = vnet_get_sw_interface (vnm, n->key.sw_if_index); + s = format (s, "%=12U%=20U%=6s%=20U%=40U", + format_vlib_cpu_time, vm, n->cpu_time_last_updated, + format_ip6_address, &n->key.ip6_address, + flags ? (char *) flags : "", + format_ethernet_address, n->link_layer_address, + format_vnet_sw_interface_name, vnm, si); + + vec_free (flags); + return s; +} + +static void +ip6_neighbor_adj_fib_remove (ip6_neighbor_t * n, u32 fib_index) +{ + if (FIB_NODE_INDEX_INVALID != n->fib_entry_index) + { + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = n->key.ip6_address, + }; + fib_table_entry_path_remove (fib_index, + &pfx, + FIB_SOURCE_ADJ, + DPO_PROTO_IP6, + &pfx.fp_addr, + n->key.sw_if_index, ~0, + 1, FIB_ROUTE_PATH_FLAG_NONE); + } +} + +static clib_error_t * +ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm, + u32 sw_if_index, u32 flags) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_neighbor_t *n; + + if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + u32 i, *to_delete = 0; + + /* *INDENT-OFF* */ + pool_foreach (n, nm->neighbor_pool, + ({ + if (n->key.sw_if_index == sw_if_index) + vec_add1 (to_delete, n - nm->neighbor_pool); + })); + /* *INDENT-ON* */ + + for (i = 0; i < vec_len (to_delete); i++) + { + n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]); + mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); + ip6_neighbor_adj_fib_remove (n, + ip6_fib_table_get_index_for_sw_if_index + (n->key.sw_if_index)); + pool_put (nm->neighbor_pool, n); + } + vec_free (to_delete); + } + + return 0; +} + +VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_neighbor_sw_interface_up_down); + +static void +unset_random_neighbor_entry (void) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vnm->vlib_main; + ip6_neighbor_t *e; + u32 index; + + index = pool_next_index (nm->neighbor_pool, nm->neighbor_delete_rotor); + nm->neighbor_delete_rotor = index; + + /* Try again from elt 0, could happen if an intfc goes down */ + if (index == ~0) + { + index = pool_next_index (nm->neighbor_pool, nm->neighbor_delete_rotor); + nm->neighbor_delete_rotor = index; + } + + /* Nothing left in the pool */ + if (index == ~0) + return; + + e = pool_elt_at_index (nm->neighbor_pool, index); + + vnet_unset_ip6_ethernet_neighbor (vm, e->key.sw_if_index, + &e->key.ip6_address, + e->link_layer_address, + ETHER_MAC_ADDR_LEN); +} + +typedef struct +{ + u8 is_add; + u8 is_static; + u8 is_no_fib_entry; + u8 link_layer_address[6]; + u32 sw_if_index; + ip6_address_t addr; +} ip6_neighbor_set_unset_rpc_args_t; + +static void ip6_neighbor_set_unset_rpc_callback + (ip6_neighbor_set_unset_rpc_args_t * a); + +static void set_unset_ip6_neighbor_rpc + (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * a, u8 * link_layer_address, int is_add, int is_static, + int is_no_fib_entry) +{ + ip6_neighbor_set_unset_rpc_args_t args; + void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); + + args.sw_if_index = sw_if_index; + args.is_add = is_add; + args.is_static = is_static; + args.is_no_fib_entry = is_no_fib_entry; + clib_memcpy (&args.addr, a, sizeof (*a)); + if (NULL != link_layer_address) + clib_memcpy (args.link_layer_address, link_layer_address, 6); + + vl_api_rpc_call_main_thread (ip6_neighbor_set_unset_rpc_callback, + (u8 *) & args, sizeof (args)); +} + +static void +ip6_nbr_probe (ip_adjacency_t * adj) +{ + icmp6_neighbor_solicitation_header_t *h; + vnet_main_t *vnm = vnet_get_main (); + ip6_main_t *im = &ip6_main; + ip_interface_address_t *ia; + ip6_address_t *dst, *src; + vnet_hw_interface_t *hi; + vnet_sw_interface_t *si; + vlib_buffer_t *b; + int bogus_length; + vlib_main_t *vm; + u32 bi = 0; + + vm = vlib_get_main (); + + si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index); + dst = &adj->sub_type.nbr.next_hop.ip6; + + if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)) + { + return; + } + src = ip6_interface_address_matching_destination (im, dst, + adj->rewrite_header. + sw_if_index, &ia); + if (!src) + { + return; + } + + h = vlib_packet_template_get_packet (vm, + &im->discover_neighbor_packet_template, + &bi); + + hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index); + + h->ip.dst_address.as_u8[13] = dst->as_u8[13]; + h->ip.dst_address.as_u8[14] = dst->as_u8[14]; + h->ip.dst_address.as_u8[15] = dst->as_u8[15]; + h->ip.src_address = src[0]; + h->neighbor.target_address = dst[0]; + + clib_memcpy (h->link_layer_option.ethernet_address, + hi->hw_address, vec_len (hi->hw_address)); + + h->neighbor.icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length); + ASSERT (bogus_length == 0); + + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index; + + /* Add encapsulation string for software interface (e.g. ethernet header). */ + vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); + vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); + + { + vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 *to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } +} + +static void +ip6_nd_mk_complete (adj_index_t ai, ip6_neighbor_t * nbr) +{ + adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE, + ethernet_build_rewrite (vnet_get_main (), + nbr->key.sw_if_index, + adj_get_link_type (ai), + nbr->link_layer_address)); +} + +static void +ip6_nd_mk_incomplete (adj_index_t ai) +{ + ip_adjacency_t *adj = adj_get (ai); + + adj_nbr_update_rewrite (ai, + ADJ_NBR_REWRITE_FLAG_INCOMPLETE, + ethernet_build_rewrite (vnet_get_main (), + adj->rewrite_header. + sw_if_index, + adj_get_link_type (ai), + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); +} + +#define IP6_NBR_MK_KEY(k, sw_if_index, addr) \ +{ \ + k.sw_if_index = sw_if_index; \ + k.ip6_address = *addr; \ + k.pad = 0; \ +} + +static ip6_neighbor_t * +ip6_nd_find (u32 sw_if_index, const ip6_address_t * addr) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_neighbor_t *n = NULL; + ip6_neighbor_key_t k; + uword *p; + + IP6_NBR_MK_KEY (k, sw_if_index, addr); + + p = mhash_get (&nm->neighbor_index_by_key, &k); + if (p) + { + n = pool_elt_at_index (nm->neighbor_pool, p[0]); + } + + return (n); +} + +static adj_walk_rc_t +ip6_nd_mk_complete_walk (adj_index_t ai, void *ctx) +{ + ip6_neighbor_t *nbr = ctx; + + ip6_nd_mk_complete (ai, nbr); + + return (ADJ_WALK_RC_CONTINUE); +} + +static adj_walk_rc_t +ip6_nd_mk_incomplete_walk (adj_index_t ai, void *ctx) +{ + ip6_nd_mk_incomplete (ai); + + return (ADJ_WALK_RC_CONTINUE); +} + +void +ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai) +{ + ip6_neighbor_t *nbr; + ip_adjacency_t *adj; + + adj = adj_get (ai); + + nbr = ip6_nd_find (sw_if_index, &adj->sub_type.nbr.next_hop.ip6); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_ARP: + case IP_LOOKUP_NEXT_GLEAN: + if (NULL != nbr) + { + adj_nbr_walk_nh6 (sw_if_index, &nbr->key.ip6_address, + ip6_nd_mk_complete_walk, nbr); + } + else + { + /* + * no matching ND entry. + * construct the rewrite required to for an ND packet, and stick + * that in the adj's pipe to smoke. + */ + adj_nbr_update_rewrite (ai, + ADJ_NBR_REWRITE_FLAG_INCOMPLETE, + ethernet_build_rewrite (vnm, + sw_if_index, + VNET_LINK_IP6, + VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST)); + + /* + * since the FIB has added this adj for a route, it makes sense it may + * want to forward traffic sometime soon. Let's send a speculative ND. + * just one. If we were to do periodically that wouldn't be bad either, + * but that's more code than i'm prepared to write at this time for + * relatively little reward. + */ + ip6_nbr_probe (adj); + } + break; + case IP_LOOKUP_NEXT_MCAST: + { + /* + * Construct a partial rewrite from the known ethernet mcast dest MAC + */ + u8 *rewrite; + u8 offset; + + rewrite = ethernet_build_rewrite (vnm, + sw_if_index, + adj->ia_link, + ethernet_ip6_mcast_dst_addr ()); + + /* + * Complete the remaining fields of the adj's rewrite to direct the + * complete of the rewrite at switch time by copying in the IP + * dst address's bytes. + * Ofset is 2 bytes into the desintation address. And we write 4 bytes. + */ + offset = vec_len (rewrite) - 2; + adj_mcast_update_rewrite (ai, rewrite, offset, 0xffffffff); + + break; + } + case IP_LOOKUP_NEXT_DROP: + case IP_LOOKUP_NEXT_PUNT: + case IP_LOOKUP_NEXT_LOCAL: + case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_MCAST_MIDCHAIN: + case IP_LOOKUP_NEXT_MIDCHAIN: + case IP_LOOKUP_NEXT_ICMP_ERROR: + case IP_LOOKUP_N_NEXT: + ASSERT (0); + break; + } +} + + +static void +ip6_neighbor_adj_fib_add (ip6_neighbor_t * n, u32 fib_index) +{ + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = n->key.ip6_address, + }; + + n->fib_entry_index = + fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ, + FIB_ENTRY_FLAG_ATTACHED, + DPO_PROTO_IP6, &pfx.fp_addr, + n->key.sw_if_index, ~0, 1, NULL, + FIB_ROUTE_PATH_FLAG_NONE); +} + +int +vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * a, + u8 * link_layer_address, + uword n_bytes_link_layer_address, + int is_static, int is_no_fib_entry) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_neighbor_key_t k; + ip6_neighbor_t *n = 0; + int make_new_nd_cache_entry = 1; + uword *p; + u32 next_index; + pending_resolution_t *pr, *mc; + + if (vlib_get_thread_index ()) + { + set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address, + 1 /* set new neighbor */ , is_static, + is_no_fib_entry); + return 0; + } + + k.sw_if_index = sw_if_index; + k.ip6_address = a[0]; + k.pad = 0; + + p = mhash_get (&nm->neighbor_index_by_key, &k); + if (p) + { + n = pool_elt_at_index (nm->neighbor_pool, p[0]); + /* Refuse to over-write static neighbor entry. */ + if (!is_static && (n->flags & IP6_NEIGHBOR_FLAG_STATIC)) + return -2; + make_new_nd_cache_entry = 0; + } + + if (make_new_nd_cache_entry) + { + pool_get (nm->neighbor_pool, n); + mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool, + /* old value */ 0); + n->key = k; + n->fib_entry_index = FIB_NODE_INDEX_INVALID; + + clib_memcpy (n->link_layer_address, + link_layer_address, n_bytes_link_layer_address); + + /* + * create the adj-fib. the entry in the FIB table for and to the peer. + */ + if (!is_no_fib_entry) + { + ip6_neighbor_adj_fib_add (n, + ip6_fib_table_get_index_for_sw_if_index + (n->key.sw_if_index)); + } + else + { + n->flags |= IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY; + } + } + else + { + /* + * prevent a DoS attack from the data-plane that + * spams us with no-op updates to the MAC address + */ + if (0 == memcmp (n->link_layer_address, + link_layer_address, n_bytes_link_layer_address)) + goto check_customers; + + clib_memcpy (n->link_layer_address, + link_layer_address, n_bytes_link_layer_address); + } + + /* Update time stamp and flags. */ + n->cpu_time_last_updated = clib_cpu_time_now (); + if (is_static) + n->flags |= IP6_NEIGHBOR_FLAG_STATIC; + else + n->flags |= IP6_NEIGHBOR_FLAG_DYNAMIC; + + adj_nbr_walk_nh6 (sw_if_index, + &n->key.ip6_address, ip6_nd_mk_complete_walk, n); + +check_customers: + /* Customer(s) waiting for this address to be resolved? */ + p = mhash_get (&nm->pending_resolutions_by_address, a); + if (p) + { + next_index = p[0]; + + while (next_index != (u32) ~ 0) + { + pr = pool_elt_at_index (nm->pending_resolutions, next_index); + vlib_process_signal_event (vm, pr->node_index, + pr->type_opaque, pr->data); + next_index = pr->next_index; + pool_put (nm->pending_resolutions, pr); + } + + mhash_unset (&nm->pending_resolutions_by_address, a, 0); + } + + /* Customer(s) requesting ND event for this address? */ + p = mhash_get (&nm->mac_changes_by_address, a); + if (p) + { + next_index = p[0]; + + while (next_index != (u32) ~ 0) + { + int (*fp) (u32, u8 *, u32, ip6_address_t *); + int rv = 1; + mc = pool_elt_at_index (nm->mac_changes, next_index); + fp = mc->data_callback; + + /* Call the user's data callback, return 1 to suppress dup events */ + if (fp) + rv = + (*fp) (mc->data, link_layer_address, sw_if_index, &ip6a_zero); + /* + * Signal the resolver process, as long as the user + * says they want to be notified + */ + if (rv == 0) + vlib_process_signal_event (vm, mc->node_index, + mc->type_opaque, mc->data); + next_index = mc->next_index; + } + } + + return 0; +} + +int +vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * a, + u8 * link_layer_address, + uword n_bytes_link_layer_address) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_neighbor_key_t k; + ip6_neighbor_t *n; + uword *p; + int rv = 0; + + if (vlib_get_thread_index ()) + { + set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address, + 0 /* unset */ , 0, 0); + return 0; + } + + k.sw_if_index = sw_if_index; + k.ip6_address = a[0]; + k.pad = 0; + + p = mhash_get (&nm->neighbor_index_by_key, &k); + if (p == 0) + { + rv = -1; + goto out; + } + + n = pool_elt_at_index (nm->neighbor_pool, p[0]); + mhash_unset (&nm->neighbor_index_by_key, &n->key, 0); + + adj_nbr_walk_nh6 (sw_if_index, + &n->key.ip6_address, ip6_nd_mk_incomplete_walk, NULL); + + + if (FIB_NODE_INDEX_INVALID != n->fib_entry_index) + { + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr.ip6 = n->key.ip6_address, + }; + fib_table_entry_path_remove + (ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index), + &pfx, + FIB_SOURCE_ADJ, + DPO_PROTO_IP6, + &pfx.fp_addr, n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); + } + pool_put (nm->neighbor_pool, n); + +out: + return rv; +} + +static void ip6_neighbor_set_unset_rpc_callback + (ip6_neighbor_set_unset_rpc_args_t * a) +{ + vlib_main_t *vm = vlib_get_main (); + if (a->is_add) + vnet_set_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr, + a->link_layer_address, 6, a->is_static, + a->is_no_fib_entry); + else + vnet_unset_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr, + a->link_layer_address, 6); +} + +static int +ip6_neighbor_sort (void *a1, void *a2) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_neighbor_t *n1 = a1, *n2 = a2; + int cmp; + cmp = vnet_sw_interface_compare (vnm, n1->key.sw_if_index, + n2->key.sw_if_index); + if (!cmp) + cmp = ip6_address_compare (&n1->key.ip6_address, &n2->key.ip6_address); + return cmp; +} + +ip6_neighbor_t * +ip6_neighbors_entries (u32 sw_if_index) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_neighbor_t *n, *ns = 0; + + /* *INDENT-OFF* */ + pool_foreach (n, nm->neighbor_pool, + ({ + if (sw_if_index != ~0 && n->key.sw_if_index != sw_if_index) + continue; + vec_add1 (ns, n[0]); + })); + /* *INDENT-ON* */ + + if (ns) + vec_sort_with_function (ns, ip6_neighbor_sort); + return ns; +} + +static clib_error_t * +show_ip6_neighbors (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_neighbor_t *n, *ns; + clib_error_t *error = 0; + u32 sw_if_index; + + /* Filter entries by interface if given. */ + sw_if_index = ~0; + (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index); + + ns = ip6_neighbors_entries (sw_if_index); + if (ns) + { + vlib_cli_output (vm, "%U", format_ip6_neighbor_ip6_entry, vm, 0); + vec_foreach (n, ns) + { + vlib_cli_output (vm, "%U", format_ip6_neighbor_ip6_entry, vm, n); + } + vec_free (ns); + } + + return error; +} + +/*? + * This command is used to display the adjacent IPv6 hosts found via + * neighbor discovery. Optionally, limit the output to the specified + * interface. + * + * @cliexpar + * Example of how to display the IPv6 neighbor adjacency table: + * @cliexstart{show ip6 neighbors} + * Time Address Flags Link layer Interface + * 34.0910 ::a:1:1:0:7 02:fe:6a:07:39:6f GigabitEthernet2/0/0 + * 173.2916 ::b:5:1:c:2 02:fe:50:62:3a:94 GigabitEthernet2/0/0 + * 886.6654 ::1:1:c:0:9 S 02:fe:e4:45:27:5b GigabitEthernet3/0/0 + * @cliexend + * Example of how to display the IPv6 neighbor adjacency table for given interface: + * @cliexstart{show ip6 neighbors GigabitEthernet2/0/0} + * Time Address Flags Link layer Interface + * 34.0910 ::a:1:1:0:7 02:fe:6a:07:39:6f GigabitEthernet2/0/0 + * 173.2916 ::b:5:1:c:2 02:fe:50:62:3a:94 GigabitEthernet2/0/0 + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_ip6_neighbors_command, static) = { + .path = "show ip6 neighbors", + .function = show_ip6_neighbors, + .short_help = "show ip6 neighbors [<interface>]", +}; +/* *INDENT-ON* */ + +static clib_error_t * +set_ip6_neighbor (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_address_t addr; + u8 mac_address[6]; + int addr_valid = 0; + int is_del = 0; + int is_static = 0; + int is_no_fib_entry = 0; + u32 sw_if_index; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + /* intfc, ip6-address, mac-address */ + if (unformat (input, "%U %U %U", + unformat_vnet_sw_interface, vnm, &sw_if_index, + unformat_ip6_address, &addr, + unformat_ethernet_address, mac_address)) + addr_valid = 1; + + else if (unformat (input, "delete") || unformat (input, "del")) + is_del = 1; + else if (unformat (input, "static")) + is_static = 1; + else if (unformat (input, "no-fib-entry")) + is_no_fib_entry = 1; + else + break; + } + + if (!addr_valid) + return clib_error_return (0, "Missing interface, ip6 or hw address"); + + if (!is_del) + vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, &addr, + mac_address, sizeof (mac_address), + is_static, is_no_fib_entry); + else + vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, &addr, + mac_address, sizeof (mac_address)); + return 0; +} + +/*? + * This command is used to manually add an entry to the IPv6 neighbor + * adjacency table. Optionally, the entry can be added as static. It is + * also used to remove an entry from the table. Use the '<em>show ip6 + * neighbors</em>' command to display all learned and manually entered entries. + * + * @cliexpar + * Example of how to add a static entry to the IPv6 neighbor adjacency table: + * @cliexcmd{set ip6 neighbor GigabitEthernet2/0/0 ::1:1:c:0:9 02:fe:e4:45:27:5b static} + * Example of how to delete an entry from the IPv6 neighbor adjacency table: + * @cliexcmd{set ip6 neighbor del GigabitEthernet2/0/0 ::1:1:c:0:9 02:fe:e4:45:27:5b} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_ip6_neighbor_command, static) = +{ + .path = "set ip6 neighbor", + .function = set_ip6_neighbor, + .short_help = "set ip6 neighbor [del] <interface> <ip6-address> <mac-address> [static]", +}; +/* *INDENT-ON* */ + +typedef enum +{ + ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP, + ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY, + ICMP6_NEIGHBOR_SOLICITATION_N_NEXT, +} icmp6_neighbor_solicitation_or_advertisement_next_t; + +static_always_inline uword +icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + uword is_solicitation) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_main_t *im = &ip6_main; + uword n_packets = frame->n_vectors; + u32 *from, *to_next; + u32 n_left_from, n_left_to_next, next_index, n_advertisements_sent; + icmp6_neighbor_discovery_option_type_t option_type; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_icmp_input_node.index); + int bogus_length; + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp6_input_trace_t)); + + option_type = + (is_solicitation + ? ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address + : ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address); + n_advertisements_sent = 0; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip6_header_t *ip0; + icmp6_neighbor_solicitation_or_advertisement_header_t *h0; + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *o0; + u32 bi0, options_len0, sw_if_index0, next0, error0; + u32 ip6_sadd_link_local, ip6_sadd_unspecified; + int is_rewrite0; + u32 ni0; + + bi0 = to_next[0] = from[0]; + + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + h0 = ip6_next_header (ip0); + options_len0 = + clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]); + + error0 = ICMP6_ERROR_NONE; + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + ip6_sadd_link_local = + ip6_address_is_link_local_unicast (&ip0->src_address); + ip6_sadd_unspecified = + ip6_address_is_unspecified (&ip0->src_address); + + /* Check that source address is unspecified, link-local or else on-link. */ + if (!ip6_sadd_unspecified && !ip6_sadd_link_local) + { + u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); + + if (ADJ_INDEX_INVALID != src_adj_index0) + { + ip_adjacency_t *adj0 = adj_get (src_adj_index0); + + /* Allow all realistic-looking rewrite adjacencies to pass */ + ni0 = adj0->lookup_next_index; + is_rewrite0 = (ni0 >= IP_LOOKUP_NEXT_ARP) && + (ni0 < IP6_LOOKUP_N_NEXT); + + error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0 + || !is_rewrite0) + ? + ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK + : error0); + } + else + { + error0 = + ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK; + } + } + + o0 = (void *) (h0 + 1); + o0 = ((options_len0 == 8 && o0->header.type == option_type + && o0->header.n_data_u64s == 1) ? o0 : 0); + + /* If src address unspecified or link local, donot learn neighbor MAC */ + if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 && + !ip6_sadd_unspecified)) + { + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + if (nm->limit_neighbor_cache_size && + pool_elts (nm->neighbor_pool) >= + nm->limit_neighbor_cache_size) + unset_random_neighbor_entry (); + vnet_set_ip6_ethernet_neighbor (vm, sw_if_index0, + is_solicitation ? + &ip0->src_address : + &h0->target_address, + o0->ethernet_address, + sizeof (o0->ethernet_address), + 0, ip6_sadd_link_local); + } + + if (is_solicitation && error0 == ICMP6_ERROR_NONE) + { + /* Check that target address is local to this router. */ + fib_node_index_t fei; + u32 fib_index; + + fib_index = + ip6_fib_table_get_index_for_sw_if_index (sw_if_index0); + + if (~0 == fib_index) + { + error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN; + } + else + { + fei = ip6_fib_table_lookup_exact_match (fib_index, + &h0->target_address, + 128); + + if (FIB_NODE_INDEX_INVALID == fei) + { + /* The target address is not in the FIB */ + error0 = + ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN; + } + else + { + if (FIB_ENTRY_FLAG_LOCAL & + fib_entry_get_flags_for_source (fei, + FIB_SOURCE_INTERFACE)) + { + /* It's an address that belongs to one of our interfaces + * that's good. */ + } + else + if (fib_entry_is_sourced + (fei, FIB_SOURCE_IP6_ND_PROXY)) + { + /* The address was added by IPv6 Proxy ND config. + * We should only respond to these if the NS arrived on + * the link that has a matching covering prefix */ + } + else + { + error0 = + ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN; + } + } + } + } + + if (is_solicitation) + next0 = (error0 != ICMP6_ERROR_NONE + ? ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP + : ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY); + else + { + next0 = 0; + error0 = error0 == ICMP6_ERROR_NONE ? + ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_RX : error0; + } + + if (is_solicitation && error0 == ICMP6_ERROR_NONE) + { + vnet_sw_interface_t *sw_if0; + ethernet_interface_t *eth_if0; + ethernet_header_t *eth0; + + /* dst address is either source address or the all-nodes mcast addr */ + if (!ip6_sadd_unspecified) + ip0->dst_address = ip0->src_address; + else + ip6_set_reserved_multicast_address (&ip0->dst_address, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + + ip0->src_address = h0->target_address; + ip0->hop_limit = 255; + h0->icmp.type = ICMP6_neighbor_advertisement; + + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0); + ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + eth_if0 = + ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + if (eth_if0 && o0) + { + clib_memcpy (o0->ethernet_address, eth_if0->address, 6); + o0->header.type = + ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address; + } + + h0->advertisement_flags = clib_host_to_net_u32 + (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED + | ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE); + + h0->icmp.checksum = 0; + h0->icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, + &bogus_length); + ASSERT (bogus_length == 0); + + /* Reuse current MAC header, copy SMAC to DMAC and + * interface MAC to SMAC */ + vlib_buffer_advance (p0, -ethernet_buffer_header_size (p0)); + eth0 = vlib_buffer_get_current (p0); + clib_memcpy (eth0->dst_address, eth0->src_address, 6); + if (eth_if0) + clib_memcpy (eth0->src_address, eth_if0->address, 6); + + /* Setup input and output sw_if_index for packet */ + ASSERT (vnet_buffer (p0)->sw_if_index[VLIB_RX] == sw_if_index0); + vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0; + vnet_buffer (p0)->sw_if_index[VLIB_RX] = + vnet_main.local_interface_sw_if_index; + + n_advertisements_sent++; + } + + p0->error = error_node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Account for advertisements sent. */ + vlib_error_count (vm, error_node->node_index, + ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX, + n_advertisements_sent); + + return frame->n_vectors; +} + +/* for "syslogging" - use elog for now */ +#define foreach_log_level \ + _ (DEBUG, "DEBUG") \ + _ (INFO, "INFORMATION") \ + _ (NOTICE, "NOTICE") \ + _ (WARNING, "WARNING") \ + _ (ERR, "ERROR") \ + _ (CRIT, "CRITICAL") \ + _ (ALERT, "ALERT") \ + _ (EMERG, "EMERGENCY") + +typedef enum +{ +#define _(f,s) LOG_##f, + foreach_log_level +#undef _ +} log_level_t; + +static char *log_level_strings[] = { +#define _(f,s) s, + foreach_log_level +#undef _ +}; + +static int logmask = 1 << LOG_DEBUG; + +static void +ip6_neighbor_syslog (vlib_main_t * vm, int priority, char *fmt, ...) +{ + /* just use elog for now */ + u8 *what; + va_list va; + + if ((priority > LOG_EMERG) || !(logmask & (1 << priority))) + return; + + va_start (va, fmt); + if (fmt) + { + what = va_format (0, fmt, &va); + + ELOG_TYPE_DECLARE (e) = + { + .format = "ip6 nd: (%s): %s",.format_args = "T4T4",}; + struct + { + u32 s[2]; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->s[0] = elog_string (&vm->elog_main, log_level_strings[priority]); + ed->s[1] = elog_string (&vm->elog_main, (char *) what); + } + va_end (va); + return; +} + +/* ipv6 neighbor discovery - router advertisements */ +typedef enum +{ + ICMP6_ROUTER_SOLICITATION_NEXT_DROP, + ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW, + ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX, + ICMP6_ROUTER_SOLICITATION_N_NEXT, +} icmp6_router_solicitation_or_advertisement_next_t; + +static_always_inline uword +icmp6_router_solicitation (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_main_t *im = &ip6_main; + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + uword n_packets = frame->n_vectors; + u32 *from, *to_next; + u32 n_left_from, n_left_to_next, next_index; + u32 n_advertisements_sent = 0; + int bogus_length; + + icmp6_neighbor_discovery_option_type_t option_type; + + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_icmp_input_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp6_input_trace_t)); + + /* source may append his LL address */ + option_type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip6_header_t *ip0; + ip6_radv_t *radv_info = 0; + + icmp6_neighbor_discovery_header_t *h0; + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *o0; + + u32 bi0, options_len0, sw_if_index0, next0, error0; + u32 is_solicitation = 1, is_dropped = 0; + u32 is_unspecified, is_link_local; + + bi0 = to_next[0] = from[0]; + + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + h0 = ip6_next_header (ip0); + options_len0 = + clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]); + is_unspecified = ip6_address_is_unspecified (&ip0->src_address); + is_link_local = + ip6_address_is_link_local_unicast (&ip0->src_address); + + error0 = ICMP6_ERROR_NONE; + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + /* check if solicitation (not from nd_timer node) */ + if (ip6_address_is_unspecified (&ip0->dst_address)) + is_solicitation = 0; + + /* Check that source address is unspecified, link-local or else on-link. */ + if (!is_unspecified && !is_link_local) + { + u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0); + + if (ADJ_INDEX_INVALID != src_adj_index0) + { + ip_adjacency_t *adj0 = adj_get (src_adj_index0); + + error0 = (adj0->rewrite_header.sw_if_index != sw_if_index0 + ? + ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK + : error0); + } + else + { + error0 = ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK; + } + } + + /* check for source LL option and process */ + o0 = (void *) (h0 + 1); + o0 = ((options_len0 == 8 + && o0->header.type == option_type + && o0->header.n_data_u64s == 1) ? o0 : 0); + + /* if src address unspecified IGNORE any options */ + if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 && + !is_unspecified && !is_link_local)) + { + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + if (nm->limit_neighbor_cache_size && + pool_elts (nm->neighbor_pool) >= + nm->limit_neighbor_cache_size) + unset_random_neighbor_entry (); + + vnet_set_ip6_ethernet_neighbor (vm, sw_if_index0, + &ip0->src_address, + o0->ethernet_address, + sizeof (o0->ethernet_address), + 0, 0); + } + + /* default is to drop */ + next0 = ICMP6_ROUTER_SOLICITATION_NEXT_DROP; + + if (error0 == ICMP6_ERROR_NONE) + { + vnet_sw_interface_t *sw_if0; + ethernet_interface_t *eth_if0; + u32 adj_index0; + + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0); + ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + eth_if0 = + ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + + /* only support ethernet interface type for now */ + error0 = + (!eth_if0) ? ICMP6_ERROR_ROUTER_SOLICITATION_UNSUPPORTED_INTF + : error0; + + if (error0 == ICMP6_ERROR_NONE) + { + u32 ri; + + /* adjust the sizeof the buffer to just include the ipv6 header */ + p0->current_length -= + (options_len0 + + sizeof (icmp6_neighbor_discovery_header_t)); + + /* look up the radv_t information for this interface */ + vec_validate_init_empty + (nm->if_radv_pool_index_by_sw_if_index, sw_if_index0, ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index0]; + + if (ri != ~0) + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + error0 = + ((!radv_info) ? + ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG : + error0); + + if (error0 == ICMP6_ERROR_NONE) + { + f64 now = vlib_time_now (vm); + + /* for solicited adverts - need to rate limit */ + if (is_solicitation) + { + if (0 != radv_info->last_radv_time && + (now - radv_info->last_radv_time) < + MIN_DELAY_BETWEEN_RAS) + is_dropped = 1; + else + radv_info->last_radv_time = now; + } + + /* send now */ + icmp6_router_advertisement_header_t rh; + + rh.icmp.type = ICMP6_router_advertisement; + rh.icmp.code = 0; + rh.icmp.checksum = 0; + + rh.current_hop_limit = radv_info->curr_hop_limit; + rh.router_lifetime_in_sec = + clib_host_to_net_u16 + (radv_info->adv_router_lifetime_in_sec); + rh. + time_in_msec_between_retransmitted_neighbor_solicitations + = + clib_host_to_net_u32 (radv_info-> + adv_time_in_msec_between_retransmitted_neighbor_solicitations); + rh.neighbor_reachable_time_in_msec = + clib_host_to_net_u32 (radv_info-> + adv_neighbor_reachable_time_in_msec); + + rh.flags = + (radv_info->adv_managed_flag) ? + ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP : + 0; + rh.flags |= + ((radv_info->adv_other_flag) ? + ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP : + 0); + + + u16 payload_length = + sizeof (icmp6_router_advertisement_header_t); + + vlib_buffer_add_data (vm, + vlib_buffer_get_free_list_index + (p0), bi0, (void *) &rh, + sizeof + (icmp6_router_advertisement_header_t)); + + if (radv_info->adv_link_layer_address) + { + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t + h; + + h.header.type = + ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address; + h.header.n_data_u64s = 1; + + /* copy ll address */ + clib_memcpy (&h.ethernet_address[0], + eth_if0->address, 6); + + vlib_buffer_add_data (vm, + vlib_buffer_get_free_list_index + (p0), bi0, (void *) &h, + sizeof + (icmp6_neighbor_discovery_ethernet_link_layer_address_option_t)); + + payload_length += + sizeof + (icmp6_neighbor_discovery_ethernet_link_layer_address_option_t); + } + + /* add MTU option */ + if (radv_info->adv_link_mtu) + { + icmp6_neighbor_discovery_mtu_option_t h; + + h.unused = 0; + h.mtu = + clib_host_to_net_u32 (radv_info->adv_link_mtu); + h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_mtu; + h.header.n_data_u64s = 1; + + payload_length += + sizeof (icmp6_neighbor_discovery_mtu_option_t); + + vlib_buffer_add_data (vm, + vlib_buffer_get_free_list_index + (p0), bi0, (void *) &h, + sizeof + (icmp6_neighbor_discovery_mtu_option_t)); + } + + /* add advertised prefix options */ + ip6_radv_prefix_t *pr_info; + + /* *INDENT-OFF* */ + pool_foreach (pr_info, radv_info->adv_prefixes_pool, + ({ + if(pr_info->enabled && + (!pr_info->decrement_lifetime_flag + || (pr_info->pref_lifetime_expires >0))) + { + /* advertise this prefix */ + icmp6_neighbor_discovery_prefix_information_option_t h; + + h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_prefix_information; + h.header.n_data_u64s = (sizeof(icmp6_neighbor_discovery_prefix_information_option_t) >> 3); + + h.dst_address_length = pr_info->prefix_len; + + h.flags = (pr_info->adv_on_link_flag) ? ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_FLAG_ON_LINK : 0; + h.flags |= (pr_info->adv_autonomous_flag) ? ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_AUTO : 0; + + if(radv_info->cease_radv && pr_info->deprecated_prefix_flag) + { + h.valid_time = clib_host_to_net_u32(MIN_ADV_VALID_LIFETIME); + h.preferred_time = 0; + } + else + { + if(pr_info->decrement_lifetime_flag) + { + pr_info->adv_valid_lifetime_in_secs = ((pr_info->valid_lifetime_expires > now)) ? + (pr_info->valid_lifetime_expires - now) : 0; + + pr_info->adv_pref_lifetime_in_secs = ((pr_info->pref_lifetime_expires > now)) ? + (pr_info->pref_lifetime_expires - now) : 0; + } + + h.valid_time = clib_host_to_net_u32(pr_info->adv_valid_lifetime_in_secs); + h.preferred_time = clib_host_to_net_u32(pr_info->adv_pref_lifetime_in_secs) ; + } + h.unused = 0; + + clib_memcpy(&h.dst_address, &pr_info->prefix, sizeof(ip6_address_t)); + + payload_length += sizeof( icmp6_neighbor_discovery_prefix_information_option_t); + + vlib_buffer_add_data (vm, + vlib_buffer_get_free_list_index (p0), + bi0, + (void *)&h, sizeof(icmp6_neighbor_discovery_prefix_information_option_t)); + + } + })); + /* *INDENT-ON* */ + + /* add additional options before here */ + + /* finish building the router advertisement... */ + if (!is_unspecified && radv_info->send_unicast) + { + ip0->dst_address = ip0->src_address; + } + else + { + /* target address is all-nodes mcast addr */ + ip6_set_reserved_multicast_address + (&ip0->dst_address, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + } + + /* source address MUST be the link-local address */ + ip0->src_address = radv_info->link_local_address; + + ip0->hop_limit = 255; + ip0->payload_length = + clib_host_to_net_u16 (payload_length); + + icmp6_router_advertisement_header_t *rh0 = + (icmp6_router_advertisement_header_t *) (ip0 + 1); + rh0->icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, + &bogus_length); + ASSERT (bogus_length == 0); + + /* setup output if and adjacency */ + vnet_buffer (p0)->sw_if_index[VLIB_RX] = + vnet_main.local_interface_sw_if_index; + + if (is_solicitation) + { + ethernet_header_t *eth0; + /* Reuse current MAC header, copy SMAC to DMAC and + * interface MAC to SMAC */ + vlib_buffer_reset (p0); + eth0 = vlib_buffer_get_current (p0); + clib_memcpy (eth0->dst_address, eth0->src_address, + 6); + clib_memcpy (eth0->src_address, eth_if0->address, + 6); + next0 = + is_dropped ? next0 : + ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX; + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + sw_if_index0; + } + else + { + adj_index0 = radv_info->mcast_adj_index; + if (adj_index0 == 0) + error0 = ICMP6_ERROR_DST_LOOKUP_MISS; + else + { + next0 = + is_dropped ? next0 : + ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW; + vnet_buffer (p0)->ip.adj_index[VLIB_TX] = + adj_index0; + } + } + p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + + radv_info->n_solicitations_dropped += is_dropped; + radv_info->n_solicitations_rcvd += is_solicitation; + + if ((error0 == ICMP6_ERROR_NONE) && !is_dropped) + { + radv_info->n_advertisements_sent++; + n_advertisements_sent++; + } + } + } + } + + p0->error = error_node->errors[error0]; + + if (error0 != ICMP6_ERROR_NONE) + vlib_error_count (vm, error_node->node_index, error0, 1); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Account for router advertisements sent. */ + vlib_error_count (vm, error_node->node_index, + ICMP6_ERROR_ROUTER_ADVERTISEMENTS_TX, + n_advertisements_sent); + + return frame->n_vectors; +} + + /* validate advertised info for consistancy (see RFC-4861 section 6.2.7) - log any inconsistencies, packet will always be dropped */ +static_always_inline uword +icmp6_router_advertisement (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + uword n_packets = frame->n_vectors; + u32 *from, *to_next; + u32 n_left_from, n_left_to_next, next_index; + u32 n_advertisements_rcvd = 0; + + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_icmp_input_node.index); + + from = vlib_frame_vector_args (frame); + n_left_from = n_packets; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, + sizeof (icmp6_input_trace_t)); + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t *p0; + ip6_header_t *ip0; + ip6_radv_t *radv_info = 0; + icmp6_router_advertisement_header_t *h0; + u32 bi0, options_len0, sw_if_index0, next0, error0; + + bi0 = to_next[0] = from[0]; + + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, bi0); + ip0 = vlib_buffer_get_current (p0); + h0 = ip6_next_header (ip0); + options_len0 = + clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]); + + error0 = ICMP6_ERROR_NONE; + sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; + + /* Check that source address is link-local */ + error0 = (!ip6_address_is_link_local_unicast (&ip0->src_address)) ? + ICMP6_ERROR_ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL : error0; + + /* default is to drop */ + next0 = ICMP6_ROUTER_SOLICITATION_NEXT_DROP; + + n_advertisements_rcvd++; + + if (error0 == ICMP6_ERROR_NONE) + { + vnet_sw_interface_t *sw_if0; + ethernet_interface_t *eth_if0; + + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0); + ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + eth_if0 = + ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + + /* only support ethernet interface type for now */ + error0 = + (!eth_if0) ? ICMP6_ERROR_ROUTER_SOLICITATION_UNSUPPORTED_INTF + : error0; + + if (error0 == ICMP6_ERROR_NONE) + { + u32 ri; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty + (nm->if_radv_pool_index_by_sw_if_index, sw_if_index0, ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index0]; + + if (ri != ~0) + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + error0 = + ((!radv_info) ? + ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG : + error0); + + if (error0 == ICMP6_ERROR_NONE) + { + /* validate advertised information */ + if ((h0->current_hop_limit && radv_info->curr_hop_limit) + && (h0->current_hop_limit != + radv_info->curr_hop_limit)) + { + ip6_neighbor_syslog (vm, LOG_WARNING, + "our AdvCurHopLimit on %U doesn't agree with %U", + format_vnet_sw_if_index_name, + vnm, sw_if_index0, + format_ip6_address, + &ip0->src_address); + } + + if ((h0->flags & + ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP) + != radv_info->adv_managed_flag) + { + ip6_neighbor_syslog (vm, LOG_WARNING, + "our AdvManagedFlag on %U doesn't agree with %U", + format_vnet_sw_if_index_name, + vnm, sw_if_index0, + format_ip6_address, + &ip0->src_address); + } + + if ((h0->flags & + ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP) + != radv_info->adv_other_flag) + { + ip6_neighbor_syslog (vm, LOG_WARNING, + "our AdvOtherConfigFlag on %U doesn't agree with %U", + format_vnet_sw_if_index_name, + vnm, sw_if_index0, + format_ip6_address, + &ip0->src_address); + } + + if ((h0-> + time_in_msec_between_retransmitted_neighbor_solicitations + && radv_info-> + adv_time_in_msec_between_retransmitted_neighbor_solicitations) + && (h0-> + time_in_msec_between_retransmitted_neighbor_solicitations + != + clib_host_to_net_u32 (radv_info-> + adv_time_in_msec_between_retransmitted_neighbor_solicitations))) + { + ip6_neighbor_syslog (vm, LOG_WARNING, + "our AdvRetransTimer on %U doesn't agree with %U", + format_vnet_sw_if_index_name, + vnm, sw_if_index0, + format_ip6_address, + &ip0->src_address); + } + + if ((h0->neighbor_reachable_time_in_msec && + radv_info->adv_neighbor_reachable_time_in_msec) && + (h0->neighbor_reachable_time_in_msec != + clib_host_to_net_u32 + (radv_info->adv_neighbor_reachable_time_in_msec))) + { + ip6_neighbor_syslog (vm, LOG_WARNING, + "our AdvReachableTime on %U doesn't agree with %U", + format_vnet_sw_if_index_name, + vnm, sw_if_index0, + format_ip6_address, + &ip0->src_address); + } + + /* check for MTU or prefix options or .. */ + u8 *opt_hdr = (u8 *) (h0 + 1); + while (options_len0 > 0) + { + icmp6_neighbor_discovery_option_header_t *o0 = + (icmp6_neighbor_discovery_option_header_t *) + opt_hdr; + int opt_len = o0->n_data_u64s << 3; + icmp6_neighbor_discovery_option_type_t option_type = + o0->type; + + if (options_len0 < 2) + { + ip6_neighbor_syslog (vm, LOG_ERR, + "malformed RA packet on %U from %U", + format_vnet_sw_if_index_name, + vnm, sw_if_index0, + format_ip6_address, + &ip0->src_address); + break; + } + + if (opt_len == 0) + { + ip6_neighbor_syslog (vm, LOG_ERR, + " zero length option in RA on %U from %U", + format_vnet_sw_if_index_name, + vnm, sw_if_index0, + format_ip6_address, + &ip0->src_address); + break; + } + else if (opt_len > options_len0) + { + ip6_neighbor_syslog (vm, LOG_ERR, + "option length in RA packet greater than total length on %U from %U", + format_vnet_sw_if_index_name, + vnm, sw_if_index0, + format_ip6_address, + &ip0->src_address); + break; + } + + options_len0 -= opt_len; + opt_hdr += opt_len; + + switch (option_type) + { + case ICMP6_NEIGHBOR_DISCOVERY_OPTION_mtu: + { + icmp6_neighbor_discovery_mtu_option_t *h = + (icmp6_neighbor_discovery_mtu_option_t + *) (o0); + + if (opt_len < sizeof (*h)) + break; + + if ((h->mtu && radv_info->adv_link_mtu) && + (h->mtu != + clib_host_to_net_u32 + (radv_info->adv_link_mtu))) + { + ip6_neighbor_syslog (vm, LOG_WARNING, + "our AdvLinkMTU on %U doesn't agree with %U", + format_vnet_sw_if_index_name, + vnm, sw_if_index0, + format_ip6_address, + &ip0->src_address); + } + } + break; + + case ICMP6_NEIGHBOR_DISCOVERY_OPTION_prefix_information: + { + icmp6_neighbor_discovery_prefix_information_option_t + * h = + (icmp6_neighbor_discovery_prefix_information_option_t + *) (o0); + + /* validate advertised prefix options */ + ip6_radv_prefix_t *pr_info; + u32 preferred, valid; + + if (opt_len < sizeof (*h)) + break; + + preferred = + clib_net_to_host_u32 (h->preferred_time); + valid = clib_net_to_host_u32 (h->valid_time); + + /* look for matching prefix - if we our advertising it, it better be consistant */ + /* *INDENT-OFF* */ + pool_foreach (pr_info, radv_info->adv_prefixes_pool, + ({ + + ip6_address_t mask; + ip6_address_mask_from_width(&mask, pr_info->prefix_len); + + if(pr_info->enabled && + (pr_info->prefix_len == h->dst_address_length) && + ip6_address_is_equal_masked (&pr_info->prefix, &h->dst_address, &mask)) + { + /* found it */ + if(!pr_info->decrement_lifetime_flag && + valid != pr_info->adv_valid_lifetime_in_secs) + { + ip6_neighbor_syslog(vm, LOG_WARNING, + "our ADV validlifetime on %U for %U does not agree with %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0,format_ip6_address, &pr_info->prefix, + format_ip6_address, &h->dst_address); + } + if(!pr_info->decrement_lifetime_flag && + preferred != pr_info->adv_pref_lifetime_in_secs) + { + ip6_neighbor_syslog(vm, LOG_WARNING, + "our ADV preferredlifetime on %U for %U does not agree with %U", + format_vnet_sw_if_index_name, vnm, sw_if_index0,format_ip6_address, &pr_info->prefix, + format_ip6_address, &h->dst_address); + } + } + break; + })); + /* *INDENT-ON* */ + break; + } + default: + /* skip this one */ + break; + } + } + } + } + } + + p0->error = error_node->errors[error0]; + + if (error0 != ICMP6_ERROR_NONE) + vlib_error_count (vm, error_node->node_index, error0, 1); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + /* Account for router advertisements sent. */ + vlib_error_count (vm, error_node->node_index, + ICMP6_ERROR_ROUTER_ADVERTISEMENTS_RX, + n_advertisements_rcvd); + + return frame->n_vectors; +} + +/** + * @brief Add a multicast Address to the advertised MLD set + */ +static void +ip6_neighbor_add_mld_prefix (ip6_radv_t * radv_info, ip6_address_t * addr) +{ + ip6_mldp_group_t *mcast_group_info; + uword *p; + + /* lookup mldp info for this interface */ + p = mhash_get (&radv_info->address_to_mldp_index, &addr); + mcast_group_info = + p ? pool_elt_at_index (radv_info->mldp_group_pool, p[0]) : 0; + + /* add address */ + if (!mcast_group_info) + { + /* add */ + u32 mi; + pool_get (radv_info->mldp_group_pool, mcast_group_info); + + mi = mcast_group_info - radv_info->mldp_group_pool; + mhash_set (&radv_info->address_to_mldp_index, &addr, mi, /* old_value */ + 0); + + mcast_group_info->type = 4; + mcast_group_info->mcast_source_address_pool = 0; + mcast_group_info->num_sources = 0; + clib_memcpy (&mcast_group_info->mcast_address, &addr, + sizeof (ip6_address_t)); + } +} + +/** + * @brief Delete a multicast Address from the advertised MLD set + */ +static void +ip6_neighbor_del_mld_prefix (ip6_radv_t * radv_info, ip6_address_t * addr) +{ + ip6_mldp_group_t *mcast_group_info; + uword *p; + + p = mhash_get (&radv_info->address_to_mldp_index, &addr); + mcast_group_info = + p ? pool_elt_at_index (radv_info->mldp_group_pool, p[0]) : 0; + + if (mcast_group_info) + { + mhash_unset (&radv_info->address_to_mldp_index, &addr, + /* old_value */ 0); + pool_put (radv_info->mldp_group_pool, mcast_group_info); + } +} + +/** + * @brief Add a multicast Address to the advertised MLD set + */ +static void +ip6_neighbor_add_mld_grp (ip6_radv_t * a, + ip6_multicast_address_scope_t scope, + ip6_multicast_link_local_group_id_t group) +{ + ip6_address_t addr; + + ip6_set_reserved_multicast_address (&addr, scope, group); + + ip6_neighbor_add_mld_prefix (a, &addr); +} + +/** + * @brief create and initialize router advertisement parameters with default + * values for this intfc + */ +u32 +ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, + u32 sw_if_index, u32 is_add) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_radv_t *a = 0; + u32 ri = ~0; + vnet_sw_interface_t *sw_if0; + ethernet_interface_t *eth_if0 = 0; + + /* lookup radv container - ethernet interfaces only */ + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index); + if (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE) + eth_if0 = ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + + if (!eth_if0) + return ri; + + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, + ~0); + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + if (ri != ~0) + { + a = pool_elt_at_index (nm->if_radv_pool, ri); + + if (!is_add) + { + ip6_radv_prefix_t *p; + ip6_mldp_group_t *m; + + /* release the lock on the interface's mcast adj */ + adj_unlock (a->mcast_adj_index); + + /* clean up prefix and MDP pools */ + /* *INDENT-OFF* */ + pool_flush(p, a->adv_prefixes_pool, + ({ + mhash_unset (&a->address_to_prefix_index, &p->prefix, 0); + })); + pool_flush (m, a->mldp_group_pool, + ({ + mhash_unset (&a->address_to_mldp_index, &m->mcast_address, 0); + })); + /* *INDENT-ON* */ + + pool_free (a->mldp_group_pool); + pool_free (a->adv_prefixes_pool); + + mhash_free (&a->address_to_prefix_index); + mhash_free (&a->address_to_mldp_index); + + pool_put (nm->if_radv_pool, a); + nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ~0; + ri = ~0; + } + } + else + { + if (is_add) + { + vnet_hw_interface_t *hw_if0; + + hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index); + + pool_get (nm->if_radv_pool, a); + + ri = a - nm->if_radv_pool; + nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ri; + + /* initialize default values (most of which are zero) */ + memset (a, 0, sizeof (a[0])); + + a->sw_if_index = sw_if_index; + a->max_radv_interval = DEF_MAX_RADV_INTERVAL; + a->min_radv_interval = DEF_MIN_RADV_INTERVAL; + a->curr_hop_limit = DEF_CURR_HOP_LIMIT; + a->adv_router_lifetime_in_sec = DEF_DEF_RTR_LIFETIME; + + /* send ll address source address option */ + a->adv_link_layer_address = 1; + + a->min_delay_between_radv = MIN_DELAY_BETWEEN_RAS; + a->max_delay_between_radv = MAX_DELAY_BETWEEN_RAS; + a->max_rtr_default_lifetime = MAX_DEF_RTR_LIFETIME; + a->seed = (u32) clib_cpu_time_now (); + (void) random_u32 (&a->seed); + a->randomizer = clib_cpu_time_now (); + (void) random_u64 (&a->randomizer); + + a->initial_adverts_count = MAX_INITIAL_RTR_ADVERTISEMENTS; + a->initial_adverts_sent = a->initial_adverts_count - 1; + a->initial_adverts_interval = MAX_INITIAL_RTR_ADVERT_INTERVAL; + + /* deafult is to send */ + a->send_radv = 1; + + /* fill in radv_info for this interface that will be needed later */ + a->adv_link_mtu = hw_if0->max_l3_packet_bytes[VLIB_RX]; + + clib_memcpy (a->link_layer_address, eth_if0->address, 6); + + /* fill in default link-local address (this may be overridden) */ + ip6_link_local_address_from_ethernet_address + (&a->link_local_address, eth_if0->address); + + mhash_init (&a->address_to_prefix_index, sizeof (uword), + sizeof (ip6_address_t)); + mhash_init (&a->address_to_mldp_index, sizeof (uword), + sizeof (ip6_address_t)); + + a->mcast_adj_index = adj_mcast_add_or_lock (FIB_PROTOCOL_IP6, + VNET_LINK_IP6, + sw_if_index); + + /* add multicast groups we will always be reporting */ + ip6_neighbor_add_mld_grp (a, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + ip6_neighbor_add_mld_grp (a, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_routers); + ip6_neighbor_add_mld_grp (a, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_mldv2_routers); + } + } + return ri; +} + +/* send an mldpv2 report */ +static void +ip6_neighbor_send_mldpv2_report (u32 sw_if_index) +{ + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vnm->vlib_main; + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + vnet_sw_interface_t *sw_if0; + ethernet_interface_t *eth_if0; + u32 ri; + int bogus_length; + + ip6_radv_t *radv_info; + u16 payload_length; + vlib_buffer_t *b0; + ip6_header_t *ip0; + u32 *to_next; + vlib_frame_t *f; + u32 bo0; + u32 n_to_alloc = 1; + u32 n_allocated; + + icmp6_multicast_listener_report_header_t *rh0; + icmp6_multicast_listener_report_packet_t *rp0; + + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index); + ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE); + eth_if0 = ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + + if (!eth_if0 || !vnet_sw_interface_is_admin_up (vnm, sw_if_index)) + return; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, + ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + if (ri == ~0) + return; + + /* send report now - build a mldpv2 report packet */ + n_allocated = vlib_buffer_alloc_from_free_list (vm, + &bo0, + n_to_alloc, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + if (PREDICT_FALSE (n_allocated == 0)) + { + clib_warning ("buffer allocation failure"); + return; + } + + b0 = vlib_get_buffer (vm, bo0); + + /* adjust the sizeof the buffer to just include the ipv6 header */ + b0->current_length = sizeof (icmp6_multicast_listener_report_packet_t); + + payload_length = sizeof (icmp6_multicast_listener_report_header_t); + + b0->error = ICMP6_ERROR_NONE; + + rp0 = vlib_buffer_get_current (b0); + ip0 = (ip6_header_t *) & rp0->ip; + rh0 = (icmp6_multicast_listener_report_header_t *) & rp0->report_hdr; + + memset (rp0, 0x0, sizeof (icmp6_multicast_listener_report_packet_t)); + + ip0->ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + + ip0->protocol = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS; + /* for DEBUG - vnet driver won't seem to emit router alerts */ + /* ip0->protocol = IP_PROTOCOL_ICMP6; */ + ip0->hop_limit = 1; + + rh0->icmp.type = ICMP6_multicast_listener_report_v2; + + /* source address MUST be the link-local address */ + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + ip0->src_address = radv_info->link_local_address; + + /* destination is all mldpv2 routers */ + ip6_set_reserved_multicast_address (&ip0->dst_address, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_mldv2_routers); + + /* add reports here */ + ip6_mldp_group_t *m; + int num_addr_records = 0; + icmp6_multicast_address_record_t rr; + + /* fill in the hop-by-hop extension header (router alert) info */ + rh0->ext_hdr.next_hdr = IP_PROTOCOL_ICMP6; + rh0->ext_hdr.n_data_u64s = 0; + + rh0->alert.type = IP6_MLDP_ALERT_TYPE; + rh0->alert.len = 2; + rh0->alert.value = 0; + + rh0->pad.type = 1; + rh0->pad.len = 0; + + rh0->icmp.checksum = 0; + + /* *INDENT-OFF* */ + pool_foreach (m, radv_info->mldp_group_pool, + ({ + rr.type = m->type; + rr.aux_data_len_u32s = 0; + rr.num_sources = clib_host_to_net_u16 (m->num_sources); + clib_memcpy(&rr.mcast_addr, &m->mcast_address, sizeof(ip6_address_t)); + + num_addr_records++; + + vlib_buffer_add_data + (vm, vlib_buffer_get_free_list_index (b0), bo0, + (void *)&rr, sizeof(icmp6_multicast_address_record_t)); + + payload_length += sizeof( icmp6_multicast_address_record_t); + })); + /* *INDENT-ON* */ + + rh0->rsvd = 0; + rh0->num_addr_records = clib_host_to_net_u16 (num_addr_records); + + /* update lengths */ + ip0->payload_length = clib_host_to_net_u16 (payload_length); + + rh0->icmp.checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, + &bogus_length); + ASSERT (bogus_length == 0); + + /* + * OK to override w/ no regard for actual FIB, because + * ip6-rewrite only looks at the adjacency. + */ + vnet_buffer (b0)->sw_if_index[VLIB_RX] = + vnet_main.local_interface_sw_if_index; + + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = radv_info->mcast_adj_index; + b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + + vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite-mcast"); + + f = vlib_get_frame_to_node (vm, node->index); + to_next = vlib_frame_vector_args (f); + to_next[0] = bo0; + f->n_vectors = 1; + + vlib_put_frame_to_node (vm, node->index, f); + return; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) = +{ + .function = icmp6_router_solicitation, + .name = "icmp6-router-solicitation", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp6_input_trace, + + .n_next_nodes = ICMP6_ROUTER_SOLICITATION_N_NEXT, + .next_nodes = { + [ICMP6_ROUTER_SOLICITATION_NEXT_DROP] = "error-drop", + [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite-mcast", + [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output", + }, +}; +/* *INDENT-ON* */ + +/* send a RA or update the timer info etc.. */ +static uword +ip6_neighbor_process_timer_event (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_radv_t *radv_info; + vlib_frame_t *f = 0; + u32 n_this_frame = 0; + u32 n_left_to_next = 0; + u32 *to_next = 0; + u32 bo0; + icmp6_router_solicitation_header_t *h0; + vlib_buffer_t *b0; + f64 now = vlib_time_now (vm); + + /* Interface ip6 radv info list */ + /* *INDENT-OFF* */ + pool_foreach (radv_info, nm->if_radv_pool, + ({ + if( !vnet_sw_interface_is_admin_up (vnm, radv_info->sw_if_index)) + { + radv_info->initial_adverts_sent = radv_info->initial_adverts_count-1; + radv_info->next_multicast_time = now; + radv_info->last_multicast_time = now; + radv_info->last_radv_time = 0; + radv_info->all_routers_mcast = 0; + continue; + } + + /* Make sure that we've joined the all-routers multicast group */ + if(!radv_info->all_routers_mcast) + { + /* send MDLP_REPORT_EVENT message */ + ip6_neighbor_send_mldpv2_report(radv_info->sw_if_index); + radv_info->all_routers_mcast = 1; + } + + /* is it time to send a multicast RA on this interface? */ + if(radv_info->send_radv && (now >= radv_info->next_multicast_time)) + { + u32 n_to_alloc = 1; + u32 n_allocated; + + f64 rfn = (radv_info->max_radv_interval - radv_info->min_radv_interval) * + random_f64 (&radv_info->seed) + radv_info->min_radv_interval; + + /* multicast send - compute next multicast send time */ + if( radv_info->initial_adverts_sent > 0) + { + radv_info->initial_adverts_sent--; + if(rfn > radv_info-> initial_adverts_interval) + rfn = radv_info-> initial_adverts_interval; + + /* check to see if we are ceasing to send */ + if( radv_info->initial_adverts_sent == 0) + if(radv_info->cease_radv) + radv_info->send_radv = 0; + } + + radv_info->next_multicast_time = rfn + now; + radv_info->last_multicast_time = now; + + /* send advert now - build a "solicted" router advert with unspecified source address */ + n_allocated = vlib_buffer_alloc_from_free_list + (vm, &bo0, n_to_alloc, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + if (PREDICT_FALSE(n_allocated == 0)) + { + clib_warning ("buffer allocation failure"); + continue; + } + b0 = vlib_get_buffer (vm, bo0); + b0->current_length = sizeof( icmp6_router_solicitation_header_t); + b0->error = ICMP6_ERROR_NONE; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = radv_info->sw_if_index; + + h0 = vlib_buffer_get_current (b0); + + memset (h0, 0, sizeof (icmp6_router_solicitation_header_t)); + + h0->ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28); + h0->ip.payload_length = clib_host_to_net_u16 (sizeof (icmp6_router_solicitation_header_t) + - STRUCT_OFFSET_OF (icmp6_router_solicitation_header_t, neighbor)); + h0->ip.protocol = IP_PROTOCOL_ICMP6; + h0->ip.hop_limit = 255; + + /* set src/dst address as "unspecified" this marks this packet as internally generated rather than recieved */ + h0->ip.src_address.as_u64[0] = 0; + h0->ip.src_address.as_u64[1] = 0; + + h0->ip.dst_address.as_u64[0] = 0; + h0->ip.dst_address.as_u64[1] = 0; + + h0->neighbor.icmp.type = ICMP6_router_solicitation; + + if (PREDICT_FALSE(f == 0)) + { + f = vlib_get_frame_to_node (vm, ip6_icmp_router_solicitation_node.index); + to_next = vlib_frame_vector_args (f); + n_left_to_next = VLIB_FRAME_SIZE; + n_this_frame = 0; + } + + n_this_frame++; + n_left_to_next--; + to_next[0] = bo0; + to_next += 1; + + if (PREDICT_FALSE(n_left_to_next == 0)) + { + f->n_vectors = n_this_frame; + vlib_put_frame_to_node (vm, ip6_icmp_router_solicitation_node.index, f); + f = 0; + } + } + })); + /* *INDENT-ON* */ + + if (f) + { + ASSERT (n_this_frame); + f->n_vectors = n_this_frame; + vlib_put_frame_to_node (vm, ip6_icmp_router_solicitation_node.index, f); + } + return 0; +} + +static uword +ip6_icmp_neighbor_discovery_event_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + uword event_type; + ip6_icmp_neighbor_discovery_event_data_t *event_data; + + /* init code here */ + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, 1. /* seconds */ ); + + event_data = vlib_process_get_event_data (vm, &event_type); + + if (!event_data) + { + /* No events found: timer expired. */ + /* process interface list and send RAs as appropriate, update timer info */ + ip6_neighbor_process_timer_event (vm, node, frame); + } + else + { + switch (event_type) + { + + case ICMP6_ND_EVENT_INIT: + break; + + case ~0: + break; + + default: + ASSERT (0); + } + + if (event_data) + _vec_len (event_data) = 0; + } + } + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) = +{ + .function = icmp6_router_advertisement, + .name = "icmp6-router-advertisement", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp6_input_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +vlib_node_registration_t ip6_icmp_neighbor_discovery_event_node = { + + .function = ip6_icmp_neighbor_discovery_event_process, + .name = "ip6-icmp-neighbor-discovery-event-process", + .type = VLIB_NODE_TYPE_PROCESS, +}; + +static uword +icmp6_neighbor_solicitation (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return icmp6_neighbor_solicitation_or_advertisement (vm, node, frame, + /* is_solicitation */ + 1); +} + +static uword +icmp6_neighbor_advertisement (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return icmp6_neighbor_solicitation_or_advertisement (vm, node, frame, + /* is_solicitation */ + 0); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_icmp_neighbor_solicitation_node,static) = +{ + .function = icmp6_neighbor_solicitation, + .name = "icmp6-neighbor-solicitation", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp6_input_trace, + + .n_next_nodes = ICMP6_NEIGHBOR_SOLICITATION_N_NEXT, + .next_nodes = { + [ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP] = "error-drop", + [ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY] = "interface-output", + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_icmp_neighbor_advertisement_node,static) = +{ + .function = icmp6_neighbor_advertisement, + .name = "icmp6-neighbor-advertisement", + + .vector_size = sizeof (u32), + + .format_trace = format_icmp6_input_trace, + + .n_next_nodes = 1, + .next_nodes = { + [0] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +/* API support functions */ +int +ip6_neighbor_ra_config (vlib_main_t * vm, u32 sw_if_index, + u8 suppress, u8 managed, u8 other, + u8 ll_option, u8 send_unicast, u8 cease, + u8 use_lifetime, u32 lifetime, + u32 initial_count, u32 initial_interval, + u32 max_interval, u32 min_interval, u8 is_no) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + int error; + u32 ri; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, + ~0); + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + error = (ri != ~0) ? 0 : VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if (!error) + { + + ip6_radv_t *radv_info; + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + if ((max_interval != 0) && (min_interval == 0)) + min_interval = .75 * max_interval; + + max_interval = + (max_interval != + 0) ? ((is_no) ? DEF_MAX_RADV_INTERVAL : max_interval) : + radv_info->max_radv_interval; + min_interval = + (min_interval != + 0) ? ((is_no) ? DEF_MIN_RADV_INTERVAL : min_interval) : + radv_info->min_radv_interval; + lifetime = + (use_lifetime != + 0) ? ((is_no) ? DEF_DEF_RTR_LIFETIME : lifetime) : + radv_info->adv_router_lifetime_in_sec; + + if (lifetime) + { + if (lifetime > MAX_DEF_RTR_LIFETIME) + lifetime = MAX_DEF_RTR_LIFETIME; + + if (lifetime <= max_interval) + return VNET_API_ERROR_INVALID_VALUE; + } + + if (min_interval != 0) + { + if ((min_interval > .75 * max_interval) || (min_interval < 3)) + return VNET_API_ERROR_INVALID_VALUE; + } + + if ((initial_count > MAX_INITIAL_RTR_ADVERTISEMENTS) || + (initial_interval > MAX_INITIAL_RTR_ADVERT_INTERVAL)) + return VNET_API_ERROR_INVALID_VALUE; + + /* + if "flag" is set and is_no is true then restore default value else set value corresponding to "flag" + if "flag" is clear don't change corresponding value + */ + radv_info->send_radv = + (suppress != 0) ? ((is_no != 0) ? 1 : 0) : radv_info->send_radv; + radv_info->adv_managed_flag = + (managed != 0) ? ((is_no) ? 0 : 1) : radv_info->adv_managed_flag; + radv_info->adv_other_flag = + (other != 0) ? ((is_no) ? 0 : 1) : radv_info->adv_other_flag; + radv_info->adv_link_layer_address = + (ll_option != + 0) ? ((is_no) ? 1 : 0) : radv_info->adv_link_layer_address; + radv_info->send_unicast = + (send_unicast != 0) ? ((is_no) ? 0 : 1) : radv_info->send_unicast; + radv_info->cease_radv = + (cease != 0) ? ((is_no) ? 0 : 1) : radv_info->cease_radv; + + radv_info->min_radv_interval = min_interval; + radv_info->max_radv_interval = max_interval; + radv_info->adv_router_lifetime_in_sec = lifetime; + + radv_info->initial_adverts_count = + (initial_count != + 0) ? ((is_no) ? MAX_INITIAL_RTR_ADVERTISEMENTS : initial_count) : + radv_info->initial_adverts_count; + radv_info->initial_adverts_interval = + (initial_interval != + 0) ? ((is_no) ? MAX_INITIAL_RTR_ADVERT_INTERVAL : initial_interval) : + radv_info->initial_adverts_interval; + + /* restart */ + if ((cease != 0) && (is_no)) + radv_info->send_radv = 1; + + radv_info->initial_adverts_sent = radv_info->initial_adverts_count - 1; + radv_info->next_multicast_time = vlib_time_now (vm); + radv_info->last_multicast_time = vlib_time_now (vm); + radv_info->last_radv_time = 0; + } + return (error); +} + +int +ip6_neighbor_ra_prefix (vlib_main_t * vm, u32 sw_if_index, + ip6_address_t * prefix_addr, u8 prefix_len, + u8 use_default, u32 val_lifetime, u32 pref_lifetime, + u8 no_advertise, u8 off_link, u8 no_autoconfig, + u8 no_onlink, u8 is_no) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + int error; + + u32 ri; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, + ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + error = (ri != ~0) ? 0 : VNET_API_ERROR_INVALID_SW_IF_INDEX; + + if (!error) + { + f64 now = vlib_time_now (vm); + ip6_radv_t *radv_info; + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + /* prefix info add, delete or update */ + ip6_radv_prefix_t *prefix; + + /* lookup prefix info for this address on this interface */ + uword *p = mhash_get (&radv_info->address_to_prefix_index, prefix_addr); + + prefix = p ? pool_elt_at_index (radv_info->adv_prefixes_pool, p[0]) : 0; + + if (is_no) + { + /* delete */ + if (!prefix) + return VNET_API_ERROR_INVALID_VALUE; /* invalid prefix */ + + if (prefix->prefix_len != prefix_len) + return VNET_API_ERROR_INVALID_VALUE_2; + + /* FIXME - Should the DP do this or the CP ? */ + /* do specific delete processing here before returning */ + /* try to remove from routing table */ + + mhash_unset (&radv_info->address_to_prefix_index, prefix_addr, + /* old_value */ 0); + pool_put (radv_info->adv_prefixes_pool, prefix); + + radv_info->initial_adverts_sent = + radv_info->initial_adverts_count - 1; + radv_info->next_multicast_time = vlib_time_now (vm); + radv_info->last_multicast_time = vlib_time_now (vm); + radv_info->last_radv_time = 0; + return (error); + } + + /* adding or changing */ + if (!prefix) + { + /* add */ + u32 pi; + pool_get (radv_info->adv_prefixes_pool, prefix); + pi = prefix - radv_info->adv_prefixes_pool; + mhash_set (&radv_info->address_to_prefix_index, prefix_addr, pi, + /* old_value */ 0); + + memset (prefix, 0x0, sizeof (ip6_radv_prefix_t)); + + prefix->prefix_len = prefix_len; + clib_memcpy (&prefix->prefix, prefix_addr, sizeof (ip6_address_t)); + + /* initialize default values */ + prefix->adv_on_link_flag = 1; /* L bit set */ + prefix->adv_autonomous_flag = 1; /* A bit set */ + prefix->adv_valid_lifetime_in_secs = DEF_ADV_VALID_LIFETIME; + prefix->adv_pref_lifetime_in_secs = DEF_ADV_PREF_LIFETIME; + prefix->enabled = 1; + prefix->decrement_lifetime_flag = 1; + prefix->deprecated_prefix_flag = 1; + + if (off_link == 0) + { + /* FIXME - Should the DP do this or the CP ? */ + /* insert prefix into routing table as a connected prefix */ + } + + if (use_default) + goto restart; + } + else + { + + if (prefix->prefix_len != prefix_len) + return VNET_API_ERROR_INVALID_VALUE_2; + + if (off_link != 0) + { + /* FIXME - Should the DP do this or the CP ? */ + /* remove from routing table if already there */ + } + } + + if ((val_lifetime == ~0) || (pref_lifetime == ~0)) + { + prefix->adv_valid_lifetime_in_secs = ~0; + prefix->adv_pref_lifetime_in_secs = ~0; + prefix->decrement_lifetime_flag = 0; + } + else + { + prefix->adv_valid_lifetime_in_secs = val_lifetime;; + prefix->adv_pref_lifetime_in_secs = pref_lifetime; + } + + /* copy remaining */ + prefix->enabled = !(no_advertise != 0); + prefix->adv_on_link_flag = !((off_link != 0) || (no_onlink != 0)); + prefix->adv_autonomous_flag = !(no_autoconfig != 0); + + restart: + /* restart */ + /* fill in the expiration times */ + prefix->valid_lifetime_expires = + now + prefix->adv_valid_lifetime_in_secs; + prefix->pref_lifetime_expires = now + prefix->adv_pref_lifetime_in_secs; + + radv_info->initial_adverts_sent = radv_info->initial_adverts_count - 1; + radv_info->next_multicast_time = vlib_time_now (vm); + radv_info->last_multicast_time = vlib_time_now (vm); + radv_info->last_radv_time = 0; + } + return (error); +} + +clib_error_t * +ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + clib_error_t *error = 0; + u8 is_no = 0; + u8 suppress = 0, managed = 0, other = 0; + u8 suppress_ll_option = 0, send_unicast = 0, cease = 0; + u8 use_lifetime = 0; + u32 sw_if_index, ra_lifetime = 0, ra_initial_count = + 0, ra_initial_interval = 0; + u32 ra_max_interval = 0, ra_min_interval = 0; + + unformat_input_t _line_input, *line_input = &_line_input; + vnet_sw_interface_t *sw_if0; + + int add_radv_info = 1; + __attribute__ ((unused)) ip6_radv_t *radv_info = 0; + ip6_address_t ip6_addr; + u32 addr_len; + + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + /* get basic radv info for this interface */ + if (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + + if (unformat_user (line_input, + unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + u32 ri; + ethernet_interface_t *eth_if0 = 0; + + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index); + if (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE) + eth_if0 = + ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + + if (!eth_if0) + { + error = + clib_error_return (0, "Interface must be of ethernet type"); + goto done; + } + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, + sw_if_index, ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + if (ri != ~0) + { + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + } + else + { + error = clib_error_return (0, "unknown interface %U'", + format_unformat_error, line_input); + goto done; + } + } + else + { + error = clib_error_return (0, "invalid interface name %U'", + format_unformat_error, line_input); + goto done; + } + } + + /* get the rest of the command */ + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "no")) + is_no = 1; + else if (unformat (line_input, "prefix %U/%d", + unformat_ip6_address, &ip6_addr, &addr_len)) + { + add_radv_info = 0; + break; + } + else if (unformat (line_input, "ra-managed-config-flag")) + { + managed = 1; + break; + } + else if (unformat (line_input, "ra-other-config-flag")) + { + other = 1; + break; + } + else if (unformat (line_input, "ra-suppress") || + unformat (line_input, "ra-surpress")) + { + suppress = 1; + break; + } + else if (unformat (line_input, "ra-suppress-link-layer") || + unformat (line_input, "ra-surpress-link-layer")) + { + suppress_ll_option = 1; + break; + } + else if (unformat (line_input, "ra-send-unicast")) + { + send_unicast = 1; + break; + } + else if (unformat (line_input, "ra-lifetime")) + { + if (!unformat (line_input, "%d", &ra_lifetime)) + { + error = unformat_parse_error (line_input); + goto done; + } + use_lifetime = 1; + break; + } + else if (unformat (line_input, "ra-initial")) + { + if (!unformat + (line_input, "%d %d", &ra_initial_count, &ra_initial_interval)) + { + error = unformat_parse_error (line_input); + goto done; + } + break; + } + else if (unformat (line_input, "ra-interval")) + { + if (!unformat (line_input, "%d", &ra_max_interval)) + { + error = unformat_parse_error (line_input); + goto done; + } + + if (!unformat (line_input, "%d", &ra_min_interval)) + ra_min_interval = 0; + break; + } + else if (unformat (line_input, "ra-cease")) + { + cease = 1; + break; + } + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (add_radv_info) + { + ip6_neighbor_ra_config (vm, sw_if_index, + suppress, managed, other, + suppress_ll_option, send_unicast, cease, + use_lifetime, ra_lifetime, + ra_initial_count, ra_initial_interval, + ra_max_interval, ra_min_interval, is_no); + } + else + { + u32 valid_lifetime_in_secs = 0; + u32 pref_lifetime_in_secs = 0; + u8 use_prefix_default_values = 0; + u8 no_advertise = 0; + u8 off_link = 0; + u8 no_autoconfig = 0; + u8 no_onlink = 0; + + /* get the rest of the command */ + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "default")) + { + use_prefix_default_values = 1; + break; + } + else if (unformat (line_input, "infinite")) + { + valid_lifetime_in_secs = ~0; + pref_lifetime_in_secs = ~0; + break; + } + else if (unformat (line_input, "%d %d", &valid_lifetime_in_secs, + &pref_lifetime_in_secs)) + break; + else + break; + } + + + /* get the rest of the command */ + while (!use_prefix_default_values && + unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "no-advertise")) + no_advertise = 1; + else if (unformat (line_input, "off-link")) + off_link = 1; + else if (unformat (line_input, "no-autoconfig")) + no_autoconfig = 1; + else if (unformat (line_input, "no-onlink")) + no_onlink = 1; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + ip6_neighbor_ra_prefix (vm, sw_if_index, + &ip6_addr, addr_len, + use_prefix_default_values, + valid_lifetime_in_secs, + pref_lifetime_in_secs, + no_advertise, + off_link, no_autoconfig, no_onlink, is_no); + } + +done: + unformat_free (line_input); + + return error; +} + +static void +ip6_print_addrs (vlib_main_t * vm, u32 * addrs) +{ + ip_lookup_main_t *lm = &ip6_main.lookup_main; + u32 i; + + for (i = 0; i < vec_len (addrs); i++) + { + ip_interface_address_t *a = + pool_elt_at_index (lm->if_address_pool, addrs[i]); + ip6_address_t *address = ip_interface_address_get_address (lm, a); + + vlib_cli_output (vm, "\t\t%U/%d", + format_ip6_address, address, a->address_length); + } +} + +static clib_error_t * +show_ip6_interface_cmd (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + clib_error_t *error = 0; + u32 sw_if_index; + + sw_if_index = ~0; + + if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + u32 ri; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, + sw_if_index, ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + if (ri != ~0) + { + ip_lookup_main_t *lm = &ip6_main.lookup_main; + ip6_radv_t *radv_info; + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + vlib_cli_output (vm, "%U is admin %s\n", + format_vnet_sw_interface_name, vnm, + vnet_get_sw_interface (vnm, sw_if_index), + (vnet_sw_interface_is_admin_up (vnm, sw_if_index) ? + "up" : "down")); + + u32 ai; + u32 *link_scope = 0, *global_scope = 0; + u32 *local_scope = 0, *unknown_scope = 0; + ip_interface_address_t *a; + + vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index, + sw_if_index, ~0); + ai = lm->if_address_pool_index_by_sw_if_index[sw_if_index]; + + while (ai != (u32) ~ 0) + { + a = pool_elt_at_index (lm->if_address_pool, ai); + ip6_address_t *address = + ip_interface_address_get_address (lm, a); + + if (ip6_address_is_link_local_unicast (address)) + vec_add1 (link_scope, ai); + else if (ip6_address_is_global_unicast (address)) + vec_add1 (global_scope, ai); + else if (ip6_address_is_local_unicast (address)) + vec_add1 (local_scope, ai); + else + vec_add1 (unknown_scope, ai); + + ai = a->next_this_sw_interface; + } + + if (vec_len (link_scope)) + { + vlib_cli_output (vm, "\tLink-local address(es):\n"); + ip6_print_addrs (vm, link_scope); + vec_free (link_scope); + } + + if (vec_len (local_scope)) + { + vlib_cli_output (vm, "\tLocal unicast address(es):\n"); + ip6_print_addrs (vm, local_scope); + vec_free (local_scope); + } + + if (vec_len (global_scope)) + { + vlib_cli_output (vm, "\tGlobal unicast address(es):\n"); + ip6_print_addrs (vm, global_scope); + vec_free (global_scope); + } + + if (vec_len (unknown_scope)) + { + vlib_cli_output (vm, "\tOther-scope address(es):\n"); + ip6_print_addrs (vm, unknown_scope); + vec_free (unknown_scope); + } + + vlib_cli_output (vm, "\tJoined group address(es):\n"); + ip6_mldp_group_t *m; + /* *INDENT-OFF* */ + pool_foreach (m, radv_info->mldp_group_pool, + ({ + vlib_cli_output (vm, "\t\t%U\n", format_ip6_address, + &m->mcast_address); + })); + /* *INDENT-ON* */ + + vlib_cli_output (vm, "\tAdvertised Prefixes:\n"); + ip6_radv_prefix_t *p; + /* *INDENT-OFF* */ + pool_foreach (p, radv_info->adv_prefixes_pool, + ({ + vlib_cli_output (vm, "\t\tprefix %U, length %d\n", + format_ip6_address, &p->prefix, p->prefix_len); + })); + /* *INDENT-ON* */ + + vlib_cli_output (vm, "\tMTU is %d\n", radv_info->adv_link_mtu); + vlib_cli_output (vm, "\tICMP error messages are unlimited\n"); + vlib_cli_output (vm, "\tICMP redirects are disabled\n"); + vlib_cli_output (vm, "\tICMP unreachables are not sent\n"); + vlib_cli_output (vm, "\tND DAD is disabled\n"); + //vlib_cli_output (vm, "\tND reachable time is %d milliseconds\n",); + vlib_cli_output (vm, "\tND advertised reachable time is %d\n", + radv_info->adv_neighbor_reachable_time_in_msec); + vlib_cli_output (vm, + "\tND advertised retransmit interval is %d (msec)\n", + radv_info-> + adv_time_in_msec_between_retransmitted_neighbor_solicitations); + + u32 ra_interval = radv_info->max_radv_interval; + u32 ra_interval_min = radv_info->min_radv_interval; + vlib_cli_output (vm, + "\tND router advertisements are sent every %d seconds (min interval is %d)\n", + ra_interval, ra_interval_min); + vlib_cli_output (vm, + "\tND router advertisements live for %d seconds\n", + radv_info->adv_router_lifetime_in_sec); + vlib_cli_output (vm, + "\tHosts %s stateless autoconfig for addresses\n", + (radv_info->adv_managed_flag) ? "use" : + " don't use"); + vlib_cli_output (vm, "\tND router advertisements sent %d\n", + radv_info->n_advertisements_sent); + vlib_cli_output (vm, "\tND router solicitations received %d\n", + radv_info->n_solicitations_rcvd); + vlib_cli_output (vm, "\tND router solicitations dropped %d\n", + radv_info->n_solicitations_dropped); + } + else + { + error = clib_error_return (0, "IPv6 not enabled on interface", + format_unformat_error, input); + + } + } + return error; +} + +/*? + * This command is used to display various IPv6 attributes on a given + * interface. + * + * @cliexpar + * Example of how to display IPv6 settings: + * @cliexstart{show ip6 interface GigabitEthernet2/0/0} + * GigabitEthernet2/0/0 is admin up + * Link-local address(es): + * fe80::ab8/64 + * Joined group address(es): + * ff02::1 + * ff02::2 + * ff02::16 + * ff02::1:ff00:ab8 + * Advertised Prefixes: + * prefix fe80::fe:28ff:fe9c:75b3, length 64 + * MTU is 1500 + * ICMP error messages are unlimited + * ICMP redirects are disabled + * ICMP unreachables are not sent + * ND DAD is disabled + * ND advertised reachable time is 0 + * ND advertised retransmit interval is 0 (msec) + * ND router advertisements are sent every 200 seconds (min interval is 150) + * ND router advertisements live for 600 seconds + * Hosts use stateless autoconfig for addresses + * ND router advertisements sent 19336 + * ND router solicitations received 0 + * ND router solicitations dropped 0 + * @cliexend + * Example of output if IPv6 is not enabled on the interface: + * @cliexstart{show ip6 interface GigabitEthernet2/0/0} + * show ip6 interface: IPv6 not enabled on interface + * @cliexend +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_ip6_interface_command, static) = +{ + .path = "show ip6 interface", + .function = show_ip6_interface_cmd, + .short_help = "show ip6 interface <interface>", +}; +/* *INDENT-ON* */ + +clib_error_t * +disable_ip6_interface (vlib_main_t * vm, u32 sw_if_index) +{ + clib_error_t *error = 0; + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + u32 ri; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, + ~0); + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + /* if not created - do nothing */ + if (ri != ~0) + { + vnet_main_t *vnm = vnet_get_main (); + ip6_radv_t *radv_info; + + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + /* check radv_info ref count for other ip6 addresses on this interface */ + /* This implicitly excludes the link local address */ + if (radv_info->ref_count == 0) + { + /* essentially "disables" ipv6 on this interface */ + error = ip6_add_del_interface_address (vm, sw_if_index, + &radv_info-> + link_local_address, 128, + 1 /* is_del */ ); + + ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, + 0 /* is_add */ ); + ip6_mfib_interface_enable_disable (sw_if_index, 0); + } + } + return error; +} + +int +ip6_interface_enabled (vlib_main_t * vm, u32 sw_if_index) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + u32 ri = ~0; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, + ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + return ri != ~0; +} + +clib_error_t * +enable_ip6_interface (vlib_main_t * vm, u32 sw_if_index) +{ + clib_error_t *error = 0; + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + u32 ri; + int is_add = 1; + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index, + ~0); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + /* if not created yet */ + if (ri == ~0) + { + vnet_main_t *vnm = vnet_get_main (); + vnet_sw_interface_t *sw_if0; + + sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index); + if (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE) + { + ethernet_interface_t *eth_if0; + + eth_if0 = + ethernet_get_interface (ðernet_main, sw_if0->hw_if_index); + if (eth_if0) + { + /* create radv_info. for this interface. This holds all the info needed for router adverts */ + ri = + ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, is_add); + + if (ri != ~0) + { + ip6_radv_t *radv_info; + ip6_address_t link_local_address; + + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + ip6_link_local_address_from_ethernet_mac_address + (&link_local_address, eth_if0->address); + + sw_if0 = vnet_get_sw_interface (vnm, sw_if_index); + if (sw_if0->type == VNET_SW_INTERFACE_TYPE_SUB || + sw_if0->type == VNET_SW_INTERFACE_TYPE_P2P) + { + /* make up an interface id */ + md5_context_t m; + u8 digest[16]; + + link_local_address.as_u64[0] = radv_info->randomizer; + + md5_init (&m); + md5_add (&m, &link_local_address, 16); + md5_finish (&m, digest); + + clib_memcpy (&link_local_address, digest, 16); + + radv_info->randomizer = link_local_address.as_u64[0]; + + link_local_address.as_u64[0] = + clib_host_to_net_u64 (0xFE80000000000000ULL); + /* clear u bit */ + link_local_address.as_u8[8] &= 0xfd; + } + + ip6_mfib_interface_enable_disable (sw_if_index, 1); + + /* essentially "enables" ipv6 on this interface */ + error = ip6_add_del_interface_address (vm, sw_if_index, + &link_local_address, + 128 + /* address width */ , + 0 /* is_del */ ); + + if (error) + ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, + !is_add); + else + { + radv_info->link_local_address = link_local_address; + } + } + } + } + } + return error; +} + +static clib_error_t * +enable_ip6_interface_cmd (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + + sw_if_index = ~0; + + if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + enable_ip6_interface (vm, sw_if_index); + } + else + { + error = clib_error_return (0, "unknown interface\n'", + format_unformat_error, input); + + } + return error; +} + +/*? + * This command is used to enable IPv6 on a given interface. + * + * @cliexpar + * Example of how enable IPv6 on a given interface: + * @cliexcmd{enable ip6 interface GigabitEthernet2/0/0} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (enable_ip6_interface_command, static) = +{ + .path = "enable ip6 interface", + .function = enable_ip6_interface_cmd, + .short_help = "enable ip6 interface <interface>", +}; +/* *INDENT-ON* */ + +static clib_error_t * +disable_ip6_interface_cmd (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + + sw_if_index = ~0; + + if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = disable_ip6_interface (vm, sw_if_index); + } + else + { + error = clib_error_return (0, "unknown interface\n'", + format_unformat_error, input); + + } + return error; +} + +/*? + * This command is used to disable IPv6 on a given interface. + * + * @cliexpar + * Example of how disable IPv6 on a given interface: + * @cliexcmd{disable ip6 interface GigabitEthernet2/0/0} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (disable_ip6_interface_command, static) = +{ + .path = "disable ip6 interface", + .function = disable_ip6_interface_cmd, + .short_help = "disable ip6 interface <interface>", +}; +/* *INDENT-ON* */ + +/*? + * This command is used to configure the neighbor discovery + * parameters on a given interface. Use the '<em>show ip6 interface</em>' + * command to display some of the current neighbor discovery parameters + * on a given interface. This command has three formats: + * + * + * <b>Format 1 - Router Advertisement Options:</b> (Only one can be entered in a single command) + * + * '<em><b>ip6 nd <interface> [no] [ra-managed-config-flag] | [ra-other-config-flag] | [ra-suppress] | [ra-suppress-link-layer] | [ra-send-unicast] | [ra-lifetime <lifetime>] | [ra-initial <cnt> <interval>] | [ra-interval <max-interval> [<min-interval>]] | [ra-cease]</b></em>' + * + * Where: + * + * <em>[no] ra-managed-config-flag</em> - Advertises in ICMPv6 + * router-advertisement messages to use stateful address + * auto-configuration to obtain address information (sets the M-bit). + * Default is the M-bit is not set and the '<em>no</em>' option + * returns it to this default state. + * + * <em>[no] ra-other-config-flag</em> - Indicates in ICMPv6 + * router-advertisement messages that hosts use stateful auto + * configuration to obtain nonaddress related information (sets + * the O-bit). Default is the O-bit is not set and the '<em>no</em>' + * option returns it to this default state. + * + * <em>[no] ra-suppress</em> - Disables sending ICMPv6 router-advertisement + * messages. The '<em>no</em>' option implies to enable sending ICMPv6 + * router-advertisement messages. + * + * <em>[no] ra-suppress-link-layer</em> - Indicates not to include the + * optional source link-layer address in the ICMPv6 router-advertisement + * messages. Default is to include the optional source link-layer address + * and the '<em>no</em>' option returns it to this default state. + * + * <em>[no] ra-send-unicast</em> - Use the source address of the + * router-solicitation message if availiable. The default is to use + * multicast address of all nodes, and the '<em>no</em>' option returns + * it to this default state. + * + * <em>[no] ra-lifetime <lifetime></em> - Advertises the lifetime of a + * default router in ICMPv6 router-advertisement messages. The range is + * from 0 to 9000 seconds. '<em><lifetime></em>' must be greater than + * '<em><max-interval></em>'. The default value is 600 seconds and the + * '<em>no</em>' option returns it to this default value. + * + * <em>[no] ra-initial <cnt> <interval></em> - Number of initial ICMPv6 + * router-advertisement messages sent and the interval between each + * message. Range for count is 1 - 3 and default is 3. Range for interval + * is 1 to 16 seconds, and default is 16 seconds. The '<em>no</em>' option + * returns both to their default value. + * + * <em>[no] ra-interval <max-interval> [<min-interval>]</em> - Configures the + * interval between sending ICMPv6 router-advertisement messages. The + * range for max-interval is from 4 to 200 seconds. min-interval can not + * be more than 75% of max-interval. If not set, min-interval will be + * set to 75% of max-interval. The range for min-interval is from 3 to + * 150 seconds. The '<em>no</em>' option returns both to their default + * value. + * + * <em>[no] ra-cease</em> - Cease sending ICMPv6 router-advertisement messages. + * The '<em>no</em>' options implies to start (or restart) sending + * ICMPv6 router-advertisement messages. + * + * + * <b>Format 2 - Prefix Options:</b> + * + * '<em><b>ip6 nd <interface> [no] prefix <ip6-address>/<width> [<valid-lifetime> <pref-lifetime> | infinite] [no-advertise] [off-link] [no-autoconfig] [no-onlink]</b></em>' + * + * Where: + * + * <em>no</em> - All additional flags are ignored and the prefix is deleted. + * + * <em><valid-lifetime> <pref-lifetime></em> - '<em><valid-lifetime></em>' is the + * length of time in seconds during what the prefix is valid for the purpose of + * on-link determination. Range is 7203 to 2592000 seconds and default is 2592000 + * seconds (30 days). '<em><pref-lifetime></em>' is the prefered-lifetime and is the + * length of time in seconds during what addresses generated from the prefix remain + * preferred. Range is 0 to 604800 seconds and default is 604800 seconds (7 days). + * + * <em>infinite</em> - Both '<em><valid-lifetime></em>' and '<em><<pref-lifetime></em>' + * are inifinte, no timeout. + * + * <em>no-advertise</em> - Do not send full router address in prefix + * advertisement. Default is to advertise (i.e. - This flag is off by default). + * + * <em>off-link</em> - Prefix is off-link, clear L-bit in packet. Default is on-link + * (i.e. - This flag is off and L-bit in packet is set by default and this prefix can + * be used for on-link determination). '<em>no-onlink</em>' also controls the L-bit. + * + * <em>no-autoconfig</em> - Do not use prefix for autoconfiguration, clear A-bit in packet. + * Default is autoconfig (i.e. - This flag is off and A-bit in packet is set by default. + * + * <em>no-onlink</em> - Do not use prefix for onlink determination, clear L-bit in packet. + * Default is on-link (i.e. - This flag is off and L-bit in packet is set by default and + * this prefix can be used for on-link determination). '<em>off-link</em>' also controls + * the L-bit. + * + * + * <b>Format 3: - Default of Prefix:</b> + * + * '<em><b>ip6 nd <interface> [no] prefix <ip6-address>/<width> default</b></em>' + * + * When a new prefix is added (or existing one is being overwritten) <em>default</em> + * uses default values for the prefix. If <em>no</em> is used, the <em>default</em> + * is ignored and the prefix is deleted. + * + * + * @cliexpar + * Example of how set a router advertisement option: + * @cliexcmd{ip6 nd GigabitEthernet2/0/0 ra-interval 100 20} + * Example of how to add a prefix: + * @cliexcmd{ip6 nd GigabitEthernet2/0/0 prefix fe80::fe:28ff:fe9c:75b3/64 infinite no-advertise} + * Example of how to delete a prefix: + * @cliexcmd{ip6 nd GigabitEthernet2/0/0 no prefix fe80::fe:28ff:fe9c:75b3/64} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_nd_command, static) = +{ + .path = "ip6 nd", + .short_help = "ip6 nd <interface> ...", + .function = ip6_neighbor_cmd, +}; +/* *INDENT-ON* */ + +clib_error_t * +set_ip6_link_local_address (vlib_main_t * vm, + u32 sw_if_index, ip6_address_t * address) +{ + clib_error_t *error = 0; + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + u32 ri; + ip6_radv_t *radv_info; + vnet_main_t *vnm = vnet_get_main (); + + if (!ip6_address_is_link_local_unicast (address)) + { + vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_LINK_LOCAL; + return (error = clib_error_return (0, "address not link-local", + format_unformat_error)); + } + + /* call enable ipv6 */ + enable_ip6_interface (vm, sw_if_index); + + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + if (ri != ~0) + { + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + /* save if link local address (overwrite default) */ + + /* delete the old one */ + error = ip6_add_del_interface_address (vm, sw_if_index, + &radv_info->link_local_address, + 128, 1 /* is_del */ ); + + if (!error) + { + /* add the new one */ + error = ip6_add_del_interface_address (vm, sw_if_index, + address, 128, + 0 /* is_del */ ); + + if (!error) + { + radv_info->link_local_address = *address; + } + } + } + else + { + vnm->api_errno = VNET_API_ERROR_IP6_NOT_ENABLED; + error = clib_error_return (0, "ip6 not enabled for interface", + format_unformat_error); + } + return error; +} + +clib_error_t * +set_ip6_link_local_address_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index; + ip6_address_t ip6_addr; + + if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + /* get the rest of the command */ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_ip6_address, &ip6_addr)) + break; + else + return (unformat_parse_error (input)); + } + } + error = set_ip6_link_local_address (vm, sw_if_index, &ip6_addr); + return error; +} + +/*? + * This command is used to assign an IPv6 Link-local address to an + * interface. This command will enable IPv6 on an interface if it + * is not already enabled. Use the '<em>show ip6 interface</em>' command + * to display the assigned Link-local address. + * + * @cliexpar + * Example of how to assign an IPv6 Link-local address to an interface: + * @cliexcmd{set ip6 link-local address GigabitEthernet2/0/0 FE80::AB8} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_ip6_link_local_address_command, static) = +{ + .path = "set ip6 link-local address", + .short_help = "set ip6 link-local address <interface> <ip6-address>", + .function = set_ip6_link_local_address_cmd, +}; +/* *INDENT-ON* */ + +/** + * @brief callback when an interface address is added or deleted + */ +static void +ip6_neighbor_add_del_interface_address (ip6_main_t * im, + uword opaque, + u32 sw_if_index, + ip6_address_t * address, + u32 address_length, + u32 if_address_index, u32 is_delete) +{ + vnet_main_t *vnm = vnet_get_main (); + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + u32 ri; + vlib_main_t *vm = vnm->vlib_main; + ip6_radv_t *radv_info; + ip6_address_t a; + + /* create solicited node multicast address for this interface adddress */ + ip6_set_solicited_node_multicast_address (&a, 0); + + a.as_u8[0xd] = address->as_u8[0xd]; + a.as_u8[0xe] = address->as_u8[0xe]; + a.as_u8[0xf] = address->as_u8[0xf]; + + if (!is_delete) + { + /* try to create radv_info - does nothing if ipv6 already enabled */ + enable_ip6_interface (vm, sw_if_index); + + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, + sw_if_index, ~0); + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + if (ri != ~0) + { + /* get radv_info */ + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + /* add address */ + if (!ip6_address_is_link_local_unicast (address)) + radv_info->ref_count++; + + ip6_neighbor_add_mld_prefix (radv_info, &a); + } + } + else + { + + /* delete */ + /* look up the radv_t information for this interface */ + vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, + sw_if_index, ~0); + ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index]; + + if (ri != ~0) + { + /* get radv_info */ + radv_info = pool_elt_at_index (nm->if_radv_pool, ri); + + ip6_neighbor_del_mld_prefix (radv_info, &a); + + /* if interface up send MLDP "report" */ + radv_info->all_routers_mcast = 0; + + /* add address */ + if (!ip6_address_is_link_local_unicast (address)) + radv_info->ref_count--; + } + /* Ensure that IPv6 is disabled, and LL removed after ref_count reaches 0 */ + disable_ip6_interface (vm, sw_if_index); + } +} + +clib_error_t * +ip6_set_neighbor_limit (u32 neighbor_limit) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + + nm->limit_neighbor_cache_size = neighbor_limit; + return 0; +} + +static void +ip6_neighbor_table_bind (ip6_main_t * im, + uword opaque, + u32 sw_if_index, + u32 new_fib_index, u32 old_fib_index) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_neighbor_t *n = NULL; + u32 i, *to_re_add = 0; + + /* *INDENT-OFF* */ + pool_foreach (n, nm->neighbor_pool, + ({ + if (n->key.sw_if_index == sw_if_index) + vec_add1 (to_re_add, n - nm->neighbor_pool); + })); + /* *INDENT-ON* */ + + for (i = 0; i < vec_len (to_re_add); i++) + { + n = pool_elt_at_index (nm->neighbor_pool, to_re_add[i]); + ip6_neighbor_adj_fib_remove (n, old_fib_index); + ip6_neighbor_adj_fib_add (n, new_fib_index); + } + vec_free (to_re_add); +} + +static clib_error_t * +ip6_neighbor_init (vlib_main_t * vm) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_main_t *im = &ip6_main; + + mhash_init (&nm->neighbor_index_by_key, + /* value size */ sizeof (uword), + /* key size */ sizeof (ip6_neighbor_key_t)); + + icmp6_register_type (vm, ICMP6_neighbor_solicitation, + ip6_icmp_neighbor_solicitation_node.index); + icmp6_register_type (vm, ICMP6_neighbor_advertisement, + ip6_icmp_neighbor_advertisement_node.index); + icmp6_register_type (vm, ICMP6_router_solicitation, + ip6_icmp_router_solicitation_node.index); + icmp6_register_type (vm, ICMP6_router_advertisement, + ip6_icmp_router_advertisement_node.index); + + /* handler node for ip6 neighbor discovery events and timers */ + vlib_register_node (vm, &ip6_icmp_neighbor_discovery_event_node); + + /* add call backs */ + ip6_add_del_interface_address_callback_t cb; + memset (&cb, 0x0, sizeof (ip6_add_del_interface_address_callback_t)); + + /* when an interface address changes... */ + cb.function = ip6_neighbor_add_del_interface_address; + cb.function_opaque = 0; + vec_add1 (im->add_del_interface_address_callbacks, cb); + + ip6_table_bind_callback_t cbt; + cbt.function = ip6_neighbor_table_bind; + cbt.function_opaque = 0; + vec_add1 (im->table_bind_callbacks, cbt); + + mhash_init (&nm->pending_resolutions_by_address, + /* value size */ sizeof (uword), + /* key size */ sizeof (ip6_address_t)); + + mhash_init (&nm->mac_changes_by_address, + /* value size */ sizeof (uword), + /* key size */ sizeof (ip6_address_t)); + + /* default, configurable */ + nm->limit_neighbor_cache_size = 50000; + + nm->wc_ip6_nd_publisher_node = (uword) ~ 0; + +#if 0 + /* $$$$ Hack fix for today */ + vec_validate_init_empty + (im->discover_neighbor_next_index_by_hw_if_index, 32, 0 /* drop */ ); +#endif + + return 0; +} + +VLIB_INIT_FUNCTION (ip6_neighbor_init); + + +void +vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm, + void *address_arg, + uword node_index, + uword type_opaque, uword data) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_address_t *address = address_arg; + uword *p; + pending_resolution_t *pr; + + pool_get (nm->pending_resolutions, pr); + + pr->next_index = ~0; + pr->node_index = node_index; + pr->type_opaque = type_opaque; + pr->data = data; + + p = mhash_get (&nm->pending_resolutions_by_address, address); + if (p) + { + /* Insert new resolution at the head of the list */ + pr->next_index = p[0]; + mhash_unset (&nm->pending_resolutions_by_address, address, 0); + } + + mhash_set (&nm->pending_resolutions_by_address, address, + pr - nm->pending_resolutions, 0 /* old value */ ); +} + +int +vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm, + void *data_callback, + u32 pid, + void *address_arg, + uword node_index, + uword type_opaque, uword data, int is_add) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_address_t *address = address_arg; + + /* Try to find an existing entry */ + u32 *first = (u32 *) mhash_get (&nm->mac_changes_by_address, address); + u32 *p = first; + pending_resolution_t *mc; + while (p && *p != ~0) + { + mc = pool_elt_at_index (nm->mac_changes, *p); + if (mc->node_index == node_index && mc->type_opaque == type_opaque + && mc->pid == pid) + break; + p = &mc->next_index; + } + + int found = p && *p != ~0; + if (is_add) + { + if (found) + return VNET_API_ERROR_ENTRY_ALREADY_EXISTS; + + pool_get (nm->mac_changes, mc); + *mc = (pending_resolution_t) + { + .next_index = ~0,.node_index = node_index,.type_opaque = + type_opaque,.data = data,.data_callback = data_callback,.pid = + pid,}; + + /* Insert new resolution at the end of the list */ + u32 new_idx = mc - nm->mac_changes; + if (p) + p[0] = new_idx; + else + mhash_set (&nm->mac_changes_by_address, address, new_idx, 0); + } + else + { + if (!found) + return VNET_API_ERROR_NO_SUCH_ENTRY; + + /* Clients may need to clean up pool entries, too */ + void (*fp) (u32, u8 *) = data_callback; + if (fp) + (*fp) (mc->data, 0 /* no new mac addrs */ ); + + /* Remove the entry from the list and delete the entry */ + *p = mc->next_index; + pool_put (nm->mac_changes, mc); + + /* Remove from hash if we deleted the last entry */ + if (*p == ~0 && p == first) + mhash_unset (&nm->mac_changes_by_address, address, 0); + } + return 0; +} + +int +vnet_ip6_nd_term (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_buffer_t * p0, + ethernet_header_t * eth, + ip6_header_t * ip, u32 sw_if_index, u16 bd_index) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + icmp6_neighbor_solicitation_or_advertisement_header_t *ndh; + + ndh = ip6_next_header (ip); + if (ndh->icmp.type != ICMP6_neighbor_solicitation && + ndh->icmp.type != ICMP6_neighbor_advertisement) + return 0; + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) && + (p0->flags & VLIB_BUFFER_IS_TRACED))) + { + u8 *t0 = vlib_add_trace (vm, node, p0, + sizeof (icmp6_input_trace_t)); + clib_memcpy (t0, ip, sizeof (icmp6_input_trace_t)); + } + + /* Check if anyone want ND events for L2 BDs */ + if (PREDICT_FALSE + (nm->wc_ip6_nd_publisher_node != (uword) ~ 0 + && !ip6_address_is_link_local_unicast (&ip->src_address))) + { + vnet_nd_wc_publish (sw_if_index, eth->src_address, &ip->src_address); + } + + /* Check if MAC entry exsist for solicited target IP */ + if (ndh->icmp.type == ICMP6_neighbor_solicitation) + { + icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *opt; + l2_bridge_domain_t *bd_config; + u8 *macp; + + opt = (void *) (ndh + 1); + if ((opt->header.type != + ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address) || + (opt->header.n_data_u64s != 1)) + return 0; /* source link layer address option not present */ + + bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index); + macp = + (u8 *) hash_get_mem (bd_config->mac_by_ip6, &ndh->target_address); + if (macp) + { /* found ip-mac entry, generate eighbor advertisement response */ + int bogus_length; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_icmp_input_node.index); + ip->dst_address = ip->src_address; + ip->src_address = ndh->target_address; + ip->hop_limit = 255; + opt->header.type = + ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address; + clib_memcpy (opt->ethernet_address, macp, 6); + ndh->icmp.type = ICMP6_neighbor_advertisement; + ndh->advertisement_flags = clib_host_to_net_u32 + (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED | + ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE); + ndh->icmp.checksum = 0; + ndh->icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip, &bogus_length); + clib_memcpy (eth->dst_address, eth->src_address, 6); + clib_memcpy (eth->src_address, macp, 6); + vlib_error_count (vm, error_node->node_index, + ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX, 1); + return 1; + } + } + + return 0; + +} + +int +ip6_neighbor_proxy_add_del (u32 sw_if_index, ip6_address_t * addr, u8 is_del) +{ + u32 fib_index; + + fib_prefix_t pfx = { + .fp_len = 128, + .fp_proto = FIB_PROTOCOL_IP6, + .fp_addr = { + .ip6 = *addr, + }, + }; + ip46_address_t nh = { + .ip6 = *addr, + }; + + fib_index = ip6_fib_table_get_index_for_sw_if_index (sw_if_index); + + if (~0 == fib_index) + return VNET_API_ERROR_NO_SUCH_FIB; + + if (is_del) + { + fib_table_entry_path_remove (fib_index, + &pfx, + FIB_SOURCE_IP6_ND_PROXY, + DPO_PROTO_IP6, + &nh, + sw_if_index, + ~0, 1, FIB_ROUTE_PATH_FLAG_NONE); + /* flush the ND cache of this address if it's there */ + vnet_unset_ip6_ethernet_neighbor (vlib_get_main (), + sw_if_index, addr, NULL, 0); + } + else + { + fib_table_entry_path_add (fib_index, + &pfx, + FIB_SOURCE_IP6_ND_PROXY, + FIB_ENTRY_FLAG_NONE, + DPO_PROTO_IP6, + &nh, + sw_if_index, + ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE); + } + return (0); +} + +static clib_error_t * +set_ip6_nd_proxy_cmd (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + ip6_address_t addr; + u32 sw_if_index; + u8 is_del = 0; + + if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + /* get the rest of the command */ + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%U", unformat_ip6_address, &addr)) + break; + else if (unformat (input, "delete") || unformat (input, "del")) + is_del = 1; + else + return (unformat_parse_error (input)); + } + } + + ip6_neighbor_proxy_add_del (sw_if_index, &addr, is_del); + + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_ip6_nd_proxy_command, static) = +{ + .path = "set ip6 nd proxy", + .short_help = "set ip6 nd proxy <HOST> <INTERFACE>", + .function = set_ip6_nd_proxy_cmd, +}; +/* *INDENT-ON* */ + +void +ethernet_ndp_change_mac (u32 sw_if_index) +{ + ip6_neighbor_main_t *nm = &ip6_neighbor_main; + ip6_neighbor_t *n; + + /* *INDENT-OFF* */ + pool_foreach (n, nm->neighbor_pool, + ({ + if (n->key.sw_if_index == sw_if_index) + { + adj_nbr_walk_nh6 (sw_if_index, + &n->key.ip6_address, + ip6_nd_mk_complete_walk, n); + } + })); + /* *INDENT-ON* */ +} + +void +send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi) +{ + ip6_main_t *i6m = &ip6_main; + u32 sw_if_index = hi->sw_if_index; + ip6_address_t *ip6_addr = ip6_interface_first_address (i6m, sw_if_index); + if (ip6_addr) + { + clib_warning + ("Sending unsolicitated NA IP6 address %U on sw_if_idex %d", + format_ip6_address, ip6_addr, sw_if_index); + + /* Form unsolicited neighbor advertisement packet from NS pkt template */ + int bogus_length; + u32 bi = 0; + icmp6_neighbor_solicitation_header_t *h = + vlib_packet_template_get_packet (vm, + &i6m->discover_neighbor_packet_template, + &bi); + ip6_set_reserved_multicast_address (&h->ip.dst_address, + IP6_MULTICAST_SCOPE_link_local, + IP6_MULTICAST_GROUP_ID_all_hosts); + h->ip.src_address = ip6_addr[0]; + h->neighbor.icmp.type = ICMP6_neighbor_advertisement; + h->neighbor.target_address = ip6_addr[0]; + h->neighbor.advertisement_flags = clib_host_to_net_u32 + (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE); + clib_memcpy (h->link_layer_option.ethernet_address, + hi->hw_address, vec_len (hi->hw_address)); + h->neighbor.icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length); + ASSERT (bogus_length == 0); + + /* Setup MAC header with IP6 Etype and mcast DMAC */ + vlib_buffer_t *b = vlib_get_buffer (vm, bi); + vlib_buffer_advance (b, -sizeof (ethernet_header_t)); + ethernet_header_t *e = vlib_buffer_get_current (b); + e->type = clib_host_to_net_u16 (ETHERNET_TYPE_IP6); + clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address)); + ip6_multicast_ethernet_address (e->dst_address, + IP6_MULTICAST_GROUP_ID_all_hosts); + + /* Send unsolicited ND advertisement packet out the specified interface */ + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index); + u32 *to_next = vlib_frame_vector_args (f); + to_next[0] = bi; + f->n_vectors = 1; + vlib_put_frame_to_node (vm, hi->output_node_index, f); + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_neighbor.h b/src/vnet/ip/ip6_neighbor.h new file mode 100644 index 00000000..ed80381b --- /dev/null +++ b/src/vnet/ip/ip6_neighbor.h @@ -0,0 +1,109 @@ +/* + * + * ip6_neighboor.h: ip6 neighbor structures + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_ip6_neighbor_h +#define included_ip6_neighbor_h + +#include <vnet/fib/fib_types.h> + +typedef struct +{ + ip6_address_t ip6_address; + u32 sw_if_index; + u32 pad; +} ip6_neighbor_key_t; + +typedef enum ip6_neighbor_flags_t_ +{ + IP6_NEIGHBOR_FLAG_STATIC = (1 << 0), + IP6_NEIGHBOR_FLAG_DYNAMIC = (1 << 1), + IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY = (1 << 2), +} __attribute__ ((packed)) ip6_neighbor_flags_t; + +typedef struct +{ + ip6_neighbor_key_t key; + u8 link_layer_address[8]; + ip6_neighbor_flags_t flags; + u64 cpu_time_last_updated; + fib_node_index_t fib_entry_index; +} ip6_neighbor_t; + +extern ip6_neighbor_t *ip6_neighbors_entries (u32 sw_if_index); + +extern int ip6_neighbor_ra_config (vlib_main_t * vm, u32 sw_if_index, + u8 suppress, u8 managed, u8 other, + u8 ll_option, u8 send_unicast, u8 cease, + u8 use_lifetime, u32 lifetime, + u32 initial_count, u32 initial_interval, + u32 max_interval, u32 min_interval, + u8 is_no); + +extern int ip6_neighbor_ra_prefix (vlib_main_t * vm, u32 sw_if_index, + ip6_address_t * prefix_addr, u8 prefix_len, + u8 use_default, u32 val_lifetime, + u32 pref_lifetime, u8 no_advertise, + u8 off_link, u8 no_autoconfig, + u8 no_onlink, u8 is_no); + +extern clib_error_t *ip6_set_neighbor_limit (u32 neighbor_limit); + +extern void vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm, + void *address_arg, + uword node_index, + uword type_opaque, + uword data); + +extern int vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * a, + u8 * link_layer_address, + uword n_bytes_link_layer_address, + int is_static, + int is_no_fib_entry); + +extern int vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm, + u32 sw_if_index, + ip6_address_t * a, + u8 * link_layer_address, + uword + n_bytes_link_layer_address); + +extern int ip6_neighbor_proxy_add_del (u32 sw_if_index, + ip6_address_t * addr, u8 is_add); + +u32 ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, + u32 is_add); +typedef struct +{ + u32 sw_if_index; + ip6_address_t ip6; + u8 mac[6]; +} wc_nd_report_t; + +void wc_nd_set_publisher_node (uword node_index, uword event_type); + +#endif /* included_ip6_neighbor_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h new file mode 100644 index 00000000..c0c745e2 --- /dev/null +++ b/src/vnet/ip/ip6_packet.h @@ -0,0 +1,536 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip6/packet.h: ip6 packet format + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip6_packet_h +#define included_ip6_packet_h + +#include <vnet/tcp/tcp_packet.h> +#include <vnet/ip/ip4_packet.h> + +typedef union +{ + u8 as_u8[16]; + u16 as_u16[8]; + u32 as_u32[4]; + u64 as_u64[2]; + uword as_uword[16 / sizeof (uword)]; +} +ip6_address_t; + +/* Packed so that the mhash key doesn't include uninitialized pad bytes */ +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + /* IP address must be first for ip_interface_address_get_address() to work */ + ip6_address_t ip6_addr; + u32 fib_index; +}) ip6_address_fib_t; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (union { + struct { + u32 pad[3]; + ip4_address_t ip4; + }; + ip6_address_t ip6; + u8 as_u8[16]; + u64 as_u64[2]; +}) ip46_address_t; +/* *INDENT-ON* */ +#define ip46_address_is_ip4(ip46) (((ip46)->pad[0] | (ip46)->pad[1] | (ip46)->pad[2]) == 0) +#define ip46_address_mask_ip4(ip46) ((ip46)->pad[0] = (ip46)->pad[1] = (ip46)->pad[2] = 0) +#define ip46_address_set_ip4(ip46, ip) (ip46_address_mask_ip4(ip46), (ip46)->ip4 = (ip)[0]) +#define ip46_address_reset(ip46) ((ip46)->as_u64[0] = (ip46)->as_u64[1] = 0) +#define ip46_address_cmp(ip46_1, ip46_2) (memcmp(ip46_1, ip46_2, sizeof(*ip46_1))) +#define ip46_address_is_zero(ip46) (((ip46)->as_u64[0] == 0) && ((ip46)->as_u64[1] == 0)) +#define ip46_address_is_equal(a1, a2) (((a1)->as_u64[0] == (a2)->as_u64[0]) \ + && ((a1)->as_u64[1] == (a2)->as_u64[1])) + +always_inline ip46_address_t +to_ip46 (u32 is_ipv6, u8 * buf) +{ + ip46_address_t ip; + if (is_ipv6) + ip.ip6 = *((ip6_address_t *) buf); + else + ip46_address_set_ip4 (&ip, (ip4_address_t *) buf); + return ip; +} + + +always_inline void +ip6_addr_fib_init (ip6_address_fib_t * addr_fib, ip6_address_t * address, + u32 fib_index) +{ + addr_fib->ip6_addr = *address; + addr_fib->fib_index = fib_index; +} + +/* Special addresses: + unspecified ::/128 + loopback ::1/128 + global unicast 2000::/3 + unique local unicast fc00::/7 + link local unicast fe80::/10 + multicast ff00::/8 + ietf reserved everything else. */ + +#define foreach_ip6_multicast_address_scope \ + _ (loopback, 0x1) \ + _ (link_local, 0x2) \ + _ (admin_local, 0x4) \ + _ (site_local, 0x5) \ + _ (organization_local, 0x8) \ + _ (global, 0xe) + +#define foreach_ip6_multicast_link_local_group_id \ + _ (all_hosts, 0x1) \ + _ (all_routers, 0x2) \ + _ (rip_routers, 0x9) \ + _ (eigrp_routers, 0xa) \ + _ (pim_routers, 0xd) \ + _ (mldv2_routers, 0x16) + +typedef enum +{ +#define _(f,n) IP6_MULTICAST_SCOPE_##f = n, + foreach_ip6_multicast_address_scope +#undef _ +} ip6_multicast_address_scope_t; + +typedef enum +{ +#define _(f,n) IP6_MULTICAST_GROUP_ID_##f = n, + foreach_ip6_multicast_link_local_group_id +#undef _ +} ip6_multicast_link_local_group_id_t; + +always_inline uword +ip6_address_is_multicast (ip6_address_t * a) +{ + return a->as_u8[0] == 0xff; +} + +always_inline uword +ip46_address_is_multicast (ip46_address_t * a) +{ + return ip46_address_is_ip4 (a) ? ip4_address_is_multicast (&a->ip4) : + ip6_address_is_multicast (&a->ip6); +} + +always_inline void +ip6_set_reserved_multicast_address (ip6_address_t * a, + ip6_multicast_address_scope_t scope, + u16 id) +{ + a->as_u64[0] = a->as_u64[1] = 0; + a->as_u16[0] = clib_host_to_net_u16 (0xff00 | scope); + a->as_u16[7] = clib_host_to_net_u16 (id); +} + +always_inline void +ip6_set_solicited_node_multicast_address (ip6_address_t * a, u32 id) +{ + /* 0xff02::1:ffXX:XXXX. */ + a->as_u64[0] = a->as_u64[1] = 0; + a->as_u16[0] = clib_host_to_net_u16 (0xff02); + a->as_u8[11] = 1; + ASSERT ((id >> 24) == 0); + id |= 0xff << 24; + a->as_u32[3] = clib_host_to_net_u32 (id); +} + +always_inline void +ip6_link_local_address_from_ethernet_address (ip6_address_t * a, + u8 * ethernet_address) +{ + a->as_u64[0] = a->as_u64[1] = 0; + a->as_u16[0] = clib_host_to_net_u16 (0xfe80); + /* Always set locally administered bit (6). */ + a->as_u8[0x8] = ethernet_address[0] | (1 << 6); + a->as_u8[0x9] = ethernet_address[1]; + a->as_u8[0xa] = ethernet_address[2]; + a->as_u8[0xb] = 0xff; + a->as_u8[0xc] = 0xfe; + a->as_u8[0xd] = ethernet_address[3]; + a->as_u8[0xe] = ethernet_address[4]; + a->as_u8[0xf] = ethernet_address[5]; +} + +always_inline void +ip6_multicast_ethernet_address (u8 * ethernet_address, u32 group_id) +{ + ethernet_address[0] = 0x33; + ethernet_address[1] = 0x33; + ethernet_address[2] = ((group_id >> 24) & 0xff); + ethernet_address[3] = ((group_id >> 16) & 0xff); + ethernet_address[4] = ((group_id >> 8) & 0xff); + ethernet_address[5] = ((group_id >> 0) & 0xff); +} + +always_inline uword +ip6_address_is_equal (ip6_address_t * a, ip6_address_t * b) +{ + int i; + for (i = 0; i < ARRAY_LEN (a->as_uword); i++) + if (a->as_uword[i] != b->as_uword[i]) + return 0; + return 1; +} + +always_inline uword +ip6_address_is_equal_masked (ip6_address_t * a, ip6_address_t * b, + ip6_address_t * mask) +{ + int i; + for (i = 0; i < ARRAY_LEN (a->as_uword); i++) + { + uword a_masked, b_masked; + a_masked = a->as_uword[i] & mask->as_uword[i]; + b_masked = b->as_uword[i] & mask->as_uword[i]; + + if (a_masked != b_masked) + return 0; + } + return 1; +} + +always_inline void +ip6_address_mask (ip6_address_t * a, ip6_address_t * mask) +{ + int i; + for (i = 0; i < ARRAY_LEN (a->as_uword); i++) + a->as_uword[i] &= mask->as_uword[i]; +} + +always_inline void +ip6_address_set_zero (ip6_address_t * a) +{ + int i; + for (i = 0; i < ARRAY_LEN (a->as_uword); i++) + a->as_uword[i] = 0; +} + +always_inline void +ip6_address_mask_from_width (ip6_address_t * a, u32 width) +{ + int i, byte, bit, bitnum; + ASSERT (width <= 128); + memset (a, 0, sizeof (a[0])); + for (i = 0; i < width; i++) + { + bitnum = (7 - (i & 7)); + byte = i / 8; + bit = 1 << bitnum; + a->as_u8[byte] |= bit; + } +} + +always_inline uword +ip6_address_is_zero (ip6_address_t * a) +{ + int i; + for (i = 0; i < ARRAY_LEN (a->as_uword); i++) + if (a->as_uword[i] != 0) + return 0; + return 1; +} + +/* Check for unspecified address ::0 */ +always_inline uword +ip6_address_is_unspecified (ip6_address_t * a) +{ + return ip6_address_is_zero (a); +} + +/* Check for loopback address ::1 */ +always_inline uword +ip6_address_is_loopback (ip6_address_t * a) +{ + uword is_loopback; + u8 save = a->as_u8[15]; + a->as_u8[15] = save ^ 1; + is_loopback = ip6_address_is_zero (a); + a->as_u8[15] = save; + return is_loopback; +} + +/* Check for link local unicast fe80::/10. */ +always_inline uword +ip6_address_is_link_local_unicast (ip6_address_t * a) +{ + return a->as_u8[0] == 0xfe && (a->as_u8[1] & 0xc0) == 0x80; +} + +/* Check for unique local unicast fc00::/7. */ +always_inline uword +ip6_address_is_local_unicast (ip6_address_t * a) +{ + return (a->as_u8[0] & 0xfe) == 0xfc; +} + +/* Check for unique global unicast 2000::/3. */ +always_inline uword +ip6_address_is_global_unicast (ip6_address_t * a) +{ + return (a->as_u8[0] & 0xe0) == 0x20; +} + +/* Check for solicited node multicast 0xff02::1:ff00:0/104 */ +always_inline uword +ip6_is_solicited_node_multicast_address (ip6_address_t * a) +{ + return (a->as_u32[0] == clib_host_to_net_u32 (0xff020000) + && a->as_u32[1] == 0 + && a->as_u32[2] == clib_host_to_net_u32 (1) + && a->as_u8[12] == 0xff); +} + +typedef struct +{ + /* 4 bit version, 8 bit traffic class and 20 bit flow label. */ + u32 ip_version_traffic_class_and_flow_label; + + /* Total packet length not including this header (but including + any extension headers if present). */ + u16 payload_length; + + /* Protocol for next header. */ + u8 protocol; + + /* Hop limit decremented by router at each hop. */ + u8 hop_limit; + + /* Source and destination address. */ + ip6_address_t src_address, dst_address; +} ip6_header_t; + +always_inline u8 +ip6_traffic_class (ip6_header_t * i) +{ + return (i->ip_version_traffic_class_and_flow_label & 0x0FF00000) >> 20; +} + +always_inline void * +ip6_next_header (ip6_header_t * i) +{ + return (void *) (i + 1); +} + +always_inline void +ip6_copy_header (ip6_header_t * dst, const ip6_header_t * src) +{ + dst->ip_version_traffic_class_and_flow_label = + src->ip_version_traffic_class_and_flow_label; + dst->payload_length = src->payload_length; + dst->protocol = src->protocol; + dst->hop_limit = src->hop_limit; + + dst->src_address.as_uword[0] = src->src_address.as_uword[0]; + dst->src_address.as_uword[1] = src->src_address.as_uword[1]; + dst->dst_address.as_uword[0] = src->dst_address.as_uword[0]; + dst->dst_address.as_uword[1] = src->dst_address.as_uword[1]; +} + +always_inline void +ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0) +{ + { + ip6_address_t src0, dst0; + + src0 = ip0->src_address; + dst0 = ip0->dst_address; + ip0->src_address = dst0; + ip0->dst_address = src0; + } + + { + u16 src0, dst0; + + src0 = tcp0->src; + dst0 = tcp0->dst; + tcp0->src = dst0; + tcp0->dst = src0; + } +} + +always_inline void +ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1, + tcp_header_t * tcp0, tcp_header_t * tcp1) +{ + { + ip6_address_t src0, dst0, src1, dst1; + + src0 = ip0->src_address; + src1 = ip1->src_address; + dst0 = ip0->dst_address; + dst1 = ip1->dst_address; + ip0->src_address = dst0; + ip1->src_address = dst1; + ip0->dst_address = src0; + ip1->dst_address = src1; + } + + { + u16 src0, dst0, src1, dst1; + + src0 = tcp0->src; + src1 = tcp1->src; + dst0 = tcp0->dst; + dst1 = tcp1->dst; + tcp0->src = dst0; + tcp1->src = dst1; + tcp0->dst = src0; + tcp1->dst = src1; + } +} + + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + u8 data; +}) ip6_pad1_option_t; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + u8 type; + u8 len; + u8 data[0]; +}) ip6_padN_option_t; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { +#define IP6_MLDP_ALERT_TYPE 0x5 + u8 type; + u8 len; + u16 value; +}) ip6_router_alert_option_t; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + u8 next_hdr; + /* Length of this header plus option data in 8 byte units. */ + u8 n_data_u64s; +}) ip6_ext_header_t; + +always_inline u8 ip6_ext_hdr(u8 nexthdr) +{ + /* + * find out if nexthdr is an extension header or a protocol + */ + return (nexthdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) || + (nexthdr == IP_PROTOCOL_IPV6_FRAGMENTATION) || + (nexthdr == IP_PROTOCOL_IPSEC_AH) || + (nexthdr == IP_PROTOCOL_IPV6_ROUTE) || + (nexthdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS); +} + +#define ip6_ext_header_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+1) << 3) +#define ip6_ext_authhdr_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+2) << 2) + +always_inline void * +ip6_ext_next_header (ip6_ext_header_t *ext_hdr ) +{ return (void *)((u8 *) ext_hdr + ip6_ext_header_len(ext_hdr)); } + +/* + * Macro to find the IPv6 ext header of type t + * I is the IPv6 header + * P is the previous IPv6 ext header (NULL if none) + * M is the matched IPv6 ext header of type t + */ +#define ip6_ext_header_find_t(i, p, m, t) \ +if ((i)->protocol == t) \ +{ \ + (m) = (void *)((i)+1); \ + (p) = NULL; \ +} \ +else \ +{ \ + (m) = NULL; \ + (p) = (void *)((i)+1); \ + while (ip6_ext_hdr((p)->next_hdr) && \ + ((ip6_ext_header_t *)(p))->next_hdr != (t)) \ + { \ + (p) = ip6_ext_next_header((p)); \ + } \ + if ( ((p)->next_hdr) == (t)) \ + { \ + (m) = (void *)(ip6_ext_next_header((p))); \ + } \ +} + + +typedef CLIB_PACKED (struct { + u8 next_hdr; + /* Length of this header plus option data in 8 byte units. */ + u8 n_data_u64s; + u8 data[0]; +}) ip6_hop_by_hop_ext_t; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +typedef CLIB_PACKED (struct { + u8 next_hdr; + u8 rsv; + u16 fragment_offset_and_more; + u32 identification; +}) ip6_frag_hdr_t; +/* *INDENT-ON* */ + +#define ip6_frag_hdr_offset(hdr) \ + (clib_net_to_host_u16((hdr)->fragment_offset_and_more) >> 3) + +#define ip6_frag_hdr_more(hdr) \ + (clib_net_to_host_u16((hdr)->fragment_offset_and_more) & 0x1) + +#define ip6_frag_hdr_offset_and_more(offset, more) \ + clib_host_to_net_u16(((offset) << 3) + !!(more)) + +#endif /* included_ip6_packet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_pg.c b/src/vnet/ip/ip6_pg.c new file mode 100644 index 00000000..ba1e4ad9 --- /dev/null +++ b/src/vnet/ip/ip6_pg.c @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip6_pg: IP v4 packet-generator interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> + +static void +ip6_pg_edit_function (pg_main_t * pg, + pg_stream_t * s, + pg_edit_group_t * g, u32 * packets, u32 n_packets) +{ + vlib_main_t *vm = vlib_get_main (); + u32 ip_header_offset = g->start_byte_offset; + + while (n_packets >= 2) + { + u32 pi0, pi1; + vlib_buffer_t *p0, *p1; + ip6_header_t *ip0, *ip1; + + pi0 = packets[0]; + pi1 = packets[1]; + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + n_packets -= 2; + packets += 2; + + ip0 = (void *) (p0->data + ip_header_offset); + ip1 = (void *) (p1->data + ip_header_offset); + + ip0->payload_length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) - + ip_header_offset - sizeof (ip0[0])); + ip1->payload_length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p1) - + ip_header_offset - sizeof (ip1[0])); + } + + while (n_packets >= 1) + { + u32 pi0; + vlib_buffer_t *p0; + ip6_header_t *ip0; + + pi0 = packets[0]; + p0 = vlib_get_buffer (vm, pi0); + n_packets -= 1; + packets += 1; + + ip0 = (void *) (p0->data + ip_header_offset); + + ip0->payload_length = + clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) - + ip_header_offset - sizeof (ip0[0])); + } +} + +typedef struct +{ + pg_edit_t ip_version; + pg_edit_t traffic_class; + pg_edit_t flow_label; + pg_edit_t payload_length; + pg_edit_t protocol; + pg_edit_t hop_limit; + pg_edit_t src_address, dst_address; +} pg_ip6_header_t; + +static inline void +pg_ip6_header_init (pg_ip6_header_t * p) +{ + /* Initialize fields that are not bit fields in the IP header. */ +#define _(f) pg_edit_init (&p->f, ip6_header_t, f); + _(payload_length); + _(hop_limit); + _(protocol); + _(src_address); + _(dst_address); +#undef _ + + /* Initialize bit fields. */ + pg_edit_init_bitfield (&p->ip_version, ip6_header_t, + ip_version_traffic_class_and_flow_label, 28, 4); + pg_edit_init_bitfield (&p->traffic_class, ip6_header_t, + ip_version_traffic_class_and_flow_label, 20, 8); + pg_edit_init_bitfield (&p->flow_label, ip6_header_t, + ip_version_traffic_class_and_flow_label, 0, 20); +} + +uword +unformat_pg_ip6_header (unformat_input_t * input, va_list * args) +{ + pg_stream_t *s = va_arg (*args, pg_stream_t *); + pg_ip6_header_t *p; + u32 group_index; + + p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ip6_header_t), + &group_index); + pg_ip6_header_init (p); + + /* Defaults. */ + pg_edit_set_fixed (&p->ip_version, 6); + pg_edit_set_fixed (&p->traffic_class, 0); + pg_edit_set_fixed (&p->flow_label, 0); + pg_edit_set_fixed (&p->hop_limit, 64); + + p->payload_length.type = PG_EDIT_UNSPECIFIED; + + if (!unformat (input, "%U: %U -> %U", + unformat_pg_edit, + unformat_ip_protocol, &p->protocol, + unformat_pg_edit, + unformat_ip6_address, &p->src_address, + unformat_pg_edit, unformat_ip6_address, &p->dst_address)) + goto error; + + /* Parse options. */ + while (1) + { + if (unformat (input, "version %U", + unformat_pg_edit, unformat_pg_number, &p->ip_version)) + ; + + else if (unformat (input, "traffic-class %U", + unformat_pg_edit, + unformat_pg_number, &p->traffic_class)) + ; + + else if (unformat (input, "length %U", + unformat_pg_edit, + unformat_pg_number, &p->payload_length)) + ; + + else if (unformat (input, "hop-limit %U", + unformat_pg_edit, unformat_pg_number, &p->hop_limit)) + ; + + /* Can't parse input: try next protocol level. */ + else + break; + } + + { + ip_main_t *im = &ip_main; + ip_protocol_t protocol; + ip_protocol_info_t *pi; + + pi = 0; + if (p->protocol.type == PG_EDIT_FIXED) + { + protocol = pg_edit_get_value (&p->protocol, PG_EDIT_LO); + pi = ip_get_protocol_info (im, protocol); + } + + if (pi && pi->unformat_pg_edit + && unformat_user (input, pi->unformat_pg_edit, s)) + ; + + else if (!unformat_user (input, unformat_pg_payload, s)) + goto error; + + if (p->payload_length.type == PG_EDIT_UNSPECIFIED + && s->min_packet_bytes == s->max_packet_bytes + && group_index + 1 < vec_len (s->edit_groups)) + { + pg_edit_set_fixed (&p->payload_length, + pg_edit_group_n_bytes (s, + group_index) - + sizeof (ip6_header_t)); + } + + p = pg_get_edit_group (s, group_index); + if (p->payload_length.type == PG_EDIT_UNSPECIFIED) + { + pg_edit_group_t *g = pg_stream_get_group (s, group_index); + g->edit_function = ip6_pg_edit_function; + } + + return 1; + } + +error: + /* Free up any edits we may have added. */ + pg_free_edit_group (s); + return 0; +} + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h new file mode 100644 index 00000000..c14b46c4 --- /dev/null +++ b/src/vnet/ip/ip6_to_ip4.h @@ -0,0 +1,634 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * @file + * @brief IPv6 to IPv4 translation + */ +#ifndef __included_ip6_to_ip4_h__ +#define __included_ip6_to_ip4_h__ + +#include <vnet/ip/ip.h> + +/** + * IPv6 to IPv4 set call back function type + */ +typedef int (*ip6_to_ip4_set_fn_t) (ip6_header_t * ip6, ip4_header_t * ip4, + void *ctx); + +/* *INDENT-OFF* */ +static u8 icmp6_to_icmp_updater_pointer_table[] = + { 0, 1, ~0, ~0, + 2, 2, 9, 8, + 12, 12, 12, 12, + 12, 12, 12, 12, + 12, 12, 12, 12, + 12, 12, 12, 12, + 24, 24, 24, 24, + 24, 24, 24, 24, + 24, 24, 24, 24, + 24, 24, 24, 24 + }; +/* *INDENT-ON* */ + +#define frag_id_6to4(id) ((id) ^ ((id) >> 16)) + +/** + * @brief Parse some useful information from IPv6 header. + * + * @param ip6 IPv6 header. + * @param buff_len Buffer length. + * @param l4_protocol L4 protocol number. + * @param l4_offset L4 header offset. + * @param frag_hdr_offset Fragment header offset if present, 0 otherwise. + * + * @returns 0 on success, non-zero value otherwise. + */ +static_always_inline int +ip6_parse (const ip6_header_t * ip6, u32 buff_len, + u8 * l4_protocol, u16 * l4_offset, u16 * frag_hdr_offset) +{ + if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + *l4_protocol = ((ip6_frag_hdr_t *) (ip6 + 1))->next_hdr; + *frag_hdr_offset = sizeof (*ip6); + *l4_offset = sizeof (*ip6) + sizeof (ip6_frag_hdr_t); + } + else + { + *l4_protocol = ip6->protocol; + *frag_hdr_offset = 0; + *l4_offset = sizeof (*ip6); + } + + return (buff_len < (*l4_offset + 4)) || + (clib_net_to_host_u16 (ip6->payload_length) < + (*l4_offset + 4 - sizeof (*ip6))); +} + +/** + * @brief Get TCP/UDP port number or ICMP id from IPv6 packet. + * + * @param ip6 IPv6 header. + * @param sender 1 get sender port, 0 get receiver port. + * @param buffer_len Buffer length. + * + * @returns Port number on success, 0 otherwise. + */ +always_inline u16 +ip6_get_port (ip6_header_t * ip6, u8 sender, u16 buffer_len) +{ + u8 l4_protocol; + u16 l4_offset; + u16 frag_offset; + u8 *l4; + + if (ip6_parse (ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset)) + return 0; + + if (frag_offset && + ip6_frag_hdr_offset (((ip6_frag_hdr_t *) + u8_ptr_add (ip6, frag_offset)))) + return 0; //Can't deal with non-first fragment for now + + l4 = u8_ptr_add (ip6, l4_offset); + if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP) + { + return (sender) ? ((udp_header_t *) (l4))->src_port : ((udp_header_t + *) + (l4))->dst_port; + } + else if (l4_protocol == IP_PROTOCOL_ICMP6) + { + icmp46_header_t *icmp = (icmp46_header_t *) (l4); + if (icmp->type == ICMP6_echo_request) + { + return (sender) ? ((u16 *) (icmp))[2] : -1; + } + else if (icmp->type == ICMP6_echo_reply) + { + return (sender) ? -1 : ((u16 *) (icmp))[2]; + } + } + return 0; +} + +/** + * @brief Convert type and code value from ICMP6 to ICMP4. + * + * @param icmp ICMP header. + * @param inner_ip6 Inner IPv6 header if present, 0 otherwise. + * + * @returns 0 on success, non-zero value otherwise. + */ +static_always_inline int +icmp6_to_icmp_header (icmp46_header_t * icmp, ip6_header_t ** inner_ip6) +{ + *inner_ip6 = NULL; + switch (icmp->type) + { + case ICMP6_echo_request: + icmp->type = ICMP4_echo_request; + break; + case ICMP6_echo_reply: + icmp->type = ICMP4_echo_reply; + break; + case ICMP6_destination_unreachable: + *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); + + switch (icmp->code) + { + case ICMP6_destination_unreachable_no_route_to_destination: //0 + case ICMP6_destination_unreachable_beyond_scope_of_source_address: //2 + case ICMP6_destination_unreachable_address_unreachable: //3 + icmp->type = ICMP4_destination_unreachable; + icmp->code = + ICMP4_destination_unreachable_destination_unreachable_host; + break; + case ICMP6_destination_unreachable_destination_administratively_prohibited: //1 + icmp->type = + ICMP4_destination_unreachable; + icmp->code = + ICMP4_destination_unreachable_communication_administratively_prohibited; + break; + case ICMP6_destination_unreachable_port_unreachable: + icmp->type = ICMP4_destination_unreachable; + icmp->code = ICMP4_destination_unreachable_port_unreachable; + break; + default: + return -1; + } + break; + case ICMP6_packet_too_big: + *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); + + icmp->type = ICMP4_destination_unreachable; + icmp->code = 4; + { + u32 advertised_mtu = clib_net_to_host_u32 (*((u32 *) (icmp + 1))); + advertised_mtu -= 20; + //FIXME: = minimum(advertised MTU-20, MTU_of_IPv4_nexthop, (MTU_of_IPv6_nexthop)-20) + ((u16 *) (icmp))[3] = clib_host_to_net_u16 (advertised_mtu); + } + break; + + case ICMP6_time_exceeded: + *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); + + icmp->type = ICMP4_time_exceeded; + break; + + case ICMP6_parameter_problem: + *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8); + + switch (icmp->code) + { + case ICMP6_parameter_problem_erroneous_header_field: + icmp->type = ICMP4_parameter_problem; + icmp->code = ICMP4_parameter_problem_pointer_indicates_error; + u32 pointer = clib_net_to_host_u32 (*((u32 *) (icmp + 1))); + if (pointer >= 40) + return -1; + + ((u8 *) (icmp + 1))[0] = + icmp6_to_icmp_updater_pointer_table[pointer]; + break; + case ICMP6_parameter_problem_unrecognized_next_header: + icmp->type = ICMP4_destination_unreachable; + icmp->code = ICMP4_destination_unreachable_port_unreachable; + break; + case ICMP6_parameter_problem_unrecognized_option: + default: + return -1; + } + break; + default: + return -1; + break; + } + return 0; +} + +/** + * @brief Translate TOS value from IPv6 to IPv4. + * + * @param ip6 IPv6 header. + * + * @returns IPv4 TOS value. + */ +static_always_inline u8 +ip6_translate_tos (const ip6_header_t * ip6) +{ + return (clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label) + & 0x0ff00000) >> 20; +} + +/** + * @brief Translate ICMP6 packet to ICMP4. + * + * @param p Buffer to translate. + * @param fn The function to translate outer header. + * @param ctx A context passed in the outer header translate function. + * @param inner_fn The function to translate inner header. + * @param inner_ctx A context passed in the inner header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, + ip6_to_ip4_set_fn_t inner_fn, void *inner_ctx) +{ + ip6_header_t *ip6, *inner_ip6; + ip4_header_t *ip4, *inner_ip4; + u32 ip6_pay_len; + icmp46_header_t *icmp; + ip_csum_t csum; + int rv; + + ip6 = vlib_buffer_get_current (p); + ip6_pay_len = clib_net_to_host_u16 (ip6->payload_length); + icmp = (icmp46_header_t *) (ip6 + 1); + ASSERT (ip6_pay_len + sizeof (*ip6) <= p->current_length); + + //No extensions headers allowed here + if (ip6->protocol != IP_PROTOCOL_ICMP6) + return -1; + + //There are no fragmented ICMP messages, so no extension header for now + if (icmp6_to_icmp_header (icmp, &inner_ip6)) + return -1; + + if (inner_ip6) + { + u16 *inner_L4_checksum, inner_l4_offset, inner_frag_offset, + inner_frag_id; + u8 *inner_l4, inner_protocol; + + //We have two headers to translate + // FROM + // [ IPv6 ]<- ext ->[IC][ IPv6 ]<- ext ->[L4 header ... + // Handled cases: + // [ IPv6 ][IC][ IPv6 ][L4 header ... + // [ IPv6 ][IC][ IPv6 ][Fr][L4 header ... + // TO + // [ IPv4][IC][ IPv4][L4 header ... + + if (ip6_parse (inner_ip6, ip6_pay_len - 8, + &inner_protocol, &inner_l4_offset, &inner_frag_offset)) + return -1; + + inner_l4 = u8_ptr_add (inner_ip6, inner_l4_offset); + inner_ip4 = + (ip4_header_t *) u8_ptr_add (inner_l4, -sizeof (*inner_ip4)); + if (inner_frag_offset) + { + ip6_frag_hdr_t *inner_frag = + (ip6_frag_hdr_t *) u8_ptr_add (inner_ip6, inner_frag_offset); + inner_frag_id = frag_id_6to4 (inner_frag->identification); + } + else + { + inner_frag_id = 0; + } + + //Do the translation of the inner packet + if (inner_protocol == IP_PROTOCOL_TCP) + { + inner_L4_checksum = (u16 *) u8_ptr_add (inner_l4, 16); + } + else if (inner_protocol == IP_PROTOCOL_UDP) + { + inner_L4_checksum = (u16 *) u8_ptr_add (inner_l4, 6); + } + else if (inner_protocol == IP_PROTOCOL_ICMP6) + { + icmp46_header_t *inner_icmp = (icmp46_header_t *) inner_l4; + //It cannot be of a different type as ip6_icmp_to_icmp6_in_place succeeded + inner_icmp->type = (inner_icmp->type == ICMP6_echo_request) ? + ICMP4_echo_request : ICMP4_echo_reply; + inner_protocol = IP_PROTOCOL_ICMP; //Will be copied to ip6 later + inner_L4_checksum = &inner_icmp->checksum; + } + else + { + return -1; + } + + csum = *inner_L4_checksum; + csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[0]); + csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]); + csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]); + csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]); + *inner_L4_checksum = ip_csum_fold (csum); + + if ((rv = inner_fn (inner_ip6, inner_ip4, inner_ctx)) != 0) + return rv; + + inner_ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + inner_ip4->tos = ip6_translate_tos (inner_ip6); + inner_ip4->length = + u16_net_add (inner_ip6->payload_length, + sizeof (*ip4) + sizeof (*ip6) - inner_l4_offset); + inner_ip4->fragment_id = inner_frag_id; + inner_ip4->flags_and_fragment_offset = + clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); + inner_ip4->ttl = inner_ip6->hop_limit; + inner_ip4->protocol = inner_protocol; + inner_ip4->checksum = ip4_header_checksum (inner_ip4); + + if (inner_ip4->protocol == IP_PROTOCOL_ICMP) + { + //Recompute ICMP checksum + icmp46_header_t *inner_icmp = (icmp46_header_t *) inner_l4; + inner_icmp->checksum = 0; + csum = + ip_incremental_checksum (0, inner_icmp, + clib_net_to_host_u16 (inner_ip4->length) + - sizeof (*inner_ip4)); + inner_icmp->checksum = ~ip_csum_fold (csum); + } + else + { + //Update to new pseudo-header + csum = *inner_L4_checksum; + csum = ip_csum_add_even (csum, inner_ip4->src_address.as_u32); + csum = ip_csum_add_even (csum, inner_ip4->dst_address.as_u32); + *inner_L4_checksum = ip_csum_fold (csum); + } + + //Move up icmp header + ip4 = (ip4_header_t *) u8_ptr_add (inner_l4, -2 * sizeof (*ip4) - 8); + clib_memcpy (u8_ptr_add (inner_l4, -sizeof (*ip4) - 8), icmp, 8); + icmp = (icmp46_header_t *) u8_ptr_add (inner_l4, -sizeof (*ip4) - 8); + } + else + { + //Only one header to translate + ip4 = (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)); + } + + vlib_buffer_advance (p, (u32) (((u8 *) ip4) - ((u8 *) ip6))); + + if ((rv = fn (ip6, ip4, ctx)) != 0) + return rv; + + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip6); + ip4->fragment_id = 0; + ip4->flags_and_fragment_offset = 0; + ip4->ttl = ip6->hop_limit; + ip4->protocol = IP_PROTOCOL_ICMP; + //TODO fix the length depending on offset length + ip4->length = u16_net_add (ip6->payload_length, + (inner_ip6 == + NULL) ? sizeof (*ip4) : (2 * sizeof (*ip4) - + sizeof (*ip6))); + ip4->checksum = ip4_header_checksum (ip4); + + //Recompute ICMP checksum + icmp->checksum = 0; + csum = + ip_incremental_checksum (0, icmp, + clib_net_to_host_u16 (ip4->length) - + sizeof (*ip4)); + icmp->checksum = ~ip_csum_fold (csum); + + return 0; +} + +/** + * @brief Translate IPv6 fragmented packet to IPv4. + * + * @param p Buffer to translate. + * @param fn The function to translate header. + * @param ctx A context passed in the header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +ip6_to_ip4_fragmented (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx) +{ + ip6_header_t *ip6; + ip6_frag_hdr_t *frag; + ip4_header_t *ip4; + u16 frag_id; + u8 frag_more; + u16 frag_offset; + u8 l4_protocol; + u16 l4_offset; + int rv; + + ip6 = vlib_buffer_get_current (p); + + if (ip6_parse + (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) + return -1; + + frag = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); + ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + + frag_id = frag_id_6to4 (frag->identification); + frag_more = ip6_frag_hdr_more (frag); + frag_offset = ip6_frag_hdr_offset (frag); + + if ((rv = fn (ip6, ip4, ctx)) != 0) + return rv; + + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip6); + ip4->length = u16_net_add (ip6->payload_length, + sizeof (*ip4) - l4_offset + sizeof (*ip6)); + ip4->fragment_id = frag_id; + ip4->flags_and_fragment_offset = + clib_host_to_net_u16 (frag_offset | + (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0)); + ip4->ttl = ip6->hop_limit; + ip4->protocol = + (l4_protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : l4_protocol; + ip4->checksum = ip4_header_checksum (ip4); + + return 0; +} + +/** + * @brief Translate IPv6 UDP/TCP packet to IPv4. + * + * @param p Buffer to translate. + * @param fn The function to translate header. + * @param ctx A context passed in the header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +ip6_to_ip4_tcp_udp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx, + u8 udp_checksum) +{ + ip6_header_t *ip6; + u16 *checksum; + ip_csum_t csum = 0; + ip4_header_t *ip4; + u16 fragment_id; + u16 flags; + u16 frag_offset; + u8 l4_protocol; + u16 l4_offset; + int rv; + + ip6 = vlib_buffer_get_current (p); + + if (ip6_parse + (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) + return -1; + + if (l4_protocol == IP_PROTOCOL_TCP) + { + tcp_header_t *tcp = ip6_next_header (ip6); + checksum = &tcp->checksum; + } + else + { + udp_header_t *udp = ip6_next_header (ip6); + checksum = &udp->checksum; + //UDP checksum is optional over IPv4 + if (!udp_checksum) + goto no_csum; + } + + csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]); + csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]); + csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]); + csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]); + *checksum = ip_csum_fold (csum); + +no_csum: + ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); + + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + + if (PREDICT_FALSE (frag_offset)) + { + //Only the first fragment + ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); + fragment_id = frag_id_6to4 (hdr->identification); + flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); + } + else + { + fragment_id = 0; + flags = 0; + } + + if ((rv = fn (ip6, ip4, ctx)) != 0) + return rv; + + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip6); + ip4->length = u16_net_add (ip6->payload_length, + sizeof (*ip4) + sizeof (*ip6) - l4_offset); + ip4->fragment_id = fragment_id; + ip4->flags_and_fragment_offset = flags; + ip4->ttl = ip6->hop_limit; + ip4->protocol = l4_protocol; + ip4->checksum = ip4_header_checksum (ip4); + + //UDP checksum is optional over IPv4 + if (!udp_checksum && l4_protocol == IP_PROTOCOL_UDP) + { + *checksum = 0; + } + else + { + csum = ip_csum_add_even (*checksum, ip4->dst_address.as_u32); + csum = ip_csum_add_even (csum, ip4->src_address.as_u32); + *checksum = ip_csum_fold (csum); + } + + return 0; +} + +/** + * @brief Translate IPv6 packet to IPv4 (IP header only). + * + * @param p Buffer to translate. + * @param fn The function to translate header. + * @param ctx A context passed in the header translate function. + * + * @returns 0 on success, non-zero value otherwise. + */ +always_inline int +ip6_to_ip4 (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx) +{ + ip6_header_t *ip6; + ip4_header_t *ip4; + u16 fragment_id; + u16 flags; + u16 frag_offset; + u8 l4_protocol; + u16 l4_offset; + int rv; + + ip6 = vlib_buffer_get_current (p); + + if (ip6_parse + (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset)) + return -1; + + ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4)); + + vlib_buffer_advance (p, l4_offset - sizeof (*ip4)); + + if (PREDICT_FALSE (frag_offset)) + { + //Only the first fragment + ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset); + fragment_id = frag_id_6to4 (hdr->identification); + flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS); + } + else + { + fragment_id = 0; + flags = 0; + } + + if ((rv = fn (ip6, ip4, ctx)) != 0) + return rv; + + ip4->ip_version_and_header_length = + IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS; + ip4->tos = ip6_translate_tos (ip6); + ip4->length = u16_net_add (ip6->payload_length, + sizeof (*ip4) + sizeof (*ip6) - l4_offset); + ip4->fragment_id = fragment_id; + ip4->flags_and_fragment_offset = flags; + ip4->ttl = ip6->hop_limit; + ip4->protocol = l4_protocol; + ip4->checksum = ip4_header_checksum (ip4); + + return 0; +} + +#endif /* __included_ip6_to_ip4_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c new file mode 100644 index 00000000..e13e6e64 --- /dev/null +++ b/src/vnet/ip/ip_api.c @@ -0,0 +1,1825 @@ +/* + *------------------------------------------------------------------ + * ip_api.c - vnet ip api + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vnet/vnet.h> +#include <vlibmemory/api.h> + +#include <vnet/interface.h> +#include <vnet/api_errno.h> +#include <vnet/ethernet/ethernet.h> +#include <vnet/ip/ip.h> +#include <vnet/ip/ip6_neighbor.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/fib_api.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/receive_dpo.h> +#include <vnet/dpo/lookup_dpo.h> +#include <vnet/dpo/classify_dpo.h> +#include <vnet/dpo/ip_null_dpo.h> +#include <vnet/ethernet/arp_packet.h> +#include <vnet/mfib/ip6_mfib.h> +#include <vnet/mfib/ip4_mfib.h> +#include <vnet/mfib/mfib_signal.h> +#include <vnet/mfib/mfib_entry.h> + +#include <vnet/vnet_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <vnet/vnet_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <vnet/vnet_all_api_h.h> +#undef vl_printfun + +#include <vlibapi/api_helper_macros.h> + + +#define foreach_ip_api_msg \ +_(IP_FIB_DUMP, ip_fib_dump) \ +_(IP6_FIB_DUMP, ip6_fib_dump) \ +_(IP_MFIB_DUMP, ip_mfib_dump) \ +_(IP6_MFIB_DUMP, ip6_mfib_dump) \ +_(IP_NEIGHBOR_DUMP, ip_neighbor_dump) \ +_(IP_MROUTE_ADD_DEL, ip_mroute_add_del) \ +_(MFIB_SIGNAL_DUMP, mfib_signal_dump) \ +_(IP_ADDRESS_DUMP, ip_address_dump) \ +_(IP_DUMP, ip_dump) \ +_(IP_NEIGHBOR_ADD_DEL, ip_neighbor_add_del) \ +_(IP_ADD_DEL_ROUTE, ip_add_del_route) \ +_(IP_TABLE_ADD_DEL, ip_table_add_del) \ +_(SET_IP_FLOW_HASH,set_ip_flow_hash) \ +_(SW_INTERFACE_IP6ND_RA_CONFIG, sw_interface_ip6nd_ra_config) \ +_(SW_INTERFACE_IP6ND_RA_PREFIX, sw_interface_ip6nd_ra_prefix) \ +_(IP6ND_PROXY_ADD_DEL, ip6nd_proxy_add_del) \ +_(IP6ND_PROXY_DUMP, ip6nd_proxy_dump) \ +_(SW_INTERFACE_IP6_ENABLE_DISABLE, sw_interface_ip6_enable_disable ) \ +_(SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS, \ + sw_interface_ip6_set_link_local_address) + +extern void stats_dslock_with_hint (int hint, int tag); +extern void stats_dsunlock (void); + +static void +send_ip_neighbor_details (u8 is_ipv6, + u8 is_static, + u8 * mac_address, + u8 * ip_address, + unix_shared_memory_queue_t * q, u32 context) +{ + vl_api_ip_neighbor_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_DETAILS); + mp->context = context; + mp->is_ipv6 = is_ipv6; + mp->is_static = is_static; + memcpy (mp->mac_address, mac_address, 6); + memcpy (mp->ip_address, ip_address, (is_ipv6) ? 16 : 4); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_ip_neighbor_dump_t_handler (vl_api_ip_neighbor_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + u32 sw_if_index = ntohl (mp->sw_if_index); + + if (mp->is_ipv6) + { + ip6_neighbor_t *n, *ns; + + ns = ip6_neighbors_entries (sw_if_index); + /* *INDENT-OFF* */ + vec_foreach (n, ns) + { + send_ip_neighbor_details + (mp->is_ipv6, ((n->flags & IP6_NEIGHBOR_FLAG_STATIC) ? 1 : 0), + (u8 *) n->link_layer_address, + (u8 *) & (n->key.ip6_address.as_u8), + q, mp->context); + } + /* *INDENT-ON* */ + vec_free (ns); + } + else + { + ethernet_arp_ip4_entry_t *n, *ns; + + ns = ip4_neighbor_entries (sw_if_index); + /* *INDENT-OFF* */ + vec_foreach (n, ns) + { + send_ip_neighbor_details (mp->is_ipv6, + ((n->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) ? 1 : 0), + (u8*) n->ethernet_address, + (u8*) & (n->ip4_address.as_u8), + q, mp->context); + } + /* *INDENT-ON* */ + vec_free (ns); + } +} + + +void +copy_fib_next_hop (fib_route_path_encode_t * api_rpath, void *fp_arg) +{ + int is_ip4; + vl_api_fib_path_t *fp = (vl_api_fib_path_t *) fp_arg; + + if (api_rpath->rpath.frp_proto == DPO_PROTO_IP4) + fp->afi = IP46_TYPE_IP4; + else if (api_rpath->rpath.frp_proto == DPO_PROTO_IP6) + fp->afi = IP46_TYPE_IP6; + else + { + is_ip4 = ip46_address_is_ip4 (&api_rpath->rpath.frp_addr); + if (is_ip4) + fp->afi = IP46_TYPE_IP4; + else + fp->afi = IP46_TYPE_IP6; + } + if (fp->afi == IP46_TYPE_IP4) + memcpy (fp->next_hop, &api_rpath->rpath.frp_addr.ip4, + sizeof (api_rpath->rpath.frp_addr.ip4)); + else + memcpy (fp->next_hop, &api_rpath->rpath.frp_addr.ip6, + sizeof (api_rpath->rpath.frp_addr.ip6)); +} + +static void +send_ip_fib_details (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + const fib_table_t * table, + const fib_prefix_t * pfx, + fib_route_path_encode_t * api_rpaths, u32 context) +{ + vl_api_ip_fib_details_t *mp; + fib_route_path_encode_t *api_rpath; + vl_api_fib_path_t *fp; + int path_count; + + path_count = vec_len (api_rpaths); + mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp)); + if (!mp) + return; + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IP_FIB_DETAILS); + mp->context = context; + + mp->table_id = htonl (table->ft_table_id); + memcpy (mp->table_name, table->ft_desc, + clib_min (vec_len (table->ft_desc), sizeof (mp->table_name))); + mp->address_length = pfx->fp_len; + memcpy (mp->address, &pfx->fp_addr.ip4, sizeof (pfx->fp_addr.ip4)); + + mp->count = htonl (path_count); + fp = mp->path; + vec_foreach (api_rpath, api_rpaths) + { + memset (fp, 0, sizeof (*fp)); + switch (api_rpath->dpo.dpoi_type) + { + case DPO_RECEIVE: + fp->is_local = true; + break; + case DPO_DROP: + fp->is_drop = true; + break; + case DPO_IP_NULL: + switch (api_rpath->dpo.dpoi_index) + { + case IP_NULL_ACTION_NONE: + fp->is_drop = true; + break; + case IP_NULL_ACTION_SEND_ICMP_UNREACH: + fp->is_unreach = true; + break; + case IP_NULL_ACTION_SEND_ICMP_PROHIBIT: + fp->is_prohibit = true; + break; + default: + break; + } + break; + default: + break; + } + fp->weight = api_rpath->rpath.frp_weight; + fp->preference = api_rpath->rpath.frp_preference; + fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index); + copy_fib_next_hop (api_rpath, fp); + fp++; + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +typedef struct vl_api_ip_fib_dump_walk_ctx_t_ +{ + fib_node_index_t *feis; +} vl_api_ip_fib_dump_walk_ctx_t; + +static int +vl_api_ip_fib_dump_walk (fib_node_index_t fei, void *arg) +{ + vl_api_ip_fib_dump_walk_ctx_t *ctx = arg; + + vec_add1 (ctx->feis, fei); + + return (1); +} + +static void +vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + unix_shared_memory_queue_t *q; + ip4_main_t *im = &ip4_main; + fib_table_t *fib_table; + fib_node_index_t *lfeip; + fib_prefix_t pfx; + u32 fib_index; + fib_route_path_encode_t *api_rpaths; + vl_api_ip_fib_dump_walk_ctx_t ctx = { + .feis = NULL, + }; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* *INDENT-OFF* */ + pool_foreach (fib_table, im->fibs, + ({ + fib_table_walk(fib_table->ft_index, + FIB_PROTOCOL_IP4, + vl_api_ip_fib_dump_walk, + &ctx); + })); + /* *INDENT-ON* */ + + vec_sort_with_function (ctx.feis, fib_entry_cmp_for_sort); + + vec_foreach (lfeip, ctx.feis) + { + fib_entry_get_prefix (*lfeip, &pfx); + fib_index = fib_entry_get_fib_index (*lfeip); + fib_table = fib_table_get (fib_index, pfx.fp_proto); + api_rpaths = NULL; + fib_entry_encode (*lfeip, &api_rpaths); + send_ip_fib_details (am, q, fib_table, &pfx, api_rpaths, mp->context); + vec_free (api_rpaths); + } + + vec_free (ctx.feis); +} + +static void +send_ip6_fib_details (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + u32 table_id, fib_prefix_t * pfx, + fib_route_path_encode_t * api_rpaths, u32 context) +{ + vl_api_ip6_fib_details_t *mp; + fib_route_path_encode_t *api_rpath; + vl_api_fib_path_t *fp; + int path_count; + + path_count = vec_len (api_rpaths); + mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp)); + if (!mp) + return; + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IP6_FIB_DETAILS); + mp->context = context; + + mp->table_id = htonl (table_id); + mp->address_length = pfx->fp_len; + memcpy (mp->address, &pfx->fp_addr.ip6, sizeof (pfx->fp_addr.ip6)); + + mp->count = htonl (path_count); + fp = mp->path; + vec_foreach (api_rpath, api_rpaths) + { + memset (fp, 0, sizeof (*fp)); + switch (api_rpath->dpo.dpoi_type) + { + case DPO_RECEIVE: + fp->is_local = true; + break; + case DPO_DROP: + fp->is_drop = true; + break; + case DPO_IP_NULL: + switch (api_rpath->dpo.dpoi_index) + { + case IP_NULL_DPO_ACTION_NUM + IP_NULL_ACTION_NONE: + fp->is_drop = true; + break; + case IP_NULL_DPO_ACTION_NUM + IP_NULL_ACTION_SEND_ICMP_UNREACH: + fp->is_unreach = true; + break; + case IP_NULL_DPO_ACTION_NUM + IP_NULL_ACTION_SEND_ICMP_PROHIBIT: + fp->is_prohibit = true; + break; + default: + break; + } + break; + default: + break; + } + fp->weight = api_rpath->rpath.frp_weight; + fp->preference = api_rpath->rpath.frp_preference; + fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index); + copy_fib_next_hop (api_rpath, fp); + fp++; + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +typedef struct apt_ip6_fib_show_ctx_t_ +{ + u32 fib_index; + fib_node_index_t *entries; +} api_ip6_fib_show_ctx_t; + +static void +api_ip6_fib_table_put_entries (clib_bihash_kv_24_8_t * kvp, void *arg) +{ + api_ip6_fib_show_ctx_t *ctx = arg; + + if ((kvp->key[2] >> 32) == ctx->fib_index) + { + vec_add1 (ctx->entries, kvp->value); + } +} + +static void +api_ip6_fib_table_get_all (unix_shared_memory_queue_t * q, + vl_api_ip6_fib_dump_t * mp, + fib_table_t * fib_table) +{ + vpe_api_main_t *am = &vpe_api_main; + ip6_main_t *im6 = &ip6_main; + fib_node_index_t *fib_entry_index; + api_ip6_fib_show_ctx_t ctx = { + .fib_index = fib_table->ft_index, + .entries = NULL, + }; + fib_route_path_encode_t *api_rpaths; + fib_prefix_t pfx; + + BV (clib_bihash_foreach_key_value_pair) + ((BVT (clib_bihash) *) & im6->ip6_table[IP6_FIB_TABLE_NON_FWDING]. + ip6_hash, api_ip6_fib_table_put_entries, &ctx); + + vec_sort_with_function (ctx.entries, fib_entry_cmp_for_sort); + + vec_foreach (fib_entry_index, ctx.entries) + { + fib_entry_get_prefix (*fib_entry_index, &pfx); + api_rpaths = NULL; + fib_entry_encode (*fib_entry_index, &api_rpaths); + send_ip6_fib_details (am, q, + fib_table->ft_table_id, + &pfx, api_rpaths, mp->context); + vec_free (api_rpaths); + } + + vec_free (ctx.entries); +} + +static void +vl_api_ip6_fib_dump_t_handler (vl_api_ip6_fib_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + ip6_main_t *im6 = &ip6_main; + fib_table_t *fib_table; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + /* *INDENT-OFF* */ + pool_foreach (fib_table, im6->fibs, + ({ + api_ip6_fib_table_get_all(q, mp, fib_table); + })); + /* *INDENT-ON* */ +} + +static void +send_ip_mfib_details (unix_shared_memory_queue_t * q, + u32 context, u32 table_id, fib_node_index_t mfei) +{ + fib_route_path_encode_t *api_rpath, *api_rpaths = NULL; + vl_api_ip_mfib_details_t *mp; + mfib_entry_t *mfib_entry; + vl_api_fib_path_t *fp; + mfib_prefix_t pfx; + int path_count; + + mfib_entry = mfib_entry_get (mfei); + mfib_entry_get_prefix (mfei, &pfx); + mfib_entry_encode (mfei, &api_rpaths); + + path_count = vec_len (api_rpaths); + mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp)); + if (!mp) + return; + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IP_FIB_DETAILS); + mp->context = context; + + mp->rpf_id = mfib_entry->mfe_rpf_id; + mp->entry_flags = mfib_entry->mfe_flags; + mp->table_id = htonl (table_id); + mp->address_length = pfx.fp_len; + memcpy (mp->grp_address, &pfx.fp_grp_addr.ip4, + sizeof (pfx.fp_grp_addr.ip4)); + memcpy (mp->src_address, &pfx.fp_src_addr.ip4, + sizeof (pfx.fp_src_addr.ip4)); + + mp->count = htonl (path_count); + fp = mp->path; + vec_foreach (api_rpath, api_rpaths) + { + memset (fp, 0, sizeof (*fp)); + + fp->weight = 0; + fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index); + copy_fib_next_hop (api_rpath, fp); + fp++; + } + vec_free (api_rpaths); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +typedef struct vl_api_ip_mfib_dump_ctc_t_ +{ + fib_node_index_t *entries; +} vl_api_ip_mfib_dump_ctc_t; + +static int +vl_api_ip_mfib_table_dump_walk (fib_node_index_t fei, void *arg) +{ + vl_api_ip_mfib_dump_ctc_t *ctx = arg; + + vec_add1 (ctx->entries, fei); + + return (0); +} + +static void +vl_api_ip_mfib_dump_t_handler (vl_api_ip_mfib_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + ip4_main_t *im = &ip4_main; + mfib_table_t *mfib_table; + fib_node_index_t *mfeip; + vl_api_ip_mfib_dump_ctc_t ctx = { + .entries = NULL, + }; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + + /* *INDENT-OFF* */ + pool_foreach (mfib_table, im->mfibs, + ({ + ip4_mfib_table_walk(&mfib_table->v4, + vl_api_ip_mfib_table_dump_walk, + &ctx); + + vec_sort_with_function (ctx.entries, mfib_entry_cmp_for_sort); + + vec_foreach (mfeip, ctx.entries) + { + send_ip_mfib_details (q, mp->context, + mfib_table->mft_table_id, + *mfeip); + } + vec_reset_length (ctx.entries); + + })); + /* *INDENT-ON* */ + + vec_free (ctx.entries); +} + +static void +send_ip6_mfib_details (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + u32 table_id, + mfib_prefix_t * pfx, + fib_route_path_encode_t * api_rpaths, u32 context) +{ + vl_api_ip6_mfib_details_t *mp; + fib_route_path_encode_t *api_rpath; + vl_api_fib_path_t *fp; + int path_count; + + path_count = vec_len (api_rpaths); + mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp)); + if (!mp) + return; + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IP6_FIB_DETAILS); + mp->context = context; + + mp->table_id = htonl (table_id); + mp->address_length = pfx->fp_len; + memcpy (mp->grp_address, &pfx->fp_grp_addr.ip6, + sizeof (pfx->fp_grp_addr.ip6)); + memcpy (mp->src_address, &pfx->fp_src_addr.ip6, + sizeof (pfx->fp_src_addr.ip6)); + + mp->count = htonl (path_count); + fp = mp->path; + vec_foreach (api_rpath, api_rpaths) + { + memset (fp, 0, sizeof (*fp)); + + fp->weight = 0; + fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index); + copy_fib_next_hop (api_rpath, fp); + fp++; + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +typedef struct vl_api_ip6_mfib_dump_ctc_t_ +{ + fib_node_index_t *entries; +} vl_api_ip6_mfib_dump_ctc_t; + +static int +vl_api_ip6_mfib_table_dump_walk (fib_node_index_t fei, void *arg) +{ + vl_api_ip6_mfib_dump_ctc_t *ctx = arg; + + vec_add1 (ctx->entries, fei); + + return (0); +} + +static void +vl_api_ip6_mfib_dump_t_handler (vl_api_ip6_mfib_dump_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + unix_shared_memory_queue_t *q; + ip6_main_t *im = &ip6_main; + mfib_table_t *mfib_table; + fib_node_index_t *mfeip; + mfib_prefix_t pfx; + fib_route_path_encode_t *api_rpaths = NULL; + vl_api_ip6_mfib_dump_ctc_t ctx = { + .entries = NULL, + }; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + + /* *INDENT-OFF* */ + pool_foreach (mfib_table, im->mfibs, + ({ + ip6_mfib_table_walk(&mfib_table->v6, + vl_api_ip6_mfib_table_dump_walk, + &ctx); + + vec_sort_with_function (ctx.entries, mfib_entry_cmp_for_sort); + + vec_foreach(mfeip, ctx.entries) + { + mfib_entry_get_prefix (*mfeip, &pfx); + mfib_entry_encode (*mfeip, &api_rpaths); + send_ip6_mfib_details (am, q, + mfib_table->mft_table_id, + &pfx, api_rpaths, + mp->context); + } + vec_reset_length (api_rpaths); + vec_reset_length (ctx.entries); + + })); + /* *INDENT-ON* */ + + vec_free (ctx.entries); + vec_free (api_rpaths); +} + +static void +vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp, + vlib_main_t * vm) +{ + vl_api_ip_neighbor_add_del_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + stats_dslock_with_hint (1 /* release hint */ , 7 /* tag */ ); + + /* + * there's no validation here of the ND/ARP entry being added. + * The expectation is that the FIB will ensure that nothing bad + * will come of adding bogus entries. + */ + if (mp->is_ipv6) + { + if (mp->is_add) + rv = vnet_set_ip6_ethernet_neighbor + (vm, ntohl (mp->sw_if_index), + (ip6_address_t *) (mp->dst_address), + mp->mac_address, sizeof (mp->mac_address), mp->is_static, + mp->is_no_adj_fib); + else + rv = vnet_unset_ip6_ethernet_neighbor + (vm, ntohl (mp->sw_if_index), + (ip6_address_t *) (mp->dst_address), + mp->mac_address, sizeof (mp->mac_address)); + } + else + { + ethernet_arp_ip4_over_ethernet_address_t a; + + clib_memcpy (&a.ethernet, mp->mac_address, 6); + clib_memcpy (&a.ip4, mp->dst_address, 4); + + if (mp->is_add) + rv = vnet_arp_set_ip4_over_ethernet (vnm, ntohl (mp->sw_if_index), + &a, mp->is_static, + mp->is_no_adj_fib); + else + rv = + vnet_arp_unset_ip4_over_ethernet (vnm, ntohl (mp->sw_if_index), &a); + } + + stats_dsunlock (); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_IP_NEIGHBOR_ADD_DEL_REPLY); +} + +void +ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api) +{ + u32 fib_index, mfib_index; + + /* + * ignore action on the default table - this is always present + * and cannot be added nor deleted from the API + */ + if (0 != table_id) + { + /* + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + * The FIB index for unicast and multicast is not necessarily the + * same, since internal VPP systesm (like LISP and SR) create + * their own unicast tables. + */ + fib_index = fib_table_find (fproto, table_id); + mfib_index = mfib_table_find (fproto, table_id); + + if (~0 != fib_index) + { + fib_table_unlock (fib_index, fproto, + (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI)); + } + if (~0 != mfib_index) + { + mfib_table_unlock (mfib_index, fproto, + (is_api ? MFIB_SOURCE_API : MFIB_SOURCE_CLI)); + } + } +} + +void +vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp) +{ + vl_api_ip_table_add_del_reply_t *rmp; + fib_protocol_t fproto = (mp->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4); + u32 table_id = ntohl (mp->table_id); + int rv = 0; + + if (mp->is_add) + { + ip_table_create (fproto, table_id, 1, mp->name); + } + else + { + ip_table_delete (fproto, table_id, 1); + } + + REPLY_MACRO (VL_API_IP_TABLE_ADD_DEL_REPLY); +} + +int +add_del_route_t_handler (u8 is_multipath, + u8 is_add, + u8 is_drop, + u8 is_unreach, + u8 is_prohibit, + u8 is_local, + u8 is_multicast, + u8 is_classify, + u32 classify_table_index, + u8 is_resolve_host, + u8 is_resolve_attached, + u8 is_interface_rx, + u8 is_rpf_id, + u32 fib_index, + const fib_prefix_t * prefix, + dpo_proto_t next_hop_proto, + const ip46_address_t * next_hop, + u32 next_hop_sw_if_index, + u8 next_hop_fib_index, + u16 next_hop_weight, + u16 next_hop_preference, + mpls_label_t next_hop_via_label, + mpls_label_t * next_hop_out_label_stack) +{ + vnet_classify_main_t *cm = &vnet_classify_main; + fib_route_path_flags_t path_flags = FIB_ROUTE_PATH_FLAG_NONE; + fib_route_path_t path = { + .frp_proto = next_hop_proto, + .frp_addr = (NULL == next_hop ? zero_addr : *next_hop), + .frp_sw_if_index = next_hop_sw_if_index, + .frp_fib_index = next_hop_fib_index, + .frp_weight = next_hop_weight, + .frp_preference = next_hop_preference, + .frp_label_stack = next_hop_out_label_stack, + }; + fib_route_path_t *paths = NULL; + fib_entry_flag_t entry_flags = FIB_ENTRY_FLAG_NONE; + + /* + * the special INVALID label meams we are not recursing via a + * label. Exp-null value is never a valid via-label so that + * also means it's not a via-label and means clients that set + * it to 0 by default get the expected behaviour + */ + if ((MPLS_LABEL_INVALID != next_hop_via_label) && (0 != next_hop_via_label)) + { + path.frp_proto = DPO_PROTO_MPLS; + path.frp_local_label = next_hop_via_label; + path.frp_eos = MPLS_NON_EOS; + } + if (is_resolve_host) + path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST; + if (is_resolve_attached) + path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED; + if (is_interface_rx) + path_flags |= FIB_ROUTE_PATH_INTF_RX; + if (is_rpf_id) + path_flags |= FIB_ROUTE_PATH_RPF_ID; + if (is_multicast) + entry_flags |= FIB_ENTRY_FLAG_MULTICAST; + + path.frp_flags = path_flags; + + if (is_multipath) + { + stats_dslock_with_hint (1 /* release hint */ , 10 /* tag */ ); + + + vec_add1 (paths, path); + + if (is_add) + fib_table_entry_path_add2 (fib_index, + prefix, + FIB_SOURCE_API, entry_flags, paths); + else + fib_table_entry_path_remove2 (fib_index, + prefix, FIB_SOURCE_API, paths); + + vec_free (paths); + stats_dsunlock (); + return 0; + } + + stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ ); + + if (is_drop || is_local || is_classify || is_unreach || is_prohibit) + { + /* + * special route types that link directly to the adj + */ + if (is_add) + { + dpo_id_t dpo = DPO_INVALID; + dpo_proto_t dproto; + + dproto = fib_proto_to_dpo (prefix->fp_proto); + + if (is_drop) + ip_null_dpo_add_and_lock (dproto, IP_NULL_ACTION_NONE, &dpo); + else if (is_local) + receive_dpo_add_or_lock (dproto, ~0, NULL, &dpo); + else if (is_unreach) + ip_null_dpo_add_and_lock (dproto, + IP_NULL_ACTION_SEND_ICMP_UNREACH, &dpo); + else if (is_prohibit) + ip_null_dpo_add_and_lock (dproto, + IP_NULL_ACTION_SEND_ICMP_PROHIBIT, + &dpo); + else if (is_classify) + { + if (pool_is_free_index (cm->tables, + ntohl (classify_table_index))) + { + stats_dsunlock (); + return VNET_API_ERROR_NO_SUCH_TABLE; + } + + dpo_set (&dpo, DPO_CLASSIFY, dproto, + classify_dpo_create (dproto, + ntohl (classify_table_index))); + } + else + { + stats_dsunlock (); + return VNET_API_ERROR_NO_SUCH_TABLE; + } + + fib_table_entry_special_dpo_update (fib_index, + prefix, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_EXCLUSIVE, &dpo); + dpo_reset (&dpo); + } + else + { + fib_table_entry_special_remove (fib_index, prefix, FIB_SOURCE_API); + } + } + else + { + if (is_add) + { + vec_add1 (paths, path); + fib_table_entry_update (fib_index, + prefix, FIB_SOURCE_API, entry_flags, paths); + vec_free (paths); + } + else + { + fib_table_entry_delete (fib_index, prefix, FIB_SOURCE_API); + } + } + + stats_dsunlock (); + return (0); +} + +int +add_del_route_check (fib_protocol_t table_proto, + u32 table_id, + u32 next_hop_sw_if_index, + dpo_proto_t next_hop_table_proto, + u32 next_hop_table_id, + u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index) +{ + vnet_main_t *vnm = vnet_get_main (); + + /* Temporaray whilst I do the CSIT dance */ + u8 create_missing_tables = 1; + + *fib_index = fib_table_find (table_proto, ntohl (table_id)); + if (~0 == *fib_index) + { + if (create_missing_tables) + { + *fib_index = fib_table_find_or_create_and_lock (table_proto, + ntohl (table_id), + FIB_SOURCE_API); + } + else + { + /* No such VRF, and we weren't asked to create one */ + return VNET_API_ERROR_NO_SUCH_FIB; + } + } + + if (!is_rpf_id && ~0 != ntohl (next_hop_sw_if_index)) + { + if (pool_is_free_index (vnm->interface_main.sw_interfaces, + ntohl (next_hop_sw_if_index))) + { + return VNET_API_ERROR_NO_MATCHING_INTERFACE; + } + } + else + { + fib_protocol_t fib_nh_proto; + + if (next_hop_table_proto > DPO_PROTO_MPLS) + return (0); + + fib_nh_proto = dpo_proto_to_fib (next_hop_table_proto); + + if (is_rpf_id) + *next_hop_fib_index = mfib_table_find (fib_nh_proto, + ntohl (next_hop_table_id)); + else + *next_hop_fib_index = fib_table_find (fib_nh_proto, + ntohl (next_hop_table_id)); + + if (~0 == *next_hop_fib_index) + { + if (create_missing_tables) + { + if (is_rpf_id) + *next_hop_fib_index = + mfib_table_find_or_create_and_lock (fib_nh_proto, + ntohl + (next_hop_table_id), + MFIB_SOURCE_API); + else + *next_hop_fib_index = + fib_table_find_or_create_and_lock (fib_nh_proto, + ntohl + (next_hop_table_id), + FIB_SOURCE_API); + } + else + { + /* No such VRF, and we weren't asked to create one */ + return VNET_API_ERROR_NO_SUCH_FIB; + } + } + } + + return (0); +} + +static int +ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) +{ + u32 fib_index, next_hop_fib_index; + mpls_label_t *label_stack = NULL; + int rv, ii, n_labels;; + + rv = add_del_route_check (FIB_PROTOCOL_IP4, + mp->table_id, + mp->next_hop_sw_if_index, + DPO_PROTO_IP4, + mp->next_hop_table_id, + 0, &fib_index, &next_hop_fib_index); + + if (0 != rv) + return (rv); + + fib_prefix_t pfx = { + .fp_len = mp->dst_address_length, + .fp_proto = FIB_PROTOCOL_IP4, + }; + clib_memcpy (&pfx.fp_addr.ip4, mp->dst_address, sizeof (pfx.fp_addr.ip4)); + + ip46_address_t nh; + memset (&nh, 0, sizeof (nh)); + memcpy (&nh.ip4, mp->next_hop_address, sizeof (nh.ip4)); + + n_labels = mp->next_hop_n_out_labels; + if (n_labels == 0) + ; + else if (1 == n_labels) + vec_add1 (label_stack, ntohl (mp->next_hop_out_label_stack[0])); + else + { + vec_validate (label_stack, n_labels - 1); + for (ii = 0; ii < n_labels; ii++) + label_stack[ii] = ntohl (mp->next_hop_out_label_stack[ii]); + } + + return (add_del_route_t_handler (mp->is_multipath, + mp->is_add, + mp->is_drop, + mp->is_unreach, + mp->is_prohibit, + mp->is_local, 0, + mp->is_classify, + mp->classify_table_index, + mp->is_resolve_host, + mp->is_resolve_attached, 0, 0, + fib_index, &pfx, DPO_PROTO_IP4, + &nh, + ntohl (mp->next_hop_sw_if_index), + next_hop_fib_index, + mp->next_hop_weight, + mp->next_hop_preference, + ntohl (mp->next_hop_via_label), + label_stack)); +} + +static int +ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) +{ + u32 fib_index, next_hop_fib_index; + mpls_label_t *label_stack = NULL; + int rv, ii, n_labels;; + + rv = add_del_route_check (FIB_PROTOCOL_IP6, + mp->table_id, + mp->next_hop_sw_if_index, + DPO_PROTO_IP6, + mp->next_hop_table_id, + 0, &fib_index, &next_hop_fib_index); + + if (0 != rv) + return (rv); + + fib_prefix_t pfx = { + .fp_len = mp->dst_address_length, + .fp_proto = FIB_PROTOCOL_IP6, + }; + clib_memcpy (&pfx.fp_addr.ip6, mp->dst_address, sizeof (pfx.fp_addr.ip6)); + + ip46_address_t nh; + memset (&nh, 0, sizeof (nh)); + memcpy (&nh.ip6, mp->next_hop_address, sizeof (nh.ip6)); + + n_labels = mp->next_hop_n_out_labels; + if (n_labels == 0) + ; + else if (1 == n_labels) + vec_add1 (label_stack, ntohl (mp->next_hop_out_label_stack[0])); + else + { + vec_validate (label_stack, n_labels - 1); + for (ii = 0; ii < n_labels; ii++) + label_stack[ii] = ntohl (mp->next_hop_out_label_stack[ii]); + } + + return (add_del_route_t_handler (mp->is_multipath, + mp->is_add, + mp->is_drop, + mp->is_unreach, + mp->is_prohibit, + mp->is_local, 0, + mp->is_classify, + mp->classify_table_index, + mp->is_resolve_host, + mp->is_resolve_attached, 0, 0, + fib_index, &pfx, DPO_PROTO_IP6, + &nh, ntohl (mp->next_hop_sw_if_index), + next_hop_fib_index, + mp->next_hop_weight, + mp->next_hop_preference, + ntohl (mp->next_hop_via_label), + label_stack)); +} + +void +vl_api_ip_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp) +{ + vl_api_ip_add_del_route_reply_t *rmp; + int rv; + vnet_main_t *vnm = vnet_get_main (); + + vnm->api_errno = 0; + + if (mp->is_ipv6) + rv = ip6_add_del_route_t_handler (mp); + else + rv = ip4_add_del_route_t_handler (mp); + + rv = (rv == 0) ? vnm->api_errno : rv; + + REPLY_MACRO (VL_API_IP_ADD_DEL_ROUTE_REPLY); +} + +void +ip_table_create (fib_protocol_t fproto, + u32 table_id, u8 is_api, const u8 * name) +{ + u32 fib_index, mfib_index; + + /* + * ignore action on the default table - this is always present + * and cannot be added nor deleted from the API + */ + if (0 != table_id) + { + /* + * The API holds only one lock on the table. + * i.e. it can be added many times via the API but needs to be + * deleted only once. + * The FIB index for unicast and multicast is not necessarily the + * same, since internal VPP systesm (like LISP and SR) create + * their own unicast tables. + */ + fib_index = fib_table_find (fproto, table_id); + mfib_index = mfib_table_find (fproto, table_id); + + if (~0 == fib_index) + { + fib_table_find_or_create_and_lock_w_name (fproto, table_id, + (is_api ? + FIB_SOURCE_API : + FIB_SOURCE_CLI), name); + } + if (~0 == mfib_index) + { + mfib_table_find_or_create_and_lock_w_name (fproto, table_id, + (is_api ? + MFIB_SOURCE_API : + MFIB_SOURCE_CLI), name); + } + } +} + +static int +add_del_mroute_check (fib_protocol_t table_proto, + u32 table_id, + u32 next_hop_sw_if_index, u8 is_local, u32 * fib_index) +{ + vnet_main_t *vnm = vnet_get_main (); + + *fib_index = mfib_table_find (table_proto, ntohl (table_id)); + if (~0 == *fib_index) + { + /* No such table */ + return VNET_API_ERROR_NO_SUCH_FIB; + } + + if (~0 != ntohl (next_hop_sw_if_index)) + { + if (pool_is_free_index (vnm->interface_main.sw_interfaces, + ntohl (next_hop_sw_if_index))) + { + return VNET_API_ERROR_NO_MATCHING_INTERFACE; + } + } + + return (0); +} + +static int +mroute_add_del_handler (u8 is_add, + u8 is_local, + u32 fib_index, + const mfib_prefix_t * prefix, + u32 entry_flags, + fib_rpf_id_t rpf_id, + u32 next_hop_sw_if_index, u32 itf_flags) +{ + stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ ); + + fib_route_path_t path = { + .frp_sw_if_index = next_hop_sw_if_index, + .frp_proto = fib_proto_to_dpo (prefix->fp_proto), + }; + + if (is_local) + path.frp_flags |= FIB_ROUTE_PATH_LOCAL; + + + if (!is_local && ~0 == next_hop_sw_if_index) + { + mfib_table_entry_update (fib_index, prefix, + MFIB_SOURCE_API, rpf_id, entry_flags); + } + else + { + if (is_add) + { + mfib_table_entry_path_update (fib_index, prefix, + MFIB_SOURCE_API, &path, itf_flags); + } + else + { + mfib_table_entry_path_remove (fib_index, prefix, + MFIB_SOURCE_API, &path); + } + } + + stats_dsunlock (); + return (0); +} + +static int +api_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp) +{ + fib_protocol_t fproto; + u32 fib_index; + int rv; + + fproto = (mp->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4); + rv = add_del_mroute_check (fproto, + mp->table_id, + mp->next_hop_sw_if_index, + mp->is_local, &fib_index); + + if (0 != rv) + return (rv); + + mfib_prefix_t pfx = { + .fp_len = ntohs (mp->grp_address_length), + .fp_proto = fproto, + }; + + if (FIB_PROTOCOL_IP4 == fproto) + { + clib_memcpy (&pfx.fp_grp_addr.ip4, mp->grp_address, + sizeof (pfx.fp_grp_addr.ip4)); + clib_memcpy (&pfx.fp_src_addr.ip4, mp->src_address, + sizeof (pfx.fp_src_addr.ip4)); + } + else + { + clib_memcpy (&pfx.fp_grp_addr.ip6, mp->grp_address, + sizeof (pfx.fp_grp_addr.ip6)); + clib_memcpy (&pfx.fp_src_addr.ip6, mp->src_address, + sizeof (pfx.fp_src_addr.ip6)); + } + + return (mroute_add_del_handler (mp->is_add, + mp->is_local, + fib_index, &pfx, + ntohl (mp->entry_flags), + ntohl (mp->rpf_id), + ntohl (mp->next_hop_sw_if_index), + ntohl (mp->itf_flags))); +} + +void +vl_api_ip_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp) +{ + vl_api_ip_mroute_add_del_reply_t *rmp; + int rv; + vnet_main_t *vnm = vnet_get_main (); + + vnm->api_errno = 0; + + rv = api_mroute_add_del_t_handler (mp); + + rv = (rv == 0) ? vnm->api_errno : rv; + + REPLY_MACRO (VL_API_IP_MROUTE_ADD_DEL_REPLY); +} + +static void +send_ip_details (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, u32 sw_if_index, + u8 is_ipv6, u32 context) +{ + vl_api_ip_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IP_DETAILS); + + mp->sw_if_index = ntohl (sw_if_index); + mp->is_ipv6 = is_ipv6; + mp->context = context; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +send_ip_address_details (vpe_api_main_t * am, + unix_shared_memory_queue_t * q, + u8 * ip, u16 prefix_length, + u32 sw_if_index, u8 is_ipv6, u32 context) +{ + vl_api_ip_address_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IP_ADDRESS_DETAILS); + + if (is_ipv6) + { + clib_memcpy (&mp->ip, ip, sizeof (mp->ip)); + } + else + { + u32 *tp = (u32 *) mp->ip; + *tp = *(u32 *) ip; + } + mp->prefix_length = prefix_length; + mp->context = context; + mp->sw_if_index = htonl (sw_if_index); + mp->is_ipv6 = is_ipv6; + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + unix_shared_memory_queue_t *q; + ip6_address_t *r6; + ip4_address_t *r4; + ip6_main_t *im6 = &ip6_main; + ip4_main_t *im4 = &ip4_main; + ip_lookup_main_t *lm6 = &im6->lookup_main; + ip_lookup_main_t *lm4 = &im4->lookup_main; + ip_interface_address_t *ia = 0; + u32 sw_if_index = ~0; + int rv __attribute__ ((unused)) = 0; + + VALIDATE_SW_IF_INDEX (mp); + + sw_if_index = ntohl (mp->sw_if_index); + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + return; + + if (mp->is_ipv6) + { + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm6, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + r6 = ip_interface_address_get_address (lm6, ia); + u16 prefix_length = ia->address_length; + send_ip_address_details(am, q, (u8*)r6, prefix_length, + sw_if_index, 1, mp->context); + })); + /* *INDENT-ON* */ + } + else + { + /* *INDENT-OFF* */ + foreach_ip_interface_address (lm4, ia, sw_if_index, + 1 /* honor unnumbered */, + ({ + r4 = ip_interface_address_get_address (lm4, ia); + u16 prefix_length = ia->address_length; + send_ip_address_details(am, q, (u8*)r4, prefix_length, + sw_if_index, 0, mp->context); + })); + /* *INDENT-ON* */ + } + BAD_SW_IF_INDEX_LABEL; +} + +static void +vl_api_ip_dump_t_handler (vl_api_ip_dump_t * mp) +{ + vpe_api_main_t *am = &vpe_api_main; + vnet_main_t *vnm = vnet_get_main (); + vlib_main_t *vm = vlib_get_main (); + vnet_interface_main_t *im = &vnm->interface_main; + unix_shared_memory_queue_t *q; + vnet_sw_interface_t *si, *sorted_sis; + u32 sw_if_index = ~0; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + /* Gather interfaces. */ + sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces)); + _vec_len (sorted_sis) = 0; + /* *INDENT-OFF* */ + pool_foreach (si, im->sw_interfaces, + ({ + vec_add1 (sorted_sis, si[0]); + })); + /* *INDENT-ON* */ + + vec_foreach (si, sorted_sis) + { + if (!(si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED)) + { + if (mp->is_ipv6 && !ip6_interface_enabled (vm, si->sw_if_index)) + { + continue; + } + sw_if_index = si->sw_if_index; + send_ip_details (am, q, sw_if_index, mp->is_ipv6, mp->context); + } + } +} + +static void +set_ip6_flow_hash (vl_api_set_ip_flow_hash_t * mp) +{ + vl_api_set_ip_flow_hash_reply_t *rmp; + int rv; + u32 table_id; + flow_hash_config_t flow_hash_config = 0; + + table_id = ntohl (mp->vrf_id); + +#define _(a,b) if (mp->a) flow_hash_config |= b; + foreach_flow_hash_bit; +#undef _ + + rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config); + + REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_REPLY); +} + +static void +set_ip4_flow_hash (vl_api_set_ip_flow_hash_t * mp) +{ + vl_api_set_ip_flow_hash_reply_t *rmp; + int rv; + u32 table_id; + flow_hash_config_t flow_hash_config = 0; + + table_id = ntohl (mp->vrf_id); + +#define _(a,b) if (mp->a) flow_hash_config |= b; + foreach_flow_hash_bit; +#undef _ + + rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config); + + REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_REPLY); +} + + +static void +vl_api_set_ip_flow_hash_t_handler (vl_api_set_ip_flow_hash_t * mp) +{ + if (mp->is_ipv6 == 0) + set_ip4_flow_hash (mp); + else + set_ip6_flow_hash (mp); +} + +static void + vl_api_sw_interface_ip6nd_ra_config_t_handler + (vl_api_sw_interface_ip6nd_ra_config_t * mp) +{ + vl_api_sw_interface_ip6nd_ra_config_reply_t *rmp; + vlib_main_t *vm = vlib_get_main (); + int rv = 0; + u8 is_no, suppress, managed, other, ll_option, send_unicast, cease, + default_router; + + is_no = mp->is_no == 1; + suppress = mp->suppress == 1; + managed = mp->managed == 1; + other = mp->other == 1; + ll_option = mp->ll_option == 1; + send_unicast = mp->send_unicast == 1; + cease = mp->cease == 1; + default_router = mp->default_router == 1; + + VALIDATE_SW_IF_INDEX (mp); + + rv = ip6_neighbor_ra_config (vm, ntohl (mp->sw_if_index), + suppress, managed, other, + ll_option, send_unicast, cease, + default_router, ntohl (mp->lifetime), + ntohl (mp->initial_count), + ntohl (mp->initial_interval), + ntohl (mp->max_interval), + ntohl (mp->min_interval), is_no); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_IP6ND_RA_CONFIG_REPLY); +} + +static void + vl_api_sw_interface_ip6nd_ra_prefix_t_handler + (vl_api_sw_interface_ip6nd_ra_prefix_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_sw_interface_ip6nd_ra_prefix_reply_t *rmp; + int rv = 0; + u8 is_no, use_default, no_advertise, off_link, no_autoconfig, no_onlink; + + VALIDATE_SW_IF_INDEX (mp); + + is_no = mp->is_no == 1; + use_default = mp->use_default == 1; + no_advertise = mp->no_advertise == 1; + off_link = mp->off_link == 1; + no_autoconfig = mp->no_autoconfig == 1; + no_onlink = mp->no_onlink == 1; + + rv = ip6_neighbor_ra_prefix (vm, ntohl (mp->sw_if_index), + (ip6_address_t *) mp->address, + mp->address_length, use_default, + ntohl (mp->val_lifetime), + ntohl (mp->pref_lifetime), no_advertise, + off_link, no_autoconfig, no_onlink, is_no); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_SW_INTERFACE_IP6ND_RA_PREFIX_REPLY); +} + +static void +send_ip6nd_proxy_details (unix_shared_memory_queue_t * q, + u32 context, + const ip46_address_t * addr, u32 sw_if_index) +{ + vl_api_ip6nd_proxy_details_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_IP6ND_PROXY_DETAILS); + mp->context = context; + mp->sw_if_index = htonl (sw_if_index); + memcpy (mp->address, addr, 16); + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +typedef struct api_ip6nd_proxy_fib_table_walk_ctx_t_ +{ + u32 *indices; +} api_ip6nd_proxy_fib_table_walk_ctx_t; + +static int +api_ip6nd_proxy_fib_table_walk (fib_node_index_t fei, void *arg) +{ + api_ip6nd_proxy_fib_table_walk_ctx_t *ctx = arg; + + if (fib_entry_is_sourced (fei, FIB_SOURCE_IP6_ND_PROXY)) + { + vec_add1 (ctx->indices, fei); + } + + return (1); +} + +static void +vl_api_ip6nd_proxy_dump_t_handler (vl_api_ip6nd_proxy_dump_t * mp) +{ + ip6_main_t *im6 = &ip6_main; + fib_table_t *fib_table; + api_ip6nd_proxy_fib_table_walk_ctx_t ctx = { + .indices = NULL, + }; + fib_node_index_t *feip; + fib_prefix_t pfx; + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + /* *INDENT-OFF* */ + pool_foreach (fib_table, im6->fibs, + ({ + fib_table_walk(fib_table->ft_index, + FIB_PROTOCOL_IP6, + api_ip6nd_proxy_fib_table_walk, + &ctx); + })); + /* *INDENT-ON* */ + + vec_sort_with_function (ctx.indices, fib_entry_cmp_for_sort); + + vec_foreach (feip, ctx.indices) + { + fib_entry_get_prefix (*feip, &pfx); + + send_ip6nd_proxy_details (q, + mp->context, + &pfx.fp_addr, + fib_entry_get_resolving_interface (*feip)); + } + + vec_free (ctx.indices); +} + +static void +vl_api_ip6nd_proxy_add_del_t_handler (vl_api_ip6nd_proxy_add_del_t * mp) +{ + vl_api_ip6nd_proxy_add_del_reply_t *rmp; + int rv = 0; + + VALIDATE_SW_IF_INDEX (mp); + + rv = ip6_neighbor_proxy_add_del (ntohl (mp->sw_if_index), + (ip6_address_t *) mp->address, mp->is_del); + + BAD_SW_IF_INDEX_LABEL; + REPLY_MACRO (VL_API_IP6ND_PROXY_ADD_DEL_REPLY); +} + +static void + vl_api_sw_interface_ip6_enable_disable_t_handler + (vl_api_sw_interface_ip6_enable_disable_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_sw_interface_ip6_enable_disable_reply_t *rmp; + vnet_main_t *vnm = vnet_get_main (); + int rv = 0; + clib_error_t *error; + + vnm->api_errno = 0; + + VALIDATE_SW_IF_INDEX (mp); + + error = + (mp->enable == 1) ? enable_ip6_interface (vm, + ntohl (mp->sw_if_index)) : + disable_ip6_interface (vm, ntohl (mp->sw_if_index)); + + if (error) + { + clib_error_report (error); + rv = VNET_API_ERROR_UNSPECIFIED; + } + else + { + rv = vnm->api_errno; + } + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_IP6_ENABLE_DISABLE_REPLY); +} + +static void + vl_api_sw_interface_ip6_set_link_local_address_t_handler + (vl_api_sw_interface_ip6_set_link_local_address_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_sw_interface_ip6_set_link_local_address_reply_t *rmp; + int rv = 0; + clib_error_t *error; + vnet_main_t *vnm = vnet_get_main (); + + vnm->api_errno = 0; + + VALIDATE_SW_IF_INDEX (mp); + + error = set_ip6_link_local_address (vm, + ntohl (mp->sw_if_index), + (ip6_address_t *) mp->address); + if (error) + { + clib_error_report (error); + rv = VNET_API_ERROR_UNSPECIFIED; + } + else + { + rv = vnm->api_errno; + } + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS_REPLY); +} + +void +vl_mfib_signal_send_one (unix_shared_memory_queue_t * q, + u32 context, const mfib_signal_t * mfs) +{ + vl_api_mfib_signal_details_t *mp; + mfib_prefix_t prefix; + mfib_table_t *mfib; + mfib_itf_t *mfi; + + mp = vl_msg_api_alloc (sizeof (*mp)); + + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = ntohs (VL_API_MFIB_SIGNAL_DETAILS); + mp->context = context; + + mfi = mfib_itf_get (mfs->mfs_itf); + mfib_entry_get_prefix (mfs->mfs_entry, &prefix); + mfib = mfib_table_get (mfib_entry_get_fib_index (mfs->mfs_entry), + prefix.fp_proto); + mp->table_id = ntohl (mfib->mft_table_id); + mp->sw_if_index = ntohl (mfi->mfi_sw_if_index); + + if (FIB_PROTOCOL_IP4 == prefix.fp_proto) + { + mp->grp_address_len = ntohs (prefix.fp_len); + + memcpy (mp->grp_address, &prefix.fp_grp_addr.ip4, 4); + if (prefix.fp_len > 32) + { + memcpy (mp->src_address, &prefix.fp_src_addr.ip4, 4); + } + } + else + { + mp->grp_address_len = ntohs (prefix.fp_len); + + ASSERT (0); + } + + if (0 != mfs->mfs_buffer_len) + { + mp->ip_packet_len = ntohs (mfs->mfs_buffer_len); + + memcpy (mp->ip_packet_data, mfs->mfs_buffer, mfs->mfs_buffer_len); + } + else + { + mp->ip_packet_len = 0; + } + + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + +static void +vl_api_mfib_signal_dump_t_handler (vl_api_mfib_signal_dump_t * mp) +{ + unix_shared_memory_queue_t *q; + + q = vl_api_client_index_to_input_queue (mp->client_index); + if (q == 0) + { + return; + } + + while (q->cursize < q->maxsize && mfib_signal_send_one (q, mp->context)) + ; +} + +#define vl_msg_name_crc_list +#include <vnet/ip/ip.api.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (api_main_t * am) +{ +#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id); + foreach_vl_msg_name_crc_ip; +#undef _ +} + +static clib_error_t * +ip_api_hookup (vlib_main_t * vm) +{ + api_main_t *am = &api_main; + +#define _(N,n) \ + vl_msg_api_set_handlers(VL_API_##N, #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_ip_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (am); + + return 0; +} + +VLIB_API_INIT_FUNCTION (ip_api_hookup); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip_checksum.c b/src/vnet/ip/ip_checksum.c new file mode 100644 index 00000000..6a9cf657 --- /dev/null +++ b/src/vnet/ip/ip_checksum.c @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip4/ip_checksum.c: ip/tcp/udp checksums + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +ip_csum_t +ip_incremental_checksum (ip_csum_t sum, void *_data, uword n_bytes) +{ + uword data = pointer_to_uword (_data); + ip_csum_t sum0, sum1; + + sum0 = 0; + sum1 = sum; + + /* Align data pointer to 64 bits. */ +#define _(t) \ +do { \ + if (n_bytes >= sizeof (t) \ + && sizeof (t) < sizeof (ip_csum_t) \ + && (data % (2 * sizeof (t))) != 0) \ + { \ + sum0 += * uword_to_pointer (data, t *); \ + data += sizeof (t); \ + n_bytes -= sizeof (t); \ + } \ +} while (0) + + _(u8); + _(u16); + if (BITS (ip_csum_t) > 32) + _(u32); + +#undef _ + + { + ip_csum_t *d = uword_to_pointer (data, ip_csum_t *); + + while (n_bytes >= 2 * sizeof (d[0])) + { + sum0 = ip_csum_with_carry (sum0, d[0]); + sum1 = ip_csum_with_carry (sum1, d[1]); + d += 2; + n_bytes -= 2 * sizeof (d[0]); + } + + data = pointer_to_uword (d); + } + +#define _(t) \ +do { \ + if (n_bytes >= sizeof (t) && sizeof (t) <= sizeof (ip_csum_t)) \ + { \ + sum0 = ip_csum_with_carry (sum0, * uword_to_pointer (data, t *)); \ + data += sizeof (t); \ + n_bytes -= sizeof (t); \ + } \ +} while (0) + + if (BITS (ip_csum_t) > 32) + _(u64); + _(u32); + _(u16); + _(u8); + +#undef _ + + /* Combine even and odd sums. */ + sum0 = ip_csum_with_carry (sum0, sum1); + + return sum0; +} + +ip_csum_t +ip_csum_and_memcpy (ip_csum_t sum, void *dst, void *src, uword n_bytes) +{ + uword n_left; + ip_csum_t sum0 = sum, sum1; + n_left = n_bytes; + + if (n_left && (pointer_to_uword (dst) & sizeof (u8))) + { + u8 *d8, val; + + d8 = dst; + val = ((u8 *) src)[0]; + d8[0] = val; + dst += 1; + src += 1; + n_left -= 1; + sum0 = + ip_csum_with_carry (sum0, val << (8 * CLIB_ARCH_IS_LITTLE_ENDIAN)); + } + + while ((n_left >= sizeof (u16)) + && (pointer_to_uword (dst) & (sizeof (sum) - sizeof (u16)))) + { + u16 *d16, *s16; + + d16 = dst; + s16 = src; + + d16[0] = clib_mem_unaligned (&s16[0], u16); + + sum0 = ip_csum_with_carry (sum0, d16[0]); + dst += sizeof (u16); + src += sizeof (u16); + n_left -= sizeof (u16); + } + + sum1 = 0; + while (n_left >= 2 * sizeof (sum)) + { + ip_csum_t dst0, dst1; + ip_csum_t *dst_even, *src_even; + + dst_even = dst; + src_even = src; + dst0 = clib_mem_unaligned (&src_even[0], ip_csum_t); + dst1 = clib_mem_unaligned (&src_even[1], ip_csum_t); + + dst_even[0] = dst0; + dst_even[1] = dst1; + + dst += 2 * sizeof (dst_even[0]); + src += 2 * sizeof (dst_even[0]); + n_left -= 2 * sizeof (dst_even[0]); + + sum0 = ip_csum_with_carry (sum0, dst0); + sum1 = ip_csum_with_carry (sum1, dst1); + } + + sum0 = ip_csum_with_carry (sum0, sum1); + while (n_left >= 1 * sizeof (sum)) + { + ip_csum_t dst0, *dst_even, *src_even; + + dst_even = dst; + src_even = src; + + dst0 = clib_mem_unaligned (&src_even[0], ip_csum_t); + + dst_even[0] = dst0; + + dst += 1 * sizeof (sum); + src += 1 * sizeof (sum); + n_left -= 1 * sizeof (sum); + + sum0 = ip_csum_with_carry (sum0, dst0); + } + + while (n_left >= sizeof (u16)) + { + u16 dst0, *dst_short, *src_short; + + dst_short = dst; + src_short = src; + + dst0 = clib_mem_unaligned (&src_short[0], u16); + + dst_short[0] = dst0; + + sum0 = ip_csum_with_carry (sum0, dst_short[0]); + dst += 1 * sizeof (dst0); + src += 1 * sizeof (dst0); + n_left -= 1 * sizeof (dst0); + + } + + if (n_left == 1) + { + u8 *d8, *s8, val; + + d8 = dst; + s8 = src; + + d8[0] = val = s8[0]; + d8 += 1; + s8 += 1; + n_left -= 1; + sum0 = ip_csum_with_carry (sum0, val << (8 * CLIB_ARCH_IS_BIG_ENDIAN)); + } + + return sum0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c new file mode 100644 index 00000000..ca062bfd --- /dev/null +++ b/src/vnet/ip/ip_frag.c @@ -0,0 +1,581 @@ +/*--------------------------------------------------------------------------- + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +/* + * IPv4 Fragmentation Node + * + * + */ + +#include "ip_frag.h" + +#include <vnet/ip/ip.h> + + +typedef struct +{ + u8 ipv6; + u16 header_offset; + u16 mtu; + u8 next; + u16 n_fragments; +} ip_frag_trace_t; + +static u8 * +format_ip_frag_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *); + s = format (s, "IPv%s offset: %u mtu: %u fragments: %u", + t->ipv6 ? "6" : "4", t->header_offset, t->mtu, t->n_fragments); + return s; +} + +static u32 running_fragment_id; + +static void +ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer, + ip_frag_error_t * error) +{ + vlib_buffer_t *p; + ip4_header_t *ip4; + u16 mtu, ptr, len, max, rem, offset, ip_frag_id, ip_frag_offset; + u8 *packet, more; + + vec_add1 (*buffer, pi); + p = vlib_get_buffer (vm, pi); + offset = vnet_buffer (p)->ip_frag.header_offset; + mtu = vnet_buffer (p)->ip_frag.mtu; + packet = (u8 *) vlib_buffer_get_current (p); + ip4 = (ip4_header_t *) (packet + offset); + + rem = clib_net_to_host_u16 (ip4->length) - sizeof (*ip4); + ptr = 0; + max = (mtu - sizeof (*ip4) - vnet_buffer (p)->ip_frag.header_offset) & ~0x7; + + if (rem < (p->current_length - offset - sizeof (*ip4))) + { + *error = IP_FRAG_ERROR_MALFORMED; + return; + } + + if (mtu < sizeof (*ip4)) + { + *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER; + return; + } + + if (ip4->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT)) + { + *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET; + return; + } + + if (ip4_is_fragment (ip4)) + { + ip_frag_id = ip4->fragment_id; + ip_frag_offset = ip4_get_fragment_offset (ip4); + more = + ! !(ip4->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)); + } + else + { + ip_frag_id = (++running_fragment_id); + ip_frag_offset = 0; + more = 0; + } + + //Do the actual fragmentation + while (rem) + { + u32 bi; + vlib_buffer_t *b; + ip4_header_t *fip4; + + len = + (rem > + (mtu - sizeof (*ip4) - + vnet_buffer (p)->ip_frag.header_offset)) ? max : rem; + + if (ptr == 0) + { + bi = pi; + b = p; + fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset); + } + else + { + if (!vlib_buffer_alloc (vm, &bi, 1)) + { + *error = IP_FRAG_ERROR_MEMORY; + return; + } + vec_add1 (*buffer, bi); + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (p)->sw_if_index[VLIB_RX]; + vnet_buffer (b)->sw_if_index[VLIB_TX] = + vnet_buffer (p)->sw_if_index[VLIB_TX]; + fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset); + + //Copy offset and ip4 header + clib_memcpy (b->data, packet, offset + sizeof (*ip4)); + //Copy data + clib_memcpy (((u8 *) (fip4)) + sizeof (*fip4), + packet + offset + sizeof (*fip4) + ptr, len); + } + b->current_length = offset + len + sizeof (*fip4); + + fip4->fragment_id = ip_frag_id; + fip4->flags_and_fragment_offset = + clib_host_to_net_u16 ((ptr >> 3) + ip_frag_offset); + fip4->flags_and_fragment_offset |= + clib_host_to_net_u16 (((len != rem) || more) << 13); + // ((len0 != rem0) || more0) << 13 is optimization for + // ((len0 != rem0) || more0) ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0 + fip4->length = clib_host_to_net_u16 (len + sizeof (*fip4)); + fip4->checksum = ip4_header_checksum (fip4); + + if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER) + { + //Encapsulating ipv4 header + ip4_header_t *encap_header4 = + (ip4_header_t *) vlib_buffer_get_current (b); + encap_header4->length = clib_host_to_net_u16 (b->current_length); + encap_header4->checksum = ip4_header_checksum (encap_header4); + } + else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER) + { + //Encapsulating ipv6 header + ip6_header_t *encap_header6 = + (ip6_header_t *) vlib_buffer_get_current (b); + encap_header6->payload_length = + clib_host_to_net_u16 (b->current_length - + sizeof (*encap_header6)); + } + + rem -= len; + ptr += len; + } +} + +void +ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu, + u8 next_index, u8 flags) +{ + vnet_buffer (b)->ip_frag.header_offset = offset; + vnet_buffer (b)->ip_frag.mtu = mtu; + vnet_buffer (b)->ip_frag.next_index = next_index; + vnet_buffer (b)->ip_frag.flags = flags; +} + +static uword +ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_frag_node.index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + u32 frag_sent = 0, small_packets = 0; + u32 *buffer = 0; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0, *frag_from, frag_left; + vlib_buffer_t *p0; + ip_frag_error_t error0; + ip4_frag_next_t next0; + + //Note: The packet is not enqueued now. + //It is instead put in a vector where other fragments + //will be put as well. + pi0 = from[0]; + from += 1; + n_left_from -= 1; + error0 = IP_FRAG_ERROR_NONE; + + p0 = vlib_get_buffer (vm, pi0); + ip4_frag_do_fragment (vm, pi0, &buffer, &error0); + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip_frag_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset; + tr->mtu = vnet_buffer (p0)->ip_frag.mtu; + tr->ipv6 = 0; + tr->n_fragments = vec_len (buffer); + tr->next = vnet_buffer (p0)->ip_frag.next_index; + } + + if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET) + { + icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, + vnet_buffer (p0)->ip_frag.mtu); + vlib_buffer_advance (p0, + vnet_buffer (p0)->ip_frag.header_offset); + next0 = IP4_FRAG_NEXT_ICMP_ERROR; + } + else + next0 = + (error0 == + IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)-> + ip_frag.next_index : IP4_FRAG_NEXT_DROP; + + if (error0 == IP_FRAG_ERROR_NONE) + { + frag_sent += vec_len (buffer); + small_packets += (vec_len (buffer) == 1); + } + else + vlib_error_count (vm, ip4_frag_node.index, error0, 1); + + //Send fragments that were added in the frame + frag_from = buffer; + frag_left = vec_len (buffer); + + while (frag_left > 0) + { + while (frag_left > 0 && n_left_to_next > 0) + { + u32 i; + i = to_next[0] = frag_from[0]; + frag_from += 1; + frag_left -= 1; + to_next += 1; + n_left_to_next -= 1; + + vlib_get_buffer (vm, i)->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, i, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + vec_reset_length (buffer); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vec_free (buffer); + + vlib_node_increment_counter (vm, ip4_frag_node.index, + IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent); + vlib_node_increment_counter (vm, ip4_frag_node.index, + IP_FRAG_ERROR_SMALL_PACKET, small_packets); + + return frame->n_vectors; +} + + +static void +ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer, + ip_frag_error_t * error) +{ + vlib_buffer_t *p; + ip6_header_t *ip6_hdr; + ip6_frag_hdr_t *frag_hdr; + u8 *payload, *next_header; + + p = vlib_get_buffer (vm, pi); + + //Parsing the IPv6 headers + ip6_hdr = + vlib_buffer_get_current (p) + vnet_buffer (p)->ip_frag.header_offset; + payload = (u8 *) (ip6_hdr + 1); + next_header = &ip6_hdr->protocol; + if (*next_header == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) + { + next_header = payload; + payload += payload[1] * 8; + } + + if (*next_header == IP_PROTOCOL_IP6_DESTINATION_OPTIONS) + { + next_header = payload; + payload += payload[1] * 8; + } + + if (*next_header == IP_PROTOCOL_IPV6_ROUTE) + { + next_header = payload; + payload += payload[1] * 8; + } + + if (PREDICT_FALSE + (payload >= (u8 *) vlib_buffer_get_current (p) + p->current_length)) + { + //A malicious packet could set an extension header with a too big size + //and make us modify another vlib_buffer + *error = IP_FRAG_ERROR_MALFORMED; + return; + } + + u8 has_more; + u16 initial_offset; + if (*next_header == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + //The fragmentation header is already there + frag_hdr = (ip6_frag_hdr_t *) payload; + has_more = ip6_frag_hdr_more (frag_hdr); + initial_offset = ip6_frag_hdr_offset (frag_hdr); + } + else + { + //Insert a fragmentation header in the packet + u8 nh = *next_header; + *next_header = IP_PROTOCOL_IPV6_FRAGMENTATION; + vlib_buffer_advance (p, -sizeof (*frag_hdr)); + u8 *start = vlib_buffer_get_current (p); + memmove (start, start + sizeof (*frag_hdr), + payload - (start + sizeof (*frag_hdr))); + frag_hdr = (ip6_frag_hdr_t *) (payload - sizeof (*frag_hdr)); + frag_hdr->identification = ++running_fragment_id; + frag_hdr->next_hdr = nh; + frag_hdr->rsv = 0; + has_more = 0; + initial_offset = 0; + } + payload = (u8 *) (frag_hdr + 1); + + u16 headers_len = payload - (u8 *) vlib_buffer_get_current (p); + u16 max_payload = vnet_buffer (p)->ip_frag.mtu - headers_len; + u16 rem = p->current_length - headers_len; + u16 ptr = 0; + + if (max_payload < 8) + { + *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER; + return; + } + + while (rem) + { + u32 bi; + vlib_buffer_t *b; + u16 len = (rem > max_payload) ? (max_payload & ~0x7) : rem; + rem -= len; + + if (ptr != 0) + { + if (!vlib_buffer_alloc (vm, &bi, 1)) + { + *error = IP_FRAG_ERROR_MEMORY; + return; + } + b = vlib_get_buffer (vm, bi); + vnet_buffer (b)->sw_if_index[VLIB_RX] = + vnet_buffer (p)->sw_if_index[VLIB_RX]; + vnet_buffer (b)->sw_if_index[VLIB_TX] = + vnet_buffer (p)->sw_if_index[VLIB_TX]; + clib_memcpy (vlib_buffer_get_current (b), + vlib_buffer_get_current (p), headers_len); + clib_memcpy (vlib_buffer_get_current (b) + headers_len, + payload + ptr, len); + frag_hdr = + vlib_buffer_get_current (b) + headers_len - sizeof (*frag_hdr); + } + else + { + bi = pi; + b = vlib_get_buffer (vm, bi); + //frag_hdr already set here + } + + ip6_hdr = + vlib_buffer_get_current (b) + vnet_buffer (p)->ip_frag.header_offset; + frag_hdr->fragment_offset_and_more = + ip6_frag_hdr_offset_and_more (initial_offset + (ptr >> 3), + (rem || has_more)); + b->current_length = headers_len + len; + ip6_hdr->payload_length = + clib_host_to_net_u16 (b->current_length - + vnet_buffer (p)->ip_frag.header_offset - + sizeof (*ip6_hdr)); + + if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER) + { + //Encapsulating ipv4 header + ip4_header_t *encap_header4 = + (ip4_header_t *) vlib_buffer_get_current (b); + encap_header4->length = clib_host_to_net_u16 (b->current_length); + encap_header4->checksum = ip4_header_checksum (encap_header4); + } + else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER) + { + //Encapsulating ipv6 header + ip6_header_t *encap_header6 = + (ip6_header_t *) vlib_buffer_get_current (b); + encap_header6->payload_length = + clib_host_to_net_u16 (b->current_length - + sizeof (*encap_header6)); + } + + vec_add1 (*buffer, bi); + + ptr += len; + } +} + +static uword +ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from, next_index, *to_next, n_left_to_next; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip6_frag_node.index); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + u32 frag_sent = 0, small_packets = 0; + u32 *buffer = 0; + + while (n_left_from > 0) + { + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 pi0, *frag_from, frag_left; + vlib_buffer_t *p0; + ip_frag_error_t error0; + ip6_frag_next_t next0; + + pi0 = from[0]; + from += 1; + n_left_from -= 1; + error0 = IP_FRAG_ERROR_NONE; + + p0 = vlib_get_buffer (vm, pi0); + ip6_frag_do_fragment (vm, pi0, &buffer, &error0); + + if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED)) + { + ip_frag_trace_t *tr = + vlib_add_trace (vm, node, p0, sizeof (*tr)); + tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset; + tr->mtu = vnet_buffer (p0)->ip_frag.mtu; + tr->ipv6 = 1; + tr->n_fragments = vec_len (buffer); + tr->next = vnet_buffer (p0)->ip_frag.next_index; + } + + next0 = + (error0 == + IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)-> + ip_frag.next_index : IP6_FRAG_NEXT_DROP; + frag_sent += vec_len (buffer); + small_packets += (vec_len (buffer) == 1); + + //Send fragments that were added in the frame + frag_from = buffer; + frag_left = vec_len (buffer); + while (frag_left > 0) + { + while (frag_left > 0 && n_left_to_next > 0) + { + u32 i; + i = to_next[0] = frag_from[0]; + frag_from += 1; + frag_left -= 1; + to_next += 1; + n_left_to_next -= 1; + + vlib_get_buffer (vm, i)->error = error_node->errors[error0]; + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, i, + next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + vlib_get_next_frame (vm, node, next_index, to_next, + n_left_to_next); + } + vec_reset_length (buffer); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + vec_free (buffer); + vlib_node_increment_counter (vm, ip6_frag_node.index, + IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent); + vlib_node_increment_counter (vm, ip6_frag_node.index, + IP_FRAG_ERROR_SMALL_PACKET, small_packets); + + return frame->n_vectors; +} + +static char *ip4_frag_error_strings[] = { +#define _(sym,string) string, + foreach_ip_frag_error +#undef _ +}; + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_frag_node) = { + .function = ip4_frag, + .name = IP4_FRAG_NODE_NAME, + .vector_size = sizeof (u32), + .format_trace = format_ip_frag_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = IP_FRAG_N_ERROR, + .error_strings = ip4_frag_error_strings, + + .n_next_nodes = IP4_FRAG_N_NEXT, + .next_nodes = { + [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [IP4_FRAG_NEXT_DROP] = "error-drop" + }, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_frag_node) = { + .function = ip6_frag, + .name = IP6_FRAG_NODE_NAME, + .vector_size = sizeof (u32), + .format_trace = format_ip_frag_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = IP_FRAG_N_ERROR, + .error_strings = ip4_frag_error_strings, + + .n_next_nodes = IP6_FRAG_N_NEXT, + .next_nodes = { + [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup", + [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup", + [IP6_FRAG_NEXT_DROP] = "error-drop" + }, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h new file mode 100644 index 00000000..348f5a2f --- /dev/null +++ b/src/vnet/ip/ip_frag.h @@ -0,0 +1,96 @@ +/*--------------------------------------------------------------------------- + * Copyright (c) 2009-2014 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *--------------------------------------------------------------------------- + */ +/* + * IPv4 and IPv6 Fragmentation Nodes + * + * A packet sent to those nodes require the following + * buffer attributes to be set: + * ip_frag.header_offset : + * Where to find the IPv4 (or IPv6) header in the packet. Previous + * bytes are left untouched and copied in every fragment. The fragments + * are then appended. This option is used for fragmented packets + * that are encapsulated. + * ip_frag.mtu : + * Maximum size of IP packets, header included, but ignoring + * the 'ip_frag.header_offset' copied bytes. + * ip_frag.next_index : + * One of ip_frag_next_t, indicating to which exit node the fragments + * should be sent to. + * + */ + +#ifndef IP_FRAG_H +#define IP_FRAG_H + +#include <vnet/vnet.h> + +#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header +#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header + +#define IP4_FRAG_NODE_NAME "ip4-frag" +#define IP6_FRAG_NODE_NAME "ip6-frag" + +extern vlib_node_registration_t ip4_frag_node; +extern vlib_node_registration_t ip6_frag_node; + +typedef enum +{ + IP4_FRAG_NEXT_IP4_LOOKUP, + IP4_FRAG_NEXT_IP6_LOOKUP, + IP4_FRAG_NEXT_ICMP_ERROR, + IP4_FRAG_NEXT_DROP, + IP4_FRAG_N_NEXT +} ip4_frag_next_t; + +typedef enum +{ + IP6_FRAG_NEXT_IP4_LOOKUP, + IP6_FRAG_NEXT_IP6_LOOKUP, + IP6_FRAG_NEXT_DROP, + IP6_FRAG_N_NEXT +} ip6_frag_next_t; + +#define foreach_ip_frag_error \ + /* Must be first. */ \ + _(NONE, "packet fragmented") \ + _(SMALL_PACKET, "packet smaller than MTU") \ + _(FRAGMENT_SENT, "number of sent fragments") \ + _(CANT_FRAGMENT_HEADER, "can't fragment header") \ + _(DONT_FRAGMENT_SET, "can't fragment this packet") \ + _(MALFORMED, "malformed packet") \ + _(MEMORY, "could not allocate buffer") \ + _(UNKNOWN, "unknown error") + +typedef enum +{ +#define _(sym,str) IP_FRAG_ERROR_##sym, + foreach_ip_frag_error +#undef _ + IP_FRAG_N_ERROR, +} ip_frag_error_t; + +void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu, + u8 next_index, u8 flags); + +#endif /* ifndef IP_FRAG_H */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c new file mode 100644 index 00000000..f7635b35 --- /dev/null +++ b/src/vnet/ip/ip_init.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip_init.c: ip generic initialization + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> + +ip_main_t ip_main; + +clib_error_t * +ip_main_init (vlib_main_t * vm) +{ + ip_main_t *im = &ip_main; + clib_error_t *error = 0; + + memset (im, 0, sizeof (im[0])); + + { + ip_protocol_info_t *pi; + u32 i; + +#define ip_protocol(n,s) \ +do { \ + vec_add2 (im->protocol_infos, pi, 1); \ + pi->protocol = n; \ + pi->name = (u8 *) #s; \ +} while (0); + +#include "protocols.def" + +#undef ip_protocol + + im->protocol_info_by_name = hash_create_string (0, sizeof (uword)); + for (i = 0; i < vec_len (im->protocol_infos); i++) + { + pi = im->protocol_infos + i; + + hash_set_mem (im->protocol_info_by_name, pi->name, i); + hash_set (im->protocol_info_by_protocol, pi->protocol, i); + } + } + + { + tcp_udp_port_info_t *pi; + u32 i; + static char *port_names[] = { +#define ip_port(s,n) #s, +#include "ports.def" +#undef ip_port + }; + static u16 ports[] = { +#define ip_port(s,n) n, +#include "ports.def" +#undef ip_port + }; + + vec_resize (im->port_infos, ARRAY_LEN (port_names)); + im->port_info_by_name = hash_create_string (0, sizeof (uword)); + + for (i = 0; i < vec_len (im->port_infos); i++) + { + pi = im->port_infos + i; + pi->port = clib_host_to_net_u16 (ports[i]); + pi->name = (u8 *) port_names[i]; + hash_set_mem (im->port_info_by_name, pi->name, i); + hash_set (im->port_info_by_port, pi->port, i); + } + } + + if ((error = vlib_call_init_function (vm, vnet_main_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip4_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip6_init))) + return error; + + if ((error = vlib_call_init_function (vm, icmp4_init))) + return error; + + if ((error = vlib_call_init_function (vm, icmp6_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_init))) + return error; + + if ((error = vlib_call_init_function (vm, udp_local_init))) + return error; + + if ((error = vlib_call_init_function (vm, udp_init))) + return error; + + if ((error = vlib_call_init_function (vm, ip_classify_init))) + return error; + + if ((error = vlib_call_init_function (vm, input_acl_init))) + return error; + + if ((error = vlib_call_init_function (vm, policer_classify_init))) + return error; + + if ((error = vlib_call_init_function (vm, flow_classify_init))) + return error; + + return error; +} + +VLIB_INIT_FUNCTION (ip_main_init); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip_input_acl.c b/src/vnet/ip/ip_input_acl.c new file mode 100644 index 00000000..b0b52ab1 --- /dev/null +++ b/src/vnet/ip/ip_input_acl.c @@ -0,0 +1,450 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip.h> +#include <vnet/classify/vnet_classify.h> +#include <vnet/classify/input_acl.h> + +typedef struct +{ + u32 sw_if_index; + u32 next_index; + u32 table_index; + u32 offset; +} ip_inacl_trace_t; + +/* packet trace format function */ +static u8 * +format_ip_inacl_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + ip_inacl_trace_t *t = va_arg (*args, ip_inacl_trace_t *); + + s = format (s, "INACL: sw_if_index %d, next_index %d, table %d, offset %d", + t->sw_if_index, t->next_index, t->table_index, t->offset); + return s; +} + +vlib_node_registration_t ip4_inacl_node; +vlib_node_registration_t ip6_inacl_node; + +#define foreach_ip_inacl_error \ +_(MISS, "input ACL misses") \ +_(HIT, "input ACL hits") \ +_(CHAIN_HIT, "input ACL hits after chain walk") + +typedef enum +{ +#define _(sym,str) IP_INACL_ERROR_##sym, + foreach_ip_inacl_error +#undef _ + IP_INACL_N_ERROR, +} ip_inacl_error_t; + +static char *ip_inacl_error_strings[] = { +#define _(sym,string) string, + foreach_ip_inacl_error +#undef _ +}; + +static inline uword +ip_inacl_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip4) +{ + u32 n_left_from, *from, *to_next; + acl_next_index_t next_index; + input_acl_main_t *am = &input_acl_main; + vnet_classify_main_t *vcm = am->vnet_classify_main; + f64 now = vlib_time_now (vm); + u32 hits = 0; + u32 misses = 0; + u32 chain_hits = 0; + input_acl_table_id_t tid; + vlib_node_runtime_t *error_node; + u32 n_next_nodes; + + n_next_nodes = node->n_next_nodes; + + if (is_ip4) + { + tid = INPUT_ACL_TABLE_IP4; + error_node = vlib_node_get_runtime (vm, ip4_input_node.index); + } + else + { + tid = INPUT_ACL_TABLE_IP6; + error_node = vlib_node_get_runtime (vm, ip6_input_node.index); + } + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + /* First pass: compute hashes */ + + while (n_left_from > 2) + { + vlib_buffer_t *b0, *b1; + u32 bi0, bi1; + u8 *h0, *h1; + u32 sw_if_index0, sw_if_index1; + u32 table_index0, table_index1; + vnet_classify_table_t *t0, *t1; + + /* prefetch next iteration */ + { + vlib_buffer_t *p1, *p2; + + p1 = vlib_get_buffer (vm, from[1]); + p2 = vlib_get_buffer (vm, from[2]); + + vlib_prefetch_buffer_header (p1, STORE); + CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE); + vlib_prefetch_buffer_header (p2, STORE); + CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE); + } + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + + bi1 = from[1]; + b1 = vlib_get_buffer (vm, bi1); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = + am->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX]; + table_index1 = + am->classify_table_index_by_sw_if_index[tid][sw_if_index1]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + + t1 = pool_elt_at_index (vcm->tables, table_index1); + + if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA) + h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset; + else + h0 = b0->data; + + vnet_buffer (b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash); + + if (t1->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA) + h1 = (void *) vlib_buffer_get_current (b1) + t1->current_data_offset; + else + h1 = b1->data; + + vnet_buffer (b1)->l2_classify.hash = + vnet_classify_hash_packet (t1, (u8 *) h1); + + vnet_classify_prefetch_bucket (t1, vnet_buffer (b1)->l2_classify.hash); + + vnet_buffer (b0)->l2_classify.table_index = table_index0; + + vnet_buffer (b1)->l2_classify.table_index = table_index1; + + from += 2; + n_left_from -= 2; + } + + while (n_left_from > 0) + { + vlib_buffer_t *b0; + u32 bi0; + u8 *h0; + u32 sw_if_index0; + u32 table_index0; + vnet_classify_table_t *t0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + + sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + table_index0 = + am->classify_table_index_by_sw_if_index[tid][sw_if_index0]; + + t0 = pool_elt_at_index (vcm->tables, table_index0); + + if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA) + h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset; + else + h0 = b0->data; + + vnet_buffer (b0)->l2_classify.hash = + vnet_classify_hash_packet (t0, (u8 *) h0); + + vnet_buffer (b0)->l2_classify.table_index = table_index0; + vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash); + + from++; + n_left_from--; + } + + next_index = node->cached_next_index; + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + /* Not enough load/store slots to dual loop... */ + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0 = ACL_NEXT_INDEX_DENY; + u32 table_index0; + vnet_classify_table_t *t0; + vnet_classify_entry_t *e0; + u64 hash0; + u8 *h0; + u8 error0; + + /* Stride 3 seems to work best */ + if (PREDICT_TRUE (n_left_from > 3)) + { + vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]); + vnet_classify_table_t *tp1; + u32 table_index1; + u64 phash1; + + table_index1 = vnet_buffer (p1)->l2_classify.table_index; + + if (PREDICT_TRUE (table_index1 != ~0)) + { + tp1 = pool_elt_at_index (vcm->tables, table_index1); + phash1 = vnet_buffer (p1)->l2_classify.hash; + vnet_classify_prefetch_entry (tp1, phash1); + } + } + + /* speculatively enqueue b0 to the current next frame */ + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + table_index0 = vnet_buffer (b0)->l2_classify.table_index; + e0 = 0; + t0 = 0; + vnet_get_config_data (am->vnet_config_main[tid], + &b0->current_config_index, &next0, + /* # bytes of config data */ 0); + + vnet_buffer (b0)->l2_classify.opaque_index = ~0; + + if (PREDICT_TRUE (table_index0 != ~0)) + { + hash0 = vnet_buffer (b0)->l2_classify.hash; + t0 = pool_elt_at_index (vcm->tables, table_index0); + + if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA) + h0 = + (void *) vlib_buffer_get_current (b0) + + t0->current_data_offset; + else + h0 = b0->data; + + e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now); + if (e0) + { + vnet_buffer (b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + + next0 = (e0->next_index < n_next_nodes) ? + e0->next_index : next0; + + hits++; + + if (is_ip4) + error0 = (next0 == ACL_NEXT_INDEX_DENY) ? + IP4_ERROR_INACL_SESSION_DENY : IP4_ERROR_NONE; + else + error0 = (next0 == ACL_NEXT_INDEX_DENY) ? + IP6_ERROR_INACL_SESSION_DENY : IP6_ERROR_NONE; + b0->error = error_node->errors[error0]; + + if (e0->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX || + e0->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX) + vnet_buffer (b0)->sw_if_index[VLIB_TX] = e0->metadata; + } + else + { + while (1) + { + if (PREDICT_TRUE (t0->next_table_index != ~0)) + t0 = pool_elt_at_index (vcm->tables, + t0->next_table_index); + else + { + next0 = (t0->miss_next_index < n_next_nodes) ? + t0->miss_next_index : next0; + + misses++; + + if (is_ip4) + error0 = (next0 == ACL_NEXT_INDEX_DENY) ? + IP4_ERROR_INACL_TABLE_MISS : IP4_ERROR_NONE; + else + error0 = (next0 == ACL_NEXT_INDEX_DENY) ? + IP6_ERROR_INACL_TABLE_MISS : IP6_ERROR_NONE; + b0->error = error_node->errors[error0]; + break; + } + + if (t0->current_data_flag == + CLASSIFY_FLAG_USE_CURR_DATA) + h0 = + (void *) vlib_buffer_get_current (b0) + + t0->current_data_offset; + else + h0 = b0->data; + + hash0 = vnet_classify_hash_packet (t0, (u8 *) h0); + e0 = vnet_classify_find_entry + (t0, (u8 *) h0, hash0, now); + if (e0) + { + vnet_buffer (b0)->l2_classify.opaque_index + = e0->opaque_index; + vlib_buffer_advance (b0, e0->advance); + next0 = (e0->next_index < n_next_nodes) ? + e0->next_index : next0; + hits++; + chain_hits++; + + if (is_ip4) + error0 = (next0 == ACL_NEXT_INDEX_DENY) ? + IP4_ERROR_INACL_SESSION_DENY : IP4_ERROR_NONE; + else + error0 = (next0 == ACL_NEXT_INDEX_DENY) ? + IP6_ERROR_INACL_SESSION_DENY : IP6_ERROR_NONE; + b0->error = error_node->errors[error0]; + + if (e0->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX + || e0->action == + CLASSIFY_ACTION_SET_IP6_FIB_INDEX) + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + e0->metadata; + break; + } + } + } + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) + && (b0->flags & VLIB_BUFFER_IS_TRACED))) + { + ip_inacl_trace_t *t = + vlib_add_trace (vm, node, b0, sizeof (*t)); + t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX]; + t->next_index = next0; + t->table_index = t0 ? t0 - vcm->tables : ~0; + t->offset = (e0 && t0) ? vnet_classify_get_offset (t0, e0) : ~0; + } + + /* verify speculative enqueue, maybe switch current next frame */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + vlib_node_increment_counter (vm, node->node_index, + IP_INACL_ERROR_MISS, misses); + vlib_node_increment_counter (vm, node->node_index, + IP_INACL_ERROR_HIT, hits); + vlib_node_increment_counter (vm, node->node_index, + IP_INACL_ERROR_CHAIN_HIT, chain_hits); + return frame->n_vectors; +} + +static uword +ip4_inacl (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip_inacl_inline (vm, node, frame, 1 /* is_ip4 */ ); +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_inacl_node) = { + .function = ip4_inacl, + .name = "ip4-inacl", + .vector_size = sizeof (u32), + .format_trace = format_ip_inacl_trace, + .n_errors = ARRAY_LEN(ip_inacl_error_strings), + .error_strings = ip_inacl_error_strings, + + .n_next_nodes = ACL_NEXT_INDEX_N_NEXT, + .next_nodes = { + [ACL_NEXT_INDEX_DENY] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip4_inacl_node, ip4_inacl); + +static uword +ip6_inacl (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + return ip_inacl_inline (vm, node, frame, 0 /* is_ip4 */ ); +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_inacl_node) = { + .function = ip6_inacl, + .name = "ip6-inacl", + .vector_size = sizeof (u32), + .format_trace = format_ip_inacl_trace, + .n_errors = ARRAY_LEN(ip_inacl_error_strings), + .error_strings = ip_inacl_error_strings, + + .n_next_nodes = ACL_NEXT_INDEX_N_NEXT, + .next_nodes = { + [ACL_NEXT_INDEX_DENY] = "error-drop", + }, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (ip6_inacl_node, ip6_inacl); + +static clib_error_t * +ip_inacl_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (ip_inacl_init); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip_packet.h b/src/vnet/ip/ip_packet.h new file mode 100644 index 00000000..d3f3de77 --- /dev/null +++ b/src/vnet/ip/ip_packet.h @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip_packet.h: packet format common between ip4 & ip6 + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_ip_packet_h +#define included_ip_packet_h + +#include <vppinfra/byte_order.h> +#include <vppinfra/error.h> + +typedef enum ip_protocol +{ +#define ip_protocol(n,s) IP_PROTOCOL_##s = n, +#include "protocols.def" +#undef ip_protocol +} ip_protocol_t; + +/* TCP/UDP ports. */ +typedef enum +{ +#define ip_port(s,n) IP_PORT_##s = n, +#include "ports.def" +#undef ip_port +} ip_port_t; + +/* Classifies protocols into UDP, ICMP or other. */ +typedef enum +{ + IP_BUILTIN_PROTOCOL_UDP, + IP_BUILTIN_PROTOCOL_ICMP, + IP_BUILTIN_PROTOCOL_UNKNOWN, +} ip_builtin_protocol_t; + +#define foreach_ip_builtin_multicast_group \ + _ (1, all_hosts_on_subnet) \ + _ (2, all_routers_on_subnet) \ + _ (4, dvmrp) \ + _ (5, ospf_all_routers) \ + _ (6, ospf_designated_routers) \ + _ (13, pim) \ + _ (18, vrrp) \ + _ (102, hsrp) \ + _ (22, igmp_v3) + +typedef enum +{ +#define _(n,f) IP_MULTICAST_GROUP_##f = n, + foreach_ip_builtin_multicast_group +#undef _ +} ip_multicast_group_t; + +/* IP checksum support. */ + +/* Incremental checksum update. */ +typedef uword ip_csum_t; + +always_inline ip_csum_t +ip_csum_with_carry (ip_csum_t sum, ip_csum_t x) +{ + ip_csum_t t = sum + x; + return t + (t < x); +} + +/* Update checksum changing field at even byte offset from x -> 0. */ +always_inline ip_csum_t +ip_csum_add_even (ip_csum_t c, ip_csum_t x) +{ + ip_csum_t d; + + d = c - x; + + /* Fold in carry from high bit. */ + d -= d > c; + + ASSERT (ip_csum_with_carry (d, x) == c); + + return d; +} + +/* Update checksum changing field at even byte offset from 0 -> x. */ +always_inline ip_csum_t +ip_csum_sub_even (ip_csum_t c, ip_csum_t x) +{ + return ip_csum_with_carry (c, x); +} + +always_inline ip_csum_t +ip_csum_update_inline (ip_csum_t sum, ip_csum_t old, ip_csum_t new, + u32 field_byte_offset, u32 field_n_bytes) +{ + /* For even 1-byte fields on big-endian and odd 1-byte fields on little endian + we need to shift byte into place for checksum. */ + if ((field_n_bytes % 2) + && (field_byte_offset % 2) == CLIB_ARCH_IS_LITTLE_ENDIAN) + { + old = old << 8; + new = new << 8; + } + sum = ip_csum_sub_even (sum, old); + sum = ip_csum_add_even (sum, new); + return sum; +} + +#define ip_csum_update(sum,old,new,type,field) \ + ip_csum_update_inline ((sum), (old), (new), \ + STRUCT_OFFSET_OF (type, field), \ + STRUCT_SIZE_OF (type, field)) + +always_inline u16 +ip_csum_fold (ip_csum_t c) +{ + /* Reduce to 16 bits. */ +#if uword_bits == 64 + c = (c & (ip_csum_t) 0xffffffff) + (c >> (ip_csum_t) 32); + c = (c & 0xffff) + (c >> 16); +#endif + + c = (c & 0xffff) + (c >> 16); + c = (c & 0xffff) + (c >> 16); + + return c; +} + +/* Copy data and checksum at the same time. */ +ip_csum_t ip_csum_and_memcpy (ip_csum_t sum, void *dst, void *src, + uword n_bytes); + +always_inline u16 +ip_csum_and_memcpy_fold (ip_csum_t sum, void *dst) +{ + return ip_csum_fold (sum); +} + +/* Checksum routine. */ +ip_csum_t ip_incremental_checksum (ip_csum_t sum, void *data, uword n_bytes); + +#endif /* included_ip_packet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ip_source_and_port_range_check.h b/src/vnet/ip/ip_source_and_port_range_check.h new file mode 100644 index 00000000..fefe5ff1 --- /dev/null +++ b/src/vnet/ip/ip_source_and_port_range_check.h @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_ip_ip_source_and_port_range_check_h +#define included_ip_ip_source_and_port_range_check_h + + +typedef struct +{ + /* convenience */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; +} source_range_check_main_t; + +source_range_check_main_t source_range_check_main; + +typedef enum +{ + IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT, + IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT, + IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN, + IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN, + IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS, +} ip_source_and_port_range_check_protocol_t; + +typedef struct +{ + u32 fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS]; +} ip_source_and_port_range_check_config_t; + +#define IP_SOURCE_AND_PORT_RANGE_CHECK_RANGE_LIMIT VLIB_BUFFER_PRE_DATA_SIZE/(2*sizeof(u16x8)); + +typedef struct +{ + union + { + u16x8 as_u16x8; + u16 as_u16[8]; + }; +} u16x8vec_t; + +typedef struct +{ + u16x8vec_t low; + u16x8vec_t hi; +} protocol_port_range_t; + +/** + * @brief The number of supported ranges per-data path object. + * If more ranges are required, bump this number. + */ +#define N_PORT_RANGES_PER_DPO 64 +#define N_RANGES_PER_BLOCK (sizeof(u16x8vec_t)/2) +#define N_BLOCKS_PER_DPO (N_PORT_RANGES_PER_DPO/N_RANGES_PER_BLOCK) + +/** + * @brief + * The object that is in the data-path to perform the check. + * + * Some trade-offs here; memory vs performance. + * + * performance: + * the principle factor is d-cache line misses/hits. + * so we want the data layout to minimise the d-cache misses. This + * means not following dependent reads. i.e. not doing + * + * struct B { + * u16 n_ranges; + * range_t *ragnes; // vector of ranges. + * } + * + * so to read ranges[0] we would first d-cache miss on the address + * of the object of type B, for which we would need to wait before we + * can get the address of B->ranges. + * So this layout is better: + * + * struct B { + * u16 n_ranges; + * range_t ragnes[N]; + * } + * + * memory: + * the latter layout above is more memory hungry. And N needs to be: + * 1 - sized for the maximum required + * 2 - fixed, so that objects of type B can be pool allocated and so + * 'get'-able using an index. + * An option over fixed might be to allocate contiguous chunk from + * the pool (like we used to do for multi-path adjs). + */ +typedef struct protocol_port_range_dpo_t_ +{ + /** + * The number of blocks from the 'block' array below + * that have rnages configured. We keep this count so that in the data-path + * we can limit the loop to be only over the blocks we need + */ + u16 n_used_blocks; + + /** + * The total number of free ranges from all blocks. + * Used to prevent overrun of the ranges available. + */ + u16 n_free_ranges; + + /** + * the fixed size array of ranges + */ + protocol_port_range_t blocks[N_BLOCKS_PER_DPO]; +} protocol_port_range_dpo_t; + +int ip4_source_and_port_range_check_add_del (ip4_address_t * address, + u32 length, + u32 vrf_id, + u16 * low_ports, + u16 * hi_ports, int is_add); + +// This will be moved to another file in another patch -- for API freeze +int ip6_source_and_port_range_check_add_del (ip6_address_t * address, + u32 length, + u32 vrf_id, + u16 * low_ports, + u16 * hi_ports, int is_add); + +int set_ip_source_and_port_range_check (vlib_main_t * vm, + u32 * fib_index, + u32 sw_if_index, u32 is_add); + +#endif /* included ip_source_and_port_range_check_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c new file mode 100644 index 00000000..856c4942 --- /dev/null +++ b/src/vnet/ip/lookup.c @@ -0,0 +1,1442 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip_lookup.c: ip4/6 adjacency and lookup table managment + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vnet/ip/ip.h> +#include <vnet/adj/adj.h> +#include <vnet/fib/fib_table.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/mpls/mpls.h> +#include <vnet/mfib/mfib_table.h> +#include <vnet/dpo/drop_dpo.h> +#include <vnet/dpo/classify_dpo.h> +#include <vnet/dpo/punt_dpo.h> +#include <vnet/dpo/receive_dpo.h> +#include <vnet/dpo/ip_null_dpo.h> +#include <vnet/ip/ip6_neighbor.h> + +/** + * @file + * @brief IPv4 and IPv6 adjacency and lookup table managment. + * + */ + +clib_error_t * +ip_interface_address_add_del (ip_lookup_main_t * lm, + u32 sw_if_index, + void *addr_fib, + u32 address_length, + u32 is_del, u32 * result_if_address_index) +{ + vnet_main_t *vnm = vnet_get_main (); + ip_interface_address_t *a, *prev, *next; + uword *p = mhash_get (&lm->address_to_if_address_index, addr_fib); + + vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index, + sw_if_index, ~0); + a = p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0; + + /* Verify given length. */ + if ((a && (address_length != a->address_length)) || + (address_length == 0) || + (lm->is_ip6 && address_length > 128) || + (!lm->is_ip6 && address_length > 32)) + { + vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH; + return clib_error_create + ("%U wrong length (expected %d) for interface %U", + lm->format_address_and_length, addr_fib, + address_length, a ? a->address_length : -1, + format_vnet_sw_if_index_name, vnm, sw_if_index); + } + + if (is_del) + { + if (!a) + { + vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index); + vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE; + return clib_error_create ("%U not found for interface %U", + lm->format_address_and_length, + addr_fib, address_length, + format_vnet_sw_interface_name, vnm, si); + } + + if (a->prev_this_sw_interface != ~0) + { + prev = + pool_elt_at_index (lm->if_address_pool, + a->prev_this_sw_interface); + prev->next_this_sw_interface = a->next_this_sw_interface; + } + if (a->next_this_sw_interface != ~0) + { + next = + pool_elt_at_index (lm->if_address_pool, + a->next_this_sw_interface); + next->prev_this_sw_interface = a->prev_this_sw_interface; + + if (a->prev_this_sw_interface == ~0) + lm->if_address_pool_index_by_sw_if_index[sw_if_index] = + a->next_this_sw_interface; + } + + if ((a->next_this_sw_interface == ~0) + && (a->prev_this_sw_interface == ~0)) + lm->if_address_pool_index_by_sw_if_index[sw_if_index] = ~0; + + mhash_unset (&lm->address_to_if_address_index, addr_fib, + /* old_value */ 0); + pool_put (lm->if_address_pool, a); + + if (result_if_address_index) + *result_if_address_index = ~0; + } + + else if (!a) + { + u32 pi; /* previous index */ + u32 ai; + u32 hi; /* head index */ + + pool_get (lm->if_address_pool, a); + memset (a, ~0, sizeof (a[0])); + ai = a - lm->if_address_pool; + + hi = pi = lm->if_address_pool_index_by_sw_if_index[sw_if_index]; + prev = 0; + while (pi != (u32) ~ 0) + { + prev = pool_elt_at_index (lm->if_address_pool, pi); + pi = prev->next_this_sw_interface; + } + pi = prev ? prev - lm->if_address_pool : (u32) ~ 0; + + a->address_key = mhash_set (&lm->address_to_if_address_index, + addr_fib, ai, /* old_value */ 0); + a->address_length = address_length; + a->sw_if_index = sw_if_index; + a->flags = 0; + a->prev_this_sw_interface = pi; + a->next_this_sw_interface = ~0; + if (prev) + prev->next_this_sw_interface = ai; + + lm->if_address_pool_index_by_sw_if_index[sw_if_index] = + (hi != ~0) ? hi : ai; + if (result_if_address_index) + *result_if_address_index = ai; + } + else + { + if (sw_if_index != a->sw_if_index) + { + if (result_if_address_index) + *result_if_address_index = ~0; + vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS; + return clib_error_create + ("Prefix %U already found on interface %U", + lm->format_address_and_length, addr_fib, address_length, + format_vnet_sw_if_index_name, vnm, a->sw_if_index); + } + + if (result_if_address_index) + *result_if_address_index = a - lm->if_address_pool; + } + + return /* no error */ 0; +} + +static clib_error_t * +ip_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) +{ + vec_validate_init_empty (ip4_main. + lookup_main.if_address_pool_index_by_sw_if_index, + sw_if_index, ~0); + vec_validate_init_empty (ip6_main. + lookup_main.if_address_pool_index_by_sw_if_index, + sw_if_index, ~0); + + return (NULL); +} + +VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_sw_interface_add_del); + +void +ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6) +{ + if (!lm->fib_result_n_bytes) + lm->fib_result_n_bytes = sizeof (uword); + + lm->is_ip6 = is_ip6; + if (is_ip6) + { + lm->format_address_and_length = format_ip6_address_and_length; + mhash_init (&lm->address_to_if_address_index, sizeof (uword), + sizeof (ip6_address_fib_t)); + } + else + { + lm->format_address_and_length = format_ip4_address_and_length; + mhash_init (&lm->address_to_if_address_index, sizeof (uword), + sizeof (ip4_address_fib_t)); + } + + { + int i; + + /* Setup all IP protocols to be punted and builtin-unknown. */ + for (i = 0; i < 256; i++) + { + lm->local_next_by_ip_protocol[i] = IP_LOCAL_NEXT_PUNT; + lm->builtin_protocol_by_ip_protocol[i] = IP_BUILTIN_PROTOCOL_UNKNOWN; + } + + lm->local_next_by_ip_protocol[IP_PROTOCOL_UDP] = IP_LOCAL_NEXT_UDP_LOOKUP; + lm->local_next_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : + IP_PROTOCOL_ICMP] = IP_LOCAL_NEXT_ICMP; + lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_UDP] = + IP_BUILTIN_PROTOCOL_UDP; + lm->builtin_protocol_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 : + IP_PROTOCOL_ICMP] = + IP_BUILTIN_PROTOCOL_ICMP; + } +} + +u8 * +format_ip_flow_hash_config (u8 * s, va_list * args) +{ + flow_hash_config_t flow_hash_config = va_arg (*args, u32); + +#define _(n,v) if (flow_hash_config & v) s = format (s, "%s ", #n); + foreach_flow_hash_bit; +#undef _ + + return s; +} + +u8 * +format_ip_lookup_next (u8 * s, va_list * args) +{ + /* int promotion of ip_lookup_next_t */ + ip_lookup_next_t n = va_arg (*args, int); + char *t = 0; + + switch (n) + { + default: + s = format (s, "unknown %d", n); + return s; + + case IP_LOOKUP_NEXT_DROP: + t = "drop"; + break; + case IP_LOOKUP_NEXT_PUNT: + t = "punt"; + break; + case IP_LOOKUP_NEXT_ARP: + t = "arp"; + break; + case IP_LOOKUP_NEXT_MIDCHAIN: + t = "midchain"; + break; + case IP_LOOKUP_NEXT_GLEAN: + t = "glean"; + break; + case IP_LOOKUP_NEXT_MCAST: + t = "mcast"; + break; + case IP_LOOKUP_NEXT_REWRITE: + break; + } + + if (t) + vec_add (s, t, strlen (t)); + + return s; +} + +u8 * +format_ip_adjacency_packet_data (u8 * s, va_list * args) +{ + u32 adj_index = va_arg (*args, u32); + u8 *packet_data = va_arg (*args, u8 *); + u32 n_packet_data_bytes = va_arg (*args, u32); + ip_adjacency_t *adj = adj_get (adj_index); + + switch (adj->lookup_next_index) + { + case IP_LOOKUP_NEXT_REWRITE: + case IP_LOOKUP_NEXT_MCAST: + s = + format (s, "%U", format_hex_bytes, packet_data, n_packet_data_bytes); + break; + + default: + break; + } + + return s; +} + +static uword +unformat_dpo (unformat_input_t * input, va_list * args) +{ + dpo_id_t *dpo = va_arg (*args, dpo_id_t *); + fib_protocol_t fp = va_arg (*args, int); + dpo_proto_t proto; + + proto = fib_proto_to_dpo (fp); + + if (unformat (input, "drop")) + dpo_copy (dpo, drop_dpo_get (proto)); + else if (unformat (input, "punt")) + dpo_copy (dpo, punt_dpo_get (proto)); + else if (unformat (input, "local")) + receive_dpo_add_or_lock (proto, ~0, NULL, dpo); + else if (unformat (input, "null-send-unreach")) + ip_null_dpo_add_and_lock (proto, IP_NULL_ACTION_SEND_ICMP_UNREACH, dpo); + else if (unformat (input, "null-send-prohibit")) + ip_null_dpo_add_and_lock (proto, IP_NULL_ACTION_SEND_ICMP_PROHIBIT, dpo); + else if (unformat (input, "null")) + ip_null_dpo_add_and_lock (proto, IP_NULL_ACTION_NONE, dpo); + else if (unformat (input, "classify")) + { + u32 classify_table_index; + + if (!unformat (input, "%d", &classify_table_index)) + { + clib_warning ("classify adj must specify table index"); + return 0; + } + + dpo_set (dpo, DPO_CLASSIFY, proto, + classify_dpo_create (proto, classify_table_index)); + } + else + return 0; + + return 1; +} + +const ip46_address_t zero_addr = { + .as_u64 = { + 0, 0}, +}; + +clib_error_t * +vnet_ip_route_cmd (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + fib_route_path_t *rpaths = NULL, rpath; + dpo_id_t dpo = DPO_INVALID, *dpos = NULL; + fib_prefix_t *prefixs = NULL, pfx; + mpls_label_t out_label, via_label; + clib_error_t *error = NULL; + u32 weight, preference; + u32 table_id, is_del; + vnet_main_t *vnm; + u32 fib_index; + f64 count; + int i; + + vnm = vnet_get_main (); + is_del = 0; + table_id = 0; + count = 1; + memset (&pfx, 0, sizeof (pfx)); + out_label = via_label = MPLS_LABEL_INVALID; + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + memset (&rpath, 0, sizeof (rpath)); + + if (unformat (line_input, "table %d", &table_id)) + ; + else if (unformat (line_input, "resolve-via-host")) + { + if (vec_len (rpaths) == 0) + { + error = clib_error_return (0, "Paths then flags"); + goto done; + } + rpaths[vec_len (rpaths) - 1].frp_flags |= + FIB_ROUTE_PATH_RESOLVE_VIA_HOST; + } + else if (unformat (line_input, "resolve-via-attached")) + { + if (vec_len (rpaths) == 0) + { + error = clib_error_return (0, "Paths then flags"); + goto done; + } + rpaths[vec_len (rpaths) - 1].frp_flags |= + FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED; + } + else if (unformat (line_input, "out-labels")) + { + if (vec_len (rpaths) == 0) + { + error = clib_error_return (0, "Paths then labels"); + goto done; + } + else + { + while (unformat (line_input, "%U", + unformat_mpls_unicast_label, &out_label)) + { + vec_add1 (rpaths[vec_len (rpaths) - 1].frp_label_stack, + out_label); + } + } + } + else if (unformat (line_input, "via-label %U", + unformat_mpls_unicast_label, &rpath.frp_local_label)) + { + rpath.frp_weight = 1; + rpath.frp_eos = MPLS_NON_EOS; + rpath.frp_proto = DPO_PROTO_MPLS; + rpath.frp_sw_if_index = ~0; + vec_add1 (rpaths, rpath); + } + else if (unformat (line_input, "count %f", &count)) + ; + + else if (unformat (line_input, "%U/%d", + unformat_ip4_address, &pfx.fp_addr.ip4, &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + vec_add1 (prefixs, pfx); + } + else if (unformat (line_input, "%U/%d", + unformat_ip6_address, &pfx.fp_addr.ip6, &pfx.fp_len)) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + vec_add1 (prefixs, pfx); + } + else if (unformat (line_input, "via %U %U", + unformat_ip4_address, + &rpath.frp_addr.ip4, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = DPO_PROTO_IP4; + vec_add1 (rpaths, rpath); + } + + else if (unformat (line_input, "via %U %U", + unformat_ip6_address, + &rpath.frp_addr.ip6, + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = DPO_PROTO_IP6; + vec_add1 (rpaths, rpath); + } + else if (unformat (line_input, "weight %u", &weight)) + { + ASSERT (vec_len (rpaths)); + rpaths[vec_len (rpaths) - 1].frp_weight = weight; + } + else if (unformat (line_input, "preference %u", &preference)) + { + ASSERT (vec_len (rpaths)); + rpaths[vec_len (rpaths) - 1].frp_preference = preference; + } + else if (unformat (line_input, "via %U next-hop-table %d", + unformat_ip4_address, + &rpath.frp_addr.ip4, &rpath.frp_fib_index)) + { + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = DPO_PROTO_IP4; + vec_add1 (rpaths, rpath); + } + else if (unformat (line_input, "via %U next-hop-table %d", + unformat_ip6_address, + &rpath.frp_addr.ip6, &rpath.frp_fib_index)) + { + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = DPO_PROTO_IP6; + vec_add1 (rpaths, rpath); + } + else if (unformat (line_input, "via %U", + unformat_ip4_address, &rpath.frp_addr.ip4)) + { + /* + * the recursive next-hops are by default in the same table + * as the prefix + */ + rpath.frp_fib_index = table_id; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = DPO_PROTO_IP4; + vec_add1 (rpaths, rpath); + } + else if (unformat (line_input, "via %U", + unformat_ip6_address, &rpath.frp_addr.ip6)) + { + rpath.frp_fib_index = table_id; + rpath.frp_weight = 1; + rpath.frp_sw_if_index = ~0; + rpath.frp_proto = DPO_PROTO_IP6; + vec_add1 (rpaths, rpath); + } + else if (unformat (line_input, + "lookup in table %d", &rpath.frp_fib_index)) + { + rpath.frp_proto = fib_proto_to_dpo (pfx.fp_proto); + rpath.frp_sw_if_index = ~0; + vec_add1 (rpaths, rpath); + } + else if (vec_len (prefixs) > 0 && + unformat (line_input, "via %U", + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + rpath.frp_proto = fib_proto_to_dpo (prefixs[0].fp_proto); + vec_add1 (rpaths, rpath); + } + else if (vec_len (prefixs) > 0 && + unformat (line_input, "via %U", + unformat_dpo, &dpo, prefixs[0].fp_proto)) + { + vec_add1 (dpos, dpo); + } + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "add")) + is_del = 0; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (vec_len (prefixs) == 0) + { + error = + clib_error_return (0, "expected ip4/ip6 destination address/length."); + goto done; + } + + if (!is_del && vec_len (rpaths) + vec_len (dpos) == 0) + { + error = clib_error_return (0, "expected paths."); + goto done; + } + + if (~0 == table_id) + { + /* + * if no table_id is passed we will manipulate the default + */ + fib_index = 0; + } + else + { + fib_index = fib_table_find (prefixs[0].fp_proto, table_id); + + if (~0 == fib_index) + { + error = clib_error_return (0, "Nonexistent table id %d", table_id); + goto done; + } + } + + for (i = 0; i < vec_len (prefixs); i++) + { + if (is_del && 0 == vec_len (rpaths)) + { + fib_table_entry_delete (fib_index, &prefixs[i], FIB_SOURCE_CLI); + } + else if (!is_del && 1 == vec_len (dpos)) + { + fib_table_entry_special_dpo_add (fib_index, + &prefixs[i], + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_EXCLUSIVE, + &dpos[0]); + dpo_reset (&dpos[0]); + } + else if (vec_len (dpos) > 0) + { + error = + clib_error_return (0, + "Load-balancing over multiple special adjacencies is unsupported"); + goto done; + } + else if (0 < vec_len (rpaths)) + { + u32 k, j, n, incr; + ip46_address_t dst = prefixs[i].fp_addr; + f64 t[2]; + n = count; + t[0] = vlib_time_now (vm); + incr = 1 << ((FIB_PROTOCOL_IP4 == prefixs[0].fp_proto ? 32 : 128) - + prefixs[i].fp_len); + + for (k = 0; k < n; k++) + { + for (j = 0; j < vec_len (rpaths); j++) + { + u32 fi; + /* + * the CLI parsing stored table Ids, swap to FIB indicies + */ + fi = fib_table_find (prefixs[i].fp_proto, + rpaths[i].frp_fib_index); + + if (~0 == fi) + { + error = + clib_error_return (0, "Via table %d does not exist", + rpaths[i].frp_fib_index); + goto done; + } + rpaths[i].frp_fib_index = fi; + + fib_prefix_t rpfx = { + .fp_len = prefixs[i].fp_len, + .fp_proto = prefixs[i].fp_proto, + .fp_addr = dst, + }; + + if (is_del) + fib_table_entry_path_remove2 (fib_index, + &rpfx, + FIB_SOURCE_CLI, &rpaths[j]); + else + fib_table_entry_path_add2 (fib_index, + &rpfx, + FIB_SOURCE_CLI, + FIB_ENTRY_FLAG_NONE, + &rpaths[j]); + } + + if (FIB_PROTOCOL_IP4 == prefixs[0].fp_proto) + { + dst.ip4.as_u32 = + clib_host_to_net_u32 (incr + + clib_net_to_host_u32 (dst. + ip4.as_u32)); + } + else + { + int bucket = (incr < 64 ? 0 : 1); + dst.ip6.as_u64[bucket] = + clib_host_to_net_u64 (incr + + clib_net_to_host_u64 (dst.ip6.as_u64 + [bucket])); + + } + } + t[1] = vlib_time_now (vm); + if (count > 1) + vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0])); + } + else + { + error = clib_error_return (0, "Don't understand what you want..."); + goto done; + } + } + + +done: + vec_free (dpos); + vec_free (prefixs); + vec_free (rpaths); + unformat_free (line_input); + return error; +} + +clib_error_t * +vnet_ip_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, + vlib_cli_command_t * cmd, fib_protocol_t fproto) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + u32 table_id, is_add; + u8 *name = NULL; + + is_add = 1; + table_id = ~0; + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_add = 0; + else if (unformat (line_input, "add")) + is_add = 1; + else if (unformat (line_input, "name %s", &name)) + ; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == table_id) + { + error = clib_error_return (0, "No table id"); + goto done; + } + else if (0 == table_id) + { + error = clib_error_return (0, "Can't change the default table"); + goto done; + } + else + { + if (is_add) + { + ip_table_create (fproto, table_id, 0, name); + } + else + { + ip_table_delete (fproto, table_id, 0); + } + } + +done: + unformat_free (line_input); + return error; +} + +clib_error_t * +vnet_ip4_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP4)); +} + +clib_error_t * +vnet_ip6_table_cmd (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6)); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = { + .path = "ip", + .short_help = "Internet protocol (IP) commands", +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (vlib_cli_ip6_command, static) = { + .path = "ip6", + .short_help = "Internet protocol version 6 (IPv6) commands", +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = { + .path = "show ip", + .short_help = "Internet protocol (IP) show commands", +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = { + .path = "show ip6", + .short_help = "Internet protocol version 6 (IPv6) show commands", +}; +/* *INDENT-ON* */ + +/*? + * This command is used to add or delete IPv4 or IPv6 routes. All + * IP Addresses ('<em><dst-ip-addr>/<width></em>', + * '<em><next-hop-ip-addr></em>' and '<em><adj-hop-ip-addr></em>') + * can be IPv4 or IPv6, but all must be of the same form in a single + * command. To display the current set of routes, use the commands + * '<em>show ip fib</em>' and '<em>show ip6 fib</em>'. + * + * @cliexpar + * Example of how to add a straight forward static route: + * @cliexcmd{ip route add 6.0.1.2/32 via 6.0.0.1 GigabitEthernet2/0/0} + * Example of how to delete a straight forward static route: + * @cliexcmd{ip route del 6.0.1.2/32 via 6.0.0.1 GigabitEthernet2/0/0} + * Mainly for route add/del performance testing, one can add or delete + * multiple routes by adding 'count N' to the previous item: + * @cliexcmd{ip route add count 10 7.0.0.0/24 via 6.0.0.1 GigabitEthernet2/0/0} + * Add multiple routes for the same destination to create equal-cost multipath: + * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.1 GigabitEthernet2/0/0} + * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.2 GigabitEthernet2/0/0} + * For unequal-cost multipath, specify the desired weights. This + * combination of weights results in 3/4 of the traffic following the + * second path, 1/4 following the first path: + * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.1 GigabitEthernet2/0/0 weight 1} + * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.2 GigabitEthernet2/0/0 weight 3} + * To add a route to a particular FIB table (VRF), use: + * @cliexcmd{ip route add 172.16.24.0/24 table 7 via GigabitEthernet2/0/0} + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip_route_command, static) = { + .path = "ip route", + .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table <table-id>] [via <next-hop-ip-addr> [<interface>] [weight <weight>]] | [via arp <interface> <adj-hop-ip-addr>] | [via drop|punt|local<id>|arp|classify <classify-idx>] [lookup in table <out-table-id>]", + .function = vnet_ip_route_cmd, + .is_mp_safe = 1, +}; + +/* *INDENT-ON* */ +/*? + * This command is used to add or delete IPv4 Tables. All + * Tables must be explicitly added before that can be used. Creating a + * table will add both unicast and multicast FIBs + * + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip4_table_command, static) = { + .path = "ip table", + .short_help = "ip table [add|del] <table-id>", + .function = vnet_ip4_table_cmd, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +/* *INDENT-ON* */ +/*? + * This command is used to add or delete IPv4 Tables. All + * Tables must be explicitly added before that can be used. Creating a + * table will add both unicast and multicast FIBs + * + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip6_table_command, static) = { + .path = "ip6 table", + .short_help = "ip6 table [add|del] <table-id>", + .function = vnet_ip6_table_cmd, + .is_mp_safe = 1, +}; + +static clib_error_t * +ip_table_bind_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd, + fib_protocol_t fproto) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *error = 0; + u32 sw_if_index, table_id; + int rv; + + sw_if_index = ~0; + + if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = clib_error_return (0, "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + + if (unformat (input, "%d", &table_id)) + ; + else + { + error = clib_error_return (0, "expected table id `%U'", + format_unformat_error, input); + goto done; + } + + rv = ip_table_bind (fproto, sw_if_index, table_id, 0); + + if (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE == rv) + { + error = clib_error_return (0, "IP addresses are still present on %U", + format_vnet_sw_if_index_name, + vnet_get_main(), + sw_if_index); + } + else if (VNET_API_ERROR_NO_SUCH_FIB == rv) + { + error = clib_error_return (0, "no such table %d", table_id); + } + else if (0 != rv) + { + error = clib_error_return (0, "unknown error"); + } + + done: + return error; +} + +static clib_error_t * +ip4_table_bind_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP4)); +} + +static clib_error_t * +ip6_table_bind_cmd (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP6)); +} + +/*? + * Place the indicated interface into the supplied IPv4 FIB table (also known + * as a VRF). If the FIB table does not exist, this command creates it. To + * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'. + * FIB table will only be displayed if a route has been added to the table, or + * an IP Address is assigned to an interface in the table (which adds a route + * automatically). + * + * @note IP addresses added after setting the interface IP table are added to + * the indicated FIB table. If an IP address is added prior to changing the + * table then this is an error. The control plane must remove these addresses + * first and then change the table. VPP will not automatically move the + * addresses from the old to the new table as it does not know the validity + * of such a change. + * + * @cliexpar + * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id): + * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2} + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = +{ + .path = "set interface ip table", + .function = ip4_table_bind_cmd, + .short_help = "set interface ip table <interface> <table-id>", +}; +/* *INDENT-ON* */ + +/*? + * Place the indicated interface into the supplied IPv6 FIB table (also known + * as a VRF). If the FIB table does not exist, this command creates it. To + * display the current IPv6 FIB table, use the command '<em>show ip6 fib</em>'. + * FIB table will only be displayed if a route has been added to the table, or + * an IP Address is assigned to an interface in the table (which adds a route + * automatically). + * + * @note IP addresses added after setting the interface IP table are added to + * the indicated FIB table. If an IP address is added prior to changing the + * table then this is an error. The control plane must remove these addresses + * first and then change the table. VPP will not automatically move the + * addresses from the old to the new table as it does not know the validity + * of such a change. + * + * @cliexpar + * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id): + * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2} + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) = +{ + .path = "set interface ip6 table", + .function = ip6_table_bind_cmd, + .short_help = "set interface ip6 table <interface> <table-id>" +}; +/* *INDENT-ON* */ + +clib_error_t * +vnet_ip_mroute_cmd (vlib_main_t * vm, + unformat_input_t * main_input, vlib_cli_command_t * cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error = NULL; + fib_route_path_t rpath; + u32 table_id, is_del; + vnet_main_t *vnm; + mfib_prefix_t pfx; + u32 fib_index; + mfib_itf_flags_t iflags = 0; + mfib_entry_flags_t eflags = 0; + u32 gcount, scount, ss, gg, incr; + f64 timet[2]; + + gcount = scount = 1; + vnm = vnet_get_main (); + is_del = 0; + table_id = 0; + memset (&pfx, 0, sizeof (pfx)); + memset (&rpath, 0, sizeof (rpath)); + rpath.frp_sw_if_index = ~0; + + /* Get a line of input. */ + if (!unformat_user (main_input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "table %d", &table_id)) + ; + else if (unformat (line_input, "del")) + is_del = 1; + else if (unformat (line_input, "add")) + is_del = 0; + else if (unformat (line_input, "scount %d", &scount)) + ; + else if (unformat (line_input, "gcount %d", &gcount)) + ; + else if (unformat (line_input, "%U %U", + unformat_ip4_address, + &pfx.fp_src_addr.ip4, + unformat_ip4_address, &pfx.fp_grp_addr.ip4)) + { + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = 64; + } + else if (unformat (line_input, "%U %U", + unformat_ip6_address, + &pfx.fp_src_addr.ip6, + unformat_ip6_address, &pfx.fp_grp_addr.ip6)) + { + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = 256; + } + else if (unformat (line_input, "%U/%d", + unformat_ip4_address, + &pfx.fp_grp_addr.ip4, &pfx.fp_len)) + { + memset (&pfx.fp_src_addr.ip4, 0, sizeof (pfx.fp_src_addr.ip4)); + pfx.fp_proto = FIB_PROTOCOL_IP4; + } + else if (unformat (line_input, "%U/%d", + unformat_ip6_address, + &pfx.fp_grp_addr.ip6, &pfx.fp_len)) + { + memset (&pfx.fp_src_addr.ip6, 0, sizeof (pfx.fp_src_addr.ip6)); + pfx.fp_proto = FIB_PROTOCOL_IP6; + } + else if (unformat (line_input, "%U", + unformat_ip4_address, &pfx.fp_grp_addr.ip4)) + { + memset (&pfx.fp_src_addr.ip4, 0, sizeof (pfx.fp_src_addr.ip4)); + pfx.fp_proto = FIB_PROTOCOL_IP4; + pfx.fp_len = 32; + } + else if (unformat (line_input, "%U", + unformat_ip6_address, &pfx.fp_grp_addr.ip6)) + { + memset (&pfx.fp_src_addr.ip6, 0, sizeof (pfx.fp_src_addr.ip6)); + pfx.fp_proto = FIB_PROTOCOL_IP6; + pfx.fp_len = 128; + } + else if (unformat (line_input, "via %U", + unformat_vnet_sw_interface, vnm, + &rpath.frp_sw_if_index)) + { + rpath.frp_weight = 1; + } + else if (unformat (line_input, "via local")) + { + rpath.frp_sw_if_index = ~0; + rpath.frp_weight = 1; + rpath.frp_flags |= FIB_ROUTE_PATH_LOCAL; + } + else if (unformat (line_input, "%U", unformat_mfib_itf_flags, &iflags)) + ; + else if (unformat (line_input, "%U", + unformat_mfib_entry_flags, &eflags)) + ; + else + { + error = unformat_parse_error (line_input); + goto done; + } + } + + if (~0 == table_id) + { + /* + * if no table_id is passed we will manipulate the default + */ + fib_index = 0; + } + else + { + fib_index = mfib_table_find (pfx.fp_proto, table_id); + + if (~0 == fib_index) + { + error = clib_error_return (0, "Nonexistent table id %d", table_id); + goto done; + } + } + + timet[0] = vlib_time_now (vm); + + if (FIB_PROTOCOL_IP4 == pfx.fp_proto) + { + incr = 1 << (32 - (pfx.fp_len % 32)); + } + else + { + incr = 1 << (128 - (pfx.fp_len % 128)); + } + + for (ss = 0; ss < scount; ss++) + { + for (gg = 0; gg < gcount; gg++) + { + if (is_del && 0 == rpath.frp_weight) + { + /* no path provided => route delete */ + mfib_table_entry_delete (fib_index, &pfx, MFIB_SOURCE_CLI); + } + else if (eflags) + { + mfib_table_entry_update (fib_index, &pfx, MFIB_SOURCE_CLI, + MFIB_RPF_ID_NONE, eflags); + } + else + { + if (is_del) + mfib_table_entry_path_remove (fib_index, + &pfx, MFIB_SOURCE_CLI, &rpath); + else + mfib_table_entry_path_update (fib_index, + &pfx, MFIB_SOURCE_CLI, &rpath, + iflags); + } + + if (FIB_PROTOCOL_IP4 == pfx.fp_proto) + { + pfx.fp_grp_addr.ip4.as_u32 = + clib_host_to_net_u32 (incr + + clib_net_to_host_u32 (pfx. + fp_grp_addr.ip4. + as_u32)); + } + else + { + int bucket = (incr < 64 ? 0 : 1); + pfx.fp_grp_addr.ip6.as_u64[bucket] = + clib_host_to_net_u64 (incr + + clib_net_to_host_u64 (pfx. + fp_grp_addr.ip6.as_u64 + [bucket])); + + } + } + if (FIB_PROTOCOL_IP4 == pfx.fp_proto) + { + pfx.fp_src_addr.ip4.as_u32 = + clib_host_to_net_u32 (1 + + clib_net_to_host_u32 (pfx.fp_src_addr. + ip4.as_u32)); + } + else + { + pfx.fp_src_addr.ip6.as_u64[1] = + clib_host_to_net_u64 (1 + + clib_net_to_host_u64 (pfx.fp_src_addr. + ip6.as_u64[1])); + } + } + + timet[1] = vlib_time_now (vm); + + if (scount > 1 || gcount > 1) + vlib_cli_output (vm, "%.6e routes/sec", + (scount * gcount) / (timet[1] - timet[0])); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * This command is used to add or delete IPv4 or IPv6 multicastroutes. All + * IP Addresses ('<em><dst-ip-addr>/<width></em>', + * '<em><next-hop-ip-addr></em>' and '<em><adj-hop-ip-addr></em>') + * can be IPv4 or IPv6, but all must be of the same form in a single + * command. To display the current set of routes, use the commands + * '<em>show ip mfib</em>' and '<em>show ip6 mfib</em>'. + * The full set of support flags for interfaces and route is shown via; + * '<em>show mfib route flags</em>' and '<em>show mfib itf flags</em>' + * respectively. + * @cliexpar + * Example of how to add a forwarding interface to a route (and create the + * route if it does not exist) + * @cliexcmd{ip mroute add 232.1.1.1 via GigabitEthernet2/0/0 Forward} + * Example of how to add an accepting interface to a route (and create the + * route if it does not exist) + * @cliexcmd{ip mroute add 232.1.1.1 via GigabitEthernet2/0/1 Accept} + * Example of changing the route's flags to send signals via the API + * @cliexcmd{ip mroute add 232.1.1.1 Signal} + + ?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip_mroute_command, static) = +{ + .path = "ip mroute", + .short_help = "ip mroute [add|del] <dst-ip-addr>/<width> [table <table-id>] [via <next-hop-ip-addr> [<interface>],", + .function = vnet_ip_mroute_cmd, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +/* + * The next two routines address a longstanding script hemorrhoid. + * Probing a v4 or v6 neighbor needs to appear to be synchronous, + * or dependent route-adds will simply fail. + */ +static clib_error_t * +ip6_probe_neighbor_wait (vlib_main_t * vm, ip6_address_t * a, u32 sw_if_index, + int retry_count) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *e; + int i; + int resolved = 0; + uword event_type; + uword *event_data = 0; + + ASSERT (vlib_in_process_context (vm)); + + if (retry_count > 0) + vnet_register_ip6_neighbor_resolution_event + (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index, + 1 /* event */ , 0 /* data */ ); + + for (i = 0; i < retry_count; i++) + { + /* The interface may be down, etc. */ + e = ip6_probe_neighbor (vm, a, sw_if_index); + + if (e) + return e; + + vlib_process_wait_for_event_or_clock (vm, 1.0); + event_type = vlib_process_get_events (vm, &event_data); + switch (event_type) + { + case 1: /* resolved... */ + vlib_cli_output (vm, "Resolved %U", format_ip6_address, a); + resolved = 1; + goto done; + + case ~0: /* timeout */ + break; + + default: + clib_warning ("unknown event_type %d", event_type); + } + vec_reset_length (event_data); + } + +done: + + if (!resolved) + return clib_error_return (0, "Resolution failed for %U", + format_ip6_address, a); + return 0; +} + +static clib_error_t * +ip4_probe_neighbor_wait (vlib_main_t * vm, ip4_address_t * a, u32 sw_if_index, + int retry_count) +{ + vnet_main_t *vnm = vnet_get_main (); + clib_error_t *e; + int i; + int resolved = 0; + uword event_type; + uword *event_data = 0; + + ASSERT (vlib_in_process_context (vm)); + + if (retry_count > 0) + vnet_register_ip4_arp_resolution_event + (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index, + 1 /* event */ , 0 /* data */ ); + + for (i = 0; i < retry_count; i++) + { + /* The interface may be down, etc. */ + e = ip4_probe_neighbor (vm, a, sw_if_index); + + if (e) + return e; + + vlib_process_wait_for_event_or_clock (vm, 1.0); + event_type = vlib_process_get_events (vm, &event_data); + switch (event_type) + { + case 1: /* resolved... */ + vlib_cli_output (vm, "Resolved %U", format_ip4_address, a); + resolved = 1; + goto done; + + case ~0: /* timeout */ + break; + + default: + clib_warning ("unknown event_type %d", event_type); + } + vec_reset_length (event_data); + } + +done: + + vec_reset_length (event_data); + + if (!resolved) + return clib_error_return (0, "Resolution failed for %U", + format_ip4_address, a); + return 0; +} + +static clib_error_t * +probe_neighbor_address (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + vnet_main_t *vnm = vnet_get_main (); + unformat_input_t _line_input, *line_input = &_line_input; + ip4_address_t a4; + ip6_address_t a6; + clib_error_t *error = 0; + u32 sw_if_index = ~0; + int retry_count = 3; + int is_ip4 = 1; + int address_set = 0; + + /* Get a line of input. */ + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat_user (line_input, unformat_vnet_sw_interface, vnm, + &sw_if_index)) + ; + else if (unformat (line_input, "retry %d", &retry_count)) + ; + + else if (unformat (line_input, "%U", unformat_ip4_address, &a4)) + address_set++; + else if (unformat (line_input, "%U", unformat_ip6_address, &a6)) + { + address_set++; + is_ip4 = 0; + } + else + { + error = clib_error_return (0, "unknown input '%U'", + format_unformat_error, line_input); + goto done; + } + } + + if (sw_if_index == ~0) + { + error = clib_error_return (0, "Interface required, not set."); + goto done; + } + if (address_set == 0) + { + error = clib_error_return (0, "ip address required, not set."); + goto done; + } + if (address_set > 1) + { + error = clib_error_return (0, "Multiple ip addresses not supported."); + goto done; + } + + if (is_ip4) + error = ip4_probe_neighbor_wait (vm, &a4, sw_if_index, retry_count); + else + error = ip6_probe_neighbor_wait (vm, &a6, sw_if_index, retry_count); + +done: + unformat_free (line_input); + + return error; +} + +/*? + * The '<em>ip probe-neighbor</em>' command ARPs for IPv4 addresses or + * attempts IPv6 neighbor discovery depending on the supplied IP address + * format. + * + * @note This command will not immediately affect the indicated FIB; it + * is not suitable for use in establishing a FIB entry prior to adding + * recursive FIB entries. As in: don't use it in a script to probe a + * gateway prior to adding a default route. It won't work. Instead, + * configure a static ARP cache entry [see '<em>set ip arp</em>'], or + * a static IPv6 neighbor [see '<em>set ip6 neighbor</em>']. + * + * @cliexpar + * Example of probe for an IPv4 address: + * @cliexcmd{ip probe-neighbor GigabitEthernet2/0/0 172.16.1.2} +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = { + .path = "ip probe-neighbor", + .function = probe_neighbor_address, + .short_help = "ip probe-neighbor <interface> <ip4-addr> | <ip6-addr> [retry nn]", + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/lookup.h b/src/vnet/ip/lookup.h new file mode 100644 index 00000000..28a4bd8f --- /dev/null +++ b/src/vnet/ip/lookup.h @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * ip/ip_lookup.h: ip (4 or 6) lookup structures, adjacencies, ... + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @file + * Definitions for all things IP (v4|v6) unicast and multicast lookup related. + * + * - Adjacency definitions and registration. + * - Callbacks on route add. + * - Callbacks on interface address change. + */ +#ifndef included_ip_lookup_h +#define included_ip_lookup_h + +#include <vnet/vnet.h> +#include <vlib/buffer.h> +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> +#include <vnet/fib/fib_node.h> +#include <vnet/adj/adj.h> +#include <vnet/dpo/dpo.h> +#include <vnet/feature/feature.h> + +/** Flow hash configuration */ +#define IP_FLOW_HASH_SRC_ADDR (1<<0) +#define IP_FLOW_HASH_DST_ADDR (1<<1) +#define IP_FLOW_HASH_PROTO (1<<2) +#define IP_FLOW_HASH_SRC_PORT (1<<3) +#define IP_FLOW_HASH_DST_PORT (1<<4) +#define IP_FLOW_HASH_REVERSE_SRC_DST (1<<5) + +/** Default: 5-tuple without the "reverse" bit */ +#define IP_FLOW_HASH_DEFAULT (0x1F) + +#define foreach_flow_hash_bit \ +_(src, IP_FLOW_HASH_SRC_ADDR) \ +_(dst, IP_FLOW_HASH_DST_ADDR) \ +_(sport, IP_FLOW_HASH_SRC_PORT) \ +_(dport, IP_FLOW_HASH_DST_PORT) \ +_(proto, IP_FLOW_HASH_PROTO) \ +_(reverse, IP_FLOW_HASH_REVERSE_SRC_DST) + +/** + * A flow hash configuration is a mask of the flow hash options + */ +typedef u32 flow_hash_config_t; + +/* An all zeros address */ +extern const ip46_address_t zero_addr; + + +typedef struct +{ + /* Key for mhash; in fact, just a byte offset into mhash key vector. */ + u32 address_key; + + /* Interface which has this address. */ + u32 sw_if_index; + + /* Address (prefix) length for this interface. */ + u16 address_length; + + /* Will be used for something eventually. Primary vs. secondary? */ + u16 flags; + + /* Next and previous pointers for doubly linked list of + addresses per software interface. */ + u32 next_this_sw_interface; + u32 prev_this_sw_interface; +} ip_interface_address_t; + +typedef enum +{ + IP_LOCAL_NEXT_DROP, + IP_LOCAL_NEXT_PUNT, + IP_LOCAL_NEXT_UDP_LOOKUP, + IP_LOCAL_NEXT_ICMP, + IP_LOCAL_N_NEXT, +} ip_local_next_t; + +struct ip_lookup_main_t; + +typedef struct ip_lookup_main_t +{ + /** Pool of addresses that are assigned to interfaces. */ + ip_interface_address_t *if_address_pool; + + /** Hash table mapping address to index in interface address pool. */ + mhash_t address_to_if_address_index; + + /** Head of doubly linked list of interface addresses for each software interface. + ~0 means this interface has no address. */ + u32 *if_address_pool_index_by_sw_if_index; + + /** First table index to use for this interface, ~0 => none */ + u32 *classify_table_index_by_sw_if_index; + + /** Feature arc indices */ + u8 mcast_feature_arc_index; + u8 ucast_feature_arc_index; + u8 output_feature_arc_index; + + /** Number of bytes in a fib result. Must be at least + sizeof (uword). First word is always adjacency index. */ + u32 fib_result_n_bytes, fib_result_n_words; + + /** 1 for ip6; 0 for ip4. */ + u32 is_ip6; + + /** Either format_ip4_address_and_length or format_ip6_address_and_length. */ + format_function_t *format_address_and_length; + + /** Table mapping ip protocol to ip[46]-local node next index. */ + u8 local_next_by_ip_protocol[256]; + + /** IP_BUILTIN_PROTOCOL_{TCP,UDP,ICMP,OTHER} by protocol in IP header. */ + u8 builtin_protocol_by_ip_protocol[256]; +} ip_lookup_main_t; + +clib_error_t *ip_interface_address_add_del (ip_lookup_main_t * lm, + u32 sw_if_index, + void *address, + u32 address_length, + u32 is_del, u32 * result_index); + +u8 *format_ip_flow_hash_config (u8 * s, va_list * args); + +always_inline ip_interface_address_t * +ip_get_interface_address (ip_lookup_main_t * lm, void *addr_fib) +{ + uword *p = mhash_get (&lm->address_to_if_address_index, addr_fib); + return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0; +} + +always_inline void * +ip_interface_address_get_address (ip_lookup_main_t * lm, + ip_interface_address_t * a) +{ + return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key); +} + +/* *INDENT-OFF* */ +#define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \ +do { \ + vnet_main_t *_vnm = vnet_get_main(); \ + u32 _sw_if_index = sw_if_index; \ + vnet_sw_interface_t *_swif; \ + _swif = vnet_get_sw_interface (_vnm, _sw_if_index); \ + \ + /* \ + * Loop => honor unnumbered interface addressing. \ + */ \ + if (_swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) \ + { \ + if (loop) \ + _sw_if_index = _swif->unnumbered_sw_if_index; \ + else \ + /* the interface is unnumbered, by the caller does not want \ + * unnumbered interfaces considered/honoured */ \ + break; \ + } \ + u32 _ia = ((vec_len((lm)->if_address_pool_index_by_sw_if_index) \ + > (_sw_if_index)) ? \ + vec_elt ((lm)->if_address_pool_index_by_sw_if_index, \ + (_sw_if_index)) : \ + (u32)~0); \ + ip_interface_address_t * _a; \ + while (_ia != ~0) \ + { \ + _a = pool_elt_at_index ((lm)->if_address_pool, _ia); \ + _ia = _a->next_this_sw_interface; \ + (a) = _a; \ + body; \ + } \ +} while (0) +/* *INDENT-ON* */ + +void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index); + +#endif /* included_ip_lookup_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ping.c b/src/vnet/ip/ping.c new file mode 100755 index 00000000..0fa537f6 --- /dev/null +++ b/src/vnet/ip/ping.c @@ -0,0 +1,928 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stddef.h> +#include <vnet/ip/ping.h> +#include <vnet/fib/ip6_fib.h> +#include <vnet/fib/ip4_fib.h> +#include <vnet/fib/fib_entry.h> +#include <vlib/vlib.h> + +/** + * @file + * @brief IPv4 and IPv6 ICMP Ping. + * + * This file contains code to suppport IPv4 or IPv6 ICMP ECHO_REQUEST to + * network hosts. + * + */ + + +u8 * +format_icmp_echo_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + icmp_echo_trace_t *t = va_arg (*va, icmp_echo_trace_t *); + + s = format (s, "ICMP echo id %d seq %d%s", + clib_net_to_host_u16 (t->id), + clib_net_to_host_u16 (t->seq), t->bound ? "" : " (unknown)"); + + return s; +} + +/* + * If we can find the ping run by an ICMP ID, then we send the signal + * to the CLI process referenced by that ping run, alongside with + * a freshly made copy of the packet. + * I opted for a packet copy to keep the main packet processing path + * the same as for all the other nodes. + * + */ + +static int +signal_ip46_icmp_reply_event (u8 event_type, vlib_buffer_t * b0) +{ + ping_main_t *pm = &ping_main; + u16 net_icmp_id = 0; + u32 bi0_copy = 0; + + switch (event_type) + { + case PING_RESPONSE_IP4: + { + icmp4_echo_request_header_t *h0 = vlib_buffer_get_current (b0); + net_icmp_id = h0->icmp_echo.id; + } + break; + case PING_RESPONSE_IP6: + { + icmp6_echo_request_header_t *h0 = vlib_buffer_get_current (b0); + net_icmp_id = h0->icmp_echo.id; + } + break; + default: + return 0; + } + + uword *p = hash_get (pm->ping_run_by_icmp_id, + clib_net_to_host_u16 (net_icmp_id)); + if (!p) + return 0; + + ping_run_t *pr = vec_elt_at_index (pm->ping_runs, p[0]); + vlib_main_t *vm = vlib_mains[pr->cli_thread_index]; + if (vlib_buffer_alloc (vm, &bi0_copy, 1) == 1) + { + void *dst = vlib_buffer_get_current (vlib_get_buffer (vm, + bi0_copy)); + clib_memcpy (dst, vlib_buffer_get_current (b0), b0->current_length); + } + /* If buffer_alloc failed, bi0_copy == 0 - just signaling an event. */ + f64 nowts = vlib_time_now (vm); + /* Pass the timestamp to the cli_process thanks to the vnet_buffer unused metadata field */ + clib_memcpy (vnet_buffer + (vlib_get_buffer + (vm, bi0_copy))->unused, &nowts, sizeof (nowts)); + vlib_process_signal_event_mt (vm, pr->cli_process_id, event_type, bi0_copy); + return 1; +} + +/* + * Process ICMPv6 echo replies + */ +static uword +ip6_icmp_echo_reply_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from; + + from = vlib_frame_vector_args (frame); /* array of buffer indices */ + n_left_from = frame->n_vectors; /* number of buffer indices */ + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + + next0 = signal_ip46_icmp_reply_event (PING_RESPONSE_IP6, b0) ? + ICMP6_ECHO_REPLY_NEXT_DROP : ICMP6_ECHO_REPLY_NEXT_PUNT; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + icmp6_echo_request_header_t *h0 = vlib_buffer_get_current (b0); + icmp_echo_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->id = h0->icmp_echo.id; + tr->seq = h0->icmp_echo.seq; + tr->bound = (next0 == ICMP6_ECHO_REPLY_NEXT_DROP); + } + + /* push this pkt to the next graph node */ + vlib_set_next_frame_buffer (vm, node, next0, bi0); + + from += 1; + n_left_from -= 1; + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip6_icmp_echo_reply_node, static) = +{ + .function = ip6_icmp_echo_reply_node_fn, + .name = "ip6-icmp-echo-reply", + .vector_size = sizeof (u32), + .format_trace = format_icmp_echo_trace, + .n_next_nodes = ICMP6_ECHO_REPLY_N_NEXT, + .next_nodes = { + [ICMP6_ECHO_REPLY_NEXT_DROP] = "error-drop", + [ICMP6_ECHO_REPLY_NEXT_PUNT] = "error-punt", + }, +}; +/* *INDENT-ON* */ + +/* + * Process ICMPv4 echo replies + */ +static uword +ip4_icmp_echo_reply_node_fn (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 n_left_from, *from; + + from = vlib_frame_vector_args (frame); /* array of buffer indices */ + n_left_from = frame->n_vectors; /* number of buffer indices */ + + while (n_left_from > 0) + { + u32 bi0; + vlib_buffer_t *b0; + u32 next0; + + bi0 = from[0]; + b0 = vlib_get_buffer (vm, bi0); + + next0 = signal_ip46_icmp_reply_event (PING_RESPONSE_IP4, b0) ? + ICMP4_ECHO_REPLY_NEXT_DROP : ICMP4_ECHO_REPLY_NEXT_PUNT; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + icmp4_echo_request_header_t *h0 = vlib_buffer_get_current (b0); + icmp_echo_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr)); + tr->id = h0->icmp_echo.id; + tr->seq = h0->icmp_echo.seq; + tr->bound = (next0 == ICMP4_ECHO_REPLY_NEXT_DROP); + } + + /* push this pkt to the next graph node */ + vlib_set_next_frame_buffer (vm, node, next0, bi0); + + from += 1; + n_left_from -= 1; + } + + return frame->n_vectors; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (ip4_icmp_echo_reply_node, static) = +{ + .function = ip4_icmp_echo_reply_node_fn, + .name = "ip4-icmp-echo-reply", + .vector_size = sizeof (u32), + .format_trace = format_icmp_echo_trace, + .n_next_nodes = ICMP4_ECHO_REPLY_N_NEXT, + .next_nodes = { + [ICMP4_ECHO_REPLY_NEXT_DROP] = "error-drop", + [ICMP4_ECHO_REPLY_NEXT_PUNT] = "error-punt", + }, +}; +/* *INDENT-ON* */ + +char *ip6_lookup_next_nodes[] = IP6_LOOKUP_NEXT_NODES; +char *ip4_lookup_next_nodes[] = IP4_LOOKUP_NEXT_NODES; + +/* Fill in the ICMP ECHO structure, return the safety-checked and possibly shrunk data_len */ +static u16 +init_icmp46_echo_request (icmp46_echo_request_t * icmp46_echo, + u16 seq_host, u16 id_host, u16 data_len) +{ + int i; + icmp46_echo->seq = clib_host_to_net_u16 (seq_host); + icmp46_echo->id = clib_host_to_net_u16 (id_host); + + if (data_len > PING_MAXIMUM_DATA_SIZE) + data_len = PING_MAXIMUM_DATA_SIZE; + for (i = 0; i < data_len; i++) + icmp46_echo->data[i] = i % 256; + return data_len; +} + +static send_ip46_ping_result_t +send_ip6_ping (vlib_main_t * vm, ip6_main_t * im, + u32 table_id, ip6_address_t * pa6, + u32 sw_if_index, u16 seq_host, u16 id_host, u16 data_len, + u32 burst, u8 verbose) +{ + icmp6_echo_request_header_t *h0; + u32 bi0 = 0; + int bogus_length = 0; + vlib_buffer_t *p0; + vlib_frame_t *f; + u32 *to_next; + vlib_buffer_free_list_t *fl; + + if (vlib_buffer_alloc (vm, &bi0, 1) != 1) + return SEND_PING_ALLOC_FAIL; + + p0 = vlib_get_buffer (vm, bi0); + fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + vlib_buffer_init_for_free_list (p0, fl); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (p0); + + /* + * if the user did not provide a source interface, use the any interface + * that the destination resolves via. + */ + if (~0 == sw_if_index) + { + fib_node_index_t fib_entry_index; + u32 fib_index; + + fib_index = ip6_fib_index_from_table_id (table_id); + + if (~0 == fib_index) + { + vlib_buffer_free (vm, &bi0, 1); + return SEND_PING_NO_TABLE; + } + + fib_entry_index = ip6_fib_table_lookup (fib_index, pa6, 128); + sw_if_index = fib_entry_get_resolving_interface (fib_entry_index); + /* + * Set the TX interface to force ip-lookup to use its table ID + */ + vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index; + } + else + { + /* + * force an IP lookup in the table bound to the user's chosen + * source interface. + */ + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + ip6_fib_table_get_index_for_sw_if_index (sw_if_index); + } + + if (~0 == sw_if_index) + { + vlib_buffer_free (vm, &bi0, 1); + return SEND_PING_NO_INTERFACE; + } + + vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index; + + h0 = vlib_buffer_get_current (p0); + + /* Fill in ip6 header fields */ + h0->ip6.ip_version_traffic_class_and_flow_label = + clib_host_to_net_u32 (0x6 << 28); + h0->ip6.payload_length = 0; /* Set below */ + h0->ip6.protocol = IP_PROTOCOL_ICMP6; + h0->ip6.hop_limit = 255; + h0->ip6.dst_address = *pa6; + h0->ip6.src_address = *pa6; + + /* Fill in the correct source now */ + ip6_address_t *a = ip6_interface_first_address (im, sw_if_index); + if (!a) + { + vlib_buffer_free (vm, &bi0, 1); + return SEND_PING_NO_SRC_ADDRESS; + } + h0->ip6.src_address = a[0]; + + /* Fill in icmp fields */ + h0->icmp.type = ICMP6_echo_request; + h0->icmp.code = 0; + h0->icmp.checksum = 0; + + data_len = + init_icmp46_echo_request (&h0->icmp_echo, seq_host, id_host, data_len); + h0->icmp_echo.time_sent = vlib_time_now (vm); + + /* Fix up the lengths */ + h0->ip6.payload_length = + clib_host_to_net_u16 (data_len + sizeof (icmp46_header_t)); + + p0->current_length = clib_net_to_host_u16 (h0->ip6.payload_length) + + STRUCT_OFFSET_OF (icmp6_echo_request_header_t, icmp); + + /* Calculate the ICMP checksum */ + h0->icmp.checksum = 0; + h0->icmp.checksum = + ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip6, &bogus_length); + + /* Enqueue the packet right now */ + f = vlib_get_frame_to_node (vm, ip6_lookup_node.index); + to_next = vlib_frame_vector_args (f); + to_next[0] = bi0; + + ASSERT (burst <= VLIB_FRAME_SIZE); + f->n_vectors = burst; + while (--burst) + { + vlib_buffer_t *c0 = vlib_buffer_copy (vm, p0); + to_next++; + to_next[0] = vlib_get_buffer_index (vm, c0); + } + vlib_put_frame_to_node (vm, ip6_lookup_node.index, f); + + return SEND_PING_OK; +} + +static send_ip46_ping_result_t +send_ip4_ping (vlib_main_t * vm, + ip4_main_t * im, + u32 table_id, + ip4_address_t * pa4, + u32 sw_if_index, + u16 seq_host, u16 id_host, u16 data_len, u32 burst, u8 verbose) +{ + icmp4_echo_request_header_t *h0; + u32 bi0 = 0; + ip_lookup_main_t *lm = &im->lookup_main; + vlib_buffer_t *p0; + vlib_frame_t *f; + u32 *to_next; + u32 if_add_index0; + vlib_buffer_free_list_t *fl; + + if (vlib_buffer_alloc (vm, &bi0, 1) != 1) + return SEND_PING_ALLOC_FAIL; + + p0 = vlib_get_buffer (vm, bi0); + fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + vlib_buffer_init_for_free_list (p0, fl); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (p0); + + /* + * if the user did not provide a source interface, use the any interface + * that the destination resolves via. + */ + if (~0 == sw_if_index) + { + fib_node_index_t fib_entry_index; + u32 fib_index; + + fib_index = ip4_fib_index_from_table_id (table_id); + + if (~0 == fib_index) + { + vlib_buffer_free (vm, &bi0, 1); + return SEND_PING_NO_TABLE; + } + + fib_entry_index = + ip4_fib_table_lookup (ip4_fib_get (fib_index), pa4, 32); + sw_if_index = fib_entry_get_resolving_interface (fib_entry_index); + /* + * Set the TX interface to force ip-lookup to use the user's table ID + */ + vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index; + } + else + { + /* + * force an IP lookup in the table bound to the user's chosen + * source interface. + */ + vnet_buffer (p0)->sw_if_index[VLIB_TX] = + ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + } + + if (~0 == sw_if_index) + { + vlib_buffer_free (vm, &bi0, 1); + return SEND_PING_NO_INTERFACE; + } + + vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index; + + h0 = vlib_buffer_get_current (p0); + + /* Fill in ip4 header fields */ + h0->ip4.checksum = 0; + h0->ip4.ip_version_and_header_length = 0x45; + h0->ip4.tos = 0; + h0->ip4.length = 0; /* Set below */ + h0->ip4.fragment_id = 0; + h0->ip4.flags_and_fragment_offset = 0; + h0->ip4.ttl = 0xff; + h0->ip4.protocol = IP_PROTOCOL_ICMP; + h0->ip4.dst_address = *pa4; + h0->ip4.src_address = *pa4; + + /* Fill in the correct source now */ + if_add_index0 = lm->if_address_pool_index_by_sw_if_index[sw_if_index]; + if (PREDICT_TRUE (if_add_index0 != ~0)) + { + ip_interface_address_t *if_add = + pool_elt_at_index (lm->if_address_pool, if_add_index0); + ip4_address_t *if_ip = ip_interface_address_get_address (lm, if_add); + h0->ip4.src_address = *if_ip; + if (verbose) + { + vlib_cli_output (vm, "Source address: %U", + format_ip4_address, &h0->ip4.src_address); + } + } + + /* Fill in icmp fields */ + h0->icmp.type = ICMP4_echo_request; + h0->icmp.code = 0; + h0->icmp.checksum = 0; + + data_len = + init_icmp46_echo_request (&h0->icmp_echo, seq_host, id_host, data_len); + h0->icmp_echo.time_sent = vlib_time_now (vm); + + /* Fix up the lengths */ + h0->ip4.length = + clib_host_to_net_u16 (data_len + sizeof (icmp46_header_t) + + sizeof (ip4_header_t)); + + p0->current_length = clib_net_to_host_u16 (h0->ip4.length); + + /* Calculate the IP and ICMP checksums */ + h0->ip4.checksum = ip4_header_checksum (&(h0->ip4)); + h0->icmp.checksum = + ~ip_csum_fold (ip_incremental_checksum (0, &(h0->icmp), + p0->current_length - + sizeof (ip4_header_t))); + + /* Enqueue the packet right now */ + f = vlib_get_frame_to_node (vm, ip4_lookup_node.index); + to_next = vlib_frame_vector_args (f); + to_next[0] = bi0; + + ASSERT (burst <= VLIB_FRAME_SIZE); + f->n_vectors = burst; + while (--burst) + { + vlib_buffer_t *c0 = vlib_buffer_copy (vm, p0); + to_next++; + to_next[0] = vlib_get_buffer_index (vm, c0); + } + vlib_put_frame_to_node (vm, ip4_lookup_node.index, f); + + return SEND_PING_OK; +} + + +static void +print_ip6_icmp_reply (vlib_main_t * vm, u32 bi0) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + icmp6_echo_request_header_t *h0 = vlib_buffer_get_current (b0); + f64 rtt = 0; + clib_memcpy (&rtt, vnet_buffer (b0)->unused, sizeof (rtt)); + rtt -= h0->icmp_echo.time_sent; + vlib_cli_output (vm, + "%d bytes from %U: icmp_seq=%d ttl=%d time=%.4f ms", + clib_host_to_net_u16 (h0->ip6.payload_length), + format_ip6_address, + &h0->ip6.src_address, + clib_host_to_net_u16 (h0->icmp_echo.seq), + h0->ip6.hop_limit, rtt * 1000.0); +} + +static void +print_ip4_icmp_reply (vlib_main_t * vm, u32 bi0) +{ + vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0); + icmp4_echo_request_header_t *h0 = vlib_buffer_get_current (b0); + f64 rtt = 0; + clib_memcpy (&rtt, vnet_buffer (b0)->unused, sizeof (rtt)); + rtt -= h0->icmp_echo.time_sent; + u32 rcvd_icmp_len = + clib_host_to_net_u16 (h0->ip4.length) - + (4 * (0xF & h0->ip4.ip_version_and_header_length)); + + vlib_cli_output (vm, + "%d bytes from %U: icmp_seq=%d ttl=%d time=%.4f ms", + rcvd_icmp_len, + format_ip4_address, + &h0->ip4.src_address, + clib_host_to_net_u16 (h0->icmp_echo.seq), + h0->ip4.ttl, rtt * 1000.0); +} + + +/* + * Perform the ping run with the given parameters in the current CLI process. + * Depending on whether pa4 or pa6 is set, runs IPv4 or IPv6 ping. + * The amusing side effect is of course if both are set, then both pings are sent. + * This behavior can be used to ping a dualstack host over IPv4 and IPv6 at once. + */ + +static void +run_ping_ip46_address (vlib_main_t * vm, u32 table_id, ip4_address_t * pa4, + ip6_address_t * pa6, u32 sw_if_index, + f64 ping_interval, u32 ping_repeat, u32 data_len, + u32 ping_burst, u32 verbose) +{ + int i; + ping_main_t *pm = &ping_main; + uword curr_proc = vlib_current_process (vm); + u32 n_replies = 0; + u32 n_requests = 0; + ping_run_t *pr = 0; + u32 ping_run_index = 0; + u16 icmp_id; + + static u32 rand_seed = 0; + + if (PREDICT_FALSE (!rand_seed)) + rand_seed = random_default_seed (); + + icmp_id = random_u32 (&rand_seed) & 0xffff; + + while (hash_get (pm->ping_run_by_icmp_id, icmp_id)) + { + vlib_cli_output (vm, "ICMP ID collision at %d, incrementing", icmp_id); + icmp_id++; + } + pool_get (pm->ping_runs, pr); + ping_run_index = pr - pm->ping_runs; + pr->cli_process_id = curr_proc; + pr->cli_thread_index = vlib_get_thread_index (); + pr->icmp_id = icmp_id; + hash_set (pm->ping_run_by_icmp_id, icmp_id, ping_run_index); + for (i = 1; i <= ping_repeat; i++) + { + f64 sleep_interval; + f64 time_ping_sent = vlib_time_now (vm); + /* Reset pr: running ping in other process could have changed pm->ping_runs */ + pr = vec_elt_at_index (pm->ping_runs, ping_run_index); + pr->curr_seq = i; + if (pa6 && + (SEND_PING_OK == + send_ip6_ping (vm, ping_main.ip6_main, table_id, pa6, sw_if_index, + i, icmp_id, data_len, ping_burst, verbose))) + { + n_requests += ping_burst; + } + if (pa4 && + (SEND_PING_OK == + send_ip4_ping (vm, ping_main.ip4_main, table_id, pa4, sw_if_index, + i, icmp_id, data_len, ping_burst, verbose))) + { + n_requests += ping_burst; + } + while ((i <= ping_repeat) + && + ((sleep_interval = + time_ping_sent + ping_interval - vlib_time_now (vm)) > 0.0)) + { + uword event_type, *event_data = 0; + vlib_process_wait_for_event_or_clock (vm, sleep_interval); + event_type = vlib_process_get_events (vm, &event_data); + switch (event_type) + { + case ~0: /* no events => timeout */ + break; + case PING_RESPONSE_IP6: + { + int i; + for (i = 0; i < vec_len (event_data); i++) + { + u32 bi0 = event_data[i]; + print_ip6_icmp_reply (vm, bi0); + n_replies++; + if (0 != bi0) + { + vlib_buffer_free (vm, &bi0, 1); + } + } + } + break; + case PING_RESPONSE_IP4: + { + int i; + for (i = 0; i < vec_len (event_data); i++) + { + u32 bi0 = event_data[i]; + print_ip4_icmp_reply (vm, bi0); + n_replies++; + if (0 != bi0) + { + vlib_buffer_free (vm, &bi0, 1); + } + } + } + break; + default: + /* someone pressed a key, abort */ + vlib_cli_output (vm, "Aborted due to a keypress."); + i = 1 + ping_repeat; + break; + } + vec_free (event_data); + } + } + vlib_cli_output (vm, "\n"); + { + float loss = + (0 == + n_requests) ? 0 : 100.0 * ((float) n_requests - + (float) n_replies) / (float) n_requests; + vlib_cli_output (vm, + "Statistics: %u sent, %u received, %f%% packet loss\n", + n_requests, n_replies, loss); + /* Reset pr: running ping in other process could have changed pm->ping_runs */ + pr = vec_elt_at_index (pm->ping_runs, ping_run_index); + hash_unset (pm->ping_run_by_icmp_id, icmp_id); + pool_put (pm->ping_runs, pr); + } +} + + + + + +static clib_error_t * +ping_ip_address (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + ip4_address_t a4; + ip6_address_t a6; + clib_error_t *error = 0; + u32 ping_repeat = 5; + u32 ping_burst = 1; + u8 ping_ip4, ping_ip6; + vnet_main_t *vnm = vnet_get_main (); + u32 data_len = PING_DEFAULT_DATA_LEN; + u32 verbose = 0; + f64 ping_interval = PING_DEFAULT_INTERVAL; + u32 sw_if_index, table_id; + + table_id = 0; + ping_ip4 = ping_ip6 = 0; + sw_if_index = ~0; + + if (unformat (input, "%U", unformat_ip4_address, &a4)) + { + ping_ip4 = 1; + } + else if (unformat (input, "%U", unformat_ip6_address, &a6)) + { + ping_ip6 = 1; + } + else if (unformat (input, "ipv4")) + { + if (unformat (input, "%U", unformat_ip4_address, &a4)) + { + ping_ip4 = 1; + } + else + { + error = + clib_error_return (0, + "expecting IPv4 address but got `%U'", + format_unformat_error, input); + } + } + else if (unformat (input, "ipv6")) + { + if (unformat (input, "%U", unformat_ip6_address, &a6)) + { + ping_ip6 = 1; + } + else + { + error = + clib_error_return (0, + "expecting IPv6 address but got `%U'", + format_unformat_error, input); + } + } + else + { + error = + clib_error_return (0, + "expecting IP4/IP6 address `%U'. Usage: ping <addr> [source <intf>] [size <datasz>] [repeat <count>] [verbose]", + format_unformat_error, input); + goto done; + } + + /* allow for the second AF in the same ping */ + if (!ping_ip4 && (unformat (input, "ipv4"))) + { + if (unformat (input, "%U", unformat_ip4_address, &a4)) + { + ping_ip4 = 1; + } + } + else if (!ping_ip6 && (unformat (input, "ipv6"))) + { + if (unformat (input, "%U", unformat_ip6_address, &a6)) + { + ping_ip6 = 1; + } + } + + /* parse the rest of the parameters in a cycle */ + while (!unformat_eof (input, NULL)) + { + if (unformat (input, "source")) + { + if (!unformat_user + (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) + { + error = + clib_error_return (0, + "unknown interface `%U'", + format_unformat_error, input); + goto done; + } + } + else if (unformat (input, "size")) + { + if (!unformat (input, "%u", &data_len)) + { + error = + clib_error_return (0, + "expecting size but got `%U'", + format_unformat_error, input); + goto done; + } + if (data_len > PING_MAXIMUM_DATA_SIZE) + { + error = + clib_error_return (0, + "%d is bigger than maximum allowed payload size %d", + data_len, PING_MAXIMUM_DATA_SIZE); + goto done; + } + } + else if (unformat (input, "table-id")) + { + if (!unformat (input, "%u", &table_id)) + { + error = + clib_error_return (0, + "expecting table-id but got `%U'", + format_unformat_error, input); + goto done; + } + } + else if (unformat (input, "interval")) + { + if (!unformat (input, "%f", &ping_interval)) + { + error = + clib_error_return (0, + "expecting interval (floating point number) got `%U'", + format_unformat_error, input); + goto done; + } + } + else if (unformat (input, "repeat")) + { + if (!unformat (input, "%u", &ping_repeat)) + { + error = + clib_error_return (0, + "expecting repeat count but got `%U'", + format_unformat_error, input); + goto done; + } + } + else if (unformat (input, "burst")) + { + if (!unformat (input, "%u", &ping_burst)) + { + error = + clib_error_return (0, + "expecting burst count but got `%U'", + format_unformat_error, input); + goto done; + } + } + else if (unformat (input, "verbose")) + { + verbose = 1; + } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (ping_burst < 1 || ping_burst > VLIB_FRAME_SIZE) + return clib_error_return (0, "burst size must be between 1 and %u", + VLIB_FRAME_SIZE); + + run_ping_ip46_address (vm, table_id, ping_ip4 ? &a4 : NULL, + ping_ip6 ? &a6 : NULL, sw_if_index, ping_interval, + ping_repeat, data_len, ping_burst, verbose); +done: + return error; +} + +/*? + * This command sends an ICMP ECHO_REQUEST to network hosts. The address + * can be an IPv4 or IPv6 address (or both at the same time). + * + * @cliexpar + * @parblock + * Example of how ping an IPv4 address: + * @cliexstart{ping 172.16.1.2 source GigabitEthernet2/0/0 repeat 2} + * 64 bytes from 172.16.1.2: icmp_seq=1 ttl=64 time=.1090 ms + * 64 bytes from 172.16.1.2: icmp_seq=2 ttl=64 time=.0914 ms + * + * Statistics: 2 sent, 2 received, 0% packet loss + * @cliexend + * + * Example of how ping both an IPv4 address and IPv6 address at the same time: + * @cliexstart{ping 172.16.1.2 ipv6 fe80::24a5:f6ff:fe9c:3a36 source GigabitEthernet2/0/0 repeat 2 verbose} + * Adjacency index: 10, sw_if_index: 1 + * Adj: ip6-discover-neighbor + * Adj Interface: 0 + * Forced set interface: 1 + * Adjacency index: 0, sw_if_index: 4294967295 + * Adj: ip4-miss + * Adj Interface: 0 + * Forced set interface: 1 + * Source address: 172.16.1.1 + * 64 bytes from 172.16.1.2: icmp_seq=1 ttl=64 time=.1899 ms + * Adjacency index: 10, sw_if_index: 1 + * Adj: ip6-discover-neighbor + * Adj Interface: 0 + * Forced set interface: 1 + * Adjacency index: 0, sw_if_index: 4294967295 + * Adj: ip4-miss + * Adj Interface: 0 + * Forced set interface: 1 + * Source address: 172.16.1.1 + * 64 bytes from 172.16.1.2: icmp_seq=2 ttl=64 time=.0910 ms + * + * Statistics: 4 sent, 2 received, 50% packet loss + * @cliexend + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (ping_command, static) = +{ + .path = "ping", + .function = ping_ip_address, + .short_help = "ping {<ip-addr> | ipv4 <ip4-addr> | ipv6 <ip6-addr>}" + " [ipv4 <ip4-addr> | ipv6 <ip6-addr>] [source <interface>]" + " [size <pktsize>] [interval <sec>] [repeat <cnt>] [table-id <id>]" + " [verbose]", + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +static clib_error_t * +ping_cli_init (vlib_main_t * vm) +{ + ping_main_t *pm = &ping_main; + pm->ip6_main = &ip6_main; + pm->ip4_main = &ip4_main; + icmp6_register_type (vm, ICMP6_echo_reply, ip6_icmp_echo_reply_node.index); + ip4_icmp_register_type (vm, ICMP4_echo_reply, + ip4_icmp_echo_reply_node.index); + return 0; +} + +VLIB_INIT_FUNCTION (ping_cli_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/ping.h b/src/vnet/ip/ping.h new file mode 100644 index 00000000..b1b71f68 --- /dev/null +++ b/src/vnet/ip/ping.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_ping_h +#define included_vnet_ping_h + + +#include <vnet/ip/ip.h> + +#include <vnet/ip/lookup.h> + +typedef enum +{ + PING_RESPONSE_IP6 = 42, + PING_RESPONSE_IP4, +} ping_response_type_t; + +typedef enum +{ + SEND_PING_OK = 0, + SEND_PING_ALLOC_FAIL, + SEND_PING_NO_INTERFACE, + SEND_PING_NO_TABLE, + SEND_PING_NO_SRC_ADDRESS, +} send_ip46_ping_result_t; + +/* + * Currently running ping command. + */ +typedef struct ping_run_t +{ + u16 icmp_id; + u16 curr_seq; + uword cli_process_id; + uword cli_thread_index; +} ping_run_t; + +typedef struct ping_main_t +{ + ip6_main_t *ip6_main; + ip4_main_t *ip4_main; + ping_run_t *ping_runs; + /* hash table to find back the CLI process for a reply */ + // uword *cli_proc_by_icmp_id; + ping_run_t *ping_run_by_icmp_id; +} ping_main_t; + +ping_main_t ping_main; + +#define PING_DEFAULT_DATA_LEN 60 +#define PING_DEFAULT_INTERVAL 1.0 + +#define PING_MAXIMUM_DATA_SIZE (VLIB_BUFFER_DATA_SIZE - sizeof(ip6_header_t) - sizeof(icmp46_header_t) - offsetof(icmp46_echo_request_t, data)) + +/* *INDENT-OFF* */ + +typedef CLIB_PACKED (struct { + u16 id; + u16 seq; + f64 time_sent; + u8 data[0]; +}) icmp46_echo_request_t; + + +typedef CLIB_PACKED (struct { + ip6_header_t ip6; + icmp46_header_t icmp; + icmp46_echo_request_t icmp_echo; +}) icmp6_echo_request_header_t; + +typedef CLIB_PACKED (struct { + ip4_header_t ip4; + icmp46_header_t icmp; + icmp46_echo_request_t icmp_echo; +}) icmp4_echo_request_header_t; + +/* *INDENT-ON* */ + + +typedef struct +{ + u16 id; + u16 seq; + u8 bound; +} icmp_echo_trace_t; + + + + +typedef enum +{ + ICMP6_ECHO_REPLY_NEXT_DROP, + ICMP6_ECHO_REPLY_NEXT_PUNT, + ICMP6_ECHO_REPLY_N_NEXT, +} icmp6_echo_reply_next_t; + +typedef enum +{ + ICMP4_ECHO_REPLY_NEXT_DROP, + ICMP4_ECHO_REPLY_NEXT_PUNT, + ICMP4_ECHO_REPLY_N_NEXT, +} icmp4_echo_reply_next_t; + +#endif /* included_vnet_ping_h */ diff --git a/src/vnet/ip/ports.def b/src/vnet/ip/ports.def new file mode 100644 index 00000000..cdb754f5 --- /dev/null +++ b/src/vnet/ip/ports.def @@ -0,0 +1,757 @@ +/* + * ip/ports.def: tcp/udp port definitions + * + * Eliot Dresselhaus + * August, 2005 + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* +PORT NUMBERS + +(last updated 18 October 2005) + +The port numbers are divided into three ranges: the Well Known Ports, +the Registered Ports, and the Dynamic and/or Private Ports. + +The Well Known Ports are those from 0 through 1023. + +The Registered Ports are those from 1024 through 49151 + +The Dynamic and/or Private Ports are those from 49152 through 65535 + + +************************************************************************ +* PLEASE NOTE THE FOLLOWING: * +* * +* 1. UNASSIGNED PORT NUMBERS SHOULD NOT BE USED. THE IANA WILL ASSIGN * +* THE NUMBER FOR THE PORT AFTER YOUR APPLICATION HAS BEEN APPROVED. * +* * +* 2. ASSIGNMENT OF A PORT NUMBER DOES NOT IN ANY WAY IMPLY AN * +* ENDORSEMENT OF AN APPLICATION OR PRODUCT, AND THE FACT THAT NETWORK * +* TRAFFIC IS FLOWING TO OR FROM A REGISTERED PORT DOES NOT MEAN THAT * +* IT IS "GOOD" TRAFFIC. FIREWALL AND SYSTEM ADMINISTRATORS SHOULD * +* CHOOSE HOW TO CONFIGURE THEIR SYSTEMS BASED ON THEIR KNOWLEDGE OF * +* THE TRAFFIC IN QUESTION, NOT WHETHER THERE IS A PORT NUMBER * +* REGISTERED OR NOT. * +************************************************************************ + + +WELL KNOWN PORT NUMBERS + +The Well Known Ports are assigned by the IANA and on most systems can +only be used by system (or root) processes or by programs executed by +privileged users. + +Ports are used in the TCP [RFC793] to name the ends of logical +connections which carry long term conversations. For the purpose of +providing services to unknown callers, a service contact port is +defined. This list specifies the port used by the server process as +its contact port. The contact port is sometimes called the +"well-known port". + +To the extent possible, these same port assignments are used with the +UDP [RFC768]. + +The range for assigned ports managed by the IANA is 0-1023. +*/ +ip_port (TCPMUX, 1) +ip_port (COMPRESS_NET_MANAGEMENT, 2) +ip_port (COMPRESS_NET, 3) +ip_port (RJE, 5) +ip_port (ECHO, 7) +ip_port (DISCARD, 9) +ip_port (SYSTAT, 11) +ip_port (DAYTIME, 13) +ip_port (QOTD, 17) +ip_port (MSP, 18) +ip_port (CHARGEN, 19) +ip_port (FTP_DATA, 20) +ip_port (FTP, 21) +ip_port (SSH, 22) +ip_port (TELNET, 23) +ip_port (SMTP, 25) +ip_port (NSW_FE, 27) +ip_port (MSG_ICP, 29) +ip_port (MSG_AUTH, 31) +ip_port (DSP, 33) +ip_port (TIME, 37) +ip_port (RAP, 38) +ip_port (RLP, 39) +ip_port (GRAPHICS, 41) +ip_port (NAME, 42) +ip_port (NAMESERVER, 42) +ip_port (NICNAME, 43) +ip_port (MPM_FLAGS, 44) +ip_port (MPM, 45) +ip_port (MPM_SND, 46) +ip_port (NI_FTP, 47) +ip_port (AUDITD, 48) +ip_port (TACACS, 49) +ip_port (RE_MAIL_CK, 50) +ip_port (LA_MAINT, 51) +ip_port (XNS_TIME, 52) +ip_port (DNS, 53) +ip_port (XNS_CH, 54) +ip_port (ISI_GL, 55) +ip_port (XNS_AUTH, 56) +ip_port (XNS_MAIL, 58) +ip_port (NI_MAIL, 61) +ip_port (ACAS, 62) +ip_port (WHOIS_PLUS_PLUS, 63) +ip_port (COVIA, 64) +ip_port (TACACS_DS, 65) +ip_port (ORACLE_SQL_NET, 66) +ip_port (BOOTPS, 67) +ip_port (BOOTPC, 68) +ip_port (TFTP, 69) +ip_port (GOPHER, 70) +ip_port (NETRJS_1, 71) +ip_port (NETRJS_2, 72) +ip_port (NETRJS_3, 73) +ip_port (NETRJS_4, 74) +ip_port (DEOS, 76) +ip_port (VETTCP, 78) +ip_port (FINGER, 79) +ip_port (WWW, 80) +ip_port (HOSTS2_NS, 81) +ip_port (XFER, 82) +ip_port (MIT_ML_DEV, 83) +ip_port (CTF, 84) +ip_port (MIT_ML_DEV1, 85) +ip_port (MFCOBOL, 86) +ip_port (KERBEROS, 88) +ip_port (SU_MIT_TG, 89) +ip_port (DNSIX, 90) +ip_port (MIT_DOV, 91) +ip_port (NPP, 92) +ip_port (DCP, 93) +ip_port (OBJCALL, 94) +ip_port (SUPDUP, 95) +ip_port (DIXIE, 96) +ip_port (SWIFT_RVF, 97) +ip_port (TACNEWS, 98) +ip_port (METAGRAM, 99) +ip_port (NEWACCT, 100) +ip_port (HOSTNAME, 101) +ip_port (ISO_TSAP, 102) +ip_port (GPPITNP, 103) +ip_port (ACR_NEMA, 104) +ip_port (CSO, 105) +ip_port (CSNET_NS, 105) +ip_port (3COM_TSMUX, 106) +ip_port (RTELNET, 107) +ip_port (SNAGAS, 108) +ip_port (POP2, 109) +ip_port (POP3, 110) +ip_port (SUNRPC, 111) +ip_port (MCIDAS, 112) +ip_port (IDENT, 113) +ip_port (SFTP, 115) +ip_port (ANSANOTIFY, 116) +ip_port (UUCP_PATH, 117) +ip_port (SQLSERV, 118) +ip_port (NNTP, 119) +ip_port (CFDPTKT, 120) +ip_port (ERPC, 121) +ip_port (SMAKYNET, 122) +ip_port (NTP, 123) +ip_port (ANSATRADER, 124) +ip_port (LOCUS_MAP, 125) +ip_port (NXEDIT, 126) +ip_port (LOCUS_CON, 127) +ip_port (GSS_XLICEN, 128) +ip_port (PWDGEN, 129) +ip_port (CISCO_FNA, 130) +ip_port (CISCO_TNA, 131) +ip_port (CISCO_SYS, 132) +ip_port (STATSRV, 133) +ip_port (INGRES_NET, 134) +ip_port (EPMAP, 135) +ip_port (PROFILE, 136) +ip_port (NETBIOS_NS, 137) +ip_port (NETBIOS_DGM, 138) +ip_port (NETBIOS_SSN, 139) +ip_port (EMFIS_DATA, 140) +ip_port (EMFIS_CNTL, 141) +ip_port (BL_IDM, 142) +ip_port (IMAP, 143) +ip_port (UMA, 144) +ip_port (UAAC, 145) +ip_port (ISO_TP0, 146) +ip_port (ISO_IP, 147) +ip_port (JARGON, 148) +ip_port (AED_512, 149) +ip_port (SQL_NET, 150) +ip_port (HEMS, 151) +ip_port (BFTP, 152) +ip_port (SGMP, 153) +ip_port (NETSC_PROD, 154) +ip_port (NETSC_DEV, 155) +ip_port (SQLSRV, 156) +ip_port (KNET_CMP, 157) +ip_port (PCMAIL_SRV, 158) +ip_port (NSS_ROUTING, 159) +ip_port (SGMP_TRAPS, 160) +ip_port (SNMP, 161) +ip_port (SNMPTRAP, 162) +ip_port (CMIP_MAN, 163) +ip_port (CMIP_AGENT, 164) +ip_port (XNS_COURIER, 165) +ip_port (S_NET, 166) +ip_port (NAMP, 167) +ip_port (RSVD, 168) +ip_port (SEND, 169) +ip_port (PRINT_SRV, 170) +ip_port (MULTIPLEX, 171) +ip_port (CL1, 172) +ip_port (XYPLEX_MUX, 173) +ip_port (MAILQ, 174) +ip_port (VMNET, 175) +ip_port (GENRAD_MUX, 176) +ip_port (XDMCP, 177) +ip_port (NEXTSTEP, 178) +ip_port (BGP, 179) +ip_port (RIS, 180) +ip_port (UNIFY, 181) +ip_port (AUDIT, 182) +ip_port (OCBINDER, 183) +ip_port (OCSERVER, 184) +ip_port (REMOTE_KIS, 185) +ip_port (KIS, 186) +ip_port (ACI, 187) +ip_port (MUMPS, 188) +ip_port (QFT, 189) +ip_port (GACP, 190) +ip_port (PROSPERO, 191) +ip_port (OSU_NMS, 192) +ip_port (SRMP, 193) +ip_port (IRC, 194) +ip_port (DN6_NLM_AUD, 195) +ip_port (DN6_SMM_RED, 196) +ip_port (DLS, 197) +ip_port (DLS_MON, 198) +ip_port (SMUX, 199) +ip_port (SRC, 200) +ip_port (AT_RTMP, 201) +ip_port (AT_NBP, 202) +ip_port (AT_3, 203) +ip_port (AT_ECHO, 204) +ip_port (AT_5, 205) +ip_port (AT_ZIS, 206) +ip_port (AT_7, 207) +ip_port (AT_8, 208) +ip_port (QMTP, 209) +ip_port (Z39_50, 210) +ip_port (TI914CG, 211) +ip_port (ANET, 212) +ip_port (IPX, 213) +ip_port (VMPWSCS, 214) +ip_port (SOFTPC, 215) +ip_port (CAILIC, 216) +ip_port (DBASE, 217) +ip_port (MPP, 218) +ip_port (UARPS, 219) +ip_port (IMAP3, 220) +ip_port (FLN_SPX, 221) +ip_port (RSH_SPX, 222) +ip_port (CDC, 223) +ip_port (MASQDIALER, 224) +ip_port (DIRECT, 242) +ip_port (SUR_MEAS, 243) +ip_port (INBUSINESS, 244) +ip_port (LINK, 245) +ip_port (DSP3270, 246) +ip_port (SUBNTBCST_TFTP, 247) +ip_port (BHFHS, 248) +ip_port (RAP1, 256) +ip_port (SET, 257) +ip_port (YAK_CHAT, 258) +ip_port (ESRO_GEN, 259) +ip_port (OPENPORT, 260) +ip_port (NSIIOPS, 261) +ip_port (ARCISDMS, 262) +ip_port (HDAP, 263) +ip_port (BGMP, 264) +ip_port (X_BONE_CTL, 265) +ip_port (SST, 266) +ip_port (TD_SERVICE, 267) +ip_port (TD_REPLICA, 268) +ip_port (HTTP_MGMT, 280) +ip_port (PERSONAL_LINK, 281) +ip_port (CABLEPORT_AX, 282) +ip_port (RESCAP, 283) +ip_port (CORERJD, 284) +ip_port (FXP, 286) +ip_port (K_BLOCK, 287) +ip_port (NOVASTORBAKCUP, 308) +ip_port (ENTRUSTTIME, 309) +ip_port (BHMDS, 310) +ip_port (ASIP_WEBADMIN, 311) +ip_port (VSLMP, 312) +ip_port (MAGENTA_LOGIC, 313) +ip_port (OPALIS_ROBOT, 314) +ip_port (DPSI, 315) +ip_port (DECAUTH, 316) +ip_port (ZANNET, 317) +ip_port (PKIX_TIMESTAMP, 318) +ip_port (PTP_EVENT, 319) +ip_port (PTP_GENERAL, 320) +ip_port (PIP, 321) +ip_port (RTSPS, 322) +ip_port (TEXAR, 333) +ip_port (PDAP, 344) +ip_port (PAWSERV, 345) +ip_port (ZSERV, 346) +ip_port (FATSERV, 347) +ip_port (CSI_SGWP, 348) +ip_port (MFTP, 349) +ip_port (MATIP_TYPE_A, 350) +ip_port (MATIP_TYPE_B, 351) +ip_port (BHOETTY, 351) +ip_port (DTAG_STE_SB, 352) +ip_port (BHOEDAP4, 352) +ip_port (NDSAUTH, 353) +ip_port (BH611, 354) +ip_port (DATEX_ASN, 355) +ip_port (CLOANTO_NET_1, 356) +ip_port (BHEVENT, 357) +ip_port (SHRINKWRAP, 358) +ip_port (NSRMP, 359) +ip_port (SCOI2ODIALOG, 360) +ip_port (SEMANTIX, 361) +ip_port (SRSSEND, 362) +ip_port (RSVP_TUNNEL, 363) +ip_port (AURORA_CMGR, 364) +ip_port (DTK, 365) +ip_port (ODMR, 366) +ip_port (MORTGAGEWARE, 367) +ip_port (QBIKGDP, 368) +ip_port (RPC2PORTMAP, 369) +ip_port (CODAAUTH2, 370) +ip_port (CLEARCASE, 371) +ip_port (ULISTPROC, 372) +ip_port (LEGENT_1, 373) +ip_port (LEGENT_2, 374) +ip_port (HASSLE, 375) +ip_port (NIP, 376) +ip_port (TNETOS, 377) +ip_port (DSETOS, 378) +ip_port (IS99C, 379) +ip_port (IS99S, 380) +ip_port (HP_COLLECTOR, 381) +ip_port (HP_MANAGED_NODE, 382) +ip_port (HP_ALARM_MGR, 383) +ip_port (ARNS, 384) +ip_port (IBM_APP, 385) +ip_port (ASA, 386) +ip_port (AURP, 387) +ip_port (UNIDATA_LDM, 388) +ip_port (LDAP, 389) +ip_port (UIS, 390) +ip_port (SYNOTICS_RELAY, 391) +ip_port (SYNOTICS_BROKER, 392) +ip_port (META5, 393) +ip_port (EMBL_NDT, 394) +ip_port (NETCP, 395) +ip_port (NETWARE_IP, 396) +ip_port (MPTN, 397) +ip_port (KRYPTOLAN, 398) +ip_port (ISO_TSAP_C2, 399) +ip_port (WORK_SOL, 400) +ip_port (UPS, 401) +ip_port (GENIE, 402) +ip_port (DECAP, 403) +ip_port (NCED, 404) +ip_port (NCLD, 405) +ip_port (IMSP, 406) +ip_port (TIMBUKTU, 407) +ip_port (PRM_SM, 408) +ip_port (PRM_NM, 409) +ip_port (DECLADEBUG, 410) +ip_port (RMT, 411) +ip_port (SYNOPTICS_TRAP, 412) +ip_port (SMSP, 413) +ip_port (INFOSEEK, 414) +ip_port (BNET, 415) +ip_port (SILVERPLATTER, 416) +ip_port (ONMUX, 417) +ip_port (HYPER_G, 418) +ip_port (ARIEL1, 419) +ip_port (SMPTE, 420) +ip_port (ARIEL2, 421) +ip_port (ARIEL3, 422) +ip_port (OPC_JOB_START, 423) +ip_port (OPC_JOB_TRACK, 424) +ip_port (ICAD_EL, 425) +ip_port (SMARTSDP, 426) +ip_port (SVRLOC, 427) +ip_port (OCS_CMU, 428) +ip_port (OCS_AMU, 429) +ip_port (UTMPSD, 430) +ip_port (UTMPCD, 431) +ip_port (IASD, 432) +ip_port (NNSP, 433) +ip_port (MOBILEIP_AGENT, 434) +ip_port (MOBILIP_MN, 435) +ip_port (DNA_CML, 436) +ip_port (COMSCM, 437) +ip_port (DSFGW, 438) +ip_port (DASP, 439) +ip_port (SGCP, 440) +ip_port (DECVMS_SYSMGT, 441) +ip_port (CVC_HOSTD, 442) +ip_port (HTTPS, 443) +ip_port (SNPP, 444) +ip_port (MICROSOFT_DS, 445) +ip_port (DDM_RDB, 446) +ip_port (DDM_DFM, 447) +ip_port (DDM_SSL, 448) +ip_port (AS_SERVERMAP, 449) +ip_port (TSERVER, 450) +ip_port (SFS_SMP_NET, 451) +ip_port (SFS_CONFIG, 452) +ip_port (CREATIVESERVER, 453) +ip_port (CONTENTSERVER, 454) +ip_port (CREATIVEPARTNR, 455) +ip_port (MACON_TCP, 456) +ip_port (SCOHELP, 457) +ip_port (APPLEQTC, 458) +ip_port (AMPR_RCMD, 459) +ip_port (SKRONK, 460) +ip_port (DATASURFSRV, 461) +ip_port (DATASURFSRVSEC, 462) +ip_port (ALPES, 463) +ip_port (KPASSWD, 464) +ip_port (URD, 465) +ip_port (DIGITAL_VRC, 466) +ip_port (MYLEX_MAPD, 467) +ip_port (PHOTURIS, 468) +ip_port (RCP, 469) +ip_port (SCX_PROXY, 470) +ip_port (MONDEX, 471) +ip_port (LJK_LOGIN, 472) +ip_port (HYBRID_POP, 473) +ip_port (TN_TL_W1, 474) +ip_port (TCPNETHASPSRV, 475) +ip_port (TN_TL_FD1, 476) +ip_port (SS7NS, 477) +ip_port (SPSC, 478) +ip_port (IAFSERVER, 479) +ip_port (IAFDBASE, 480) +ip_port (PH, 481) +ip_port (BGS_NSI, 482) +ip_port (ULPNET, 483) +ip_port (INTEGRA_SME, 484) +ip_port (POWERBURST, 485) +ip_port (AVIAN, 486) +ip_port (SAFT, 487) +ip_port (GSS_HTTP, 488) +ip_port (NEST_PROTOCOL, 489) +ip_port (MICOM_PFS, 490) +ip_port (GO_LOGIN, 491) +ip_port (TICF_1, 492) +ip_port (TICF_2, 493) +ip_port (POV_RAY, 494) +ip_port (INTECOURIER, 495) +ip_port (PIM_RP_DISC, 496) +ip_port (DANTZ, 497) +ip_port (SIAM, 498) +ip_port (ISO_ILL, 499) +ip_port (ISAKMP, 500) +ip_port (STMF, 501) +ip_port (ASA_APPL_PROTO, 502) +ip_port (INTRINSA, 503) +ip_port (CITADEL, 504) +ip_port (MAILBOX_LM, 505) +ip_port (OHIMSRV, 506) +ip_port (CRS, 507) +ip_port (XVTTP, 508) +ip_port (SNARE, 509) +ip_port (FCP, 510) +ip_port (PASSGO, 511) +ip_port (EXEC, 512) +ip_port (LOGIN, 513) +ip_port (SHELL, 514) +ip_port (PRINTER, 515) +ip_port (VIDEOTEX, 516) +ip_port (TALK, 517) +ip_port (NTALK, 518) +ip_port (UTIME, 519) +ip_port (EFS, 520) +ip_port (RIPNG, 521) +ip_port (ULP, 522) +ip_port (IBM_DB2, 523) +ip_port (NCP, 524) +ip_port (TIMED, 525) +ip_port (TEMPO, 526) +ip_port (STX, 527) +ip_port (CUSTIX, 528) +ip_port (IRC_SERV, 529) +ip_port (COURIER, 530) +ip_port (CONFERENCE, 531) +ip_port (NETNEWS, 532) +ip_port (NETWALL, 533) +ip_port (MM_ADMIN, 534) +ip_port (IIOP, 535) +ip_port (OPALIS_RDV, 536) +ip_port (NMSP, 537) +ip_port (GDOMAP, 538) +ip_port (APERTUS_LDP, 539) +ip_port (UUCP, 540) +ip_port (UUCP_RLOGIN, 541) +ip_port (COMMERCE, 542) +ip_port (KLOGIN, 543) +ip_port (KSHELL, 544) +ip_port (APPLEQTCSRVR, 545) +ip_port (DHCPV6_CLIENT, 546) +ip_port (DHCPV6_SERVER, 547) +ip_port (AFPOVERTCP, 548) +ip_port (IDFP, 549) +ip_port (NEW_RWHO, 550) +ip_port (CYBERCASH, 551) +ip_port (DEVSHR_NTS, 552) +ip_port (PIRP, 553) +ip_port (RTSP, 554) +ip_port (DSF, 555) +ip_port (REMOTEFS, 556) +ip_port (OPENVMS_SYSIPC, 557) +ip_port (SDNSKMP, 558) +ip_port (TEEDTAP, 559) +ip_port (RMONITOR, 560) +ip_port (MONITOR, 561) +ip_port (CHSHELL, 562) +ip_port (NNTPS, 563) +ip_port (9PFS, 564) +ip_port (WHOAMI, 565) +ip_port (STREETTALK, 566) +ip_port (BANYAN_RPC, 567) +ip_port (MS_SHUTTLE, 568) +ip_port (MS_ROME, 569) +ip_port (METER, 570) +ip_port (METER1, 571) +ip_port (SONAR, 572) +ip_port (BANYAN_VIP, 573) +ip_port (FTP_AGENT, 574) +ip_port (VEMMI, 575) +ip_port (IPCD, 576) +ip_port (VNAS, 577) +ip_port (IPDD, 578) +ip_port (DECBSRV, 579) +ip_port (SNTP_HEARTBEAT, 580) +ip_port (BDP, 581) +ip_port (SCC_SECURITY, 582) +ip_port (PHILIPS_VC, 583) +ip_port (KEYSERVER, 584) +ip_port (IMAP4_SSL, 585) +ip_port (PASSWORD_CHG, 586) +ip_port (SUBMISSION, 587) +ip_port (CAL, 588) +ip_port (EYELINK, 589) +ip_port (TNS_CML, 590) +ip_port (HTTP_ALT, 591) +ip_port (EUDORA_SET, 592) +ip_port (HTTP_RPC_EPMAP, 593) +ip_port (TPIP, 594) +ip_port (CAB_PROTOCOL, 595) +ip_port (SMSD, 596) +ip_port (PTCNAMESERVICE, 597) +ip_port (SCO_WEBSRVRMG3, 598) +ip_port (ACP, 599) +ip_port (IPCSERVER, 600) +ip_port (SYSLOG_CONN, 601) +ip_port (XMLRPC_BEEP, 602) +ip_port (IDXP, 603) +ip_port (TUNNEL, 604) +ip_port (SOAP_BEEP, 605) +ip_port (URM, 606) +ip_port (NQS, 607) +ip_port (SIFT_UFT, 608) +ip_port (NPMP_TRAP, 609) +ip_port (NPMP_LOCAL, 610) +ip_port (NPMP_GUI, 611) +ip_port (HMMP_IND, 612) +ip_port (HMMP_OP, 613) +ip_port (SSHELL, 614) +ip_port (SCO_INETMGR, 615) +ip_port (SCO_SYSMGR, 616) +ip_port (SCO_DTMGR, 617) +ip_port (DEI_ICDA, 618) +ip_port (COMPAQ_EVM, 619) +ip_port (SCO_WEBSRVRMGR, 620) +ip_port (ESCP_IP, 621) +ip_port (COLLABORATOR, 622) +ip_port (ASF_RMCP, 623) +ip_port (CRYPTOADMIN, 624) +ip_port (DEC_DLM, 625) +ip_port (ASIA, 626) +ip_port (PASSGO_TIVOLI, 627) +ip_port (QMQP, 628) +ip_port (3COM_AMP3, 629) +ip_port (RDA, 630) +ip_port (IPP, 631) +ip_port (BMPP, 632) +ip_port (SERVSTAT, 633) +ip_port (GINAD, 634) +ip_port (RLZDBASE, 635) +ip_port (LDAPS, 636) +ip_port (LANSERVER, 637) +ip_port (MCNS_SEC, 638) +ip_port (MSDP, 639) +ip_port (ENTRUST_SPS, 640) +ip_port (REPCMD, 641) +ip_port (ESRO_EMSDP, 642) +ip_port (SANITY, 643) +ip_port (DWR, 644) +ip_port (PSSC, 645) +ip_port (LDP, 646) +ip_port (DHCP_FAILOVER, 647) +ip_port (RRP, 648) +ip_port (CADVIEW_3D, 649) +ip_port (OBEX, 650) +ip_port (IEEE_MMS, 651) +ip_port (HELLO_PORT, 652) +ip_port (REPSCMD, 653) +ip_port (AODV, 654) +ip_port (TINC, 655) +ip_port (SPMP, 656) +ip_port (RMC, 657) +ip_port (TENFOLD, 658) +ip_port (MAC_SRVR_ADMIN, 660) +ip_port (HAP, 661) +ip_port (PFTP, 662) +ip_port (PURENOISE, 663) +ip_port (ASF_SECURE_RMCP, 664) +ip_port (SUN_DR, 665) +ip_port (MDQS, 666) +ip_port (DOOM, 666) +ip_port (DISCLOSE, 667) +ip_port (MECOMM, 668) +ip_port (MEREGISTER, 669) +ip_port (VACDSM_SWS, 670) +ip_port (VACDSM_APP, 671) +ip_port (VPPS_QUA, 672) +ip_port (CIMPLEX, 673) +ip_port (ACAP, 674) +ip_port (DCTP, 675) +ip_port (VPPS_VIA, 676) +ip_port (VPP, 677) +ip_port (GGF_NCP, 678) +ip_port (MRM, 679) +ip_port (ENTRUST_AAAS, 680) +ip_port (ENTRUST_AAMS, 681) +ip_port (XFR, 682) +ip_port (CORBA_IIOP, 683) +ip_port (CORBA_IIOP_SSL, 684) +ip_port (MDC_PORTMAPPER, 685) +ip_port (HCP_WISMAR, 686) +ip_port (ASIPREGISTRY, 687) +ip_port (REALM_RUSD, 688) +ip_port (NMAP, 689) +ip_port (VATP, 690) +ip_port (MSEXCH_ROUTING, 691) +ip_port (HYPERWAVE_ISP, 692) +ip_port (CONNENDP, 693) +ip_port (HA_CLUSTER, 694) +ip_port (IEEE_MMS_SSL, 695) +ip_port (RUSHD, 696) +ip_port (UUIDGEN, 697) +ip_port (OLSR, 698) +ip_port (ACCESSNETWORK, 699) +ip_port (EPP, 700) +ip_port (LMP, 701) +ip_port (IRIS_BEEP, 702) +ip_port (ELCSD, 704) +ip_port (AGENTX, 705) +ip_port (SILC, 706) +ip_port (BORLAND_DSJ, 707) +ip_port (ENTRUST_KMSH, 709) +ip_port (ENTRUST_ASH, 710) +ip_port (CISCO_TDP, 711) +ip_port (TBRPF, 712) +ip_port (NETVIEWDM1, 729) +ip_port (NETVIEWDM2, 730) +ip_port (NETVIEWDM3, 731) +ip_port (NETGW, 741) +ip_port (NETRCS, 742) +ip_port (FLEXLM, 744) +ip_port (FUJITSU_DEV, 747) +ip_port (RIS_CM, 748) +ip_port (KERBEROS_ADM, 749) +ip_port (RFILE, 750) +ip_port (PUMP, 751) +ip_port (QRH, 752) +ip_port (RRH, 753) +ip_port (TELL, 754) +ip_port (NLOGIN, 758) +ip_port (CON, 759) +ip_port (NS, 760) +ip_port (RXE, 761) +ip_port (QUOTAD, 762) +ip_port (CYCLESERV, 763) +ip_port (OMSERV, 764) +ip_port (WEBSTER, 765) +ip_port (PHONEBOOK, 767) +ip_port (VID, 769) +ip_port (CADLOCK, 770) +ip_port (RTIP, 771) +ip_port (CYCLESERV2, 772) +ip_port (SUBMIT, 773) +ip_port (RPASSWD, 774) +ip_port (ENTOMB, 775) +ip_port (WPAGES, 776) +ip_port (MULTILING_HTTP, 777) +ip_port (WPGS, 780) +ip_port (MDBS_DAEMON, 800) +ip_port (DEVICE, 801) +ip_port (FCP_UDP, 810) +ip_port (ITM_MCELL_S, 828) +ip_port (PKIX_3_CA_RA, 829) +ip_port (DHCP_FAILOVER2, 847) +ip_port (GDOI, 848) +ip_port (ISCSI, 860) +ip_port (RSYNC, 873) +ip_port (ICLCNET_LOCATE, 886) +ip_port (ICLCNET_SVINFO, 887) +ip_port (ACCESSBUILDER, 888) +ip_port (CDDBP, 888) +ip_port (OMGINITIALREFS, 900) +ip_port (SMPNAMERES, 901) +ip_port (IDEAFARM_CHAT, 902) +ip_port (IDEAFARM_CATCH, 903) +ip_port (XACT_BACKUP, 911) +ip_port (APEX_MESH, 912) +ip_port (APEX_EDGE, 913) +ip_port (FTPS_DATA, 989) +ip_port (FTPS, 990) +ip_port (NAS, 991) +ip_port (TELNETS, 992) +ip_port (IMAPS, 993) +ip_port (IRCS, 994) +ip_port (POP3S, 995) +ip_port (VSINET, 996) +ip_port (MAITRD, 997) +ip_port (BUSBOY, 998) +ip_port (GARCON, 999) +ip_port (PUPROUTER, 999) +ip_port (CADLOCK2, 1000) +ip_port (SURF, 1010) + diff --git a/src/vnet/ip/protocols.def b/src/vnet/ip/protocols.def new file mode 100644 index 00000000..77fab31d --- /dev/null +++ b/src/vnet/ip/protocols.def @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Emacs editing mode -*-C-*- + +From http://www.iana.org/assignments/protocol-numbers + +PROTOCOL NUMBERS + +(last updated 18 October 2004) + +In the Internet Protocol version 4 (IPv4) [RFC791] there is a field, +called "Protocol", to identify the next level protocol. This is an 8 +bit field. In Internet Protocol version 6 (IPv6) [RFC1883] this field +is called the "Next Header" field. +*/ +ip_protocol (0, IP6_HOP_BY_HOP_OPTIONS) +ip_protocol (1, ICMP) +ip_protocol (2, IGMP) +ip_protocol (3, GGP) +ip_protocol (4, IP_IN_IP) +ip_protocol (5, ST) +ip_protocol (6, TCP) +ip_protocol (7, CBT) +ip_protocol (8, EGP) +ip_protocol (9, IGP) +ip_protocol (10, BBN_RCC_MON) +ip_protocol (11, NVP_II) +ip_protocol (12, PUP) +ip_protocol (13, ARGUS) +ip_protocol (14, EMCON) +ip_protocol (15, XNET) +ip_protocol (16, CHAOS) +ip_protocol (17, UDP) +ip_protocol (18, MUX) +ip_protocol (19, DCN_MEAS) +ip_protocol (20, HMP) +ip_protocol (21, PRM) +ip_protocol (22, XNS_IDP) +ip_protocol (23, TRUNK_1) +ip_protocol (24, TRUNK_2) +ip_protocol (25, LEAF_1) +ip_protocol (26, LEAF_2) +ip_protocol (27, RDP) +ip_protocol (28, IRTP) +ip_protocol (29, ISO_TP4) +ip_protocol (30, NETBLT) +ip_protocol (31, MFE_NSP) +ip_protocol (32, MERIT_INP) +ip_protocol (33, SEP) +ip_protocol (34, 3PC) +ip_protocol (35, IDPR) +ip_protocol (36, XTP) +ip_protocol (37, DDP) +ip_protocol (38, IDPR_CMTP) +ip_protocol (39, TP) +ip_protocol (40, IL) +ip_protocol (41, IPV6) +ip_protocol (42, SDRP) +ip_protocol (43, IPV6_ROUTE) +ip_protocol (44, IPV6_FRAGMENTATION) +ip_protocol (45, IDRP) +ip_protocol (46, RSVP) +ip_protocol (47, GRE) +ip_protocol (48, MHRP) +ip_protocol (49, BNA) +ip_protocol (50, IPSEC_ESP) +ip_protocol (51, IPSEC_AH) +ip_protocol (52, I_NLSP) +ip_protocol (53, SWIPE) +ip_protocol (54, NARP) +ip_protocol (55, MOBILE) +ip_protocol (56, TLSP) +ip_protocol (57, SKIP) +ip_protocol (58, ICMP6) +ip_protocol (59, IP6_NONXT) +ip_protocol (60, IP6_DESTINATION_OPTIONS) +ip_protocol (62, CFTP) +ip_protocol (64, SAT_EXPAK) +ip_protocol (65, KRYPTOLAN) +ip_protocol (66, RVD) +ip_protocol (67, IPPC) +ip_protocol (69, SAT_MON) +ip_protocol (70, VISA) +ip_protocol (71, IPCV) +ip_protocol (72, CPNX) +ip_protocol (73, CPHB) +ip_protocol (74, WSN) +ip_protocol (75, PVP) +ip_protocol (76, BR_SAT_MON) +ip_protocol (77, SUN_ND) +ip_protocol (78, WB_MON) +ip_protocol (79, WB_EXPAK) +ip_protocol (80, ISO_IP) +ip_protocol (81, VMTP) +ip_protocol (82, SECURE_VMTP) +ip_protocol (83, VINES) +ip_protocol (84, TTP) +ip_protocol (85, NSFNET_IGP) +ip_protocol (86, DGP) +ip_protocol (87, TCF) +ip_protocol (88, EIGRP) +ip_protocol (89, OSPF) +ip_protocol (90, SPRITE_RPC) +ip_protocol (91, LARP) +ip_protocol (92, MTP) +ip_protocol (93, AX) +ip_protocol (94, IPIP) +ip_protocol (95, MICP) +ip_protocol (96, SCC_SP) +ip_protocol (97, ETHERIP) +ip_protocol (98, ENCAP) +ip_protocol (100, GMTP) +ip_protocol (101, IFMP) +ip_protocol (102, PNNI) +ip_protocol (103, PIM) +ip_protocol (104, ARIS) +ip_protocol (105, SCPS) +ip_protocol (106, QNX) +ip_protocol (107, A) +ip_protocol (108, IPCOMP) +ip_protocol (109, SNP) +ip_protocol (110, COMPAQ_PEER) +ip_protocol (111, IPX_IN_IP) +ip_protocol (112, VRRP) +ip_protocol (113, PGM) +ip_protocol (115, L2TP) +ip_protocol (116, DDX) +ip_protocol (117, IATP) +ip_protocol (118, STP) +ip_protocol (119, SRP) +ip_protocol (120, UTI) +ip_protocol (121, SMP) +ip_protocol (122, SM) +ip_protocol (123, PTP) +ip_protocol (124, ISIS) +ip_protocol (125, FIRE) +ip_protocol (126, CRTP) +ip_protocol (127, CRUDP) +ip_protocol (128, SSCOPMCE) +ip_protocol (129, IPLT) +ip_protocol (130, SPS) +ip_protocol (131, PIPE) +ip_protocol (132, SCTP) +ip_protocol (133, FC) +ip_protocol (134, RSVP_E2E_IGNORE) +ip_protocol (135, MOBILITY) +ip_protocol (136, UDP_LITE) +ip_protocol (137, MPLS_IN_IP) +ip_protocol (255, RESERVED) + diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c new file mode 100644 index 00000000..0869954c --- /dev/null +++ b/src/vnet/ip/punt.c @@ -0,0 +1,830 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Local TCP/IP stack punt infrastructure. + * + * Provides a set of VPP nodes together with the relevant APIs and CLI + * commands in order to adjust and dispatch packets from the VPP data plane + * to the local TCP/IP stack + */ + +#include <vnet/ip/ip.h> +#include <vlib/vlib.h> +#include <vnet/pg/pg.h> +#include <vnet/udp/udp.h> +#include <vnet/tcp/tcp.h> +#include <vnet/ip/punt.h> +#include <vppinfra/sparse_vec.h> +#include <vlib/unix/unix.h> + +#include <stdio.h> +#include <unistd.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/uio.h> +#include <stdlib.h> +#include <stdbool.h> + +#define foreach_punt_next \ + _ (PUNT, "error-punt") + +typedef enum +{ +#define _(s,n) PUNT_NEXT_##s, + foreach_punt_next +#undef _ + PUNT_N_NEXT, +} punt_next_t; + +enum punt_socket_rx_next_e +{ + PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT, + PUNT_SOCKET_RX_NEXT_IP4_LOOKUP, + PUNT_SOCKET_RX_NEXT_IP6_LOOKUP, + PUNT_SOCKET_RX_N_NEXT +}; + +vlib_node_registration_t udp4_punt_node; +vlib_node_registration_t udp6_punt_node; +vlib_node_registration_t udp4_punt_socket_node; +vlib_node_registration_t udp6_punt_socket_node; +static vlib_node_registration_t punt_socket_rx_node; + +punt_main_t punt_main; + +char * +vnet_punt_get_server_pathname (void) +{ + punt_main_t *pm = &punt_main; + return pm->sun_path; +} + +/** @brief IPv4/IPv6 UDP punt node main loop. + + This is the main loop inline function for IPv4/IPv6 UDP punt + transition node. + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param frame vlib_frame_t whose contents should be dispatched + @param is_ipv4 indicates if called for IPv4 or IPv6 node +*/ +always_inline uword +udp46_punt_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, *from, *to_next; + word advance; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + /* udp[46]_lookup hands us the data payload, not the IP header */ + if (is_ip4) + advance = -(sizeof (ip4_header_t) + sizeof (udp_header_t)); + else + advance = -(sizeof (ip6_header_t) + sizeof (udp_header_t)); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, PUNT_NEXT_PUNT, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + vlib_buffer_advance (b0, advance); + b0->error = node->errors[PUNT_ERROR_UDP_PORT]; + } + + vlib_put_next_frame (vm, node, PUNT_NEXT_PUNT, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static char *punt_error_strings[] = { +#define punt_error(n,s) s, +#include "punt_error.def" +#undef punt_error +}; + +/** @brief IPv4 UDP punt node. + @node ip4-udp-punt + + This is the IPv4 UDP punt transition node. It is registered as a next + node for the "ip4-udp-lookup" handling UDP port(s) requested for punt. + The buffer's current data pointer is adjusted to the original packet + IPv4 header. All buffers are dispatched to "error-punt". + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param frame vlib_frame_t whose contents should be dispatched + + @par Graph mechanics: next index usage + + @em Sets: + - <code>vnet_buffer(b)->current_data</code> + - <code>vnet_buffer(b)->current_len</code> + + <em>Next Index:</em> + - Dispatches the packet to the "error-punt" node +*/ +static uword +udp4_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return udp46_punt_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +/** @brief IPv6 UDP punt node. + @node ip6-udp-punt + + This is the IPv6 UDP punt transition node. It is registered as a next + node for the "ip6-udp-lookup" handling UDP port(s) requested for punt. + The buffer's current data pointer is adjusted to the original packet + IPv6 header. All buffers are dispatched to "error-punt". + + @param vm vlib_main_t corresponding to the current thread + @param node vlib_node_runtime_t + @param frame vlib_frame_t whose contents should be dispatched + + @par Graph mechanics: next index usage + + @em Sets: + - <code>vnet_buffer(b)->current_data</code> + - <code>vnet_buffer(b)->current_len</code> + + <em>Next Index:</em> + - Dispatches the packet to the "error-punt" node +*/ +static uword +udp6_punt (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (udp4_punt_node) = { + .function = udp4_punt, + .name = "ip4-udp-punt", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, + + .n_next_nodes = PUNT_N_NEXT, + .next_nodes = { +#define _(s,n) [PUNT_NEXT_##s] = n, + foreach_punt_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (udp4_punt_node, udp4_punt); + +VLIB_REGISTER_NODE (udp6_punt_node) = { + .function = udp6_punt, + .name = "ip6-udp-punt", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, + + .n_next_nodes = PUNT_N_NEXT, + .next_nodes = { +#define _(s,n) [PUNT_NEXT_##s] = n, + foreach_punt_next +#undef _ + }, +}; + +VLIB_NODE_FUNCTION_MULTIARCH (udp6_punt_node, udp6_punt);; + +/* *INDENT-ON* */ + +static struct sockaddr_un * +punt_socket_get (bool is_ip4, u16 port) +{ + punt_main_t *pm = &punt_main; + punt_client_t *v = is_ip4 ? pm->clients_by_dst_port4 : + pm->clients_by_dst_port6; + + u16 i = sparse_vec_index (v, port); + if (i == SPARSE_VEC_INVALID_INDEX) + return 0; + + return &vec_elt (v, i).caddr; +} + +static void +punt_socket_register (bool is_ip4, u8 protocol, u16 port, + char *client_pathname) +{ + punt_main_t *pm = &punt_main; + punt_client_t c, *n; + punt_client_t *v = is_ip4 ? pm->clients_by_dst_port4 : + pm->clients_by_dst_port6; + + memset (&c, 0, sizeof (c)); + memcpy (c.caddr.sun_path, client_pathname, sizeof (c.caddr.sun_path)); + c.caddr.sun_family = AF_UNIX; + c.port = port; + n = sparse_vec_validate (v, port); + n[0] = c; +} + +/* $$$$ Just leaves the mapping in place for now */ +static void +punt_socket_unregister (bool is_ip4, u8 protocol, u16 port) +{ + return; +} + +always_inline uword +udp46_punt_socket_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, bool is_ip4) +{ + u32 *buffers = vlib_frame_args (frame); + uword n_packets = frame->n_vectors; + struct iovec *iovecs = 0; + punt_main_t *pm = &punt_main; + int i; + + u32 node_index = is_ip4 ? udp4_punt_socket_node.index : + udp6_punt_socket_node.index; + + for (i = 0; i < n_packets; i++) + { + struct iovec *iov; + vlib_buffer_t *b; + uword l; + punt_packetdesc_t packetdesc; + + b = vlib_get_buffer (vm, buffers[i]); + + /* Reverse UDP Punt advance */ + udp_header_t *udp; + if (is_ip4) + { + vlib_buffer_advance (b, -(sizeof (ip4_header_t) + + sizeof (udp_header_t))); + ip4_header_t *ip = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip + 1); + } + else + { + vlib_buffer_advance (b, -(sizeof (ip6_header_t) + + sizeof (udp_header_t))); + ip6_header_t *ip = vlib_buffer_get_current (b); + udp = (udp_header_t *) (ip + 1); + } + + u16 port = clib_net_to_host_u16 (udp->dst_port); + + /* + * Find registerered client + * If no registered client, drop packet and count + */ + struct sockaddr_un *caddr; + caddr = punt_socket_get (is_ip4, port); + if (!caddr) + { + vlib_node_increment_counter (vm, node_index, + PUNT_ERROR_SOCKET_TX_ERROR, 1); + goto error; + } + + /* Re-set iovecs if present. */ + if (iovecs) + _vec_len (iovecs) = 0; + + /* Add packet descriptor */ + packetdesc.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + packetdesc.action = 0; + vec_add2 (iovecs, iov, 1); + iov->iov_base = &packetdesc; + iov->iov_len = sizeof (packetdesc); + + /** VLIB buffer chain -> Unix iovec(s). */ + vlib_buffer_advance (b, -(sizeof (ethernet_header_t))); + vec_add2 (iovecs, iov, 1); + iov->iov_base = b->data + b->current_data; + iov->iov_len = l = b->current_length; + + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + do + { + b = vlib_get_buffer (vm, b->next_buffer); + + vec_add2 (iovecs, iov, 1); + + iov->iov_base = b->data + b->current_data; + iov->iov_len = b->current_length; + l += b->current_length; + } + while (b->flags & VLIB_BUFFER_NEXT_PRESENT); + } + + struct msghdr msg = { + .msg_name = caddr, + .msg_namelen = sizeof (*caddr), + .msg_iov = iovecs, + .msg_iovlen = vec_len (iovecs), + }; + + if (sendmsg (pm->socket_fd, &msg, 0) < l) + vlib_node_increment_counter (vm, node_index, + PUNT_ERROR_SOCKET_TX_ERROR, 1); + } + +error: + vlib_buffer_free_no_next (vm, buffers, n_packets); + + return n_packets; +} + +static uword +udp4_punt_socket (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return udp46_punt_socket_inline (vm, node, from_frame, true /* is_ip4 */ ); +} + +static uword +udp6_punt_socket (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) +{ + return udp46_punt_socket_inline (vm, node, from_frame, false /* is_ip4 */ ); +} + + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (udp4_punt_socket_node) = { + .function = udp4_punt_socket, + .name = "ip4-udp-punt-socket", + .flags = VLIB_NODE_FLAG_IS_DROP, + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, +}; +VLIB_REGISTER_NODE (udp6_punt_socket_node) = { + .function = udp6_punt_socket, + .name = "ip6-udp-punt-socket", + .flags = VLIB_NODE_FLAG_IS_DROP, + .vector_size = sizeof (u32), + .n_errors = PUNT_N_ERROR, + .error_strings = punt_error_strings, +}; +/* *INDENT-ON* */ + +typedef struct +{ + enum punt_action_e action; + u32 sw_if_index; +} punt_trace_t; + +static u8 * +format_punt_trace (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *); + vnet_main_t *vnm = vnet_get_main (); + punt_trace_t *t = va_arg (*va, punt_trace_t *); + s = format (s, "%U Action: %d", format_vnet_sw_if_index_name, + vnm, t->sw_if_index, t->action); + return s; +} + +static uword +punt_socket_rx_fd (vlib_main_t * vm, vlib_node_runtime_t * node, u32 fd) +{ + const uword buffer_size = VLIB_BUFFER_DATA_SIZE; + u32 n_trace = vlib_get_trace_count (vm, node); + u32 next = node->cached_next_index; + u32 n_left_to_next, next_index; + u32 *to_next; + u32 error = PUNT_ERROR_NONE; + vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); + + /* $$$$ Only dealing with one buffer at the time for now */ + + u32 bi; + vlib_buffer_t *b; + punt_packetdesc_t packetdesc; + ssize_t size; + struct iovec io[2]; + + if (vlib_buffer_alloc (vm, &bi, 1) != 1) + { + error = PUNT_ERROR_NOBUFFER; + goto error; + } + + b = vlib_get_buffer (vm, bi); + io[0].iov_base = &packetdesc; + io[0].iov_len = sizeof (packetdesc); + io[1].iov_base = b->data; + io[1].iov_len = buffer_size; + + size = readv (fd, io, 2); + /* We need at least the packet descriptor plus a header */ + if (size <= (int) (sizeof (packetdesc) + sizeof (ip4_header_t))) + { + vlib_buffer_free (vm, &bi, 1); + error = PUNT_ERROR_READV; + goto error; + } + + b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED; + b->current_length = size - sizeof (packetdesc); + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); + + switch (packetdesc.action) + { + case PUNT_L2: + vnet_buffer (b)->sw_if_index[VLIB_TX] = packetdesc.sw_if_index; + next_index = PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT; + break; + + case PUNT_IP4_ROUTED: + vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index; + vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; + next_index = PUNT_SOCKET_RX_NEXT_IP4_LOOKUP; + break; + + case PUNT_IP6_ROUTED: + vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index; + vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0; + next_index = PUNT_SOCKET_RX_NEXT_IP6_LOOKUP; + break; + + default: + error = PUNT_ERROR_ACTION; + vlib_buffer_free (vm, &bi, 1); + goto error; + } + + if (PREDICT_FALSE (n_trace > 0)) + { + punt_trace_t *t; + vlib_trace_buffer (vm, node, next_index, b, 1 /* follow_chain */ ); + vlib_set_trace_count (vm, node, --n_trace); + t = vlib_add_trace (vm, node, b, sizeof (*t)); + t->sw_if_index = packetdesc.sw_if_index; + t->action = packetdesc.action; + } + + to_next[0] = bi; + to_next++; + n_left_to_next--; + + vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, n_left_to_next, + bi, next_index); + vlib_put_next_frame (vm, node, next, n_left_to_next); + return 1; + +error: + vlib_node_increment_counter (vm, punt_socket_rx_node.index, error, 1); + return 0; +} + +static uword +punt_socket_rx (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + punt_main_t *pm = &punt_main; + u32 total_count = 0; + int i; + + for (i = 0; i < vec_len (pm->ready_fds); i++) + { + total_count += punt_socket_rx_fd (vm, node, pm->ready_fds[i]); + vec_del1 (pm->ready_fds, i); + } + return total_count; +} + +VLIB_REGISTER_NODE (punt_socket_rx_node, static) = +{ + .function = punt_socket_rx,.name = "punt-socket-rx",.type = + VLIB_NODE_TYPE_INPUT,.state = VLIB_NODE_STATE_INTERRUPT,.vector_size = + 1,.n_errors = PUNT_N_ERROR,.error_strings = + punt_error_strings,.n_next_nodes = PUNT_SOCKET_RX_N_NEXT,.next_nodes = + { +[PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT] = "interface-output", + [PUNT_SOCKET_RX_NEXT_IP4_LOOKUP] = "ip4-lookup", + [PUNT_SOCKET_RX_NEXT_IP6_LOOKUP] = "ip6-lookup",},.format_trace = + format_punt_trace,}; + +static clib_error_t * +punt_socket_read_ready (clib_file_t * uf) +{ + vlib_main_t *vm = vlib_get_main (); + punt_main_t *pm = &punt_main; + + /** Schedule the rx node */ + vlib_node_set_interrupt_pending (vm, punt_socket_rx_node.index); + vec_add1 (pm->ready_fds, uf->file_descriptor); + + return 0; +} + +clib_error_t * +vnet_punt_socket_add (vlib_main_t * vm, u32 header_version, + bool is_ip4, u8 protocol, u16 port, + char *client_pathname) +{ + punt_main_t *pm = &punt_main; + + if (!pm->is_configured) + return clib_error_return (0, "socket is not configured"); + + if (header_version != PUNT_PACKETDESC_VERSION) + return clib_error_return (0, "Invalid packet descriptor version"); + + /* For now we only support UDP punt */ + if (protocol != IP_PROTOCOL_UDP) + return clib_error_return (0, + "only UDP protocol (%d) is supported, got %d", + IP_PROTOCOL_UDP, protocol); + + if (port == (u16) ~ 0) + return clib_error_return (0, "UDP port number required"); + + /* Register client */ + punt_socket_register (is_ip4, protocol, port, client_pathname); + + u32 node_index = is_ip4 ? udp4_punt_socket_node.index : + udp6_punt_socket_node.index; + + udp_register_dst_port (vm, port, node_index, is_ip4); + + return 0; +} + +clib_error_t * +vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4, u8 l4_protocol, u16 port) +{ + punt_main_t *pm = &punt_main; + + if (!pm->is_configured) + return clib_error_return (0, "socket is not configured"); + + punt_socket_unregister (is_ip4, l4_protocol, port); + udp_unregister_dst_port (vm, port, is_ip4); + + return 0; +} + +/** + * @brief Request IP traffic punt to the local TCP/IP stack. + * + * @em Note + * - UDP and TCP are the only protocols supported in the current implementation + * + * @param vm vlib_main_t corresponding to the current thread + * @param ipv IP protcol version. + * 4 - IPv4, 6 - IPv6, ~0 for both IPv6 and IPv4 + * @param protocol 8-bits L4 protocol value + * UDP is 17 + * TCP is 1 + * @param port 16-bits L4 (TCP/IP) port number when applicable (UDP only) + * + * @returns 0 on success, non-zero value otherwise + */ +clib_error_t * +vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port, + bool is_add) +{ + + /* For now we only support UDP punt */ + if (protocol != IP_PROTOCOL_UDP && protocol != IP_PROTOCOL_TCP) + return clib_error_return (0, + "only UDP (%d) and TCP (%d) protocols are supported, got %d", + IP_PROTOCOL_UDP, IP_PROTOCOL_TCP, protocol); + + if (ipv != (u8) ~ 0 && ipv != 4 && ipv != 6) + return clib_error_return (0, "IP version must be 4 or 6, got %d", ipv); + + if (port == (u16) ~ 0) + { + if ((ipv == 4) || (ipv == (u8) ~ 0)) + { + if (protocol == IP_PROTOCOL_UDP) + udp_punt_unknown (vm, 1, is_add); + else if (protocol == IP_PROTOCOL_TCP) + tcp_punt_unknown (vm, 1, is_add); + } + + if ((ipv == 6) || (ipv == (u8) ~ 0)) + { + if (protocol == IP_PROTOCOL_UDP) + udp_punt_unknown (vm, 0, is_add); + else if (protocol == IP_PROTOCOL_TCP) + tcp_punt_unknown (vm, 0, is_add); + } + + return 0; + } + + else if (is_add) + { + if (protocol == IP_PROTOCOL_TCP) + return clib_error_return (0, "punt TCP ports is not supported yet"); + + if (ipv == 4 || ipv == (u8) ~ 0) + udp_register_dst_port (vm, port, udp4_punt_node.index, 1); + + if (ipv == 6 || ipv == (u8) ~ 0) + udp_register_dst_port (vm, port, udp6_punt_node.index, 0); + + return 0; + } + else + return clib_error_return (0, "punt delete is not supported yet"); +} + +static clib_error_t * +punt_cli (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + u32 port; + bool is_add = true; + u32 protocol = ~0; + clib_error_t *error; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "del")) + is_add = false; + else if (unformat (input, "all")) + { + /* punt both IPv6 and IPv4 when used in CLI */ + error = vnet_punt_add_del (vm, ~0, protocol, ~0, is_add); + if (error) + clib_error_report (error); + } + else if (unformat (input, "%d", &port)) + { + /* punt both IPv6 and IPv4 when used in CLI */ + error = vnet_punt_add_del (vm, ~0, protocol, port, is_add); + if (error) + clib_error_report (error); + } + else if (unformat (input, "udp")) + protocol = IP_PROTOCOL_UDP; + else if (unformat (input, "tcp")) + protocol = IP_PROTOCOL_TCP; + } + + return 0; +} + +/*? + * The set of '<em>set punt</em>' commands allows specific IP traffic to + * be punted to the host TCP/IP stack + * + * @em Note + * - UDP is the only protocol supported in the current implementation + * - All TCP traffic is currently punted to the host by default + * + * @cliexpar + * @parblock + * Example of how to request NTP traffic to be punted + * @cliexcmd{set punt udp 125} + * + * Example of how to request all 'unknown' UDP traffic to be punted + * @cliexcmd{set punt udp all} + * + * Example of how to stop all 'unknown' UDP traffic to be punted + * @cliexcmd{set punt udp del all} + * @endparblock +?*/ +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (punt_command, static) = { + .path = "set punt", + .short_help = "set punt [udp|tcp] [del] <all | port-num1 [port-num2 ...]>", + .function = punt_cli, +}; +/* *INDENT-ON* */ + +clib_error_t * +punt_init (vlib_main_t * vm) +{ + punt_main_t *pm = &punt_main; + + pm->clients_by_dst_port6 = sparse_vec_new + (sizeof (pm->clients_by_dst_port6[0]), + BITS (((udp_header_t *) 0)->dst_port)); + pm->clients_by_dst_port4 = sparse_vec_new + (sizeof (pm->clients_by_dst_port4[0]), + BITS (((udp_header_t *) 0)->dst_port)); + + pm->is_configured = false; + pm->interface_output_node = vlib_get_node_by_name (vm, + (u8 *) + "interface-output"); + return 0; +} + +VLIB_INIT_FUNCTION (punt_init); + +static clib_error_t * +punt_config (vlib_main_t * vm, unformat_input_t * input) +{ + punt_main_t *pm = &punt_main; + char *socket_path = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "socket %s", &socket_path)) + strncpy (pm->sun_path, socket_path, 108 - 1); + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (socket_path == 0) + return 0; + + /* UNIX domain socket */ + struct sockaddr_un addr; + if ((pm->socket_fd = socket (AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0)) == -1) + { + return clib_error_return (0, "socket error"); + } + + memset (&addr, 0, sizeof (addr)); + addr.sun_family = AF_UNIX; + if (*socket_path == '\0') + { + *addr.sun_path = '\0'; + strncpy (addr.sun_path + 1, socket_path + 1, + sizeof (addr.sun_path) - 2); + } + else + { + strncpy (addr.sun_path, socket_path, sizeof (addr.sun_path) - 1); + unlink (socket_path); + } + + if (bind (pm->socket_fd, (struct sockaddr *) &addr, sizeof (addr)) == -1) + { + return clib_error_return (0, "bind error"); + } + + /* Register socket */ + clib_file_main_t *fm = &file_main; + clib_file_t template = { 0 }; + template.read_function = punt_socket_read_ready; + template.file_descriptor = pm->socket_fd; + pm->clib_file_index = clib_file_add (fm, &template); + + pm->is_configured = true; + + return 0; +} + +VLIB_CONFIG_FUNCTION (punt_config, "punt"); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h new file mode 100644 index 00000000..9defa881 --- /dev/null +++ b/src/vnet/ip/punt.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file + * @brief Definitions for punt infrastructure. + */ +#ifndef included_punt_h +#define included_punt_h + +#include <sys/un.h> +typedef enum +{ +#define punt_error(n,s) PUNT_ERROR_##n, +#include <vnet/ip/punt_error.def> +#undef punt_error + PUNT_N_ERROR, +} punt_error_t; + + +clib_error_t *vnet_punt_add_del (vlib_main_t * vm, u8 ipv, + u8 protocol, u16 port, bool is_add); +clib_error_t *vnet_punt_socket_add (vlib_main_t * vm, u32 header_version, + bool is_ip4, u8 protocol, u16 port, + char *client_pathname); +clib_error_t *vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4, + u8 l4_protocol, u16 port); +char *vnet_punt_get_server_pathname (void); + +enum punt_action_e +{ + PUNT_L2 = 0, + PUNT_IP4_ROUTED, + PUNT_IP6_ROUTED, +}; + +/* + * Packet descriptor header. Version 1 + * If this header changes, the version must also change to notify clients. + */ +#define PUNT_PACKETDESC_VERSION 1 +typedef struct __attribute__ ((packed)) +{ + u32 sw_if_index; /* RX or TX interface */ + enum punt_action_e action; +} punt_packetdesc_t; + +/* + * Client registration + */ +typedef struct +{ + u16 port; + struct sockaddr_un caddr; +} punt_client_t; + +typedef struct +{ + int socket_fd; + char sun_path[sizeof (struct sockaddr_un)]; + punt_client_t *clients_by_dst_port4; + punt_client_t *clients_by_dst_port6; + u32 clib_file_index; + bool is_configured; + vlib_node_t *interface_output_node; + u32 *ready_fds; + u32 *rx_buffers; +} punt_main_t; +extern punt_main_t punt_main; + +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/ip/punt_error.def b/src/vnet/ip/punt_error.def new file mode 100644 index 00000000..13afa2c7 --- /dev/null +++ b/src/vnet/ip/punt_error.def @@ -0,0 +1,27 @@ +/* + * punt_error.def: punt errors + * + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +punt_error (NONE, "no error") +punt_error (UDP_PORT, "udp port punt") +punt_error (SOCKET_RX, "Socket RX") +punt_error (SOCKET_TX, "Socket TX") +punt_error (SOCKET_RX_ERROR, "Socket RX error") +punt_error (SOCKET_TX_ERROR, "Socket TX error") +punt_error (NOBUFFER, "buffer allocation failure") +punt_error (READV, "socket read failure") +punt_error (ACTION, "invalid packet descriptor") + |