aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/ip
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/ip')
-rw-r--r--src/vnet/ip/dir.dox26
-rw-r--r--src/vnet/ip/format.c121
-rw-r--r--src/vnet/ip/format.h114
-rw-r--r--src/vnet/ip/icmp4.c784
-rw-r--r--src/vnet/ip/icmp4.h60
-rw-r--r--src/vnet/ip/icmp46_packet.h398
-rw-r--r--src/vnet/ip/icmp6.c882
-rw-r--r--src/vnet/ip/icmp6.h82
-rw-r--r--src/vnet/ip/igmp_packet.h155
-rw-r--r--src/vnet/ip/ip.api551
-rw-r--r--src/vnet/ip/ip.h203
-rw-r--r--src/vnet/ip/ip4.h387
-rw-r--r--src/vnet/ip/ip46_cli.c236
-rw-r--r--src/vnet/ip/ip4_error.h95
-rw-r--r--src/vnet/ip/ip4_format.c256
-rwxr-xr-xsrc/vnet/ip/ip4_forward.c3197
-rw-r--r--src/vnet/ip/ip4_input.c507
-rw-r--r--src/vnet/ip/ip4_mtrie.c811
-rw-r--r--src/vnet/ip/ip4_mtrie.h237
-rw-r--r--src/vnet/ip/ip4_packet.h385
-rw-r--r--src/vnet/ip/ip4_pg.c387
-rw-r--r--src/vnet/ip/ip4_source_and_port_range_check.c1424
-rw-r--r--src/vnet/ip/ip4_source_check.c562
-rw-r--r--src/vnet/ip/ip4_test.c347
-rw-r--r--src/vnet/ip/ip4_to_ip6.h659
-rw-r--r--src/vnet/ip/ip6.h605
-rw-r--r--src/vnet/ip/ip6_error.h92
-rw-r--r--src/vnet/ip/ip6_format.c383
-rw-r--r--src/vnet/ip/ip6_forward.c3558
-rw-r--r--src/vnet/ip/ip6_hop_by_hop.c1166
-rw-r--r--src/vnet/ip/ip6_hop_by_hop.h277
-rw-r--r--src/vnet/ip/ip6_hop_by_hop_packet.h56
-rw-r--r--src/vnet/ip/ip6_input.c378
-rw-r--r--src/vnet/ip/ip6_neighbor.c4332
-rw-r--r--src/vnet/ip/ip6_neighbor.h109
-rw-r--r--src/vnet/ip/ip6_packet.h536
-rw-r--r--src/vnet/ip/ip6_pg.c231
-rw-r--r--src/vnet/ip/ip6_to_ip4.h634
-rw-r--r--src/vnet/ip/ip_api.c1825
-rw-r--r--src/vnet/ip/ip_checksum.c228
-rw-r--r--src/vnet/ip/ip_frag.c581
-rw-r--r--src/vnet/ip/ip_frag.h96
-rw-r--r--src/vnet/ip/ip_init.c152
-rw-r--r--src/vnet/ip/ip_input_acl.c450
-rw-r--r--src/vnet/ip/ip_packet.h180
-rw-r--r--src/vnet/ip/ip_source_and_port_range_check.h148
-rw-r--r--src/vnet/ip/lookup.c1442
-rw-r--r--src/vnet/ip/lookup.h224
-rwxr-xr-xsrc/vnet/ip/ping.c928
-rw-r--r--src/vnet/ip/ping.h115
-rw-r--r--src/vnet/ip/ports.def757
-rw-r--r--src/vnet/ip/protocols.def162
-rw-r--r--src/vnet/ip/punt.c830
-rw-r--r--src/vnet/ip/punt.h91
-rw-r--r--src/vnet/ip/punt_error.def27
55 files changed, 33459 insertions, 0 deletions
diff --git a/src/vnet/ip/dir.dox b/src/vnet/ip/dir.dox
new file mode 100644
index 00000000..a4eb7337
--- /dev/null
+++ b/src/vnet/ip/dir.dox
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Doxygen directory documentation */
+
+/**
+@dir
+@brief Layer 3 IP Code.
+
+This directory contains the source code for IP routing.
+
+*/
+/*? %%clicmd:group_label Layer 3 IP CLI %% ?*/
diff --git a/src/vnet/ip/format.c b/src/vnet/ip/format.c
new file mode 100644
index 00000000..be1c4fd3
--- /dev/null
+++ b/src/vnet/ip/format.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_format.c: ip generic (4 or 6) formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format IP protocol. */
+u8 *
+format_ip_protocol (u8 * s, va_list * args)
+{
+ ip_protocol_t protocol = va_arg (*args, ip_protocol_t);
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, protocol);
+
+ if (pi)
+ return format (s, "%s", pi->name);
+ else
+ return format (s, "unknown %d", protocol);
+}
+
+uword
+unformat_ip_protocol (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi;
+ int i;
+
+ if (!unformat_user (input, unformat_vlib_number_by_name,
+ im->protocol_info_by_name, &i))
+ return 0;
+
+ pi = vec_elt_at_index (im->protocol_infos, i);
+ *result = pi->protocol;
+ return 1;
+}
+
+u8 *
+format_tcp_udp_port (u8 * s, va_list * args)
+{
+ int port = va_arg (*args, int);
+ ip_main_t *im = &ip_main;
+ tcp_udp_port_info_t *pi;
+
+ pi = ip_get_tcp_udp_port_info (im, port);
+ if (pi)
+ s = format (s, "%s", pi->name);
+ else
+ s = format (s, "%d", clib_net_to_host_u16 (port));
+
+ return s;
+}
+
+uword
+unformat_tcp_udp_port (unformat_input_t * input, va_list * args)
+{
+ u16 *result = va_arg (*args, u16 *);
+ ip_main_t *im = &ip_main;
+ tcp_udp_port_info_t *pi;
+ u32 i, port;
+
+
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ im->port_info_by_name, &i))
+ {
+ pi = vec_elt_at_index (im->port_infos, i);
+ port = pi->port;
+ }
+ else if (unformat_user (input, unformat_vlib_number, &port)
+ && port < (1 << 16))
+ port = clib_host_to_net_u16 (port);
+
+ else
+ return 0;
+
+ *result = port;
+ return 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/format.h b/src/vnet/ip/format.h
new file mode 100644
index 00000000..c35f0f4b
--- /dev/null
+++ b/src/vnet/ip/format.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/format.h: ip 4 and/or 6 formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_format_h
+#define included_ip_format_h
+
+/* IP4 or IP6. */
+
+format_function_t format_ip_protocol;
+unformat_function_t unformat_ip_protocol;
+
+format_function_t format_tcp_udp_port;
+unformat_function_t unformat_tcp_udp_port;
+
+typedef enum format_ip_adjacency_flags_t_
+{
+ FORMAT_IP_ADJACENCY_NONE,
+ FORMAT_IP_ADJACENCY_BRIEF = FORMAT_IP_ADJACENCY_NONE,
+ FORMAT_IP_ADJACENCY_DETAIL = (1 << 0),
+} format_ip_adjacency_flags_t;
+
+format_function_t format_ip_adjacency;
+format_function_t format_ip_adjacency_packet_data;
+
+format_function_t format_ip46_address;
+
+typedef enum
+{
+ IP46_TYPE_ANY,
+ IP46_TYPE_IP4,
+ IP46_TYPE_IP6
+} ip46_type_t;
+/* unformat_ip46_address expects arguments (ip46_address_t *, ip46_type_t)
+ * The type argument is used to enforce a particular IP version. */
+unformat_function_t unformat_ip46_address;
+
+/* IP4 */
+
+/* Parse an IP4 address %d.%d.%d.%d. */
+unformat_function_t unformat_ip4_address;
+
+/* Format an IP4 address. */
+format_function_t format_ip4_address;
+format_function_t format_ip4_address_and_length;
+
+/* Parse an IP4 header. */
+unformat_function_t unformat_ip4_header;
+
+/* Format an IP4 header. */
+format_function_t format_ip4_header;
+
+/* Parse an IP packet matching pattern. */
+unformat_function_t unformat_ip4_match;
+
+unformat_function_t unformat_pg_ip4_header;
+
+/* IP6 */
+unformat_function_t unformat_ip6_address;
+format_function_t format_ip6_address;
+format_function_t format_ip6_address_and_length;
+unformat_function_t unformat_ip6_header;
+format_function_t format_ip6_header;
+unformat_function_t unformat_pg_ip6_header;
+
+/* Format a TCP/UDP headers. */
+format_function_t format_tcp_header, format_udp_header;
+
+unformat_function_t unformat_pg_tcp_header, unformat_pg_udp_header;
+
+#endif /* included_ip_format_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c
new file mode 100644
index 00000000..bbeab32b
--- /dev/null
+++ b/src/vnet/ip/icmp4.c
@@ -0,0 +1,784 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/icmp4.c: ipv4 icmp
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+
+static char *icmp_error_strings[] = {
+#define _(f,s) s,
+ foreach_icmp4_error
+#undef _
+};
+
+static u8 *
+format_ip4_icmp_type_and_code (u8 * s, va_list * args)
+{
+ icmp4_type_t type = va_arg (*args, int);
+ u8 code = va_arg (*args, int);
+ char *t = 0;
+
+#define _(n,f) case n: t = #f; break;
+
+ switch (type)
+ {
+ foreach_icmp4_type;
+
+ default:
+ break;
+ }
+
+#undef _
+
+ if (!t)
+ return format (s, "unknown 0x%x", type);
+
+ s = format (s, "%s", t);
+
+ t = 0;
+ switch ((type << 8) | code)
+ {
+#define _(a,n,f) case (ICMP4_##a << 8) | (n): t = #f; break;
+
+ foreach_icmp4_code;
+
+#undef _
+ }
+
+ if (t)
+ s = format (s, " %s", t);
+
+ return s;
+}
+
+static u8 *
+format_ip4_icmp_header (u8 * s, va_list * args)
+{
+ icmp46_header_t *icmp = va_arg (*args, icmp46_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (icmp[0]))
+ return format (s, "ICMP header truncated");
+
+ s = format (s, "ICMP %U checksum 0x%x",
+ format_ip4_icmp_type_and_code, icmp->type, icmp->code,
+ clib_net_to_host_u16 (icmp->checksum));
+
+ return s;
+}
+
+static u8 *
+format_icmp_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ icmp_input_trace_t *t = va_arg (*va, icmp_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum
+{
+ ICMP_INPUT_NEXT_ERROR,
+ ICMP_INPUT_N_NEXT,
+} icmp_input_next_t;
+
+typedef struct
+{
+ uword *type_and_code_by_name;
+
+ uword *type_by_name;
+
+ /* Vector dispatch table indexed by [icmp type]. */
+ u8 ip4_input_next_index_by_type[256];
+} icmp4_main_t;
+
+icmp4_main_t icmp4_main;
+
+static uword
+ip4_icmp_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ icmp4_main_t *im = &icmp4_main;
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ icmp46_header_t *icmp0;
+ icmp4_type_t type0;
+ u32 bi0, next0;
+
+ if (PREDICT_TRUE (n_left_from > 2))
+ {
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ p0 = vlib_get_buffer (vm, from[1]);
+ ip0 = vlib_buffer_get_current (p0);
+ CLIB_PREFETCH (ip0, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip4_next_header (ip0);
+ type0 = icmp0->type;
+ next0 = im->ip4_input_next_index_by_type[type0];
+
+ p0->error = node->errors[ICMP4_ERROR_UNKNOWN_TYPE];
+ if (PREDICT_FALSE (next0 != next))
+ {
+ vlib_put_next_frame (vm, node, next, n_left_to_next + 1);
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = bi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_icmp_input_node,static) = {
+ .function = ip4_icmp_input,
+ .name = "ip4-icmp-input",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp_input_trace,
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ICMP_INPUT_NEXT_ERROR] = "error-punt",
+ },
+};
+/* *INDENT-ON* */
+
+static uword
+ip4_icmp_echo_request (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next;
+ ip4_main_t *i4m = &ip4_main;
+ u16 *fragment_ids, *fid;
+ u8 host_config_ttl = i4m->host_config.ttl;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp_input_trace_t));
+
+ /* Get random fragment IDs for replies. */
+ fid = fragment_ids = clib_random_buffer_get_data (&vm->random_buffer,
+ n_packets *
+ sizeof (fragment_ids[0]));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from > 2 && n_left_to_next > 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ icmp46_header_t *icmp0, *icmp1;
+ u32 bi0, src0, dst0;
+ u32 bi1, src1, dst1;
+ ip_csum_t sum0, sum1;
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ p1 = vlib_get_buffer (vm, bi1);
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ icmp0 = ip4_next_header (ip0);
+ icmp1 = ip4_next_header (ip1);
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ /* Update ICMP checksum. */
+ sum0 = icmp0->checksum;
+ sum1 = icmp1->checksum;
+
+ ASSERT (icmp0->type == ICMP4_echo_request);
+ ASSERT (icmp1->type == ICMP4_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply,
+ icmp46_header_t, type);
+ sum1 = ip_csum_update (sum1, ICMP4_echo_request, ICMP4_echo_reply,
+ icmp46_header_t, type);
+ icmp0->type = ICMP4_echo_reply;
+ icmp1->type = ICMP4_echo_reply;
+
+ icmp0->checksum = ip_csum_fold (sum0);
+ icmp1->checksum = ip_csum_fold (sum1);
+
+ src0 = ip0->src_address.data_u32;
+ src1 = ip1->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ dst1 = ip1->dst_address.data_u32;
+
+ /* Swap source and destination address.
+ Does not change checksum. */
+ ip0->src_address.data_u32 = dst0;
+ ip1->src_address.data_u32 = dst1;
+ ip0->dst_address.data_u32 = src0;
+ ip1->dst_address.data_u32 = src1;
+
+ /* Update IP checksum. */
+ sum0 = ip0->checksum;
+ sum1 = ip1->checksum;
+
+ sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
+ ip4_header_t, ttl);
+ sum1 = ip_csum_update (sum1, ip1->ttl, host_config_ttl,
+ ip4_header_t, ttl);
+ ip0->ttl = host_config_ttl;
+ ip1->ttl = host_config_ttl;
+
+ /* New fragment id. */
+ sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0],
+ ip4_header_t, fragment_id);
+ sum1 = ip_csum_update (sum1, ip1->fragment_id, fid[1],
+ ip4_header_t, fragment_id);
+ ip0->fragment_id = fid[0];
+ ip1->fragment_id = fid[1];
+ fid += 2;
+
+ ip0->checksum = ip_csum_fold (sum0);
+ ip1->checksum = ip_csum_fold (sum1);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ ASSERT (ip1->checksum == ip4_header_checksum (ip1));
+
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ p1->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ icmp46_header_t *icmp0;
+ u32 bi0, src0, dst0;
+ ip_csum_t sum0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip4_next_header (ip0);
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ /* Update ICMP checksum. */
+ sum0 = icmp0->checksum;
+
+ ASSERT (icmp0->type == ICMP4_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP4_echo_request, ICMP4_echo_reply,
+ icmp46_header_t, type);
+ icmp0->type = ICMP4_echo_reply;
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ src0 = ip0->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip0->dst_address.data_u32 = src0;
+
+ /* Update IP checksum. */
+ sum0 = ip0->checksum;
+
+ sum0 = ip_csum_update (sum0, ip0->ttl, host_config_ttl,
+ ip4_header_t, ttl);
+ ip0->ttl = host_config_ttl;
+
+ sum0 = ip_csum_update (sum0, ip0->fragment_id, fid[0],
+ ip4_header_t, fragment_id);
+ ip0->fragment_id = fid[0];
+ fid += 1;
+
+ ip0->checksum = ip_csum_fold (sum0);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ vlib_error_count (vm, ip4_icmp_input_node.index,
+ ICMP4_ERROR_ECHO_REPLIES_SENT, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = {
+ .function = ip4_icmp_echo_request,
+ .name = "ip4-icmp-echo-request",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp_input_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "ip4-load-balance",
+ },
+};
+/* *INDENT-ON* */
+
+typedef enum
+{
+ IP4_ICMP_ERROR_NEXT_DROP,
+ IP4_ICMP_ERROR_NEXT_LOOKUP,
+ IP4_ICMP_ERROR_N_NEXT,
+} ip4_icmp_error_next_t;
+
+void
+icmp4_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code, u32 data)
+{
+ vnet_buffer (b)->ip.icmp.type = type;
+ vnet_buffer (b)->ip.icmp.code = code;
+ vnet_buffer (b)->ip.icmp.data = data;
+}
+
+static u8
+icmp4_icmp_type_to_error (u8 type)
+{
+ switch (type)
+ {
+ case ICMP4_destination_unreachable:
+ return ICMP4_ERROR_DEST_UNREACH_SENT;
+ case ICMP4_time_exceeded:
+ return ICMP4_ERROR_TTL_EXPIRE_SENT;
+ case ICMP4_parameter_problem:
+ return ICMP4_ERROR_PARAM_PROBLEM_SENT;
+ default:
+ return ICMP4_ERROR_DROP;
+ }
+}
+
+static uword
+ip4_icmp_error (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from, *to_next;
+ uword n_left_from, n_left_to_next;
+ ip4_icmp_error_next_t next_index;
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0 = from[0];
+ u32 next0 = IP4_ICMP_ERROR_NEXT_LOOKUP;
+ u8 error0 = ICMP4_ERROR_NONE;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0, *out_ip0;
+ icmp46_header_t *icmp0;
+ u32 sw_if_index0, if_add_index0;
+ ip_csum_t sum;
+
+ /* Speculatively enqueue p0 to the current next frame */
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /*
+ * RFC1812 says to keep as much of the original packet as
+ * possible within the minimum MTU (576). We cheat "a little"
+ * here by keeping whatever fits in the first buffer, to be more
+ * efficient
+ */
+ if (PREDICT_FALSE (p0->total_length_not_including_first_buffer))
+ {
+ /* clear current_length of all other buffers in chain */
+ vlib_buffer_t *b = p0;
+ p0->total_length_not_including_first_buffer = 0;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ b->current_length = 0;
+ }
+ }
+ p0->current_length =
+ p0->current_length > 576 ? 576 : p0->current_length;
+
+ /* Add IP header and ICMPv4 header including a 4 byte data field */
+ vlib_buffer_advance (p0,
+ -sizeof (ip4_header_t) -
+ sizeof (icmp46_header_t) - 4);
+ out_ip0 = vlib_buffer_get_current (p0);
+ icmp0 = (icmp46_header_t *) & out_ip0[1];
+
+ /* Fill ip header fields */
+ out_ip0->ip_version_and_header_length = 0x45;
+ out_ip0->tos = 0;
+ out_ip0->length = clib_host_to_net_u16 (p0->current_length);
+ out_ip0->fragment_id = 0;
+ out_ip0->flags_and_fragment_offset = 0;
+ out_ip0->ttl = 0xff;
+ out_ip0->protocol = IP_PROTOCOL_ICMP;
+ out_ip0->dst_address = ip0->src_address;
+ if_add_index0 = ~0;
+ if (PREDICT_TRUE (vec_len (lm->if_address_pool_index_by_sw_if_index)
+ > sw_if_index0))
+ if_add_index0 =
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
+ if (PREDICT_TRUE (if_add_index0 != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index0);
+ ip4_address_t *if_ip =
+ ip_interface_address_get_address (lm, if_add);
+ out_ip0->src_address = *if_ip;
+ }
+ else
+ {
+ /* interface has no IP4 address - should not happen */
+ next0 = IP4_ICMP_ERROR_NEXT_DROP;
+ error0 = ICMP4_ERROR_DROP;
+ }
+ out_ip0->checksum = ip4_header_checksum (out_ip0);
+
+ /* Fill icmp header fields */
+ icmp0->type = vnet_buffer (p0)->ip.icmp.type;
+ icmp0->code = vnet_buffer (p0)->ip.icmp.code;
+ *((u32 *) (icmp0 + 1)) =
+ clib_host_to_net_u32 (vnet_buffer (p0)->ip.icmp.data);
+ icmp0->checksum = 0;
+ sum =
+ ip_incremental_checksum (0, icmp0,
+ p0->current_length -
+ sizeof (ip4_header_t));
+ icmp0->checksum = ~ip_csum_fold (sum);
+
+ /* Update error status */
+ if (error0 == ICMP4_ERROR_NONE)
+ error0 = icmp4_icmp_type_to_error (icmp0->type);
+ vlib_error_count (vm, node->node_index, error0, 1);
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
+ .function = ip4_icmp_error,
+ .name = "ip4-icmp-error",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = IP4_ICMP_ERROR_N_NEXT,
+ .next_nodes = {
+ [IP4_ICMP_ERROR_NEXT_DROP] = "error-drop",
+ [IP4_ICMP_ERROR_NEXT_LOOKUP] = "ip4-lookup",
+ },
+
+ .format_trace = format_icmp_input_trace,
+};
+/* *INDENT-ON* */
+
+
+static uword
+unformat_icmp_type_and_code (unformat_input_t * input, va_list * args)
+{
+ icmp46_header_t *h = va_arg (*args, icmp46_header_t *);
+ icmp4_main_t *cm = &icmp4_main;
+ u32 i;
+
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_and_code_by_name, &i))
+ {
+ h->type = (i >> 8) & 0xff;
+ h->code = (i >> 0) & 0xff;
+ }
+ else if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_by_name, &i))
+ {
+ h->type = i;
+ h->code = 0;
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+static void
+icmp4_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_offset, icmp_offset;
+
+ icmp_offset = g->start_byte_offset;
+ ip_offset = (g - 1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ icmp46_header_t *icmp0;
+ u32 len0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ASSERT (p0->current_data == 0);
+ ip0 = (void *) (p0->data + ip_offset);
+ icmp0 = (void *) (p0->data + icmp_offset);
+ len0 = clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
+ icmp0->checksum =
+ ~ip_csum_fold (ip_incremental_checksum (0, icmp0, len0));
+ }
+}
+
+typedef struct
+{
+ pg_edit_t type, code;
+ pg_edit_t checksum;
+} pg_icmp46_header_t;
+
+always_inline void
+pg_icmp_header_init (pg_icmp46_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, icmp46_header_t, f);
+ _(type);
+ _(code);
+ _(checksum);
+#undef _
+}
+
+static uword
+unformat_pg_icmp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_icmp46_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t),
+ &group_index);
+ pg_icmp_header_init (p);
+
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ {
+ icmp46_header_t tmp;
+
+ if (!unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp))
+ goto error;
+
+ pg_edit_set_fixed (&p->type, tmp.type);
+ pg_edit_set_fixed (&p->code, tmp.code);
+ }
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "checksum %U",
+ unformat_pg_edit, unformat_pg_number, &p->checksum))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = icmp4_pg_edit_function;
+ g->edit_function_opaque = 0;
+ }
+
+ return 1;
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+void
+ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, u32 node_index)
+{
+ icmp4_main_t *im = &icmp4_main;
+
+ ASSERT ((int) type < ARRAY_LEN (im->ip4_input_next_index_by_type));
+ im->ip4_input_next_index_by_type[type]
+ = vlib_node_add_next (vm, ip4_icmp_input_node.index, node_index);
+}
+
+static clib_error_t *
+icmp4_init (vlib_main_t * vm)
+{
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi;
+ icmp4_main_t *cm = &icmp4_main;
+ clib_error_t *error;
+
+ error = vlib_call_init_function (vm, ip_main_init);
+
+ if (error)
+ return error;
+
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP);
+ pi->format_header = format_ip4_icmp_header;
+ pi->unformat_pg_edit = unformat_pg_icmp_header;
+
+ cm->type_by_name = hash_create_string (0, sizeof (uword));
+#define _(n,t) hash_set_mem (cm->type_by_name, #t, (n));
+ foreach_icmp4_type;
+#undef _
+
+ cm->type_and_code_by_name = hash_create_string (0, sizeof (uword));
+#define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP4_##a << 8));
+ foreach_icmp4_code;
+#undef _
+
+ memset (cm->ip4_input_next_index_by_type,
+ ICMP_INPUT_NEXT_ERROR, sizeof (cm->ip4_input_next_index_by_type));
+
+ ip4_icmp_register_type (vm, ICMP4_echo_request,
+ ip4_icmp_echo_request_node.index);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (icmp4_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/icmp4.h b/src/vnet/ip/icmp4.h
new file mode 100644
index 00000000..ae805148
--- /dev/null
+++ b/src/vnet/ip/icmp4.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_icmp4_h
+#define included_vnet_icmp4_h
+
+#define foreach_icmp4_error \
+ _ (NONE, "valid packets") \
+ _ (UNKNOWN_TYPE, "unknown type") \
+ _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
+ _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
+ _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
+ _ (OPTIONS_WITH_ODD_LENGTH, \
+ "total option length not multiple of 8 bytes") \
+ _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
+ _ (ECHO_REPLIES_SENT, "echo replies sent") \
+ _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
+ _ (DEST_UNREACH_SENT, "destination unreachable response sent") \
+ _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \
+ _ (PARAM_PROBLEM_SENT, "parameter problem response sent") \
+ _ (DROP, "error message dropped")
+
+typedef enum
+{
+#define _(f,s) ICMP4_ERROR_##f,
+ foreach_icmp4_error
+#undef _
+} icmp4_error_t;
+
+typedef struct
+{
+ u8 packet_data[64];
+} icmp_input_trace_t;
+
+format_function_t format_icmp4_input_trace;
+void ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type,
+ u32 node_index);
+void icmp4_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code,
+ u32 data);
+
+#endif /* included_vnet_icmp4_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/icmp46_packet.h b/src/vnet/ip/icmp46_packet.h
new file mode 100644
index 00000000..a86cbd57
--- /dev/null
+++ b/src/vnet/ip/icmp46_packet.h
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * icmp46_packet.h: ip4/ip6 icmp packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_icmp46_packet_h
+#define included_vnet_icmp46_packet_h
+
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#define foreach_icmp4_type \
+ _ (0, echo_reply) \
+ _ (3, destination_unreachable) \
+ _ (4, source_quench) \
+ _ (5, redirect) \
+ _ (6, alternate_host_address) \
+ _ (8, echo_request) \
+ _ (9, router_advertisement) \
+ _ (10, router_solicitation) \
+ _ (11, time_exceeded) \
+ _ (12, parameter_problem) \
+ _ (13, timestamp_request) \
+ _ (14, timestamp_reply) \
+ _ (15, information_request) \
+ _ (16, information_reply) \
+ _ (17, address_mask_request) \
+ _ (18, address_mask_reply) \
+ _ (30, traceroute) \
+ _ (31, datagram_conversion_error) \
+ _ (32, mobile_host_redirect) \
+ _ (33, ip6_where_are_you) \
+ _ (34, ip6_i_am_here) \
+ _ (35, mobile_registration_request) \
+ _ (36, mobile_registration_reply) \
+ _ (37, domain_name_request) \
+ _ (38, domain_name_reply) \
+ _ (39, skip) \
+ _ (40, photuris)
+
+#define icmp_no_code 0
+
+#define foreach_icmp4_code \
+ _ (destination_unreachable, 0, destination_unreachable_net) \
+ _ (destination_unreachable, 1, destination_unreachable_host) \
+ _ (destination_unreachable, 2, protocol_unreachable) \
+ _ (destination_unreachable, 3, port_unreachable) \
+ _ (destination_unreachable, 4, fragmentation_needed_and_dont_fragment_set) \
+ _ (destination_unreachable, 5, source_route_failed) \
+ _ (destination_unreachable, 6, destination_network_unknown) \
+ _ (destination_unreachable, 7, destination_host_unknown) \
+ _ (destination_unreachable, 8, source_host_isolated) \
+ _ (destination_unreachable, 9, network_administratively_prohibited) \
+ _ (destination_unreachable, 10, host_administratively_prohibited) \
+ _ (destination_unreachable, 11, network_unreachable_for_type_of_service) \
+ _ (destination_unreachable, 12, host_unreachable_for_type_of_service) \
+ _ (destination_unreachable, 13, communication_administratively_prohibited) \
+ _ (destination_unreachable, 14, host_precedence_violation) \
+ _ (destination_unreachable, 15, precedence_cutoff_in_effect) \
+ _ (redirect, 0, network_redirect) \
+ _ (redirect, 1, host_redirect) \
+ _ (redirect, 2, type_of_service_and_network_redirect) \
+ _ (redirect, 3, type_of_service_and_host_redirect) \
+ _ (router_advertisement, 0, normal_router_advertisement) \
+ _ (router_advertisement, 16, does_not_route_common_traffic) \
+ _ (time_exceeded, 0, ttl_exceeded_in_transit) \
+ _ (time_exceeded, 1, fragment_reassembly_time_exceeded) \
+ _ (parameter_problem, 0, pointer_indicates_error) \
+ _ (parameter_problem, 1, missing_required_option) \
+ _ (parameter_problem, 2, bad_length)
+
+/* ICMPv6 */
+#define foreach_icmp6_type \
+ _ (1, destination_unreachable) \
+ _ (2, packet_too_big) \
+ _ (3, time_exceeded) \
+ _ (4, parameter_problem) \
+ _ (128, echo_request) \
+ _ (129, echo_reply) \
+ _ (130, multicast_listener_request) \
+ _ (131, multicast_listener_report) \
+ _ (132, multicast_listener_done) \
+ _ (133, router_solicitation) \
+ _ (134, router_advertisement) \
+ _ (135, neighbor_solicitation) \
+ _ (136, neighbor_advertisement) \
+ _ (137, redirect) \
+ _ (138, router_renumbering) \
+ _ (139, node_information_request) \
+ _ (140, node_information_response) \
+ _ (141, inverse_neighbor_solicitation) \
+ _ (142, inverse_neighbor_advertisement) \
+ _ (143, multicast_listener_report_v2) \
+ _ (144, home_agent_address_discovery_request) \
+ _ (145, home_agent_address_discovery_reply) \
+ _ (146, mobile_prefix_solicitation) \
+ _ (147, mobile_prefix_advertisement) \
+ _ (148, certification_path_solicitation) \
+ _ (149, certification_path_advertisement) \
+ _ (151, multicast_router_advertisement) \
+ _ (152, multicast_router_solicitation) \
+ _ (153, multicast_router_termination) \
+ _ (154, fmipv6_messages)
+
+#define foreach_icmp6_code \
+ _ (destination_unreachable, 0, no_route_to_destination) \
+ _ (destination_unreachable, 1, destination_administratively_prohibited) \
+ _ (destination_unreachable, 2, beyond_scope_of_source_address) \
+ _ (destination_unreachable, 3, address_unreachable) \
+ _ (destination_unreachable, 4, port_unreachable) \
+ _ (destination_unreachable, 5, source_address_failed_policy) \
+ _ (destination_unreachable, 6, reject_route_to_destination) \
+ _ (time_exceeded, 0, ttl_exceeded_in_transit) \
+ _ (time_exceeded, 1, fragment_reassembly_time_exceeded) \
+ _ (parameter_problem, 0, erroneous_header_field) \
+ _ (parameter_problem, 1, unrecognized_next_header) \
+ _ (parameter_problem, 2, unrecognized_option) \
+ _ (router_renumbering, 0, command) \
+ _ (router_renumbering, 1, result) \
+ _ (node_information_request, 0, data_contains_ip6_address) \
+ _ (node_information_request, 1, data_contains_name) \
+ _ (node_information_request, 2, data_contains_ip4_address) \
+ _ (node_information_response, 0, success) \
+ _ (node_information_response, 1, failed) \
+ _ (node_information_response, 2, unknown_request)
+
+typedef enum
+{
+#define _(n,f) ICMP4_##f = n,
+ foreach_icmp4_type
+#undef _
+} icmp4_type_t;
+
+typedef enum
+{
+#define _(t,n,f) ICMP4_##t##_##f = n,
+ foreach_icmp4_code
+#undef _
+} icmp4_code_t;
+
+typedef enum
+{
+#define _(n,f) ICMP6_##f = n,
+ foreach_icmp6_type
+#undef _
+} icmp6_type_t;
+
+typedef enum
+{
+#define _(t,n,f) ICMP6_##t##_##f = n,
+ foreach_icmp6_code
+#undef _
+} icmp6_code_t;
+
+typedef CLIB_PACKED (struct
+ {
+ u8 type;
+ u8 code;
+ /* IP checksum of icmp header plus data which follows. */
+ u16 checksum;
+ }) icmp46_header_t;
+
+/* ip6 neighbor discovery */
+#define foreach_icmp6_neighbor_discovery_option \
+ _ (1, source_link_layer_address) \
+ _ (2, target_link_layer_address) \
+ _ (3, prefix_information) \
+ _ (4, redirected_header) \
+ _ (5, mtu) \
+ _ (6, nbma_shortcut_limit) \
+ _ (7, advertisement_interval) \
+ _ (8, home_agent_information) \
+ _ (9, source_address_list) \
+ _ (10, target_address_list) \
+ _ (11, cryptographically_generated_address) \
+ _ (12, rsa_signature) \
+ _ (13, timestamp) \
+ _ (14, nonce) \
+ _ (15, trust_anchor) \
+ _ (16, certificate) \
+ _ (17, ip_address_and_prefix) \
+ _ (18, new_router_prefix_information) \
+ _ (19, mobile_link_layer_address) \
+ _ (20, neighbor_advertisement_acknowledgment) \
+ _ (23, map) \
+ _ (24, route_information) \
+ _ (25, recursive_dns_server) \
+ _ (26, ra_flags_extension) \
+ _ (27, handover_key_request) \
+ _ (28, handover_key_reply) \
+ _ (29, handover_assist_information) \
+ _ (30, mobile_node_identifier) \
+ _ (31, dns_search_list) \
+ _ (138, card_request) \
+ _ (139, card_reply)
+
+typedef enum icmp6_neighbor_discovery_option_type
+{
+#define _(n,f) ICMP6_NEIGHBOR_DISCOVERY_OPTION_##f = n,
+ foreach_icmp6_neighbor_discovery_option
+#undef _
+} icmp6_neighbor_discovery_option_type_t;
+
+typedef CLIB_PACKED (struct
+ {
+ /* Option type. */
+ u8 type;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+ /* Option data follows. */
+ u8 data[0];
+ }) icmp6_neighbor_discovery_option_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 dst_address_length;
+ u8 flags;
+#define ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_FLAG_ON_LINK (1 << 7)
+#define ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_AUTO (1 << 6)
+ u32 valid_time;
+ u32 preferred_time;
+ u32 unused; ip6_address_t dst_address;
+ }) icmp6_neighbor_discovery_prefix_information_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ u8 type;
+ u8 aux_data_len_u32s;
+ u16 num_sources;
+ ip6_address_t mcast_addr; ip6_address_t source_addr[0];
+ }) icmp6_multicast_address_record_t;
+
+typedef CLIB_PACKED (struct
+ {
+ ip6_hop_by_hop_ext_t ext_hdr;
+ ip6_router_alert_option_t alert;
+ ip6_padN_option_t pad;
+ icmp46_header_t icmp;
+ u16 rsvd;
+ u16 num_addr_records;
+ icmp6_multicast_address_record_t records[0];
+ }) icmp6_multicast_listener_report_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 reserved[6];
+ /* IP6 header plus payload follows. */
+ u8 data[0];
+ }) icmp6_neighbor_discovery_redirected_header_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u16 unused; u32 mtu;
+ }) icmp6_neighbor_discovery_mtu_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 ethernet_address[6];
+ })
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp6_neighbor_discovery_option_header_t header;
+ u8 max_l2_address[6 + 8];
+ })
+ icmp6_neighbor_discovery_max_link_layer_address_option_t;
+
+/* Generic neighbor discover header. Used for router solicitations,
+ etc. */
+typedef CLIB_PACKED (struct
+ {
+ icmp46_header_t icmp; u32 reserved_must_be_zero;
+ }) icmp6_neighbor_discovery_header_t;
+
+/* Router advertisement packet formats. */
+typedef CLIB_PACKED (struct
+ {
+ icmp46_header_t icmp;
+ /* Current hop limit to use for outgoing packets. */
+ u8 current_hop_limit;
+ u8 flags;
+#define ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP (1 << 7)
+#define ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP (1 << 6)
+ /* Zero means unspecified. */
+ u16 router_lifetime_in_sec;
+ /* Zero means unspecified. */
+ u32 neighbor_reachable_time_in_msec;
+ /* Zero means unspecified. */
+ u32
+ time_in_msec_between_retransmitted_neighbor_solicitations;
+ /* Options that may follow: source_link_layer_address, mtu, prefix_information. */
+ }) icmp6_router_advertisement_header_t;
+
+/* Neighbor solicitation/advertisement header. */
+typedef CLIB_PACKED (struct
+ {
+ icmp46_header_t icmp;
+ /* Zero for solicitation; flags for advertisement. */
+ u32 advertisement_flags;
+ /* Set when sent by a router. */
+#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_ROUTER (1 << 31)
+ /* Set when response to solicitation. */
+#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED (1 << 30)
+#define ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE (1 << 29)
+ ip6_address_t target_address;
+ /* Options that may follow: source_link_layer_address
+ (for solicitation) target_link_layer_address (for advertisement). */
+ }) icmp6_neighbor_solicitation_or_advertisement_header_t;
+
+typedef CLIB_PACKED (struct
+ {
+ icmp46_header_t icmp;
+ u32 reserved_must_be_zero;
+ /* Better next hop to use for given destination. */
+ ip6_address_t better_next_hop_address;
+ ip6_address_t dst_address;
+ /* Options that may follow: target_link_layer_address,
+ redirected_header. */
+ }) icmp6_redirect_header_t;
+
+/* Solicitation/advertisement packet format for ethernet. */
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip;
+ icmp6_neighbor_solicitation_or_advertisement_header_t
+ neighbor;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ link_layer_option;
+ }) icmp6_neighbor_solicitation_header_t;
+
+/* Router solicitation packet format for ethernet. */
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip;
+ icmp6_neighbor_discovery_header_t neighbor;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ link_layer_option;
+ }) icmp6_router_solicitation_header_t;
+
+/* router advertisement packet format for ethernet. */
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip;
+ icmp6_router_advertisement_header_t router;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ link_layer_option;
+ icmp6_neighbor_discovery_mtu_option_t mtu_option;
+ icmp6_neighbor_discovery_prefix_information_option_t
+ prefix[0];
+ }) icmp6_router_advertisement_packet_t;
+
+/* multicast listener report packet format for ethernet. */
+typedef CLIB_PACKED (struct
+ {
+ ip6_header_t ip;
+ icmp6_multicast_listener_report_header_t report_hdr;
+ }) icmp6_multicast_listener_report_packet_t;
+
+#endif /* included_vnet_icmp46_packet_h */
diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c
new file mode 100644
index 00000000..70696d0c
--- /dev/null
+++ b/src/vnet/ip/icmp6.c
@@ -0,0 +1,882 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/icmp6.c: ip6 icmp
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+static u8 *
+format_ip6_icmp_type_and_code (u8 * s, va_list * args)
+{
+ icmp6_type_t type = va_arg (*args, int);
+ u8 code = va_arg (*args, int);
+ char *t = 0;
+
+#define _(n,f) case n: t = #f; break;
+
+ switch (type)
+ {
+ foreach_icmp6_type;
+
+ default:
+ break;
+ }
+
+#undef _
+
+ if (!t)
+ return format (s, "unknown 0x%x", type);
+
+ s = format (s, "%s", t);
+
+ t = 0;
+ switch ((type << 8) | code)
+ {
+#define _(a,n,f) case (ICMP6_##a << 8) | (n): t = #f; break;
+
+ foreach_icmp6_code;
+
+#undef _
+ }
+
+ if (t)
+ s = format (s, " %s", t);
+
+ return s;
+}
+
+static u8 *
+format_icmp6_header (u8 * s, va_list * args)
+{
+ icmp46_header_t *icmp = va_arg (*args, icmp46_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (icmp[0]))
+ return format (s, "ICMP header truncated");
+
+ s = format (s, "ICMP %U checksum 0x%x",
+ format_ip6_icmp_type_and_code, icmp->type, icmp->code,
+ clib_net_to_host_u16 (icmp->checksum));
+
+ if (max_header_bytes >=
+ sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t) &&
+ (icmp->type == ICMP6_neighbor_solicitation ||
+ icmp->type == ICMP6_neighbor_advertisement))
+ {
+ icmp6_neighbor_solicitation_or_advertisement_header_t *icmp6_nd =
+ (icmp6_neighbor_solicitation_or_advertisement_header_t *) icmp;
+ s = format (s, "\n target address %U",
+ format_ip6_address, &icmp6_nd->target_address);
+ }
+
+ return s;
+}
+
+u8 *
+format_icmp6_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ icmp6_input_trace_t *t = va_arg (*va, icmp6_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip6_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+static char *icmp_error_strings[] = {
+#define _(f,s) s,
+ foreach_icmp6_error
+#undef _
+};
+
+typedef enum
+{
+ ICMP_INPUT_NEXT_DROP,
+ ICMP_INPUT_N_NEXT,
+} icmp_input_next_t;
+
+typedef struct
+{
+ uword *type_and_code_by_name;
+
+ uword *type_by_name;
+
+ /* Vector dispatch table indexed by [icmp type]. */
+ u8 input_next_index_by_type[256];
+
+ /* Max valid code indexed by icmp type. */
+ u8 max_valid_code_by_type[256];
+
+ /* hop_limit must be >= this value for this icmp type. */
+ u8 min_valid_hop_limit_by_type[256];
+
+ u8 min_valid_length_by_type[256];
+} icmp6_main_t;
+
+icmp6_main_t icmp6_main;
+
+static uword
+ip6_icmp_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ icmp6_main_t *im = &icmp6_main;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0;
+ icmp46_header_t *icmp0;
+ icmp6_type_t type0;
+ u32 bi0, next0, error0, len0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (b0);
+ icmp0 = ip6_next_header (ip0);
+ type0 = icmp0->type;
+
+ error0 = ICMP6_ERROR_NONE;
+
+ next0 = im->input_next_index_by_type[type0];
+ error0 =
+ next0 == ICMP_INPUT_NEXT_DROP ? ICMP6_ERROR_UNKNOWN_TYPE : error0;
+
+ /* Check code is valid for type. */
+ error0 =
+ icmp0->code >
+ im->max_valid_code_by_type[type0] ?
+ ICMP6_ERROR_INVALID_CODE_FOR_TYPE : error0;
+
+ /* Checksum is already validated by ip6_local node so we don't need to check that. */
+
+ /* Check that hop limit == 255 for certain types. */
+ error0 =
+ ip0->hop_limit <
+ im->min_valid_hop_limit_by_type[type0] ?
+ ICMP6_ERROR_INVALID_HOP_LIMIT_FOR_TYPE : error0;
+
+ len0 = clib_net_to_host_u16 (ip0->payload_length);
+ error0 =
+ len0 <
+ im->min_valid_length_by_type[type0] ?
+ ICMP6_ERROR_LENGTH_TOO_SMALL_FOR_TYPE : error0;
+
+ b0->error = node->errors[error0];
+
+ next0 = error0 != ICMP6_ERROR_NONE ? ICMP_INPUT_NEXT_DROP : next0;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_input_node) = {
+ .function = ip6_icmp_input,
+ .name = "ip6-icmp-input",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [ICMP_INPUT_NEXT_DROP] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+typedef enum
+{
+ ICMP6_ECHO_REQUEST_NEXT_LOOKUP,
+ ICMP6_ECHO_REQUEST_NEXT_OUTPUT,
+ ICMP6_ECHO_REQUEST_N_NEXT,
+} icmp6_echo_request_next_t;
+
+static uword
+ip6_icmp_echo_request (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ ip6_main_t *im = &ip6_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 2 && n_left_to_next > 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ icmp46_header_t *icmp0, *icmp1;
+ ip6_address_t tmp0, tmp1;
+ ip_csum_t sum0, sum1;
+ u32 bi0, bi1;
+ u32 fib_index0, fib_index1;
+ u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+ u32 next1 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ p1 = vlib_get_buffer (vm, bi1);
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ icmp0 = ip6_next_header (ip0);
+ icmp1 = ip6_next_header (ip1);
+
+ /* Check icmp type to echo reply and update icmp checksum. */
+ sum0 = icmp0->checksum;
+ sum1 = icmp1->checksum;
+
+ ASSERT (icmp0->type == ICMP6_echo_request);
+ ASSERT (icmp1->type == ICMP6_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+ sum1 = ip_csum_update (sum1, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+
+ icmp0->checksum = ip_csum_fold (sum0);
+ icmp1->checksum = ip_csum_fold (sum1);
+
+ icmp0->type = ICMP6_echo_reply;
+ icmp1->type = ICMP6_echo_reply;
+
+ /* Swap source and destination address. */
+ tmp0 = ip0->src_address;
+ tmp1 = ip1->src_address;
+
+ ip0->src_address = ip0->dst_address;
+ ip1->src_address = ip1->dst_address;
+
+ ip0->dst_address = tmp0;
+ ip1->dst_address = tmp1;
+
+ /* New hop count. */
+ ip0->hop_limit = im->host_config.ttl;
+ ip1->hop_limit = im->host_config.ttl;
+
+ if (ip6_address_is_link_local_unicast (&ip0->dst_address))
+ {
+ ethernet_header_t *eth0;
+ u8 tmp_mac[6];
+ /* For link local, reuse current MAC header by sawpping
+ * SMAC to DMAC instead of IP6 lookup since link local
+ * is not in the IP6 FIB */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current (p0);
+ clib_memcpy (tmp_mac, eth0->dst_address, 6);
+ clib_memcpy (eth0->dst_address, eth0->src_address, 6);
+ clib_memcpy (eth0->src_address, tmp_mac, 6);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ next0 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT;
+ }
+ else
+ {
+ /* Determine the correct lookup fib indices... */
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ }
+
+ if (ip6_address_is_link_local_unicast (&ip1->dst_address))
+ {
+ ethernet_header_t *eth1;
+ u8 tmp_mac[6];
+ /* For link local, reuse current MAC header by sawpping
+ * SMAC to DMAC instead of IP6 lookup since link local
+ * is not in the IP6 FIB */
+ vlib_buffer_reset (p1);
+ eth1 = vlib_buffer_get_current (p1);
+ clib_memcpy (tmp_mac, eth1->dst_address, 6);
+ clib_memcpy (eth1->dst_address, eth1->src_address, 6);
+ clib_memcpy (eth1->src_address, tmp_mac, 6);
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p1)->sw_if_index[VLIB_RX];
+ next1 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT;
+ }
+ else
+ {
+ /* Determine the correct lookup fib indices... */
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = fib_index1;
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ icmp46_header_t *icmp0;
+ u32 bi0;
+ ip6_address_t tmp0;
+ ip_csum_t sum0;
+ u32 fib_index0;
+ u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip6_next_header (ip0);
+
+ /* Check icmp type to echo reply and update icmp checksum. */
+ sum0 = icmp0->checksum;
+
+ ASSERT (icmp0->type == ICMP6_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ icmp0->type = ICMP6_echo_reply;
+
+ /* Swap source and destination address. */
+ tmp0 = ip0->src_address;
+ ip0->src_address = ip0->dst_address;
+ ip0->dst_address = tmp0;
+
+ ip0->hop_limit = im->host_config.ttl;
+
+ if (ip6_address_is_link_local_unicast (&ip0->dst_address))
+ {
+ ethernet_header_t *eth0;
+ u8 tmp_mac[6];
+ /* For link local, reuse current MAC header by sawpping
+ * SMAC to DMAC instead of IP6 lookup since link local
+ * is not in the IP6 FIB */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current (p0);
+ clib_memcpy (tmp_mac, eth0->dst_address, 6);
+ clib_memcpy (eth0->dst_address, eth0->src_address, 6);
+ clib_memcpy (eth0->src_address, tmp_mac, 6);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ next0 = ICMP6_ECHO_REQUEST_NEXT_OUTPUT;
+ }
+ else
+ {
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ }
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]
+ = vnet_main.local_interface_sw_if_index;
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_error_count (vm, ip6_icmp_input_node.index,
+ ICMP6_ERROR_ECHO_REPLIES_SENT, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = {
+ .function = ip6_icmp_echo_request,
+ .name = "ip6-icmp-echo-request",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_ECHO_REQUEST_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ECHO_REQUEST_NEXT_LOOKUP] = "ip6-lookup",
+ [ICMP6_ECHO_REQUEST_NEXT_OUTPUT] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+typedef enum
+{
+ IP6_ICMP_ERROR_NEXT_DROP,
+ IP6_ICMP_ERROR_NEXT_LOOKUP,
+ IP6_ICMP_ERROR_N_NEXT,
+} ip6_icmp_error_next_t;
+
+void
+icmp6_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code, u32 data)
+{
+ vnet_buffer (b)->ip.icmp.type = type;
+ vnet_buffer (b)->ip.icmp.code = code;
+ vnet_buffer (b)->ip.icmp.data = data;
+}
+
+static u8
+icmp6_icmp_type_to_error (u8 type)
+{
+ switch (type)
+ {
+ case ICMP6_destination_unreachable:
+ return ICMP6_ERROR_DEST_UNREACH_SENT;
+ case ICMP6_packet_too_big:
+ return ICMP6_ERROR_PACKET_TOO_BIG_SENT;
+ case ICMP6_time_exceeded:
+ return ICMP6_ERROR_TTL_EXPIRE_SENT;
+ case ICMP6_parameter_problem:
+ return ICMP6_ERROR_PARAM_PROBLEM_SENT;
+ default:
+ return ICMP6_ERROR_DROP;
+ }
+}
+
+static uword
+ip6_icmp_error (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 *from, *to_next;
+ uword n_left_from, n_left_to_next;
+ ip6_icmp_error_next_t next_index;
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0 = from[0];
+ u32 next0 = IP6_ICMP_ERROR_NEXT_LOOKUP;
+ u8 error0 = ICMP6_ERROR_NONE;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0, *out_ip0;
+ icmp46_header_t *icmp0;
+ u32 sw_if_index0, if_add_index0;
+ int bogus_length;
+
+ /* Speculatively enqueue p0 to the current next frame */
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* RFC4443 says to keep as much of the original packet as possible
+ * within the minimum MTU. We cheat "a little" here by keeping whatever fits
+ * in the first buffer, to be more efficient */
+ if (PREDICT_FALSE (p0->total_length_not_including_first_buffer))
+ { /* clear current_length of all other buffers in chain */
+ vlib_buffer_t *b = p0;
+ p0->total_length_not_including_first_buffer = 0;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ b->current_length = 0;
+ }
+ }
+ p0->current_length =
+ p0->current_length > 1280 ? 1280 : p0->current_length;
+
+ /* Add IP header and ICMPv6 header including a 4 byte data field */
+ vlib_buffer_advance (p0,
+ -sizeof (ip6_header_t) -
+ sizeof (icmp46_header_t) - 4);
+ out_ip0 = vlib_buffer_get_current (p0);
+ icmp0 = (icmp46_header_t *) & out_ip0[1];
+
+ /* Fill ip header fields */
+ out_ip0->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+
+ out_ip0->payload_length =
+ clib_host_to_net_u16 (p0->current_length - sizeof (ip6_header_t));
+ out_ip0->protocol = IP_PROTOCOL_ICMP6;
+ out_ip0->hop_limit = 0xff;
+ out_ip0->dst_address = ip0->src_address;
+ if_add_index0 =
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
+ if (PREDICT_TRUE (if_add_index0 != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index0);
+ ip6_address_t *if_ip =
+ ip_interface_address_get_address (lm, if_add);
+ out_ip0->src_address = *if_ip;
+ }
+ else /* interface has no IP6 address - should not happen */
+ {
+ next0 = IP6_ICMP_ERROR_NEXT_DROP;
+ error0 = ICMP6_ERROR_DROP;
+ }
+
+ /* Fill icmp header fields */
+ icmp0->type = vnet_buffer (p0)->ip.icmp.type;
+ icmp0->code = vnet_buffer (p0)->ip.icmp.code;
+ *((u32 *) (icmp0 + 1)) =
+ clib_host_to_net_u32 (vnet_buffer (p0)->ip.icmp.data);
+ icmp0->checksum = 0;
+ icmp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, out_ip0,
+ &bogus_length);
+
+
+
+ /* Update error status */
+ if (error0 == ICMP6_ERROR_NONE)
+ error0 = icmp6_icmp_type_to_error (icmp0->type);
+ vlib_error_count (vm, node->node_index, error0, 1);
+
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_error_node) = {
+ .function = ip6_icmp_error,
+ .name = "ip6-icmp-error",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN (icmp_error_strings),
+ .error_strings = icmp_error_strings,
+
+ .n_next_nodes = IP6_ICMP_ERROR_N_NEXT,
+ .next_nodes = {
+ [IP6_ICMP_ERROR_NEXT_DROP] = "error-drop",
+ [IP6_ICMP_ERROR_NEXT_LOOKUP] = "ip6-lookup",
+ },
+
+ .format_trace = format_icmp6_input_trace,
+};
+/* *INDENT-ON* */
+
+
+static uword
+unformat_icmp_type_and_code (unformat_input_t * input, va_list * args)
+{
+ icmp46_header_t *h = va_arg (*args, icmp46_header_t *);
+ icmp6_main_t *cm = &icmp6_main;
+ u32 i;
+
+ if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_and_code_by_name, &i))
+ {
+ h->type = (i >> 8) & 0xff;
+ h->code = (i >> 0) & 0xff;
+ }
+ else if (unformat_user (input, unformat_vlib_number_by_name,
+ cm->type_by_name, &i))
+ {
+ h->type = i;
+ h->code = 0;
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+static void
+icmp6_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_offset, icmp_offset;
+ int bogus_length;
+
+ icmp_offset = g->start_byte_offset;
+ ip_offset = (g - 1)->start_byte_offset;
+
+ while (n_packets >= 1)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ icmp46_header_t *icmp0;
+
+ p0 = vlib_get_buffer (vm, packets[0]);
+ n_packets -= 1;
+ packets += 1;
+
+ ASSERT (p0->current_data == 0);
+ ip0 = (void *) (p0->data + ip_offset);
+ icmp0 = (void *) (p0->data + icmp_offset);
+
+ icmp0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+ }
+}
+
+typedef struct
+{
+ pg_edit_t type, code;
+ pg_edit_t checksum;
+} pg_icmp46_header_t;
+
+always_inline void
+pg_icmp_header_init (pg_icmp46_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, icmp46_header_t, f);
+ _(type);
+ _(code);
+ _(checksum);
+#undef _
+}
+
+static uword
+unformat_pg_icmp_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_icmp46_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (icmp46_header_t),
+ &group_index);
+ pg_icmp_header_init (p);
+
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ {
+ icmp46_header_t tmp;
+
+ if (!unformat (input, "ICMP %U", unformat_icmp_type_and_code, &tmp))
+ goto error;
+
+ pg_edit_set_fixed (&p->type, tmp.type);
+ pg_edit_set_fixed (&p->code, tmp.code);
+ }
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "checksum %U",
+ unformat_pg_edit, unformat_pg_number, &p->checksum))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = icmp6_pg_edit_function;
+ g->edit_function_opaque = 0;
+ }
+
+ return 1;
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+void
+icmp6_register_type (vlib_main_t * vm, icmp6_type_t type, u32 node_index)
+{
+ icmp6_main_t *im = &icmp6_main;
+
+ ASSERT ((int) type < ARRAY_LEN (im->input_next_index_by_type));
+ im->input_next_index_by_type[type]
+ = vlib_node_add_next (vm, ip6_icmp_input_node.index, node_index);
+}
+
+static clib_error_t *
+icmp6_init (vlib_main_t * vm)
+{
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi;
+ icmp6_main_t *cm = &icmp6_main;
+ clib_error_t *error;
+
+ error = vlib_call_init_function (vm, ip_main_init);
+
+ if (error)
+ return error;
+
+ pi = ip_get_protocol_info (im, IP_PROTOCOL_ICMP6);
+ pi->format_header = format_icmp6_header;
+ pi->unformat_pg_edit = unformat_pg_icmp_header;
+
+ cm->type_by_name = hash_create_string (0, sizeof (uword));
+#define _(n,t) hash_set_mem (cm->type_by_name, #t, (n));
+ foreach_icmp6_type;
+#undef _
+
+ cm->type_and_code_by_name = hash_create_string (0, sizeof (uword));
+#define _(a,n,t) hash_set_mem (cm->type_by_name, #t, (n) | (ICMP6_##a << 8));
+ foreach_icmp6_code;
+#undef _
+
+ memset (cm->input_next_index_by_type,
+ ICMP_INPUT_NEXT_DROP, sizeof (cm->input_next_index_by_type));
+ memset (cm->max_valid_code_by_type, 0, sizeof (cm->max_valid_code_by_type));
+
+#define _(a,n,t) cm->max_valid_code_by_type[ICMP6_##a] = clib_max (cm->max_valid_code_by_type[ICMP6_##a], n);
+ foreach_icmp6_code;
+#undef _
+
+ memset (cm->min_valid_hop_limit_by_type, 0,
+ sizeof (cm->min_valid_hop_limit_by_type));
+ cm->min_valid_hop_limit_by_type[ICMP6_router_solicitation] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_router_advertisement] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_neighbor_solicitation] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_neighbor_advertisement] = 255;
+ cm->min_valid_hop_limit_by_type[ICMP6_redirect] = 255;
+
+ memset (cm->min_valid_length_by_type, sizeof (icmp46_header_t),
+ sizeof (cm->min_valid_length_by_type));
+ cm->min_valid_length_by_type[ICMP6_router_solicitation] =
+ sizeof (icmp6_neighbor_discovery_header_t);
+ cm->min_valid_length_by_type[ICMP6_router_advertisement] =
+ sizeof (icmp6_router_advertisement_header_t);
+ cm->min_valid_length_by_type[ICMP6_neighbor_solicitation] =
+ sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t);
+ cm->min_valid_length_by_type[ICMP6_neighbor_advertisement] =
+ sizeof (icmp6_neighbor_solicitation_or_advertisement_header_t);
+ cm->min_valid_length_by_type[ICMP6_redirect] =
+ sizeof (icmp6_redirect_header_t);
+
+ icmp6_register_type (vm, ICMP6_echo_request,
+ ip6_icmp_echo_request_node.index);
+
+ return vlib_call_init_function (vm, ip6_neighbor_init);
+}
+
+VLIB_INIT_FUNCTION (icmp6_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/icmp6.h b/src/vnet/ip/icmp6.h
new file mode 100644
index 00000000..9a3487b1
--- /dev/null
+++ b/src/vnet/ip/icmp6.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_icmp6_h
+#define included_vnet_icmp6_h
+
+#define foreach_icmp6_error \
+ _ (NONE, "valid packets") \
+ _ (UNKNOWN_TYPE, "unknown type") \
+ _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
+ _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
+ _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
+ _ (OPTIONS_WITH_ODD_LENGTH, \
+ "total option length not multiple of 8 bytes") \
+ _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
+ _ (ECHO_REPLIES_SENT, "echo replies sent") \
+ _ (NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK, \
+ "neighbor solicitations from source not on link") \
+ _ (NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN, \
+ "neighbor solicitations for unknown targets") \
+ _ (NEIGHBOR_ADVERTISEMENTS_TX, "neighbor advertisements sent") \
+ _ (NEIGHBOR_ADVERTISEMENTS_RX, "neighbor advertisements received") \
+ _ (ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK, \
+ "router solicitations from source not on link") \
+ _ (ROUTER_SOLICITATION_UNSUPPORTED_INTF, \
+ "neighbor discovery unsupported interface") \
+ _ (ROUTER_SOLICITATION_RADV_NOT_CONFIG, \
+ "neighbor discovery not configured") \
+ _ (ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL, \
+ "router advertisement source not link local") \
+ _ (ROUTER_ADVERTISEMENTS_TX, "router advertisements sent") \
+ _ (ROUTER_ADVERTISEMENTS_RX, "router advertisements received") \
+ _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
+ _ (DEST_UNREACH_SENT, "destination unreachable response sent") \
+ _ (PACKET_TOO_BIG_SENT, "packet too big response sent") \
+ _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \
+ _ (PARAM_PROBLEM_SENT, "parameter Pproblem response sent") \
+ _ (DROP, "error message dropped")
+
+
+typedef enum
+{
+#define _(f,s) ICMP6_ERROR_##f,
+ foreach_icmp6_error
+#undef _
+} icmp6_error_t;
+
+typedef struct
+{
+ u8 packet_data[64];
+} icmp6_input_trace_t;
+
+format_function_t format_icmp6_input_trace;
+void icmp6_register_type (vlib_main_t * vm, icmp6_type_t type,
+ u32 node_index);
+void icmp6_error_set_vnet_buffer (vlib_buffer_t * b, u8 type, u8 code,
+ u32 data);
+
+extern vlib_node_registration_t ip6_icmp_input_node;
+
+#endif /* included_vnet_icmp6_h */
+
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/igmp_packet.h b/src/vnet/ip/igmp_packet.h
new file mode 100644
index 00000000..503259ec
--- /dev/null
+++ b/src/vnet/ip/igmp_packet.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * igmp_packet.h: igmp packet format
+ *
+ * Copyright (c) 2011 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vnet_igmp_packet_h
+#define included_vnet_igmp_packet_h
+
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+#define foreach_igmp_type \
+ _ (0x11, membership_query) \
+ _ (0x12, membership_report_v1) \
+ _ (0x13, dvmrp) \
+ _ (0x14, pim_v1) \
+ _ (0x15, cisco_trace) \
+ _ (0x16, membership_report_v2) \
+ _ (0x17, leave_group_v2) \
+ _ (0x1e, traceroute_response) \
+ _ (0x1f, traceroute_request) \
+ _ (0x22, membership_report_v3) \
+ _ (0x30, router_advertisement) \
+ _ (0x31, router_solicitation) \
+ _ (0x32, router_termination)
+
+typedef enum
+{
+#define _(n,f) IGMP_TYPE_##f = n,
+ foreach_igmp_type
+#undef _
+} igmp_type_t;
+
+typedef struct
+{
+ igmp_type_t type:8;
+
+ u8 code;
+
+ u16 checksum;
+} igmp_header_t;
+
+typedef struct
+{
+ /* membership_query, version <= 2 reports. */
+ igmp_header_t header;
+
+ /* Multicast destination address. */
+ ip4_address_t dst;
+} igmp_message_t;
+
+#define foreach_igmp_membership_group_v3_type \
+ _ (1, mode_is_filter_include) \
+ _ (2, mode_is_filter_exclude) \
+ _ (3, change_to_filter_include) \
+ _ (4, change_to_filter_exclude) \
+ _ (5, allow_new_sources) \
+ _ (6, block_old_sources)
+
+typedef enum
+{
+#define _(n,f) IGMP_MEMBERSHIP_GROUP_##f = n,
+ foreach_igmp_membership_group_v3_type
+#undef _
+} igmp_membership_group_v3_type_t;
+
+typedef struct
+{
+ igmp_membership_group_v3_type_t type:8;
+
+ /* Number of 32 bit words of aux data after source addresses. */
+ u8 n_aux_u32s;
+
+ /* Number of source addresses that follow. */
+ u16 n_src_addresses;
+
+ /* Destination multicast address. */
+ ip4_address_t dst_address;
+
+ ip4_address_t src_addresses[0];
+} igmp_membership_group_v3_t;
+
+always_inline igmp_membership_group_v3_t *
+igmp_membership_group_v3_next (igmp_membership_group_v3_t * g)
+{
+ return ((void *) g
+ + g->n_src_addresses * sizeof (g->src_addresses[0])
+ + g->n_aux_u32s * sizeof (u32));
+}
+
+typedef struct
+{
+ /* Type 0x22. */
+ igmp_header_t header;
+
+ u16 unused;
+
+ /* Number of groups which follow. */
+ u16 n_groups;
+
+ igmp_membership_group_v3_t groups[0];
+} igmp_membership_report_v3_t;
+
+/* IP6 flavor of IGMP is called MLD which is embedded in ICMP6. */
+typedef struct
+{
+ /* Preceeded by ICMP v6 header. */
+ u16 max_response_delay_in_milliseconds;
+ u16 reserved;
+ ip6_address_t dst;
+} mld_header_t;
+
+#endif /* included_vnet_igmp_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
new file mode 100644
index 00000000..f26d7943
--- /dev/null
+++ b/src/vnet/ip/ip.api
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** \file
+
+ This file defines vpp IP control-plane API messages which are generally
+ called through a shared memory interface.
+*/
+
+/** \brief Add / del table request
+ A table can be added multiple times, but need be deleted only once.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_ipv6 - V4 or V6 table
+ @param table_id - table ID associated with the route
+ This table ID will apply to both the unicats
+ and mlticast FIBs
+ @param name - A client provided name/tag for the table. If this is
+ not set by the client, then VPP will generate something
+ meaningfull.
+*/
+autoreply define ip_table_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 table_id;
+ u8 is_ipv6;
+ u8 is_add;
+ u8 name[64];
+};
+
+/** \brief Dump IP fib table
+ @param client_index - opaque cookie to identify the sender
+*/
+define ip_fib_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief FIB path
+ @param sw_if_index - index of the interface
+ @param weight - The weight, for UCMP
+ @param preference - The preference of the path. lowest preference is prefered
+ @param is_local - local if non-zero, else remote
+ @param is_drop - Drop the packet
+ @param is_unreach - Drop the packet and rate limit send ICMP unreachable
+ @param is_prohibit - Drop the packet and rate limit send ICMP prohibited
+ @param afi - the afi of the next hop, IP46_TYPE_IP4=1, IP46_TYPE_IP6=2
+ @param next_hop[16] - the next hop address
+
+ WARNING: this type is replicated, pending cleanup completion
+*/
+typeonly manual_print manual_endian define fib_path
+{
+ u32 sw_if_index;
+ u8 weight;
+ u8 preference;
+ u8 is_local;
+ u8 is_drop;
+ u8 is_unreach;
+ u8 is_prohibit;
+ u8 afi;
+ u8 next_hop[16];
+};
+
+/** \brief IP FIB table response
+ @param table_id - IP fib table id
+ @address_length - mask length
+ @address - ip4 prefix
+ @param count - the number of fib_path in path
+ @param path - array of of fib_path structures
+*/
+manual_endian manual_print define ip_fib_details
+{
+ u32 context;
+ u32 table_id;
+ u8 table_name[64];
+ u8 address_length;
+ u8 address[4];
+ u32 count;
+ vl_api_fib_path_t path[count];
+};
+
+/** \brief Dump IP6 fib table
+ @param client_index - opaque cookie to identify the sender
+*/
+define ip6_fib_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief IP6 FIB table entry response
+ @param table_id - IP6 fib table id
+ @param address_length - mask length
+ @param address - ip6 prefix
+ @param count - the number of fib_path in path
+ @param path - array of of fib_path structures
+*/
+manual_endian manual_print define ip6_fib_details
+{
+ u32 context;
+ u32 table_id;
+ u8 table_name[64];
+ u8 address_length;
+ u8 address[16];
+ u32 count;
+ vl_api_fib_path_t path[count];
+};
+
+/** \brief Dump IP neighboors
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface to dump neighboors
+ @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4]
+*/
+define ip_neighbor_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+};
+
+/** \brief IP neighboors dump response
+ @param context - sender context which was passed in the request
+ @param is_static - [1|0] to indicate if neighbor is statically configured
+ @param is_ipv6 - [1|0] to indicate if address family is ipv[6|4]
+*/
+define ip_neighbor_details {
+ u32 context;
+ u8 is_static;
+ u8 is_ipv6;
+ u8 mac_address[6];
+ u8 ip_address[16];
+};
+
+/** \brief IP neighbor add / del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface used to reach neighbor
+ @param is_add - 1 to add neighbor, 0 to delete
+ @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4
+ @param is_ipv6 - 1 for IPv6 neighbor, 0 for IPv4
+ @param is_static - A static neighbor Entry - there are not flushed
+ If the interface goes down.
+ @param is_no_adj_fib - Do not create a corresponding entry in the FIB
+ table for the neighbor.
+ @param mac_address - l2 address of the neighbor
+ @param dst_address - ip4 or ip6 address of the neighbor
+*/
+autoreply define ip_neighbor_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* 1 = add, 0 = delete */
+ u8 is_add;
+ u8 is_ipv6;
+ u8 is_static;
+ u8 is_no_adj_fib;
+ u8 mac_address[6];
+ u8 dst_address[16];
+};
+
+/** \brief Set the ip flow hash config for a fib request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vrf_id - vrf/fib id
+ @param is_ipv6 - if non-zero the fib is ip6, else ip4
+ @param src - if non-zero include src in flow hash
+ @param dst - if non-zero include dst in flow hash
+ @param sport - if non-zero include sport in flow hash
+ @param dport - if non-zero include dport in flow hash
+ @param proto -if non-zero include proto in flow hash
+ @param reverse - if non-zero include reverse in flow hash
+*/
+autoreply define set_ip_flow_hash
+{
+ u32 client_index;
+ u32 context;
+ u32 vrf_id;
+ u8 is_ipv6;
+ u8 src;
+ u8 dst;
+ u8 sport;
+ u8 dport;
+ u8 proto;
+ u8 reverse;
+};
+
+/** \brief IPv6 router advertisement config request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param suppress -
+ @param managed -
+ @param other -
+ @param ll_option -
+ @param send_unicast -
+ @param cease -
+ @param is_no -
+ @param default_router -
+ @param max_interval -
+ @param min_interval -
+ @param lifetime -
+ @param initial_count -
+ @param initial_interval -
+*/
+autoreply define sw_interface_ip6nd_ra_config
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 suppress;
+ u8 managed;
+ u8 other;
+ u8 ll_option;
+ u8 send_unicast;
+ u8 cease;
+ u8 is_no;
+ u8 default_router;
+ u32 max_interval;
+ u32 min_interval;
+ u32 lifetime;
+ u32 initial_count;
+ u32 initial_interval;
+};
+
+/** \brief IPv6 router advertisement prefix config request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - The interface the RA prefix information is for
+ @param address[] - The prefix to advertise
+ @param address_length - the prefix length
+ @param use_default - Revert to default settings
+ @param no_advertise - Do not advertise this prefix
+ @param off_link - The prefix is off link (it is not configured on the interface)
+ Configures the L-flag, When set, indicates that this
+ prefix can be used for on-link determination.
+ @param no_autoconfig - Setting for the A-flag. When
+ set indicates that this prefix can be used for
+ stateless address configuration.
+ @param no_onlink - The prefix is not on link. Make sure this is consistent
+ with the off_link parameter else YMMV
+ @param is_no - add/delete
+ @param val_lifetime - The length of time in
+ seconds (relative to the time the packet is sent)
+ that the prefix is valid for the purpose of on-link
+ determination. A value of all one bits
+ (0xffffffff) represents infinity
+ @param pref_lifetime - The length of time in
+ seconds (relative to the time the packet is sent)
+ that addresses generated from the prefix via
+ stateless address autoconfiguration remain
+ preferred [ADDRCONF]. A value of all one bits
+ (0xffffffff) represents infinity.
+*/
+autoreply define sw_interface_ip6nd_ra_prefix
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 address[16];
+ u8 address_length;
+ u8 use_default;
+ u8 no_advertise;
+ u8 off_link;
+ u8 no_autoconfig;
+ u8 no_onlink;
+ u8 is_no;
+ u32 val_lifetime;
+ u32 pref_lifetime;
+};
+
+/** \brief IPv6 ND proxy config
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - The interface the host is on
+ @param address - The address of the host for which to proxy for
+ @param is_add - Adding or deleting
+*/
+autoreply define ip6nd_proxy_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_del;
+ u8 address[16];
+};
+
+/** \brief IPv6 ND proxy details returned after request
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+*/
+define ip6nd_proxy_details
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 address[16];
+};
+
+/** \brief IPv6 ND proxy dump request
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param sw_if_index - The interface the host is on
+ @param address - The address of the host for which to proxy for
+*/
+define ip6nd_proxy_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief IPv6 interface enable / disable request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface used to reach neighbor
+ @param enable - if non-zero enable ip6 on interface, else disable
+*/
+autoreply define sw_interface_ip6_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 enable; /* set to true if enable */
+};
+
+/** \brief IPv6 set link local address on interface request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface to set link local on
+ @param address[] - the new link local address
+*/
+autoreply define sw_interface_ip6_set_link_local_address
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 address[16];
+};
+
+/** \brief Add / del route request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the new vlan's parent interface
+ @param vrf_id - fib table /vrf associated with the route
+ @param lookup_in_vrf -
+ @param classify_table_index -
+ @param create_vrf_if_needed -
+ @param is_add - 1 if adding the route, 0 if deleting
+ @param is_drop - Drop the packet
+ @param is_unreach - Drop the packet and rate limit send ICMP unreachable
+ @param is_prohibit - Drop the packet and rate limit send ICMP prohibited
+ @param is_ipv6 - 0 if an ip4 route, else ip6
+ @param is_local -
+ @param is_classify -
+ @param is_multipath - Set to 1 if this is a multipath route, else 0
+ @param not_last - Is last or not last msg in group of multiple add/del msgs
+ @param next_hop_weight -
+ @param dst_address_length -
+ @param dst_address[16] -
+ @param next_hop_address[16] -
+ @param next_hop_n_out_labels - the number of labels in the label stack
+ @param next_hop_out_label_stack - the next-hop output label stack, outer most first
+ @param next_hop_via_label - The next-hop is a resolved via a local label
+*/
+autoreply define ip_add_del_route
+{
+ u32 client_index;
+ u32 context;
+ u32 next_hop_sw_if_index;
+ u32 table_id;
+ u32 classify_table_index;
+ u32 next_hop_table_id;
+ u8 create_vrf_if_needed;
+ u8 is_add;
+ u8 is_drop;
+ u8 is_unreach;
+ u8 is_prohibit;
+ u8 is_ipv6;
+ u8 is_local;
+ u8 is_classify;
+ u8 is_multipath;
+ u8 is_resolve_host;
+ u8 is_resolve_attached;
+ /* Is last/not-last message in group of multiple add/del messages. */
+ u8 not_last;
+ u8 next_hop_weight;
+ u8 next_hop_preference;
+ u8 dst_address_length;
+ u8 dst_address[16];
+ u8 next_hop_address[16];
+ u8 next_hop_n_out_labels;
+ u32 next_hop_via_label;
+ u32 next_hop_out_label_stack[next_hop_n_out_labels];
+};
+
+/** \brief Add / del route request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the new vlan's parent interface
+ @param vrf_id - fib table /vrf associated with the route
+
+ FIXME
+*/
+autoreply define ip_mroute_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 next_hop_sw_if_index;
+ u32 table_id;
+ u32 entry_flags;
+ u32 itf_flags;
+ u32 rpf_id;
+ u16 grp_address_length;
+ u8 create_vrf_if_needed;
+ u8 is_add;
+ u8 is_ipv6;
+ u8 is_local;
+ u8 grp_address[16];
+ u8 src_address[16];
+};
+
+/** \brief Dump IP multicast fib table
+ @param client_index - opaque cookie to identify the sender
+*/
+define ip_mfib_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief IP Multicast FIB table response
+ @param table_id - IP fib table id
+ @address_length - mask length
+ @grp_address - Group address/prefix
+ @src_address - Source address
+ @param count - the number of fib_path in path
+ @param path - array of of fib_path structures
+*/
+manual_endian manual_print define ip_mfib_details
+{
+ u32 context;
+ u32 table_id;
+ u32 entry_flags;
+ u32 rpf_id;
+ u8 address_length;
+ u8 grp_address[4];
+ u8 src_address[4];
+ u32 count;
+ vl_api_fib_path_t path[count];
+};
+
+/** \brief Dump IP6 multicast fib table
+ @param client_index - opaque cookie to identify the sender
+*/
+define ip6_mfib_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief IP6 Multicast FIB table response
+ @param table_id - IP fib table id
+ @address_length - mask length
+ @grp_address - Group address/prefix
+ @src_address - Source address
+ @param count - the number of fib_path in path
+ @param path - array of of fib_path structures
+*/
+manual_endian manual_print define ip6_mfib_details
+{
+ u32 context;
+ u32 table_id;
+ u8 address_length;
+ u8 grp_address[16];
+ u8 src_address[16];
+ u32 count;
+ vl_api_fib_path_t path[count];
+};
+
+define ip_address_details
+{
+ u32 client_index;
+ u32 context;
+ u8 ip[16];
+ u8 prefix_length;
+ u32 sw_if_index;
+ u8 is_ipv6;
+};
+
+define ip_address_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 is_ipv6;
+};
+
+define ip_details
+{
+ u32 sw_if_index;
+ u32 context;
+ u8 is_ipv6;
+};
+
+define ip_dump
+{
+ u32 client_index;
+ u32 context;
+ u8 is_ipv6;
+};
+
+define mfib_signal_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+define mfib_signal_details
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u32 table_id;
+ u16 grp_address_len;
+ u8 grp_address[16];
+ u8 src_address[16];
+ u16 ip_packet_len;
+ u8 ip_packet_data[256];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h
new file mode 100644
index 00000000..7e26bc6c
--- /dev/null
+++ b/src/vnet/ip/ip.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip.h: ip generic (4 or 6) main
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_main_h
+#define included_ip_main_h
+
+#include <vppinfra/hash.h>
+#include <vppinfra/heap.h> /* adjacency heap */
+#include <vppinfra/ptclosure.h>
+
+#include <vnet/vnet.h>
+
+#include <vnet/ip/format.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/lookup.h>
+
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/ip/icmp46_packet.h>
+
+#include <vnet/ip/ip4.h>
+#include <vnet/ip/ip4_error.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/icmp4.h>
+
+#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_error.h>
+#include <vnet/ip/icmp6.h>
+#include <vnet/classify/vnet_classify.h>
+
+/* Per protocol info. */
+typedef struct
+{
+ /* Protocol name (also used as hash key). */
+ u8 *name;
+
+ /* Protocol number. */
+ ip_protocol_t protocol;
+
+ /* Format function for this IP protocol. */
+ format_function_t *format_header;
+
+ /* Parser for header. */
+ unformat_function_t *unformat_header;
+
+ /* Parser for per-protocol matches. */
+ unformat_function_t *unformat_match;
+
+ /* Parser for packet generator edits for this protocol. */
+ unformat_function_t *unformat_pg_edit;
+} ip_protocol_info_t;
+
+/* Per TCP/UDP port info. */
+typedef struct
+{
+ /* Port name (used as hash key). */
+ u8 *name;
+
+ /* UDP/TCP port number in network byte order. */
+ u16 port;
+
+ /* Port specific format function. */
+ format_function_t *format_header;
+
+ /* Parser for packet generator edits for this protocol. */
+ unformat_function_t *unformat_pg_edit;
+} tcp_udp_port_info_t;
+
+typedef struct
+{
+ /* Per IP protocol info. */
+ ip_protocol_info_t *protocol_infos;
+
+ /* Protocol info index hashed by 8 bit IP protocol. */
+ uword *protocol_info_by_protocol;
+
+ /* Hash table mapping IP protocol name (see protocols.def)
+ to protocol number. */
+ uword *protocol_info_by_name;
+
+ /* Per TCP/UDP port info. */
+ tcp_udp_port_info_t *port_infos;
+
+ /* Hash table from network-byte-order port to port info index. */
+ uword *port_info_by_port;
+
+ /* Hash table mapping TCP/UDP name to port info index. */
+ uword *port_info_by_name;
+} ip_main_t;
+
+extern ip_main_t ip_main;
+
+clib_error_t *ip_main_init (vlib_main_t * vm);
+
+static inline ip_protocol_info_t *
+ip_get_protocol_info (ip_main_t * im, u32 protocol)
+{
+ uword *p;
+
+ p = hash_get (im->protocol_info_by_protocol, protocol);
+ return p ? vec_elt_at_index (im->protocol_infos, p[0]) : 0;
+}
+
+static inline tcp_udp_port_info_t *
+ip_get_tcp_udp_port_info (ip_main_t * im, u32 port)
+{
+ uword *p;
+
+ p = hash_get (im->port_info_by_port, port);
+ return p ? vec_elt_at_index (im->port_infos, p[0]) : 0;
+}
+
+always_inline ip_csum_t
+ip_incremental_checksum_buffer (vlib_main_t * vm,
+ vlib_buffer_t * first_buffer,
+ u32 first_buffer_offset,
+ u32 n_bytes_to_checksum, ip_csum_t sum)
+{
+ vlib_buffer_t *b = first_buffer;
+ u32 n_bytes_left = n_bytes_to_checksum;
+ ASSERT (b->current_length >= first_buffer_offset);
+ void *h;
+ u32 n;
+
+ n = clib_min (n_bytes_left, b->current_length);
+ h = vlib_buffer_get_current (b) + first_buffer_offset;
+ sum = ip_incremental_checksum (sum, h, n);
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ while (1)
+ {
+ n_bytes_left -= n;
+ if (n_bytes_left == 0)
+ break;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ n = clib_min (n_bytes_left, b->current_length);
+ h = vlib_buffer_get_current (b);
+ sum = ip_incremental_checksum (sum, h, n);
+ }
+ }
+
+ return sum;
+}
+
+void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index);
+
+extern vlib_node_registration_t ip4_inacl_node;
+extern vlib_node_registration_t ip6_inacl_node;
+
+void ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api,
+ const u8 * name);
+
+void ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api);
+
+int ip_table_bind (fib_protocol_t fproto, u32 sw_if_index,
+ u32 table_id, u8 is_api);
+
+#endif /* included_ip_main_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
new file mode 100644
index 00000000..af0e6b9a
--- /dev/null
+++ b/src/vnet/ip/ip4.h
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4.h: ip4 main include file
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip4_h
+#define included_ip_ip4_h
+
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/buffer.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ip/icmp46_packet.h>
+
+typedef struct ip4_mfib_t
+{
+ /* Hash table for each prefix length mapping. */
+ uword *fib_entry_by_dst_address[65];
+
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+
+ /* Index into FIB vector. */
+ u32 index;
+} ip4_mfib_t;
+
+struct ip4_main_t;
+
+typedef void (ip4_add_del_interface_address_function_t)
+ (struct ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length, u32 if_address_index, u32 is_del);
+
+typedef struct
+{
+ ip4_add_del_interface_address_function_t *function;
+ uword function_opaque;
+} ip4_add_del_interface_address_callback_t;
+
+typedef void (ip4_table_bind_function_t)
+ (struct ip4_main_t * im,
+ uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index);
+
+typedef struct
+{
+ ip4_table_bind_function_t *function;
+ uword function_opaque;
+} ip4_table_bind_callback_t;
+
+/**
+ * @brief IPv4 main type.
+ *
+ * State of IPv4 VPP processing including:
+ * - FIBs
+ * - Feature indices used in feature topological sort
+ * - Feature node run time references
+ */
+
+typedef struct ip4_main_t
+{
+ ip_lookup_main_t lookup_main;
+
+ /** Vector of FIBs. */
+ struct fib_table_t_ *fibs;
+
+ /** Vector of MTries. */
+ struct ip4_fib_t_ *v4_fibs;
+
+ /** Vector of MFIBs. */
+ struct mfib_table_t_ *mfibs;
+
+ u32 fib_masks[33];
+
+ /** Table index indexed by software interface. */
+ u32 *fib_index_by_sw_if_index;
+
+ /** Table index indexed by software interface. */
+ u32 *mfib_index_by_sw_if_index;
+
+ /* IP4 enabled count by software interface */
+ u8 *ip_enabled_by_sw_if_index;
+
+ /** Hash table mapping table id to fib index.
+ ID space is not necessarily dense; index space is dense. */
+ uword *fib_index_by_table_id;
+
+ /** Hash table mapping table id to multicast fib index.
+ ID space is not necessarily dense; index space is dense. */
+ uword *mfib_index_by_table_id;
+
+ /** Functions to call when interface address changes. */
+ ip4_add_del_interface_address_callback_t
+ * add_del_interface_address_callbacks;
+
+ /** Functions to call when interface to table biding changes. */
+ ip4_table_bind_callback_t *table_bind_callbacks;
+
+ /** Template used to generate IP4 ARP packets. */
+ vlib_packet_template_t ip4_arp_request_packet_template;
+
+ /** Seed for Jenkins hash used to compute ip4 flow hash. */
+ u32 flow_hash_seed;
+
+ /** @brief Template information for VPP generated packets */
+ struct
+ {
+ /** TTL to use for host generated packets. */
+ u8 ttl;
+
+ /** TOS byte to use for host generated packets. */
+ u8 tos;
+
+ u8 pad[2];
+ } host_config;
+} ip4_main_t;
+
+/** Global ip4 main structure. */
+extern ip4_main_t ip4_main;
+
+/** Global ip4 input node. Errors get attached to ip4 input node. */
+extern vlib_node_registration_t ip4_input_node;
+extern vlib_node_registration_t ip4_lookup_node;
+extern vlib_node_registration_t ip4_local_node;
+extern vlib_node_registration_t ip4_rewrite_node;
+extern vlib_node_registration_t ip4_rewrite_mcast_node;
+extern vlib_node_registration_t ip4_rewrite_local_node;
+extern vlib_node_registration_t ip4_arp_node;
+extern vlib_node_registration_t ip4_glean_node;
+extern vlib_node_registration_t ip4_midchain_node;
+
+always_inline uword
+ip4_destination_matches_route (const ip4_main_t * im,
+ const ip4_address_t * key,
+ const ip4_address_t * dest, uword dest_length)
+{
+ return 0 == ((key->data_u32 ^ dest->data_u32) & im->fib_masks[dest_length]);
+}
+
+always_inline uword
+ip4_destination_matches_interface (ip4_main_t * im,
+ ip4_address_t * key,
+ ip_interface_address_t * ia)
+{
+ ip4_address_t *a = ip_interface_address_get_address (&im->lookup_main, ia);
+ return ip4_destination_matches_route (im, key, a, ia->address_length);
+}
+
+/* As above but allows for unaligned destinations (e.g. works right from IP header of packet). */
+always_inline uword
+ip4_unaligned_destination_matches_route (ip4_main_t * im,
+ ip4_address_t * key,
+ ip4_address_t * dest,
+ uword dest_length)
+{
+ return 0 ==
+ ((clib_mem_unaligned (&key->data_u32, u32) ^ dest->
+ data_u32) & im->fib_masks[dest_length]);
+}
+
+always_inline int
+ip4_src_address_for_packet (ip_lookup_main_t * lm,
+ u32 sw_if_index, ip4_address_t * src)
+{
+ u32 if_add_index = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ if (PREDICT_TRUE (if_add_index != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index);
+ ip4_address_t *if_ip = ip_interface_address_get_address (lm, if_add);
+ *src = *if_ip;
+ return 0;
+ }
+ else
+ {
+ src->as_u32 = 0;
+ }
+ return (!0);
+}
+
+/* Find interface address which matches destination. */
+always_inline ip4_address_t *
+ip4_interface_address_matching_destination (ip4_main_t * im,
+ ip4_address_t * dst,
+ u32 sw_if_index,
+ ip_interface_address_t **
+ result_ia)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia;
+ ip4_address_t *result = 0;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip4_address_t * a = ip_interface_address_get_address (lm, ia);
+ if (ip4_destination_matches_route (im, dst, a, ia->address_length))
+ {
+ result = a;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
+}
+
+ip4_address_t *ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
+ ip_interface_address_t **
+ result_ia);
+
+clib_error_t *ip4_add_del_interface_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length, u32 is_del);
+
+void ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable);
+
+int ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2);
+
+/* Send an ARP request to see if given destination is reachable on given interface. */
+clib_error_t *ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst,
+ u32 sw_if_index);
+
+clib_error_t *ip4_set_arp_limit (u32 arp_limit);
+
+uword
+ip4_udp_register_listener (vlib_main_t * vm,
+ u16 dst_port, u32 next_node_index);
+
+void
+ip4_icmp_register_type (vlib_main_t * vm, icmp4_type_t type, u32 node_index);
+
+u16 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip4_header_t * ip0);
+
+void ip4_register_protocol (u32 protocol, u32 node_index);
+
+serialize_function_t serialize_vnet_ip4_main, unserialize_vnet_ip4_main;
+
+int vnet_set_ip4_flow_hash (u32 table_id,
+ flow_hash_config_t flow_hash_config);
+
+int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index);
+
+/* Compute flow hash. We'll use it to select which adjacency to use for this
+ flow. And other things. */
+always_inline u32
+ip4_compute_flow_hash (const ip4_header_t * ip,
+ flow_hash_config_t flow_hash_config)
+{
+ tcp_header_t *tcp = (void *) (ip + 1);
+ u32 a, b, c, t1, t2;
+ uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP
+ || ip->protocol == IP_PROTOCOL_UDP);
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR)
+ ? ip->src_address.data_u32 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR)
+ ? ip->dst_address.data_u32 : 0;
+
+ a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1;
+ b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
+ b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0;
+
+ t1 = is_tcp_udp ? tcp->src : 0;
+ t2 = is_tcp_udp ? tcp->dst : 0;
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;
+
+ c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
+ (t1 << 16) | t2 : (t2 << 16) | t1;
+
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+
+ return c;
+}
+
+void
+ip4_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vlib_rx_or_tx_t which_adj_index);
+
+u8 *format_ip4_forward_next_trace (u8 * s, va_list * args);
+
+u32 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0);
+
+#define IP_DF 0x4000 /* don't fragment */
+
+/**
+ * Push IPv4 header to buffer
+ *
+ * This does not support fragmentation.
+ *
+ * @param vm - vlib_main
+ * @param b - buffer to write the header to
+ * @param src - source IP
+ * @param dst - destination IP
+ * @param prot - payload proto
+ *
+ * @return - pointer to start of IP header
+ */
+always_inline void *
+vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b,
+ ip4_address_t * src, ip4_address_t * dst, int proto,
+ u8 csum_offload)
+{
+ ip4_header_t *ih;
+
+ /* make some room */
+ ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t));
+
+ ih->ip_version_and_header_length = 0x45;
+ ih->tos = 0;
+ ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
+
+ /* No fragments */
+ ih->flags_and_fragment_offset = clib_host_to_net_u16 (IP_DF);
+ ih->ttl = 255;
+ ih->protocol = proto;
+ ih->src_address.as_u32 = src->as_u32;
+ ih->dst_address.as_u32 = dst->as_u32;
+
+ /* Offload ip4 header checksum generation */
+ if (csum_offload)
+ {
+ ih->checksum = 0;
+ b->flags |= VNET_BUFFER_F_OFFLOAD_IP_CKSUM | VNET_BUFFER_F_IS_IP4;
+ vnet_buffer (b)->l3_hdr_offset = (u8 *) ih - b->data;
+ vnet_buffer (b)->l4_hdr_offset = vnet_buffer (b)->l3_hdr_offset +
+ sizeof (*ih);
+ }
+ else
+ ih->checksum = ip4_header_checksum (ih);
+
+ return ih;
+}
+#endif /* included_ip_ip4_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip46_cli.c b/src/vnet/ip/ip46_cli.c
new file mode 100644
index 00000000..668c6506
--- /dev/null
+++ b/src/vnet/ip/ip46_cli.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_cli.c: ip4 commands
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/**
+ * @file
+ * @brief Set IP Address.
+ *
+ * Configure an IPv4 or IPv6 address for on an interface.
+ */
+
+
+int
+ip4_address_compare (ip4_address_t * a1, ip4_address_t * a2)
+{
+ return clib_net_to_host_u32 (a1->data_u32) -
+ clib_net_to_host_u32 (a2->data_u32);
+}
+
+int
+ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a1->as_u16); i++)
+ {
+ int cmp =
+ clib_net_to_host_u16 (a1->as_u16[i]) -
+ clib_net_to_host_u16 (a2->as_u16[i]);
+ if (cmp != 0)
+ return cmp;
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_command, static) = {
+ .path = "set interface ip",
+ .short_help = "IP4/IP6 commands",
+};
+/* *INDENT-ON* */
+
+void
+ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip4_address_t *ip4_addrs = 0;
+ u32 *ip4_masks = 0;
+ ip6_main_t *im6 = &ip6_main;
+ ip6_address_t *ip6_addrs = 0;
+ u32 *ip6_masks = 0;
+ ip_interface_address_t *ia;
+ int i;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&im4->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ ip4_address_t * x = (ip4_address_t *)
+ ip_interface_address_get_address (&im4->lookup_main, ia);
+ vec_add1 (ip4_addrs, x[0]);
+ vec_add1 (ip4_masks, ia->address_length);
+ }));
+ /* *INDENT-ON* */
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&im6->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ ip6_address_t * x = (ip6_address_t *)
+ ip_interface_address_get_address (&im6->lookup_main, ia);
+ vec_add1 (ip6_addrs, x[0]);
+ vec_add1 (ip6_masks, ia->address_length);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (ip4_addrs); i++)
+ ip4_add_del_interface_address (vm, sw_if_index, &ip4_addrs[i],
+ ip4_masks[i], 1 /* is_del */ );
+ for (i = 0; i < vec_len (ip6_addrs); i++)
+ ip6_add_del_interface_address (vm, sw_if_index, &ip6_addrs[i],
+ ip6_masks[i], 1 /* is_del */ );
+
+ vec_free (ip4_addrs);
+ vec_free (ip4_masks);
+ vec_free (ip6_addrs);
+ vec_free (ip6_masks);
+}
+
+static clib_error_t *
+ip_address_delete_cleanup (vnet_main_t * vnm, u32 hw_if_index, u32 is_create)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_hw_interface_t *hw;
+
+ if (is_create)
+ return 0;
+
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+
+ ip_del_all_interface_addresses (vm, hw->sw_if_index);
+ return 0;
+}
+
+VNET_HW_INTERFACE_ADD_DEL_FUNCTION (ip_address_delete_cleanup);
+
+static clib_error_t *
+add_del_ip_address (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_address_t a4;
+ ip6_address_t a6;
+ clib_error_t *error = 0;
+ u32 sw_if_index, length, is_del;
+
+ sw_if_index = ~0;
+ is_del = 0;
+
+ if (unformat (input, "del"))
+ is_del = 1;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (is_del && unformat (input, "all"))
+ ip_del_all_interface_addresses (vm, sw_if_index);
+ else if (unformat (input, "%U/%d", unformat_ip4_address, &a4, &length))
+ error = ip4_add_del_interface_address (vm, sw_if_index, &a4, length,
+ is_del);
+ else if (unformat (input, "%U/%d", unformat_ip6_address, &a6, &length))
+ error = ip6_add_del_interface_address (vm, sw_if_index, &a6, length,
+ is_del);
+ else
+ {
+ error = clib_error_return (0, "expected IP4/IP6 address/length `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+
+done:
+ return error;
+}
+
+/*?
+ * Add an IP Address to an interface or remove and IP Address from an interface.
+ * The IP Address can be an IPv4 or an IPv6 address. Interfaces may have multiple
+ * IPv4 and IPv6 addresses. There is no concept of primary vs. secondary
+ * interface addresses; they're just addresses.
+ *
+ * To display the addresses associated with a given interface, use the command
+ * '<em>show interface address <interface></em>'.
+ *
+ * Note that the debug CLI does not enforce classful mask-width / addressing
+ * constraints.
+ *
+ * @cliexpar
+ * @parblock
+ * An example of how to add an IPv4 address to an interface:
+ * @cliexcmd{set interface ip address GigabitEthernet2/0/0 172.16.2.12/24}
+ *
+ * An example of how to add an IPv6 address to an interface:
+ * @cliexcmd{set interface ip address GigabitEthernet2/0/0 @::a:1:1:0:7/126}
+ *
+ * To delete a specific interface ip address:
+ * @cliexcmd{set interface ip address GigabitEthernet2/0/0 172.16.2.12/24 del}
+ *
+ * To delete all interfaces addresses (IPv4 and IPv6):
+ * @cliexcmd{set interface ip address GigabitEthernet2/0/0 del all}
+ * @endparblock
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_address_command, static) = {
+ .path = "set interface ip address",
+ .function = add_del_ip_address,
+ .short_help = "set interface ip address [del] <interface> <ip-addr>/<mask> | [all]",
+};
+/* *INDENT-ON* */
+
+/* Dummy init function to get us linked in. */
+static clib_error_t *
+ip4_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_error.h b/src/vnet/ip/ip4_error.h
new file mode 100644
index 00000000..95d12ec2
--- /dev/null
+++ b/src/vnet/ip/ip4_error.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_error.h: ip4 fast path errors
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip4_error_h
+#define included_ip_ip4_error_h
+
+#define foreach_ip4_error \
+ /* Must be first. */ \
+ _ (NONE, "valid ip4 packets") \
+ \
+ /* Errors signalled by ip4-input */ \
+ _ (TOO_SHORT, "ip4 length < 20 bytes") \
+ _ (BAD_LENGTH, "ip4 length > l2 length") \
+ _ (BAD_CHECKSUM, "bad ip4 checksum") \
+ _ (VERSION, "ip4 version != 4") \
+ _ (OPTIONS, "ip4 options present") \
+ _ (FRAGMENT_OFFSET_ONE, "ip4 fragment offset == 1") \
+ _ (TIME_EXPIRED, "ip4 ttl <= 1") \
+ \
+ /* Errors signalled by ip4-rewrite. */ \
+ _ (MTU_EXCEEDED, "ip4 MTU exceeded and DF set") \
+ _ (DST_LOOKUP_MISS, "ip4 destination lookup miss") \
+ _ (SRC_LOOKUP_MISS, "ip4 source lookup miss") \
+ _ (ADJACENCY_DROP, "ip4 adjacency drop") \
+ _ (ADJACENCY_PUNT, "ip4 adjacency punt") \
+ \
+ /* Errors signalled by ip4-local. */ \
+ _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
+ _ (TCP_CHECKSUM, "bad tcp checksum") \
+ _ (UDP_CHECKSUM, "bad udp checksum") \
+ _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
+ \
+ /* Errors signalled by ip4-source-check. */ \
+ _ (UNICAST_SOURCE_CHECK_FAILS, "ip4 unicast source check fails") \
+ \
+ /* Spoofed packets in ip4-rewrite-local */ \
+ _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
+ \
+ /* Errors singalled by ip4-inacl */ \
+ _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
+ _ (INACL_SESSION_DENY, "input ACL session deny drops")
+
+typedef enum
+{
+#define _(sym,str) IP4_ERROR_##sym,
+ foreach_ip4_error
+#undef _
+ IP4_N_ERROR,
+} ip4_error_t;
+
+#endif /* included_ip_ip4_error_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_format.c b/src/vnet/ip/ip4_format.c
new file mode 100644
index 00000000..c803e065
--- /dev/null
+++ b/src/vnet/ip/ip4_format.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_format.c: ip4 formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format an IP4 address. */
+u8 *
+format_ip4_address (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+/* Format an IP4 route destination and length. */
+u8 *
+format_ip4_address_and_length (u8 * s, va_list * args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ u8 l = va_arg (*args, u32);
+ return format (s, "%U/%d", format_ip4_address, a, l);
+}
+
+/* Parse an IP4 address %d.%d.%d.%d. */
+uword
+unformat_ip4_address (unformat_input_t * input, va_list * args)
+{
+ u8 *result = va_arg (*args, u8 *);
+ unsigned a[4];
+
+ if (!unformat (input, "%d.%d.%d.%d", &a[0], &a[1], &a[2], &a[3]))
+ return 0;
+
+ if (a[0] >= 256 || a[1] >= 256 || a[2] >= 256 || a[3] >= 256)
+ return 0;
+
+ result[0] = a[0];
+ result[1] = a[1];
+ result[2] = a[2];
+ result[3] = a[3];
+
+ return 1;
+}
+
+/* Format an IP4 header. */
+u8 *
+format_ip4_header (u8 * s, va_list * args)
+{
+ ip4_header_t *ip = va_arg (*args, ip4_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 ip_version, header_bytes;
+ uword indent;
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (ip[0]))
+ return format (s, "IP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ ip_version = (ip->ip_version_and_header_length >> 4);
+ header_bytes = (ip->ip_version_and_header_length & 0xf) * sizeof (u32);
+
+ s = format (s, "%U: %U -> %U",
+ format_ip_protocol, ip->protocol,
+ format_ip4_address, ip->src_address.data,
+ format_ip4_address, ip->dst_address.data);
+
+ /* Show IP version and header length only with unexpected values. */
+ if (ip_version != 4 || header_bytes != sizeof (ip4_header_t))
+ s = format (s, "\n%Uversion %d, header length %d",
+ format_white_space, indent, ip_version, header_bytes);
+
+ s = format (s, "\n%Utos 0x%02x, ttl %d, length %d, checksum 0x%04x",
+ format_white_space, indent,
+ ip->tos, ip->ttl,
+ clib_net_to_host_u16 (ip->length),
+ clib_net_to_host_u16 (ip->checksum));
+
+ /* Check and report invalid checksums. */
+ {
+ u16 c = ip4_header_checksum (ip);
+ if (c != ip->checksum)
+ s = format (s, " (should be 0x%04x)", clib_net_to_host_u16 (c));
+ }
+
+ {
+ u32 f = clib_net_to_host_u16 (ip->flags_and_fragment_offset);
+ u32 o;
+
+ s = format (s, "\n%Ufragment id 0x%04x",
+ format_white_space, indent,
+ clib_net_to_host_u16 (ip->fragment_id));
+
+ /* Fragment offset. */
+ o = 8 * (f & 0x1fff);
+ f ^= o;
+ if (o != 0)
+ s = format (s, " offset %d", o);
+
+ if (f != 0)
+ {
+ s = format (s, ", flags ");
+#define _(l) if (f & IP4_HEADER_FLAG_##l) s = format (s, #l);
+ _(MORE_FRAGMENTS);
+ _(DONT_FRAGMENT);
+ _(CONGESTION);
+#undef _
+ }
+ }
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && header_bytes < max_header_bytes)
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U",
+ format_white_space, indent - 2, pi->format_header,
+ /* next protocol header */ (void *) ip + header_bytes,
+ max_header_bytes - header_bytes);
+ }
+
+ return s;
+}
+
+/* Parse an IP4 header. */
+uword
+unformat_ip4_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ ip4_header_t *ip;
+ int old_length;
+
+ /* Allocate space for IP header. */
+ {
+ void *p;
+
+ old_length = vec_len (*result);
+ vec_add2 (*result, p, sizeof (ip4_header_t));
+ ip = p;
+ }
+
+ memset (ip, 0, sizeof (ip[0]));
+ ip->ip_version_and_header_length = IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_ip_protocol, &ip->protocol,
+ unformat_ip4_address, &ip->src_address,
+ unformat_ip4_address, &ip->dst_address))
+ return 0;
+
+ /* Parse options. */
+ while (1)
+ {
+ int i, j;
+
+ if (unformat (input, "tos %U", unformat_vlib_number, &i))
+ ip->tos = i;
+
+ else if (unformat (input, "ttl %U", unformat_vlib_number, &i))
+ ip->ttl = i;
+
+ else if (unformat (input, "fragment id %U offset %U",
+ unformat_vlib_number, &i, unformat_vlib_number, &j))
+ {
+ ip->fragment_id = clib_host_to_net_u16 (i);
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 ((i / 8) & 0x1fff);
+ }
+
+ /* Flags. */
+ else if (unformat (input, "mf") || unformat (input, "MF"))
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+
+ else if (unformat (input, "df") || unformat (input, "DF"))
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT);
+
+ else if (unformat (input, "ce") || unformat (input, "CE"))
+ ip->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_CONGESTION);
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ /* Fill in checksum. */
+ ip->checksum = ip4_header_checksum (ip);
+
+ /* Recurse into next protocol layer. */
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->unformat_header)
+ {
+ if (!unformat_user (input, pi->unformat_header, result))
+ return 0;
+
+ /* Result may have moved. */
+ ip = (void *) *result + old_length;
+ }
+ }
+
+ /* Fill in IP length. */
+ ip->length = clib_host_to_net_u16 (vec_len (*result) - old_length);
+
+ return 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
new file mode 100755
index 00000000..6b3453b5
--- /dev/null
+++ b/src/vnet/ip/ip4_forward.c
@@ -0,0 +1,3197 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_forward.c: IP v4 forwarding
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
+#include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
+#include <vnet/ppp/ppp.h>
+#include <vnet/srp/srp.h> /* for srp_hw_interface_class */
+#include <vnet/api_errno.h> /* for API error numbers */
+#include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
+#include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
+#include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
+
+/**
+ * @file
+ * @brief IPv4 Forwarding.
+ *
+ * This file contains the source code for IPv4 forwarding.
+ */
+
+void
+ip4_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vlib_rx_or_tx_t which_adj_index);
+
+always_inline uword
+ip4_lookup_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int lookup_for_responses_to_locally_received_packets)
+{
+ ip4_main_t *im = &ip4_main;
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from >= 8 && n_left_to_next >= 4)
+ {
+ vlib_buffer_t *p0, *p1, *p2, *p3;
+ ip4_header_t *ip0, *ip1, *ip2, *ip3;
+ ip_lookup_next_t next0, next1, next2, next3;
+ const load_balance_t *lb0, *lb1, *lb2, *lb3;
+ ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
+ ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
+ u32 pi0, fib_index0, lb_index0;
+ u32 pi1, fib_index1, lb_index1;
+ u32 pi2, fib_index2, lb_index2;
+ u32 pi3, fib_index3, lb_index3;
+ flow_hash_config_t flow_hash_config0, flow_hash_config1;
+ flow_hash_config_t flow_hash_config2, flow_hash_config3;
+ u32 hash_c0, hash_c1, hash_c2, hash_c3;
+ const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, LOAD);
+ vlib_prefetch_buffer_header (p5, LOAD);
+ vlib_prefetch_buffer_header (p6, LOAD);
+ vlib_prefetch_buffer_header (p7, LOAD);
+
+ CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ pi2 = to_next[2] = from[2];
+ pi3 = to_next[3] = from[3];
+
+ from += 4;
+ to_next += 4;
+ n_left_to_next -= 4;
+ n_left_from -= 4;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ p2 = vlib_get_buffer (vm, pi2);
+ p3 = vlib_get_buffer (vm, pi3);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ ip2 = vlib_buffer_get_current (p2);
+ ip3 = vlib_buffer_get_current (p3);
+
+ dst_addr0 = &ip0->dst_address;
+ dst_addr1 = &ip1->dst_address;
+ dst_addr2 = &ip2->dst_address;
+ dst_addr3 = &ip3->dst_address;
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index1 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ fib_index2 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p2)->sw_if_index[VLIB_RX]);
+ fib_index3 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p3)->sw_if_index[VLIB_RX]);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+ fib_index1 =
+ (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
+ fib_index2 =
+ (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
+ fib_index3 =
+ (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
+
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+ mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
+ mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
+ mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
+ leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
+ leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
+ }
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
+ leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
+ leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
+ }
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
+ leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
+ leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
+ }
+
+ if (lookup_for_responses_to_locally_received_packets)
+ {
+ lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
+ lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
+ lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
+ lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
+ }
+ else
+ {
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
+ lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
+ }
+
+ ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
+ lb0 = load_balance_get (lb_index0);
+ lb1 = load_balance_get (lb_index1);
+ lb2 = load_balance_get (lb_index2);
+ lb3 = load_balance_get (lb_index3);
+
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+ ASSERT (lb1->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb1->lb_n_buckets));
+ ASSERT (lb2->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb2->lb_n_buckets));
+ ASSERT (lb3->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb3->lb_n_buckets));
+
+ /* Use flow hash to compute multipath adjacency. */
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
+ hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
+ hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, flow_hash_config0);
+ dpo0 =
+ load_balance_get_fwd_bucket (lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ flow_hash_config1 = lb1->lb_hash_config;
+ hash_c1 = vnet_buffer (p1)->ip.flow_hash =
+ ip4_compute_flow_hash (ip1, flow_hash_config1);
+ dpo1 =
+ load_balance_get_fwd_bucket (lb1,
+ (hash_c1 &
+ (lb1->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+ if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
+ {
+ flow_hash_config2 = lb2->lb_hash_config;
+ hash_c2 = vnet_buffer (p2)->ip.flow_hash =
+ ip4_compute_flow_hash (ip2, flow_hash_config2);
+ dpo2 =
+ load_balance_get_fwd_bucket (lb2,
+ (hash_c2 &
+ (lb2->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo2 = load_balance_get_bucket_i (lb2, 0);
+ }
+ if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
+ {
+ flow_hash_config3 = lb3->lb_hash_config;
+ hash_c3 = vnet_buffer (p3)->ip.flow_hash =
+ ip4_compute_flow_hash (ip3, flow_hash_config3);
+ dpo3 =
+ load_balance_get_fwd_bucket (lb3,
+ (hash_c3 &
+ (lb3->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo3 = load_balance_get_bucket_i (lb3, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ next1 = dpo1->dpoi_next_node;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+ next2 = dpo2->dpoi_next_node;
+ vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
+ next3 = dpo3->dpoi_next_node;
+ vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lb_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, thread_index, lb_index1, 1,
+ vlib_buffer_length_in_chain (vm, p1));
+ vlib_increment_combined_counter
+ (cm, thread_index, lb_index2, 1,
+ vlib_buffer_length_in_chain (vm, p2));
+ vlib_increment_combined_counter
+ (cm, thread_index, lb_index3, 1,
+ vlib_buffer_length_in_chain (vm, p3));
+
+ vlib_validate_buffer_enqueue_x4 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, pi1, pi2, pi3,
+ next0, next1, next2, next3);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ ip_lookup_next_t next0;
+ const load_balance_t *lb0;
+ ip4_fib_mtrie_t *mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ ip4_address_t *dst_addr0;
+ u32 pi0, fib_index0, lbi0;
+ flow_hash_config_t flow_hash_config0;
+ const dpo_id_t *dpo0;
+ u32 hash_c0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ dst_addr0 = &ip0->dst_address;
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ {
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
+ }
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
+
+ if (!lookup_for_responses_to_locally_received_packets)
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
+
+ if (lookup_for_responses_to_locally_received_packets)
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
+ else
+ {
+ /* Handle default route. */
+ lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ }
+
+ ASSERT (lbi0);
+ lb0 = load_balance_get (lbi0);
+
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+ /* Use flow hash to compute multipath adjacency. */
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+
+ hash_c0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, flow_hash_config0);
+ dpo0 =
+ load_balance_get_fwd_bucket (lb0,
+ (hash_c0 &
+ (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
+ vlib_buffer_length_in_chain (vm,
+ p0));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+/** @brief IPv4 lookup node.
+ @node ip4-lookup
+
+ This is the main IPv4 lookup dispatch node.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
+ - Indicates the @c sw_if_index value of the interface that the
+ packet was received on.
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
+ - When the value is @c ~0 then the node performs a longest prefix
+ match (LPM) for the packet destination address in the FIB attached
+ to the receive interface.
+ - Otherwise perform LPM for the packet destination address in the
+ indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
+ value (0, 1, ...) and not a VRF id.
+
+ @em Sets:
+ - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+ - The lookup result adjacency index.
+
+ <em>Next Index:</em>
+ - Dispatches the packet to the node index found in
+ ip_adjacency_t @c adj->lookup_next_index
+ (where @c adj is the lookup result adjacency).
+*/
+static uword
+ip4_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_lookup_inline (vm, node, frame,
+ /* lookup_for_responses_to_locally_received_packets */
+ 0);
+
+}
+
+static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
+
+VLIB_REGISTER_NODE (ip4_lookup_node) =
+{
+.function = ip4_lookup,.name = "ip4-lookup",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
+ IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
+
+always_inline uword
+ip4_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_lookup_next_t next0, next1;
+ const load_balance_t *lb0, *lb1;
+ vlib_buffer_t *p0, *p1;
+ u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
+ const ip4_header_t *ip0, *ip1;
+ const dpo_id_t *dpo0, *dpo1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
+
+ /*
+ * this node is for via FIBs we can re-use the hash value from the
+ * to node if present.
+ * We don't want to use the same hash value at each level in the recursion
+ * graph as that would lead to polarisation
+ */
+ hc0 = hc1 = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
+ }
+ dpo0 = load_balance_get_fwd_bucket
+ (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ vnet_buffer (p1)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
+ }
+ dpo1 = load_balance_get_fwd_bucket
+ (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_lookup_next_t next0;
+ const load_balance_t *lb0;
+ vlib_buffer_t *p0;
+ u32 pi0, lbi0, hc0;
+ const ip4_header_t *ip0;
+ const dpo_id_t *dpo0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get (lbi0);
+
+ hc0 = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
+ }
+ dpo0 = load_balance_get_fwd_bucket
+ (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip4_load_balance_node) =
+{
+.function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
+ sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
+ format_ip4_lookup_trace,};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
+
+/* get first interface address */
+ip4_address_t *
+ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
+ ip_interface_address_t ** result_ia)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia = 0;
+ ip4_address_t *result = 0;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address
+ (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */ ,
+ ({
+ ip4_address_t * a =
+ ip_interface_address_get_address (lm, ia);
+ result = a;
+ break;
+ }));
+ /* *INDENT-OFF* */
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
+}
+
+static void
+ip4_add_interface_routes (u32 sw_if_index,
+ ip4_main_t * im, u32 fib_index,
+ ip_interface_address_t * a)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip4_address_t *address = ip_interface_address_get_address (lm, a);
+ fib_prefix_t pfx = {
+ .fp_len = a->address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *address,
+ };
+
+ if (pfx.fp_len <= 30)
+ {
+ /* a /30 or shorter - add a glean for the network address */
+ fib_table_entry_update_one_path (fib_index, &pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ /* No next-hop address */
+ NULL,
+ sw_if_index,
+ // invalid FIB index
+ ~0,
+ 1,
+ // no out-label stack
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+
+ /* Add the two broadcast addresses as drop */
+ fib_prefix_t net_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
+ };
+ if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
+ fib_table_entry_special_add(fib_index,
+ &net_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_DROP |
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
+ net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
+ if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
+ fib_table_entry_special_add(fib_index,
+ &net_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_DROP |
+ FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
+ }
+ else if (pfx.fp_len == 31)
+ {
+ u32 mask = clib_host_to_net_u32(1);
+ fib_prefix_t net_pfx = pfx;
+
+ net_pfx.fp_len = 32;
+ net_pfx.fp_addr.ip4.as_u32 ^= mask;
+
+ /* a /31 - add the other end as an attached host */
+ fib_table_entry_update_one_path (fib_index, &net_pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP4,
+ &net_pfx.fp_addr,
+ sw_if_index,
+ // invalid FIB index
+ ~0,
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ pfx.fp_len = 32;
+
+ if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
+ {
+ u32 classify_table_index =
+ lm->classify_table_index_by_sw_if_index[sw_if_index];
+ if (classify_table_index != (u32) ~ 0)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP4,
+ classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
+
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE, &dpo);
+ dpo_reset (&dpo);
+ }
+ }
+
+ fib_table_entry_update_one_path (fib_index, &pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ DPO_PROTO_IP4,
+ &pfx.fp_addr,
+ sw_if_index,
+ // invalid FIB index
+ ~0,
+ 1, NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+}
+
+static void
+ip4_del_interface_routes (ip4_main_t * im,
+ u32 fib_index,
+ ip4_address_t * address, u32 address_length)
+{
+ fib_prefix_t pfx = {
+ .fp_len = address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *address,
+ };
+
+ if (pfx.fp_len <= 30)
+ {
+ fib_prefix_t net_pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
+ };
+ if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
+ fib_table_entry_special_remove(fib_index,
+ &net_pfx,
+ FIB_SOURCE_INTERFACE);
+ net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
+ if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
+ fib_table_entry_special_remove(fib_index,
+ &net_pfx,
+ FIB_SOURCE_INTERFACE);
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+ }
+ else if (pfx.fp_len == 31)
+ {
+ u32 mask = clib_host_to_net_u32(1);
+ fib_prefix_t net_pfx = pfx;
+
+ net_pfx.fp_len = 32;
+ net_pfx.fp_addr.ip4.as_u32 ^= mask;
+
+ fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
+ }
+
+ pfx.fp_len = 32;
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+}
+
+void
+ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
+{
+ ip4_main_t *im = &ip4_main;
+
+ vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
+
+ /*
+ * enable/disable only on the 1<->0 transition
+ */
+ if (is_enable)
+ {
+ if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+ else
+ {
+ ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
+ if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
+ !is_enable, 0, 0);
+
+
+ vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
+ sw_if_index, !is_enable, 0, 0);
+}
+
+static clib_error_t *
+ip4_add_del_interface_address_internal (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length, u32 is_del)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ clib_error_t *error = 0;
+ u32 if_address_index, elts_before;
+ ip4_address_fib_t ip4_af, *addr_fib = 0;
+
+ /* local0 interface doesn't support IP addressing */
+ if (sw_if_index == 0)
+ {
+ return
+ clib_error_create ("local0 interface doesn't support IP addressing");
+ }
+
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ ip4_addr_fib_init (&ip4_af, address,
+ vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
+ vec_add1 (addr_fib, ip4_af);
+
+ /* FIXME-LATER
+ * there is no support for adj-fib handling in the presence of overlapping
+ * subnets on interfaces. Easy fix - disallow overlapping subnets, like
+ * most routers do.
+ */
+ /* *INDENT-OFF* */
+ if (!is_del)
+ {
+ /* When adding an address check that it does not conflict
+ with an existing address. */
+ ip_interface_address_t *ia;
+ foreach_ip_interface_address
+ (&im->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */ ,
+ ({
+ ip4_address_t * x =
+ ip_interface_address_get_address
+ (&im->lookup_main, ia);
+ if (ip4_destination_matches_route
+ (im, address, x, ia->address_length) ||
+ ip4_destination_matches_route (im,
+ x,
+ address,
+ address_length))
+ return
+ clib_error_create
+ ("failed to add %U which conflicts with %U for interface %U",
+ format_ip4_address_and_length, address,
+ address_length,
+ format_ip4_address_and_length, x,
+ ia->address_length,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }));
+ }
+ /* *INDENT-ON* */
+
+ elts_before = pool_elts (lm->if_address_pool);
+
+ error = ip_interface_address_add_del
+ (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
+ if (error)
+ goto done;
+
+ ip4_sw_interface_enable_disable (sw_if_index, !is_del);
+
+ if (is_del)
+ ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
+ else
+ ip4_add_interface_routes (sw_if_index,
+ im, ip4_af.fib_index,
+ pool_elt_at_index
+ (lm->if_address_pool, if_address_index));
+
+ /* If pool did not grow/shrink: add duplicate address. */
+ if (elts_before != pool_elts (lm->if_address_pool))
+ {
+ ip4_add_del_interface_address_callback_t *cb;
+ vec_foreach (cb, im->add_del_interface_address_callbacks)
+ cb->function (im, cb->function_opaque, sw_if_index,
+ address, address_length, if_address_index, is_del);
+ }
+
+done:
+ vec_free (addr_fib);
+ return error;
+}
+
+clib_error_t *
+ip4_add_del_interface_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip4_address_t * address,
+ u32 address_length, u32 is_del)
+{
+ return ip4_add_del_interface_address_internal
+ (vm, sw_if_index, address, address_length, is_del);
+}
+
+/* Built-in ip4 unicast rx feature path definition */
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
+{
+ .arc_name = "ip4-unicast",
+ .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
+ .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip4_flow_classify, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-flow-classify",
+ .runs_before = VNET_FEATURES ("ip4-inacl"),
+};
+
+VNET_FEATURE_INIT (ip4_inacl, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-inacl",
+ .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
+};
+
+VNET_FEATURE_INIT (ip4_source_check_1, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-source-check-via-rx",
+ .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
+};
+
+VNET_FEATURE_INIT (ip4_source_check_2, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-source-check-via-any",
+ .runs_before = VNET_FEATURES ("ip4-policer-classify"),
+};
+
+VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-source-and-port-range-check-rx",
+ .runs_before = VNET_FEATURES ("ip4-policer-classify"),
+};
+
+VNET_FEATURE_INIT (ip4_policer_classify, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-policer-classify",
+ .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
+};
+
+VNET_FEATURE_INIT (ip4_ipsec, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ipsec-input-ip4",
+ .runs_before = VNET_FEATURES ("vpath-input-ip4"),
+};
+
+VNET_FEATURE_INIT (ip4_vpath, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "vpath-input-ip4",
+ .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
+};
+
+VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-vxlan-bypass",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+
+VNET_FEATURE_INIT (ip4_drop, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-drop",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+
+VNET_FEATURE_INIT (ip4_lookup, static) =
+{
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-lookup",
+ .runs_before = 0, /* not before any other features */
+};
+
+/* Built-in ip4 multicast rx feature path definition */
+VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
+{
+ .arc_name = "ip4-multicast",
+ .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
+ .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip4_vpath_mc, static) =
+{
+ .arc_name = "ip4-multicast",
+ .node_name = "vpath-input-ip4",
+ .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
+};
+
+VNET_FEATURE_INIT (ip4_mc_drop, static) =
+{
+ .arc_name = "ip4-multicast",
+ .node_name = "ip4-drop",
+ .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
+};
+
+VNET_FEATURE_INIT (ip4_lookup_mc, static) =
+{
+ .arc_name = "ip4-multicast",
+ .node_name = "ip4-mfib-forward-lookup",
+ .runs_before = 0, /* last feature */
+};
+
+/* Source and port-range check ip4 tx feature path definition */
+VNET_FEATURE_ARC_INIT (ip4_output, static) =
+{
+ .arc_name = "ip4-output",
+ .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
+ .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "ip4-source-and-port-range-check-tx",
+ .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
+};
+
+VNET_FEATURE_INIT (ip4_ipsec_output, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "ipsec-output-ip4",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+/* Built-in ip4 tx feature path definition */
+VNET_FEATURE_INIT (ip4_interface_output, static) =
+{
+ .arc_name = "ip4-output",
+ .node_name = "interface-output",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
+{
+ ip4_main_t *im = &ip4_main;
+
+ /* Fill in lookup tables with default table (0). */
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
+
+ if (!is_add)
+ {
+ ip4_main_t *im4 = &ip4_main;
+ ip_lookup_main_t *lm4 = &im4->lookup_main;
+ ip_interface_address_t *ia = 0;
+ ip4_address_t *address;
+ vlib_main_t *vm = vlib_get_main ();
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */,
+ ({
+ address = ip_interface_address_get_address (lm4, ia);
+ ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
+ }));
+ /* *INDENT-ON* */
+ }
+
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
+ is_add, 0, 0);
+
+ vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
+ is_add, 0, 0);
+
+ return /* no error */ 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
+
+/* Global IP4 main. */
+ip4_main_t ip4_main;
+
+clib_error_t *
+ip4_lookup_init (vlib_main_t * vm)
+{
+ ip4_main_t *im = &ip4_main;
+ clib_error_t *error;
+ uword i;
+
+ if ((error = vlib_call_init_function (vm, vnet_feature_init)))
+ return error;
+
+ for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
+ {
+ u32 m;
+
+ if (i < 32)
+ m = pow2_mask (i) << (32 - i);
+ else
+ m = ~0;
+ im->fib_masks[i] = clib_host_to_net_u32 (m);
+ }
+
+ ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
+
+ /* Create FIB with index 0 and table id of 0. */
+ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
+ FIB_SOURCE_DEFAULT_ROUTE);
+ mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
+ MFIB_SOURCE_DEFAULT_ROUTE);
+
+ {
+ pg_node_t *pn;
+ pn = pg_get_node (ip4_lookup_node.index);
+ pn->unformat_edit = unformat_pg_ip4_header;
+ }
+
+ {
+ ethernet_arp_header_t h;
+
+ memset (&h, 0, sizeof (h));
+
+ /* Set target ethernet address to all zeros. */
+ memset (h.ip4_over_ethernet[1].ethernet, 0,
+ sizeof (h.ip4_over_ethernet[1].ethernet));
+
+#define _16(f,v) h.f = clib_host_to_net_u16 (v);
+#define _8(f,v) h.f = v;
+ _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
+ _16 (l3_type, ETHERNET_TYPE_IP4);
+ _8 (n_l2_address_bytes, 6);
+ _8 (n_l3_address_bytes, 4);
+ _16 (opcode, ETHERNET_ARP_OPCODE_request);
+#undef _16
+#undef _8
+
+ vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
+ /* data */ &h,
+ sizeof (h),
+ /* alloc chunk size */ 8,
+ "ip4 arp");
+ }
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip4_lookup_init);
+
+typedef struct
+{
+ /* Adjacency taken. */
+ u32 dpo_index;
+ u32 flow_hash;
+ u32 fib_index;
+
+ /* Packet data, possibly *after* rewrite. */
+ u8 packet_data[64 - 1 * sizeof (u32)];
+}
+ip4_forward_next_trace_t;
+
+u8 *
+format_ip4_forward_next_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+ s = format (s, "%U%U",
+ format_white_space, indent,
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+static u8 *
+format_ip4_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
+ t->fib_index, t->dpo_index, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+static u8 *
+format_ip4_rewrite_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
+ t->fib_index, t->dpo_index, format_ip_adjacency,
+ t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip_adjacency_packet_data,
+ t->dpo_index, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+/* Common trace function for all ip4-forward next nodes. */
+void
+ip4_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
+{
+ u32 *from, n_left;
+ ip4_main_t *im = &ip4_main;
+
+ n_left = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ ip4_forward_next_trace_t *t0, *t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+
+ clib_memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
+ t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
+ t1->fib_index =
+ (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b1)->sw_if_index[VLIB_RX]);
+ clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
+ sizeof (t1->packet_data));
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip4_forward_next_trace_t *t0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+ clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+static uword
+ip4_drop_or_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, ip4_error_t error_code)
+{
+ u32 *buffers = vlib_frame_vector_args (frame);
+ uword n_packets = frame->n_vectors;
+
+ vlib_error_drop_buffers (vm, node, buffers,
+ /* stride */ 1,
+ n_packets,
+ /* next */ 0,
+ ip4_input_node.index, error_code);
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return n_packets;
+}
+
+static uword
+ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
+}
+
+static uword
+ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_drop_node, static) =
+{
+ .function = ip4_drop,
+ .name = "ip4-drop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_forward_next_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
+
+VLIB_REGISTER_NODE (ip4_punt_node, static) =
+{
+ .function = ip4_punt,
+ .name = "ip4-punt",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_forward_next_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-punt",
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
+/* *INDENT-ON */
+
+/* Compute TCP/UDP/ICMP4 checksum in software. */
+u16
+ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip4_header_t * ip0)
+{
+ ip_csum_t sum0;
+ u32 ip_header_length, payload_length_host_byte_order;
+ u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
+ u16 sum16;
+ void *data_this_buffer;
+
+ /* Initialize checksum with ip header. */
+ ip_header_length = ip4_header_bytes (ip0);
+ payload_length_host_byte_order =
+ clib_net_to_host_u16 (ip0->length) - ip_header_length;
+ sum0 =
+ clib_host_to_net_u32 (payload_length_host_byte_order +
+ (ip0->protocol << 16));
+
+ if (BITS (uword) == 32)
+ {
+ sum0 =
+ ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->src_address, u32));
+ sum0 =
+ ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->dst_address, u32));
+ }
+ else
+ sum0 =
+ ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
+
+ n_bytes_left = n_this_buffer = payload_length_host_byte_order;
+ data_this_buffer = (void *) ip0 + ip_header_length;
+ n_ip_bytes_this_buffer = p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
+ if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
+ {
+ n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
+ n_ip_bytes_this_buffer - ip_header_length : 0;
+ }
+ while (1)
+ {
+ sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
+ n_bytes_left -= n_this_buffer;
+ if (n_bytes_left == 0)
+ break;
+
+ ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
+ p0 = vlib_get_buffer (vm, p0->next_buffer);
+ data_this_buffer = vlib_buffer_get_current (p0);
+ n_this_buffer = p0->current_length;
+ }
+
+ sum16 = ~ip_csum_fold (sum0);
+
+ return sum16;
+}
+
+u32
+ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
+{
+ ip4_header_t *ip0 = vlib_buffer_get_current (p0);
+ udp_header_t *udp0;
+ u16 sum16;
+
+ ASSERT (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_UDP);
+
+ udp0 = (void *) (ip0 + 1);
+ if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
+ {
+ p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
+ | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+ return p0->flags;
+ }
+
+ sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
+
+ p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
+ | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
+
+ return p0->flags;
+}
+
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ip4_local) =
+{
+ .arc_name = "ip4-local",
+ .start_nodes = VNET_FEATURES ("ip4-local"),
+};
+/* *INDENT-ON* */
+
+static inline void
+ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip,
+ u8 is_udp, u8 * error, u8 * good_tcp_udp)
+{
+ u32 flags0;
+ flags0 = ip4_tcp_udp_validate_checksum (vm, p);
+ *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ if (is_udp)
+ {
+ udp_header_t *udp;
+ u32 ip_len, udp_len;
+ i32 len_diff;
+ udp = ip4_next_header (ip);
+ /* Verify UDP length. */
+ ip_len = clib_net_to_host_u16 (ip->length);
+ udp_len = clib_net_to_host_u16 (udp->length);
+
+ len_diff = ip_len - udp_len;
+ *good_tcp_udp &= len_diff >= 0;
+ *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
+ }
+}
+
+#define ip4_local_do_l4_check(is_tcp_udp, flags) \
+ (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))
+
+static inline uword
+ip4_local_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int head_of_feature_arc)
+{
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_local_next_t next_index;
+ u32 *from, *to_next, n_left_from, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+ u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ ip4_fib_mtrie_t *mtrie0, *mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ const dpo_id_t *dpo0, *dpo1;
+ const load_balance_t *lb0, *lb1;
+ u32 pi0, next0, fib_index0, lbi0;
+ u32 pi1, next1, fib_index1, lbi1;
+ u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
+ u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
+ u32 sw_if_index0, sw_if_index1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ next0 = next1 = IP_LOCAL_NEXT_DROP;
+ error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
+ vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
+
+ /* Treat IP frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
+ proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
+
+ if (head_of_feature_arc == 0)
+ goto skip_checks;
+
+ is_udp0 = proto0 == IP_PROTOCOL_UDP;
+ is_udp1 = proto1 == IP_PROTOCOL_UDP;
+ is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
+ is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
+
+ good_tcp_udp0 =
+ (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ good_tcp_udp1 =
+ (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)
+ || ip4_local_do_l4_check (is_tcp_udp1,
+ p1->flags)))
+ {
+ if (is_tcp_udp0)
+ ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
+ &good_tcp_udp0);
+ if (is_tcp_udp1)
+ ip4_local_validate_l4 (vm, p1, ip1, is_udp1, &error1,
+ &good_tcp_udp1);
+ }
+
+ ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
+ error0 = (is_tcp_udp0 && !good_tcp_udp0
+ ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
+ error1 = (is_tcp_udp1 && !good_tcp_udp1
+ ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
+ fib_index1 =
+ (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
+
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+ mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
+ 2);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
+ 2);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
+ 3);
+ leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
+ 3);
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
+ ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
+
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
+ ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
+
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+
+ /*
+ * Must have a route to source otherwise we drop the packet.
+ * ip4 broadcasts are accepted, e.g. to make dhcp client work
+ *
+ * The checks are:
+ * - the source is a recieve => it's from us => bogus, do this
+ * first since it sets a different error code.
+ * - uRPF check for any route to source - accept if passes.
+ * - allow packets destined to the broadcast address from unknown sources
+ */
+ error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ dpo0->dpoi_type == DPO_RECEIVE) ?
+ IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
+ error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ !fib_urpf_check_size (lb0->lb_urpf) &&
+ ip0->dst_address.as_u32 != 0xFFFFFFFF)
+ ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
+ error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ dpo1->dpoi_type == DPO_RECEIVE) ?
+ IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
+ error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ !fib_urpf_check_size (lb1->lb_urpf) &&
+ ip1->dst_address.as_u32 != 0xFFFFFFFF)
+ ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
+
+ skip_checks:
+
+ next0 = lm->local_next_by_ip_protocol[proto0];
+ next1 = lm->local_next_by_ip_protocol[proto1];
+
+ next0 =
+ error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+ next1 =
+ error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
+
+ p0->error = error0 ? error_node->errors[error0] : 0;
+ p1->error = error1 ? error_node->errors[error1] : 0;
+
+ if (head_of_feature_arc)
+ {
+ if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
+ vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
+ if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
+ vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, pi1,
+ next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ ip4_fib_mtrie_t *mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ u32 pi0, next0, fib_index0, lbi0;
+ u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
+ load_balance_t *lb0;
+ const dpo_id_t *dpo0;
+ u32 sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ next0 = IP_LOCAL_NEXT_DROP;
+ error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+ vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* Treat IP frag packets as "experimental" protocol for now
+ until support of IP frag reassembly is implemented */
+ proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
+
+ if (head_of_feature_arc == 0)
+ goto skip_check;
+
+ is_udp0 = proto0 == IP_PROTOCOL_UDP;
+ is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
+ good_tcp_udp0 =
+ (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)))
+ {
+ ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
+ &good_tcp_udp0);
+ }
+
+ ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
+ error0 = (is_tcp_udp0 && !good_tcp_udp0
+ ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
+ 2);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
+ 3);
+ lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
+
+ lb0 = load_balance_get (lbi0);
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+
+ error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ dpo0->dpoi_type == DPO_RECEIVE) ?
+ IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
+ error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
+ !fib_urpf_check_size (lb0->lb_urpf) &&
+ ip0->dst_address.as_u32 != 0xFFFFFFFF)
+ ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
+
+ skip_check:
+ next0 = lm->local_next_by_ip_protocol[proto0];
+ next0 =
+ error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+
+ p0->error = error0 ? error_node->errors[error0] : 0;
+
+ if (head_of_feature_arc)
+ {
+ if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
+ vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, pi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_local_node) =
+{
+ .function = ip4_local,
+ .name = "ip4-local",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_forward_next_trace,
+ .n_next_nodes = IP_LOCAL_N_NEXT,
+ .next_nodes =
+ {
+ [IP_LOCAL_NEXT_DROP] = "error-drop",
+ [IP_LOCAL_NEXT_PUNT] = "error-punt",
+ [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
+ [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
+
+static uword
+ip4_local_end_of_arc (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
+ .function = ip4_local_end_of_arc,
+ .name = "ip4-local-end-of-arc",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_forward_next_trace,
+ .sibling_of = "ip4-local",
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
+
+VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
+ .arc_name = "ip4-local",
+ .node_name = "ip4-local-end-of-arc",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+void
+ip4_register_protocol (u32 protocol, u32 node_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
+ lm->local_next_by_ip_protocol[protocol] =
+ vlib_node_add_next (vm, ip4_local_node.index, node_index);
+}
+
+static clib_error_t *
+show_ip_local_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ int i;
+
+ vlib_cli_output (vm, "Protocols handled by ip4_local");
+ for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
+ {
+ if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
+ {
+ u32 node_index = vlib_get_node (vm,
+ ip4_local_node.index)->
+ next_nodes[lm->local_next_by_ip_protocol[i]];
+ vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
+ node_index);
+ }
+ }
+ return 0;
+}
+
+
+
+/*?
+ * Display the set of protocols handled by the local IPv4 stack.
+ *
+ * @cliexpar
+ * Example of how to display local protocol table:
+ * @cliexstart{show ip local}
+ * Protocols handled by ip4_local
+ * 1
+ * 17
+ * 47
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip_local, static) =
+{
+ .path = "show ip local",
+ .function = show_ip_local_command_fn,
+ .short_help = "show ip local",
+};
+/* *INDENT-ON* */
+
+always_inline uword
+ip4_arp_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_glean)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 *from, *to_next_drop;
+ uword n_left_from, n_left_to_next_drop, next_index;
+ static f64 time_last_seed_change = -1e100;
+ static u32 hash_seeds[3];
+ static uword hash_bitmap[256 / BITS (uword)];
+ f64 time_now;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ time_now = vlib_time_now (vm);
+ if (time_now - time_last_seed_change > 1e-3)
+ {
+ uword i;
+ u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
+ sizeof (hash_seeds));
+ for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
+ hash_seeds[i] = r[i];
+
+ /* Mark all hash keys as been no-seen before. */
+ for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
+ hash_bitmap[i] = 0;
+
+ time_last_seed_change = time_now;
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ if (next_index == IP4_ARP_NEXT_DROP)
+ next_index = IP4_ARP_N_NEXT; /* point to first interface */
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
+ to_next_drop, n_left_to_next_drop);
+
+ while (n_left_from > 0 && n_left_to_next_drop > 0)
+ {
+ u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
+ ip_adjacency_t *adj0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ uword bm0;
+
+ pi0 = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get (adj_index0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ a0 = hash_seeds[0];
+ b0 = hash_seeds[1];
+ c0 = hash_seeds[2];
+
+ sw_if_index0 = adj0->rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ if (is_glean)
+ {
+ /*
+ * this is the Glean case, so we are ARPing for the
+ * packet's destination
+ */
+ a0 ^= ip0->dst_address.data_u32;
+ }
+ else
+ {
+ a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
+ }
+ b0 ^= sw_if_index0;
+
+ hash_v3_mix32 (a0, b0, c0);
+ hash_v3_finalize32 (a0, b0, c0);
+
+ c0 &= BITS (hash_bitmap) - 1;
+ m0 = (uword) 1 << (c0 % BITS (uword));
+ c0 = c0 / BITS (uword);
+
+ bm0 = hash_bitmap[c0];
+ drop0 = (bm0 & m0) != 0;
+
+ /* Mark it as seen. */
+ hash_bitmap[c0] = bm0 | m0;
+
+ from += 1;
+ n_left_from -= 1;
+ to_next_drop[0] = pi0;
+ to_next_drop += 1;
+ n_left_to_next_drop -= 1;
+
+ p0->error =
+ node->errors[drop0 ? IP4_ARP_ERROR_DROP :
+ IP4_ARP_ERROR_REQUEST_SENT];
+
+ /*
+ * the adj has been updated to a rewrite but the node the DPO that got
+ * us here hasn't - yet. no big deal. we'll drop while we wait.
+ */
+ if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
+ continue;
+
+ if (drop0)
+ continue;
+
+ /*
+ * Can happen if the control-plane is programming tables
+ * with traffic flowing; at least that's today's lame excuse.
+ */
+ if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
+ || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
+ {
+ p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
+ }
+ else
+ /* Send ARP request. */
+ {
+ u32 bi0 = 0;
+ vlib_buffer_t *b0;
+ ethernet_arp_header_t *h0;
+ vnet_hw_interface_t *hw_if0;
+
+ h0 =
+ vlib_packet_template_get_packet (vm,
+ &im->ip4_arp_request_packet_template,
+ &bi0);
+
+ /* Seems we're out of buffers */
+ if (PREDICT_FALSE (!h0))
+ continue;
+
+ /* Add rewrite/encap string for ARP packet. */
+ vnet_rewrite_one_header (adj0[0], h0,
+ sizeof (ethernet_header_t));
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+ /* Src ethernet address in ARP header. */
+ clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
+ hw_if0->hw_address,
+ sizeof (h0->ip4_over_ethernet[0].ethernet));
+
+ if (is_glean)
+ {
+ /* The interface's source address is stashed in the Glean Adj */
+ h0->ip4_over_ethernet[0].ip4 =
+ adj0->sub_type.glean.receive_addr.ip4;
+
+ /* Copy in destination address we are requesting. This is the
+ * glean case, so it's the packet's destination.*/
+ h0->ip4_over_ethernet[1].ip4.data_u32 =
+ ip0->dst_address.data_u32;
+ }
+ else
+ {
+ /* Src IP address in ARP header. */
+ if (ip4_src_address_for_packet (lm, sw_if_index0,
+ &h0->
+ ip4_over_ethernet[0].ip4))
+ {
+ /* No source address available */
+ p0->error =
+ node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
+ vlib_buffer_free (vm, &bi0, 1);
+ continue;
+ }
+
+ /* Copy in destination address we are requesting from the
+ incomplete adj */
+ h0->ip4_over_ethernet[1].ip4.data_u32 =
+ adj0->sub_type.nbr.next_hop.ip4.as_u32;
+ }
+
+ vlib_buffer_copy_trace_flag (vm, p0, bi0);
+ b0 = vlib_get_buffer (vm, bi0);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+
+ vlib_set_next_frame_buffer (vm, node,
+ adj0->rewrite_header.next_index,
+ bi0);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return (ip4_arp_inline (vm, node, frame, 0));
+}
+
+static uword
+ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return (ip4_arp_inline (vm, node, frame, 1));
+}
+
+static char *ip4_arp_error_strings[] = {
+ [IP4_ARP_ERROR_DROP] = "address overflow drops",
+ [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
+ [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
+ [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
+ [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
+ [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
+};
+
+VLIB_REGISTER_NODE (ip4_arp_node) =
+{
+ .function = ip4_arp,.name = "ip4-arp",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
+ ARRAY_LEN (ip4_arp_error_strings),.error_strings =
+ ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
+ {
+ [IP4_ARP_NEXT_DROP] = "error-drop",}
+,};
+
+VLIB_REGISTER_NODE (ip4_glean_node) =
+{
+ .function = ip4_glean,.name = "ip4-glean",.vector_size =
+ sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
+ ARRAY_LEN (ip4_arp_error_strings),.error_strings =
+ ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
+ {
+ [IP4_ARP_NEXT_DROP] = "error-drop",}
+,};
+
+#define foreach_notrace_ip4_arp_error \
+_(DROP) \
+_(REQUEST_SENT) \
+_(REPLICATE_DROP) \
+_(REPLICATE_FAIL)
+
+clib_error_t *
+arp_notrace_init (vlib_main_t * vm)
+{
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
+
+ /* don't trace ARP request packets */
+#define _(a) \
+ vnet_pcap_drop_trace_filter_add_del \
+ (rt->errors[IP4_ARP_ERROR_##a], \
+ 1 /* is_add */);
+ foreach_notrace_ip4_arp_error;
+#undef _
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (arp_notrace_init);
+
+
+/* Send an ARP request to see if given destination is reachable on given interface. */
+clib_error_t *
+ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ ethernet_arp_header_t *h;
+ ip4_address_t *src;
+ ip_interface_address_t *ia;
+ ip_adjacency_t *adj;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ vlib_buffer_t *b;
+ adj_index_t ai;
+ u32 bi = 0;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return clib_error_return (0, "%U: interface %U down",
+ format_ip4_address, dst,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ src =
+ ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
+ if (!src)
+ {
+ vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ return clib_error_return
+ (0,
+ "no matching interface address for destination %U (interface %U)",
+ format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ h = vlib_packet_template_get_packet (vm,
+ &im->ip4_arp_request_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ if (PREDICT_FALSE (!hi->hw_address))
+ {
+ return clib_error_return (0, "%U: interface %U do not support ip probe",
+ format_ip4_address, dst,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
+ sizeof (h->ip4_over_ethernet[0].ethernet));
+
+ h->ip4_over_ethernet[0].ip4 = src[0];
+ h->ip4_over_ethernet[1].ip4 = dst[0];
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+
+ ip46_address_t nh = {
+ .ip4 = *dst,
+ };
+
+ ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
+ VNET_LINK_IP4, &nh, sw_if_index);
+ adj = adj_get (ai);
+
+ /* Peer has been previously resolved, retrieve glean adj instead */
+ if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
+ {
+ adj_unlock (ai);
+ ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh);
+ adj = adj_get (ai);
+ }
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+
+ adj_unlock (ai);
+ return /* no error */ 0;
+}
+
+typedef enum
+{
+ IP4_REWRITE_NEXT_DROP,
+ IP4_REWRITE_NEXT_ICMP_ERROR,
+} ip4_rewrite_next_t;
+
+always_inline uword
+ip4_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int do_counters, int is_midchain, int is_mcast)
+{
+ ip_lookup_main_t *lm = &ip4_main.lookup_main;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_adjacency_t *adj0, *adj1;
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
+ u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
+ u32 tx_sw_if_index0, tx_sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ /*
+ * pre-fetch the per-adjacency counters
+ */
+ if (do_counters)
+ {
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ thread_index, adj_index0);
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ thread_index, adj_index1);
+ }
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ error0 = error1 = IP4_ERROR_NONE;
+ next0 = next1 = IP4_REWRITE_NEXT_DROP;
+
+ /* Decrement TTL & update checksum.
+ Works either endian, so no need for byte swap. */
+ if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
+ {
+ i32 ttl0 = ip0->ttl;
+
+ /* Input node should have reject packets with ttl 0. */
+ ASSERT (ip0->ttl > 0);
+
+ checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+ checksum0 += checksum0 >= 0xffff;
+
+ ip0->checksum = checksum0;
+ ttl0 -= 1;
+ ip0->ttl = ttl0;
+
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (ttl0 <= 0))
+ {
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
+ }
+
+ /* Verify checksum. */
+ ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
+ (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+ if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
+ {
+ i32 ttl1 = ip1->ttl;
+
+ /* Input node should have reject packets with ttl 0. */
+ ASSERT (ip1->ttl > 0);
+
+ checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
+ checksum1 += checksum1 >= 0xffff;
+
+ ip1->checksum = checksum1;
+ ttl1 -= 1;
+ ip1->ttl = ttl1;
+
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (ttl1 <= 0))
+ {
+ error1 = IP4_ERROR_TIME_EXPIRED;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
+ }
+
+ /* Verify checksum. */
+ ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
+ (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
+ }
+ else
+ {
+ p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+
+ /* Rewrite packet header and updates lengths. */
+ adj0 = adj_get (adj_index0);
+ adj1 = adj_get (adj_index1);
+
+ /* Worth pipelining. No guarantee that adj0,1 are hot... */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ rw_len1 = adj1[0].rewrite_header.data_bytes;
+ vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+ vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
+
+ /* Check MTU of outgoing interface. */
+ error0 =
+ (vlib_buffer_length_in_chain (vm, p0) >
+ adj0[0].
+ rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
+ error0);
+ error1 =
+ (vlib_buffer_length_in_chain (vm, p1) >
+ adj1[0].
+ rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
+ error1);
+
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
+ {
+ next0 = adj0[0].rewrite_header.next_index;
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+
+ if (PREDICT_FALSE
+ (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+ }
+ if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
+ {
+ next1 = adj1[0].rewrite_header.next_index;
+ p1->current_data -= rw_len1;
+ p1->current_length += rw_len1;
+
+ tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
+
+ if (PREDICT_FALSE
+ (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index1, &next1, p1);
+ }
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0],
+ ip0, ip1, sizeof (ethernet_header_t));
+
+ /*
+ * Bump the per-adjacency counters
+ */
+ if (do_counters)
+ {
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index,
+ adj_index1, 1,
+ vlib_buffer_length_in_chain (vm, p1) + rw_len1);
+ }
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
+ adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
+ }
+ if (is_mcast)
+ {
+ /*
+ * copy bytes from the IP address into the MAC rewrite
+ */
+ vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+ vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t *adj0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
+ u32 tx_sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get (adj_index0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ error0 = IP4_ERROR_NONE;
+ next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
+
+ /* Decrement TTL & update checksum. */
+ if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
+ {
+ i32 ttl0 = ip0->ttl;
+
+ checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
+
+ checksum0 += checksum0 >= 0xffff;
+
+ ip0->checksum = checksum0;
+
+ ASSERT (ip0->ttl > 0);
+
+ ttl0 -= 1;
+
+ ip0->ttl = ttl0;
+
+ ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
+ (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
+
+ if (PREDICT_FALSE (ttl0 <= 0))
+ {
+ /*
+ * If the ttl drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ }
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+
+ if (do_counters)
+ vlib_prefetch_combined_counter (&adjacency_counters,
+ thread_index, adj_index0);
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+ if (is_mcast)
+ {
+ /*
+ * copy bytes from the IP address into the MAC rewrite
+ */
+ vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+ }
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+
+ if (do_counters)
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+
+ /* Check MTU of outgoing interface. */
+ error0 = (vlib_buffer_length_in_chain (vm, p0)
+ > adj0[0].rewrite_header.max_l3_packet_bytes
+ ? IP4_ERROR_MTU_EXCEEDED : error0);
+
+ p0->error = error_node->errors[error0];
+
+ /* Don't adjust the buffer for ttl issue; icmp-error node wants
+ * to see the IP headerr */
+ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+ next0 = adj0[0].rewrite_header.next_index;
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
+ }
+
+ if (PREDICT_FALSE
+ (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+
+ }
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Need to do trace after rewrites to pick up new packet data. */
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip4_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+
+/** @brief IPv4 rewrite node.
+ @node ip4-rewrite
+
+ This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
+ header checksum, fetch the ip adjacency, check the outbound mtu,
+ apply the adjacency rewrite, and send pkts to the adjacency
+ rewrite header's rewrite_next_index.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+ - the rewrite adjacency index
+ - <code>adj->lookup_next_index</code>
+ - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
+ the packet will be dropped.
+ - <code>adj->rewrite_header</code>
+ - Rewrite string length, rewrite string, next_index
+
+ @em Sets:
+ - <code>b->current_data, b->current_length</code>
+ - Updated net of applying the rewrite string
+
+ <em>Next Indices:</em>
+ - <code> adj->rewrite_header.next_index </code>
+ or @c error-drop
+*/
+static uword
+ip4_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
+ else
+ return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
+}
+
+static uword
+ip4_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
+ else
+ return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
+}
+
+static uword
+ip4_rewrite_mcast (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
+ else
+ return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
+}
+
+static uword
+ip4_mcast_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
+ else
+ return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_rewrite_node) = {
+ .function = ip4_rewrite,
+ .name = "ip4-rewrite",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_rewrite_trace,
+
+ .n_next_nodes = 2,
+ .next_nodes = {
+ [IP4_REWRITE_NEXT_DROP] = "error-drop",
+ [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
+
+VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
+ .function = ip4_rewrite_mcast,
+ .name = "ip4-rewrite-mcast",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_rewrite_trace,
+ .sibling_of = "ip4-rewrite",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
+
+VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
+ .function = ip4_mcast_midchain,
+ .name = "ip4-mcast-midchain",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip4_rewrite_trace,
+ .sibling_of = "ip4-rewrite",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
+
+VLIB_REGISTER_NODE (ip4_midchain_node) = {
+ .function = ip4_midchain,
+ .name = "ip4-midchain",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_forward_next_trace,
+ .sibling_of = "ip4-rewrite",
+};
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
+/* *INDENT-ON */
+
+int
+ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
+{
+ ip4_fib_mtrie_t *mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ u32 lbi0;
+
+ mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
+ leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
+
+ lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+
+ return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
+}
+
+static clib_error_t *
+test_lookup_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip4_fib_t *fib;
+ u32 table_id = 0;
+ f64 count = 1;
+ u32 n;
+ int i;
+ ip4_address_t ip4_base_address;
+ u64 errors = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %d", &table_id))
+ {
+ /* Make sure the entry exists. */
+ fib = ip4_fib_get (table_id);
+ if ((fib) && (fib->index != table_id))
+ return clib_error_return (0, "<fib-index> %d does not exist",
+ table_id);
+ }
+ else if (unformat (input, "count %f", &count))
+ ;
+
+ else if (unformat (input, "%U",
+ unformat_ip4_address, &ip4_base_address))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ n = count;
+
+ for (i = 0; i < n; i++)
+ {
+ if (!ip4_lookup_validate (&ip4_base_address, table_id))
+ errors++;
+
+ ip4_base_address.as_u32 =
+ clib_host_to_net_u32 (1 +
+ clib_net_to_host_u32 (ip4_base_address.as_u32));
+ }
+
+ if (errors)
+ vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
+ else
+ vlib_cli_output (vm, "No errors in %d lookups\n", n);
+
+ return 0;
+}
+
+/*?
+ * Perform a lookup of an IPv4 Address (or range of addresses) in the
+ * given FIB table to determine if there is a conflict with the
+ * adjacency table. The fib-id can be determined by using the
+ * '<em>show ip fib</em>' command. If fib-id is not entered, default value
+ * of 0 is used.
+ *
+ * @todo This command uses fib-id, other commands use table-id (not
+ * just a name, they are different indexes). Would like to change this
+ * to table-id for consistency.
+ *
+ * @cliexpar
+ * Example of how to run the test lookup command:
+ * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
+ * No errors in 2 lookups
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (lookup_test_command, static) =
+{
+ .path = "test lookup",
+ .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
+ .function = test_lookup_command_fn,
+};
+/* *INDENT-ON* */
+
+int
+vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
+{
+ u32 fib_index;
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
+
+ if (~0 == fib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
+ flow_hash_config);
+
+ return 0;
+}
+
+static clib_error_t *
+set_ip_flow_hash_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int matched = 0;
+ u32 table_id = 0;
+ u32 flow_hash_config = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %d", &table_id))
+ matched = 1;
+#define _(a,v) \
+ else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
+ foreach_flow_hash_bit
+#undef _
+ else
+ break;
+ }
+
+ if (matched == 0)
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_SUCH_FIB:
+ return clib_error_return (0, "no such FIB table %d", table_id);
+
+ default:
+ clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
+ break;
+ }
+
+ return 0;
+}
+
+/*?
+ * Configure the set of IPv4 fields used by the flow hash.
+ *
+ * @cliexpar
+ * Example of how to set the flow hash on a given table:
+ * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
+ * Example of display the configured flow hash:
+ * @cliexstart{show ip fib}
+ * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ * 0.0.0.0/0
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 0.0.0.0/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 224.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 6.0.1.2/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
+ * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
+ * 7.0.0.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
+ * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
+ * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
+ * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
+ * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
+ * 240.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 255.255.255.255/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
+ * 0.0.0.0/0
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 0.0.0.0/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 172.16.1.0/24
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
+ * [0] [@4]: ipv4-glean: af_packet0
+ * 172.16.1.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
+ * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
+ * 172.16.1.2/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
+ * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
+ * 172.16.2.0/24
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
+ * [0] [@4]: ipv4-glean: af_packet1
+ * 172.16.2.1/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
+ * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
+ * 224.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 240.0.0.0/8
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * 255.255.255.255/32
+ * unicast-ip4-chain
+ * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
+{
+ .path = "set ip flow-hash",
+ .short_help =
+ "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
+ .function = set_ip_flow_hash_command_fn,
+};
+/* *INDENT-ON* */
+
+int
+vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ ip4_main_t *ipm = &ip4_main;
+ ip_lookup_main_t *lm = &ipm->lookup_main;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ ip4_address_t *if_addr;
+
+ if (pool_is_free_index (im->sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+
+ if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
+ lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
+
+ if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
+
+ if (NULL != if_addr)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr.ip4 = *if_addr,
+ };
+ u32 fib_index;
+
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ sw_if_index);
+
+
+ if (table_index != (u32) ~ 0)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP4,
+ classify_dpo_create (DPO_PROTO_IP4, table_index));
+
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE, &dpo);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ fib_table_entry_special_remove (fib_index,
+ &pfx, FIB_SOURCE_CLASSIFY);
+ }
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+set_ip_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 table_index = ~0;
+ int table_index_set = 0;
+ u32 sw_if_index = ~0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table-index %d", &table_index))
+ table_index_set = 1;
+ else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnet_get_main (), &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (table_index_set == 0)
+ return clib_error_return (0, "classify table-index must be specified");
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface / subif must be specified");
+
+ rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_MATCHING_INTERFACE:
+ return clib_error_return (0, "No such interface");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "No such classifier table");
+ }
+ return 0;
+}
+
+/*?
+ * Assign a classification table to an interface. The classification
+ * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
+ * commands. Once the table is create, use this command to filter packets
+ * on an interface.
+ *
+ * @cliexpar
+ * Example of how to assign a classification table to an interface:
+ * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip_classify_command, static) =
+{
+ .path = "set ip classify",
+ .short_help =
+ "set ip classify intfc <interface> table-index <classify-idx>",
+ .function = set_ip_classify_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
new file mode 100644
index 00000000..3b08f4b0
--- /dev/null
+++ b/src/vnet/ip/ip4_input.c
@@ -0,0 +1,507 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_input.c: IP v4 input node
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ppp/ppp.h>
+#include <vnet/hdlc/hdlc.h>
+
+typedef struct
+{
+ u8 packet_data[64];
+} ip4_input_trace_t;
+
+static u8 *
+format_ip4_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip4_input_trace_t *t = va_arg (*va, ip4_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum
+{
+ IP4_INPUT_NEXT_DROP,
+ IP4_INPUT_NEXT_PUNT,
+ IP4_INPUT_NEXT_LOOKUP,
+ IP4_INPUT_NEXT_LOOKUP_MULTICAST,
+ IP4_INPUT_NEXT_ICMP_ERROR,
+ IP4_INPUT_N_NEXT,
+} ip4_input_next_t;
+
+/* Validate IP v4 packets and pass them either to forwarding code
+ or drop/punt exception packets. */
+always_inline uword
+ip4_input_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int verify_checksum)
+{
+ ip4_main_t *im = &ip4_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 n_left_from, *from, *to_next;
+ ip4_input_next_t next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+ vlib_simple_counter_main_t *cm;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ip4_input_trace_t));
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_IP4);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ u32 sw_if_index0, pi0, ip_len0, cur_len0, next0;
+ u32 sw_if_index1, pi1, ip_len1, cur_len1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, error1, arc0, arc1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
+ }
+
+ to_next[0] = pi0 = from[0];
+ to_next[1] = pi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
+
+ error0 = error1 = IP4_ERROR_NONE;
+
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip0->dst_address)))
+ {
+ arc0 = lm->mcast_feature_arc_index;
+ next0 = IP4_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc0 = lm->ucast_feature_arc_index;
+ next0 = IP4_INPUT_NEXT_LOOKUP;
+ if (PREDICT_FALSE (ip0->ttl < 1))
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ }
+
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip1->dst_address)))
+ {
+ arc1 = lm->mcast_feature_arc_index;
+ next1 = IP4_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc1 = lm->ucast_feature_arc_index;
+ next1 = IP4_INPUT_NEXT_LOOKUP;
+ if (PREDICT_FALSE (ip1->ttl < 1))
+ error1 = IP4_ERROR_TIME_EXPIRED;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0;
+
+ vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
+ vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1);
+
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
+
+ /* Punt packets with options or wrong version. */
+ if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
+ error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ?
+ IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+ if (PREDICT_FALSE (ip1->ip_version_and_header_length != 0x45))
+ error1 = (ip1->ip_version_and_header_length & 0xf) != 5 ?
+ IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+ /* Verify header checksum. */
+ if (verify_checksum)
+ {
+ ip_csum_t sum0, sum1;
+
+ ip4_partial_header_checksum_x1 (ip0, sum0);
+ ip4_partial_header_checksum_x1 (ip1, sum1);
+
+ error0 = 0xffff != ip_csum_fold (sum0) ?
+ IP4_ERROR_BAD_CHECKSUM : error0;
+ error1 = 0xffff != ip_csum_fold (sum1) ?
+ IP4_ERROR_BAD_CHECKSUM : error1;
+ }
+
+ /* Drop fragmentation offset 1 packets. */
+ error0 = ip4_get_fragment_offset (ip0) == 1 ?
+ IP4_ERROR_FRAGMENT_OFFSET_ONE : error0;
+ error1 = ip4_get_fragment_offset (ip1) == 1 ?
+ IP4_ERROR_FRAGMENT_OFFSET_ONE : error1;
+
+ /* Verify lengths. */
+ ip_len0 = clib_net_to_host_u16 (ip0->length);
+ ip_len1 = clib_net_to_host_u16 (ip1->length);
+
+ /* IP length must be at least minimal IP header. */
+ error0 = ip_len0 < sizeof (ip0[0]) ? IP4_ERROR_TOO_SHORT : error0;
+ error1 = ip_len1 < sizeof (ip1[0]) ? IP4_ERROR_TOO_SHORT : error1;
+
+ cur_len0 = vlib_buffer_length_in_chain (vm, p0);
+ cur_len1 = vlib_buffer_length_in_chain (vm, p1);
+
+ len_diff0 = cur_len0 - ip_len0;
+ len_diff1 = cur_len1 - ip_len1;
+
+ error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0;
+ error1 = len_diff1 < 0 ? IP4_ERROR_BAD_LENGTH : error1;
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ if (PREDICT_FALSE (error0 != IP4_ERROR_NONE))
+ {
+ if (error0 == IP4_ERROR_TIME_EXPIRED)
+ {
+ icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP4_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ next0 = error0 != IP4_ERROR_OPTIONS ?
+ IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+ }
+ if (PREDICT_FALSE (error1 != IP4_ERROR_NONE))
+ {
+ if (error1 == IP4_ERROR_TIME_EXPIRED)
+ {
+ icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = IP4_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ next1 = error1 != IP4_ERROR_OPTIONS ?
+ IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ u32 sw_if_index0, pi0, ip_len0, cur_len0, next0;
+ i32 len_diff0;
+ u8 error0, arc0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ error0 = IP4_ERROR_NONE;
+
+ if (PREDICT_FALSE (ip4_address_is_multicast (&ip0->dst_address)))
+ {
+ arc0 = lm->mcast_feature_arc_index;
+ next0 = IP4_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc0 = lm->ucast_feature_arc_index;
+ next0 = IP4_INPUT_NEXT_LOOKUP;
+ if (PREDICT_FALSE (ip0->ttl < 1))
+ error0 = IP4_ERROR_TIME_EXPIRED;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
+
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+
+ /* Punt packets with options or wrong version. */
+ if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
+ error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ?
+ IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+
+ /* Verify header checksum. */
+ if (verify_checksum)
+ {
+ ip_csum_t sum0;
+
+ ip4_partial_header_checksum_x1 (ip0, sum0);
+ error0 =
+ 0xffff !=
+ ip_csum_fold (sum0) ? IP4_ERROR_BAD_CHECKSUM : error0;
+ }
+
+ /* Drop fragmentation offset 1 packets. */
+ error0 =
+ ip4_get_fragment_offset (ip0) ==
+ 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error0;
+
+ /* Verify lengths. */
+ ip_len0 = clib_net_to_host_u16 (ip0->length);
+
+ /* IP length must be at least minimal IP header. */
+ error0 = ip_len0 < sizeof (ip0[0]) ? IP4_ERROR_TOO_SHORT : error0;
+
+ cur_len0 = vlib_buffer_length_in_chain (vm, p0);
+ len_diff0 = cur_len0 - ip_len0;
+ error0 = len_diff0 < 0 ? IP4_ERROR_BAD_LENGTH : error0;
+
+ p0->error = error_node->errors[error0];
+ if (PREDICT_FALSE (error0 != IP4_ERROR_NONE))
+ {
+ if (error0 == IP4_ERROR_TIME_EXPIRED)
+ {
+ icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
+ ICMP4_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP4_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ next0 = error0 != IP4_ERROR_OPTIONS ?
+ IP4_INPUT_NEXT_DROP : IP4_INPUT_NEXT_PUNT;
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/** \brief IPv4 input node.
+ @node ip4-input
+
+ This is the IPv4 input node: validates ip4 header checksums,
+ verifies ip header lengths, discards pkts with expired TTLs,
+ and sends pkts to the set of ip feature nodes configured on
+ the rx interface.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: buffer metadata, next index usage
+
+ @em Uses:
+ - vnet_feature_config_main_t cm corresponding to each pkt's dst address unicast /
+ multicast status.
+ - <code>b->current_config_index</code> corresponding to each pkt's
+ rx sw_if_index.
+ - This sets the per-packet graph trajectory, ensuring that
+ each packet visits the per-interface features in order.
+
+ - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
+ - Indicates the @c sw_if_index value of the interface that the
+ packet was received on.
+
+ @em Sets:
+ - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
+ - The lookup result adjacency index.
+
+ <em>Next Indices:</em>
+ - Dispatches pkts to the (first) feature node:
+ <code> vnet_get_config_data (... &next0 ...); </code>
+ or @c error-drop
+*/
+static uword
+ip4_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_input_inline (vm, node, frame, /* verify_checksum */ 1);
+}
+
+static uword
+ip4_input_no_checksum (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0);
+}
+
+static char *ip4_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip4_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_input_node) = {
+ .function = ip4_input,
+ .name = "ip4-input",
+ .vector_size = sizeof (u32),
+
+ .n_errors = IP4_N_ERROR,
+ .error_strings = ip4_error_strings,
+
+ .n_next_nodes = IP4_INPUT_N_NEXT,
+ .next_nodes = {
+ [IP4_INPUT_NEXT_DROP] = "error-drop",
+ [IP4_INPUT_NEXT_PUNT] = "error-punt",
+ [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
+ [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup",
+ [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_input_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_input_node, ip4_input);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_input_no_checksum_node,static) = {
+ .function = ip4_input_no_checksum,
+ .name = "ip4-input-no-checksum",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP4_INPUT_N_NEXT,
+ .next_nodes = {
+ [IP4_INPUT_NEXT_DROP] = "error-drop",
+ [IP4_INPUT_NEXT_PUNT] = "error-punt",
+ [IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
+ [IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup",
+ [IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_input_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_input_no_checksum_node,
+ ip4_input_no_checksum);
+
+static clib_error_t *
+ip4_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+
+ ethernet_register_input_type (vm, ETHERNET_TYPE_IP4, ip4_input_node.index);
+ ppp_register_input_protocol (vm, PPP_PROTOCOL_ip4, ip4_input_node.index);
+ hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip4, ip4_input_node.index);
+
+ {
+ pg_node_t *pn;
+ pn = pg_get_node (ip4_input_node.index);
+ pn->unformat_edit = unformat_pg_ip4_header;
+ pn = pg_get_node (ip4_input_no_checksum_node.index);
+ pn->unformat_edit = unformat_pg_ip4_header;
+ }
+
+ if ((error = vlib_call_init_function (vm, ip4_cli_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_source_check_init)))
+ return error;
+
+ if ((error = vlib_call_init_function
+ (vm, ip4_source_and_port_range_check_init)))
+ return error;
+
+ /* Set flow hash to something non-zero. */
+ ip4_main.flow_hash_seed = 0xdeadbeef;
+
+ /* Default TTL for packets we generate. */
+ ip4_main.host_config.ttl = 64;
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip4_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
new file mode 100644
index 00000000..cc82384d
--- /dev/null
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -0,0 +1,811 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_fib.h: ip4 mtrie fib
+ *
+ * Copyright (c) 2012 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip4_mtrie.h>
+#include <vnet/fib/ip4_fib.h>
+
+
+/**
+ * Global pool of IPv4 8bit PLYs
+ */
+ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
+
+always_inline u32
+ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte)
+{
+ /*
+ * It's 'non-empty' if the length of the leaf stored is greater than the
+ * length of a leaf in the covering ply. i.e. the leaf is more specific
+ * than it's would be cover in the covering ply
+ */
+ if (p->dst_address_bits_of_leaves[dst_byte] > p->dst_address_bits_base)
+ return (1);
+ return (0);
+}
+
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
+{
+ ip4_fib_mtrie_leaf_t l;
+ l = 1 + 2 * adj_index;
+ ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
+ return l;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
+{
+ return (n & 1) == 0;
+}
+
+always_inline u32
+ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
+{
+ ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
+ return n >> 1;
+}
+
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
+{
+ ip4_fib_mtrie_leaf_t l;
+ l = 0 + 2 * i;
+ ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
+ return l;
+}
+
+#ifndef __ALTIVEC__
+#define PLY_X4_SPLAT_INIT(init_x4, init) \
+ init_x4 = u32x4_splat (init);
+#else
+#define PLY_X4_SPLAT_INIT(init_x4, init) \
+{ \
+ u32x4_union_t y; \
+ y.as_u32[0] = init; \
+ y.as_u32[1] = init; \
+ y.as_u32[2] = init; \
+ y.as_u32[3] = init; \
+ init_x4 = y.as_u32x4; \
+}
+#endif
+
+#ifdef CLIB_HAVE_VEC128
+#define PLY_INIT_LEAVES(p) \
+{ \
+ u32x4 *l, init_x4; \
+ \
+ PLY_X4_SPLAT_INIT(init_x4, init); \
+ for (l = p->leaves_as_u32x4; \
+ l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \
+ l += 4) \
+ { \
+ l[0] = init_x4; \
+ l[1] = init_x4; \
+ l[2] = init_x4; \
+ l[3] = init_x4; \
+ } \
+}
+#else
+#define PLY_INIT_LEAVES(p) \
+{ \
+ u32 *l; \
+ \
+ for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \
+ { \
+ l[0] = init; \
+ l[1] = init; \
+ l[2] = init; \
+ l[3] = init; \
+ } \
+}
+#endif
+
+#define PLY_INIT(p, init, prefix_len, ply_base_len) \
+{ \
+ /* \
+ * A leaf is 'empty' if it represents a leaf from the covering PLY \
+ * i.e. if the prefix length of the leaf is less than or equal to \
+ * the prefix length of the PLY \
+ */ \
+ p->n_non_empty_leafs = (prefix_len > ply_base_len ? \
+ ARRAY_LEN (p->leaves) : 0); \
+ memset (p->dst_address_bits_of_leaves, prefix_len, \
+ sizeof (p->dst_address_bits_of_leaves)); \
+ p->dst_address_bits_base = ply_base_len; \
+ \
+ /* Initialize leaves. */ \
+ PLY_INIT_LEAVES(p); \
+}
+
+static void
+ply_8_init (ip4_fib_mtrie_8_ply_t * p,
+ ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len)
+{
+ PLY_INIT (p, init, prefix_len, ply_base_len);
+}
+
+static void
+ply_16_init (ip4_fib_mtrie_16_ply_t * p,
+ ip4_fib_mtrie_leaf_t init, uword prefix_len)
+{
+ memset (p->dst_address_bits_of_leaves, prefix_len,
+ sizeof (p->dst_address_bits_of_leaves));
+ PLY_INIT_LEAVES (p);
+}
+
+static ip4_fib_mtrie_leaf_t
+ply_create (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_leaf_t init_leaf,
+ u32 leaf_prefix_len, u32 ply_base_len)
+{
+ ip4_fib_mtrie_8_ply_t *p;
+
+ /* Get cache aligned ply. */
+ pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
+
+ ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
+ return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
+}
+
+always_inline ip4_fib_mtrie_8_ply_t *
+get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
+{
+ uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
+
+ return pool_elt_at_index (ip4_ply_pool, n);
+}
+
+void
+ip4_mtrie_free (ip4_fib_mtrie_t * m)
+{
+ /* the root ply is embedded so the is nothing to do,
+ * the assumption being that the IP4 FIB table has emptied the trie
+ * before deletion.
+ */
+#if CLIB_DEBUG > 0
+ int i;
+ for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
+ {
+ ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
+ }
+#endif
+}
+
+void
+ip4_mtrie_init (ip4_fib_mtrie_t * m)
+{
+ ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0);
+}
+
+typedef struct
+{
+ ip4_address_t dst_address;
+ u32 dst_address_length;
+ u32 adj_index;
+ u32 cover_address_length;
+ u32 cover_adj_index;
+} ip4_fib_mtrie_set_unset_leaf_args_t;
+
+static void
+set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_8_ply_t * ply,
+ ip4_fib_mtrie_leaf_t new_leaf,
+ uword new_leaf_dst_address_bits)
+{
+ ip4_fib_mtrie_leaf_t old_leaf;
+ uword i;
+
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
+
+ for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
+ {
+ old_leaf = ply->leaves[i];
+
+ /* Recurse into sub plies. */
+ if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ ip4_fib_mtrie_8_ply_t *sub_ply =
+ get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
+ new_leaf_dst_address_bits);
+ }
+
+ /* Replace less specific terminal leaves with new leaf. */
+ else if (new_leaf_dst_address_bits >=
+ ply->dst_address_bits_of_leaves[i])
+ {
+ __sync_val_compare_and_swap (&ply->leaves[i], old_leaf, new_leaf);
+ ASSERT (ply->leaves[i] == new_leaf);
+ ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
+ ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i);
+ }
+ }
+}
+
+static void
+set_leaf (ip4_fib_mtrie_t * m,
+ const ip4_fib_mtrie_set_unset_leaf_args_t * a,
+ u32 old_ply_index, u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
+ i32 n_dst_bits_next_plies;
+ u8 dst_byte;
+ ip4_fib_mtrie_8_ply_t *old_ply;
+
+ old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
+
+ ASSERT (a->dst_address_length <= 32);
+ ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
+
+ /* how many bits of the destination address are in the next PLY */
+ n_dst_bits_next_plies =
+ a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
+
+ dst_byte = a->dst_address.as_u8[dst_address_byte_index];
+
+ /* Number of bits next plies <= 0 => insert leaves this ply. */
+ if (n_dst_bits_next_plies <= 0)
+ {
+ /* The mask length of the address to insert maps to this ply */
+ uword old_leaf_is_terminal;
+ u32 i, n_dst_bits_this_ply;
+
+ /* The number of bits, and hence slots/buckets, we will fill */
+ n_dst_bits_this_ply = clib_min (8, -n_dst_bits_next_plies);
+ ASSERT ((a->dst_address.as_u8[dst_address_byte_index] &
+ pow2_mask (n_dst_bits_this_ply)) == 0);
+
+ /* Starting at the value of the byte at this section of the v4 address
+ * fill the buckets/slots of the ply */
+ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
+ {
+ ip4_fib_mtrie_8_ply_t *new_ply;
+
+ old_leaf = old_ply->leaves[i];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
+ {
+ /* The new leaf is more or equally specific than the one currently
+ * occupying the slot */
+ new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ if (old_leaf_is_terminal)
+ {
+ /* The current leaf is terminal, we can replace it with
+ * the new one */
+ old_ply->n_non_empty_leafs -=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+
+ old_ply->dst_address_bits_of_leaves[i] =
+ a->dst_address_length;
+ __sync_val_compare_and_swap (&old_ply->leaves[i], old_leaf,
+ new_leaf);
+ ASSERT (old_ply->leaves[i] == new_leaf);
+
+ old_ply->n_non_empty_leafs +=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+ ASSERT (old_ply->n_non_empty_leafs <=
+ ARRAY_LEN (old_ply->leaves));
+ }
+ else
+ {
+ /* Existing leaf points to another ply. We need to place
+ * new_leaf into all more specific slots. */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+ a->dst_address_length);
+ }
+ }
+ else if (!old_leaf_is_terminal)
+ {
+ /* The current leaf is less specific and not termial (i.e. a ply),
+ * recurse on down the trie */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_leaf (m, a, new_ply - ip4_ply_pool,
+ dst_address_byte_index + 1);
+ }
+ /*
+ * else
+ * the route we are adding is less specific than the leaf currently
+ * occupying this slot. leave it there
+ */
+ }
+ }
+ else
+ {
+ /* The address to insert requires us to move down at a lower level of
+ * the trie - recurse on down */
+ ip4_fib_mtrie_8_ply_t *new_ply;
+ u8 ply_base_len;
+
+ ply_base_len = 8 * (dst_address_byte_index + 1);
+
+ old_leaf = old_ply->leaves[dst_byte];
+
+ if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ /* There is a leaf occupying the slot. Replace it with a new ply */
+ old_ply->n_non_empty_leafs -=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
+
+ new_leaf = ply_create (m, old_leaf,
+ clib_max (old_ply->dst_address_bits_of_leaves
+ [dst_byte], ply_base_len),
+ ply_base_len);
+ new_ply = get_next_ply_for_leaf (m, new_leaf);
+
+ /* Refetch since ply_create may move pool. */
+ old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
+
+ __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
+ new_leaf);
+ ASSERT (old_ply->leaves[dst_byte] == new_leaf);
+ old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
+
+ old_ply->n_non_empty_leafs +=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
+ ASSERT (old_ply->n_non_empty_leafs >= 0);
+ }
+ else
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+
+ set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
+ }
+}
+
+static void
+set_root_leaf (ip4_fib_mtrie_t * m,
+ const ip4_fib_mtrie_set_unset_leaf_args_t * a)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
+ ip4_fib_mtrie_16_ply_t *old_ply;
+ i32 n_dst_bits_next_plies;
+ u16 dst_byte;
+
+ old_ply = &m->root_ply;
+
+ ASSERT (a->dst_address_length <= 32);
+
+ /* how many bits of the destination address are in the next PLY */
+ n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
+
+ dst_byte = a->dst_address.as_u16[0];
+
+ /* Number of bits next plies <= 0 => insert leaves this ply. */
+ if (n_dst_bits_next_plies <= 0)
+ {
+ /* The mask length of the address to insert maps to this ply */
+ uword old_leaf_is_terminal;
+ u32 i, n_dst_bits_this_ply;
+
+ /* The number of bits, and hence slots/buckets, we will fill */
+ n_dst_bits_this_ply = 16 - a->dst_address_length;
+ ASSERT ((clib_host_to_net_u16 (a->dst_address.as_u16[0]) &
+ pow2_mask (n_dst_bits_this_ply)) == 0);
+
+ /* Starting at the value of the byte at this section of the v4 address
+ * fill the buckets/slots of the ply */
+ for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
+ {
+ ip4_fib_mtrie_8_ply_t *new_ply;
+ u16 slot;
+
+ slot = clib_net_to_host_u16 (dst_byte);
+ slot += i;
+ slot = clib_host_to_net_u16 (slot);
+
+ old_leaf = old_ply->leaves[slot];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (a->dst_address_length >=
+ old_ply->dst_address_bits_of_leaves[slot])
+ {
+ /* The new leaf is more or equally specific than the one currently
+ * occupying the slot */
+ new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ if (old_leaf_is_terminal)
+ {
+ /* The current leaf is terminal, we can replace it with
+ * the new one */
+ old_ply->dst_address_bits_of_leaves[slot] =
+ a->dst_address_length;
+ __sync_val_compare_and_swap (&old_ply->leaves[slot],
+ old_leaf, new_leaf);
+ ASSERT (old_ply->leaves[slot] == new_leaf);
+ }
+ else
+ {
+ /* Existing leaf points to another ply. We need to place
+ * new_leaf into all more specific slots. */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+ a->dst_address_length);
+ }
+ }
+ else if (!old_leaf_is_terminal)
+ {
+ /* The current leaf is less specific and not termial (i.e. a ply),
+ * recurse on down the trie */
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+ set_leaf (m, a, new_ply - ip4_ply_pool, 2);
+ }
+ /*
+ * else
+ * the route we are adding is less specific than the leaf currently
+ * occupying this slot. leave it there
+ */
+ }
+ }
+ else
+ {
+ /* The address to insert requires us to move down at a lower level of
+ * the trie - recurse on down */
+ ip4_fib_mtrie_8_ply_t *new_ply;
+ u8 ply_base_len;
+
+ ply_base_len = 16;
+
+ old_leaf = old_ply->leaves[dst_byte];
+
+ if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ {
+ /* There is a leaf occupying the slot. Replace it with a new ply */
+ new_leaf = ply_create (m, old_leaf,
+ clib_max (old_ply->dst_address_bits_of_leaves
+ [dst_byte], ply_base_len),
+ ply_base_len);
+ new_ply = get_next_ply_for_leaf (m, new_leaf);
+
+ __sync_val_compare_and_swap (&old_ply->leaves[dst_byte], old_leaf,
+ new_leaf);
+ ASSERT (old_ply->leaves[dst_byte] == new_leaf);
+ old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
+ }
+ else
+ new_ply = get_next_ply_for_leaf (m, old_leaf);
+
+ set_leaf (m, a, new_ply - ip4_ply_pool, 2);
+ }
+}
+
+static uword
+unset_leaf (ip4_fib_mtrie_t * m,
+ const ip4_fib_mtrie_set_unset_leaf_args_t * a,
+ ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
+ i32 n_dst_bits_next_plies;
+ i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
+ u8 dst_byte;
+
+ ASSERT (a->dst_address_length <= 32);
+ ASSERT (dst_address_byte_index < ARRAY_LEN (a->dst_address.as_u8));
+
+ n_dst_bits_next_plies =
+ a->dst_address_length - BITS (u8) * (dst_address_byte_index + 1);
+
+ dst_byte = a->dst_address.as_u8[dst_address_byte_index];
+ if (n_dst_bits_next_plies < 0)
+ dst_byte &= ~pow2_mask (-n_dst_bits_next_plies);
+
+ n_dst_bits_this_ply =
+ n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
+ n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
+
+ del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
+ {
+ old_leaf = old_ply->leaves[i];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (old_leaf == del_leaf
+ || (!old_leaf_is_terminal
+ && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
+ dst_address_byte_index + 1)))
+ {
+ old_ply->n_non_empty_leafs -=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+
+ old_ply->leaves[i] =
+ ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
+ old_ply->dst_address_bits_of_leaves[i] =
+ clib_max (old_ply->dst_address_bits_base,
+ a->cover_address_length);
+
+ old_ply->n_non_empty_leafs +=
+ ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+
+ ASSERT (old_ply->n_non_empty_leafs >= 0);
+ if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
+ {
+ pool_put (ip4_ply_pool, old_ply);
+ /* Old ply was deleted. */
+ return 1;
+ }
+#if CLIB_DEBUG > 0
+ else if (dst_address_byte_index)
+ {
+ int ii, count = 0;
+ for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++)
+ {
+ count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii);
+ }
+ ASSERT (count);
+ }
+#endif
+ }
+ }
+
+ /* Old ply was not deleted. */
+ return 0;
+}
+
+static void
+unset_root_leaf (ip4_fib_mtrie_t * m,
+ const ip4_fib_mtrie_set_unset_leaf_args_t * a)
+{
+ ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
+ i32 n_dst_bits_next_plies;
+ i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
+ u16 dst_byte;
+ ip4_fib_mtrie_16_ply_t *old_ply;
+
+ ASSERT (a->dst_address_length <= 32);
+
+ old_ply = &m->root_ply;
+ n_dst_bits_next_plies = a->dst_address_length - BITS (u16);
+
+ dst_byte = a->dst_address.as_u16[0];
+
+ n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ?
+ (16 - a->dst_address_length) : 0);
+
+ del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+
+ /* Starting at the value of the byte at this section of the v4 address
+ * fill the buckets/slots of the ply */
+ for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
+ {
+ u16 slot;
+
+ slot = clib_net_to_host_u16 (dst_byte);
+ slot += i;
+ slot = clib_host_to_net_u16 (slot);
+
+ old_leaf = old_ply->leaves[slot];
+ old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+
+ if (old_leaf == del_leaf
+ || (!old_leaf_is_terminal
+ && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2)))
+ {
+ old_ply->leaves[slot] =
+ ip4_fib_mtrie_leaf_set_adj_index (a->cover_adj_index);
+ old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length;
+ }
+ }
+}
+
+void
+ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address,
+ u32 dst_address_length, u32 adj_index)
+{
+ ip4_fib_mtrie_set_unset_leaf_args_t a;
+ ip4_main_t *im = &ip4_main;
+
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ a.dst_address.as_u32 = (dst_address->as_u32 &
+ im->fib_masks[dst_address_length]);
+ a.dst_address_length = dst_address_length;
+ a.adj_index = adj_index;
+
+ set_root_leaf (m, &a);
+}
+
+void
+ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address,
+ u32 dst_address_length,
+ u32 adj_index,
+ u32 cover_address_length, u32 cover_adj_index)
+{
+ ip4_fib_mtrie_set_unset_leaf_args_t a;
+ ip4_main_t *im = &ip4_main;
+
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ a.dst_address.as_u32 = (dst_address->as_u32 &
+ im->fib_masks[dst_address_length]);
+ a.dst_address_length = dst_address_length;
+ a.adj_index = adj_index;
+ a.cover_adj_index = cover_adj_index;
+ a.cover_address_length = cover_address_length;
+
+ /* the top level ply is never removed */
+ unset_root_leaf (m, &a);
+}
+
+/* Returns number of bytes of memory used by mtrie. */
+static uword
+mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p)
+{
+ uword bytes, i;
+
+ bytes = sizeof (p[0]);
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = p->leaves[i];
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
+ }
+
+ return bytes;
+}
+
+/* Returns number of bytes of memory used by mtrie. */
+static uword
+mtrie_memory_usage (ip4_fib_mtrie_t * m)
+{
+ uword bytes, i;
+
+ bytes = sizeof (*m);
+ for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
+ {
+ ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i];
+ if (ip4_fib_mtrie_leaf_is_next_ply (l))
+ bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
+ }
+
+ return bytes;
+}
+
+static u8 *
+format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
+
+ if (ip4_fib_mtrie_leaf_is_terminal (l))
+ s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l));
+ else
+ s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
+ return s;
+}
+
+#define FORMAT_PLY(s, _p, _i, _base_address, _ply_max_len, _indent) \
+({ \
+ u32 a, ia_length; \
+ ip4_address_t ia; \
+ ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)]; \
+ \
+ a = (_base_address) + ((_i) << (32 - (_ply_max_len))); \
+ ia.as_u32 = clib_host_to_net_u32 (a); \
+ ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \
+ s = format (s, "\n%U%20U %U", \
+ format_white_space, (_indent) + 2, \
+ format_ip4_address_and_length, &ia, ia_length, \
+ format_ip4_fib_mtrie_leaf, _l); \
+ \
+ if (ip4_fib_mtrie_leaf_is_next_ply (_l)) \
+ s = format (s, "\n%U%U", \
+ format_white_space, (_indent) + 2, \
+ format_ip4_fib_mtrie_ply, m, a, \
+ ip4_fib_mtrie_leaf_get_next_ply_index (_l)); \
+ s; \
+})
+
+static u8 *
+format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+ u32 base_address = va_arg (*va, u32);
+ u32 ply_index = va_arg (*va, u32);
+ ip4_fib_mtrie_8_ply_t *p;
+ uword indent;
+ int i;
+
+ p = pool_elt_at_index (ip4_ply_pool, ply_index);
+ indent = format_get_indent (s);
+ s = format (s, "ply index %d, %d non-empty leaves", ply_index,
+ p->n_non_empty_leafs);
+
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
+ {
+ FORMAT_PLY (s, p, i, base_address,
+ p->dst_address_bits_base + 8, indent);
+ }
+ }
+
+ return s;
+}
+
+u8 *
+format_ip4_fib_mtrie (u8 * s, va_list * va)
+{
+ ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+ ip4_fib_mtrie_16_ply_t *p;
+ u32 base_address = 0;
+ int i;
+
+ s = format (s, "%d plies, memory usage %U\n",
+ pool_elts (ip4_ply_pool),
+ format_memory_size, mtrie_memory_usage (m));
+ s = format (s, "root-ply");
+ p = &m->root_ply;
+
+ for (i = 0; i < ARRAY_LEN (p->leaves); i++)
+ {
+ u16 slot;
+
+ slot = clib_host_to_net_u16 (i);
+
+ if (p->dst_address_bits_of_leaves[slot] > 0)
+ {
+ FORMAT_PLY (s, p, slot, base_address, 16, 2);
+ }
+ }
+
+ return s;
+}
+
+static clib_error_t *
+ip4_mtrie_module_init (vlib_main_t * vm)
+{
+ /* Burn one ply so index 0 is taken */
+ CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p);
+
+ pool_get (ip4_ply_pool, p);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (ip4_mtrie_module_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
new file mode 100644
index 00000000..be262c2c
--- /dev/null
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_fib.h: ip4 mtrie fib
+ *
+ * Copyright (c) 2012 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip4_fib_h
+#define included_ip_ip4_fib_h
+
+#include <vppinfra/cache.h>
+#include <vppinfra/vector.h>
+#include <vnet/ip/lookup.h>
+#include <vnet/ip/ip4_packet.h> /* for ip4_address_t */
+
+/* ip4 fib leafs: 4 ply 8-8-8-8 mtrie.
+ 1 + 2*adj_index for terminal leaves.
+ 0 + 2*next_ply_index for non-terminals, i.e. PLYs
+ 1 => empty (adjacency index of zero is special miss adjacency). */
+typedef u32 ip4_fib_mtrie_leaf_t;
+
+#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0)
+
+/**
+ * @brief the 16 way stride that is the top PLY of the mtrie
+ * We do not maintain the count of 'real' leaves in this PLY, since
+ * it is never removed. The FIB will destroy the mtrie and the ply once
+ * the FIB is destroyed.
+ */
+#define PLY_16_SIZE (1<<16)
+typedef struct ip4_fib_mtrie_16_ply_t_
+{
+ /**
+ * The leaves/slots/buckets to be filed with leafs
+ */
+ union
+ {
+ ip4_fib_mtrie_leaf_t leaves[PLY_16_SIZE];
+
+#ifdef CLIB_HAVE_VEC128
+ u32x4 leaves_as_u32x4[PLY_16_SIZE / 4];
+#endif
+ };
+
+ /**
+ * Prefix length for terminal leaves.
+ */
+ u8 dst_address_bits_of_leaves[PLY_16_SIZE];
+} ip4_fib_mtrie_16_ply_t;
+
+/**
+ * @brief One ply of the 4 ply mtrie fib.
+ */
+typedef struct ip4_fib_mtrie_8_ply_t_
+{
+ /**
+ * The leaves/slots/buckets to be filed with leafs
+ */
+ union
+ {
+ ip4_fib_mtrie_leaf_t leaves[256];
+
+#ifdef CLIB_HAVE_VEC128
+ u32x4 leaves_as_u32x4[256 / 4];
+#endif
+ };
+
+ /**
+ * Prefix length for leaves/ply.
+ */
+ u8 dst_address_bits_of_leaves[256];
+
+ /**
+ * Number of non-empty leafs (whether terminal or not).
+ */
+ i32 n_non_empty_leafs;
+
+ /**
+ * The length of the ply's coviering prefix. Also a measure of its depth
+ * If a leaf in a slot has a mask length longer than this then it is
+ * 'non-empty'. Otherwise it is the value of the cover.
+ */
+ i32 dst_address_bits_base;
+
+ /* Pad to cache line boundary. */
+ u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)];
+}
+ip4_fib_mtrie_8_ply_t;
+
+STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES,
+ "IP4 Mtrie ply cache line");
+
+/**
+ * @brief The mutiway-TRIE.
+ * There is no data associated with the mtrie apart from the top PLY
+ */
+typedef struct
+{
+ /**
+ * Embed the PLY with the mtrie struct. This means that the Data-plane
+ * 'get me the mtrie' returns the first ply, and not an indirect 'pointer'
+ * to it. therefore no cachline misses in the data-path.
+ */
+ ip4_fib_mtrie_16_ply_t root_ply;
+} ip4_fib_mtrie_t;
+
+/**
+ * @brief Initialise an mtrie
+ */
+void ip4_mtrie_init (ip4_fib_mtrie_t * m);
+
+/**
+ * @brief Free an mtrie, It must be emty when free'd
+ */
+void ip4_mtrie_free (ip4_fib_mtrie_t * m);
+
+/**
+ * @brief Add a route/rntry to the mtrie
+ */
+void ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address,
+ u32 dst_address_length, u32 adj_index);
+/**
+ * @brief remove a route/rntry to the mtrie
+ */
+void ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address,
+ u32 dst_address_length,
+ u32 adj_index,
+ u32 cover_address_length, u32 cover_adj_index);
+
+/**
+ * @brief Format/display the contents of the mtrie
+ */
+format_function_t format_ip4_fib_mtrie;
+
+/**
+ * @brief A global pool of 8bit stride plys
+ */
+extern ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
+
+/**
+ * Is the leaf terminal (i.e. an LB index) or non-terminak (i.e. a PLY index)
+ */
+always_inline u32
+ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
+{
+ return n & 1;
+}
+
+/**
+ * From the stored slot value extract the LB index value
+ */
+always_inline u32
+ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
+{
+ ASSERT (ip4_fib_mtrie_leaf_is_terminal (n));
+ return n >> 1;
+}
+
+/**
+ * @brief Lookup step. Processes 1 byte of 4 byte ip4 address.
+ */
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m,
+ ip4_fib_mtrie_leaf_t current_leaf,
+ const ip4_address_t * dst_address,
+ u32 dst_address_byte_index)
+{
+ ip4_fib_mtrie_8_ply_t *ply;
+
+ uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf);
+
+ if (!current_is_terminal)
+ {
+ ply = ip4_ply_pool + (current_leaf >> 1);
+ return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]);
+ }
+
+ return current_leaf;
+}
+
+/**
+ * @brief Lookup step number 1. Processes 2 bytes of 4 byte ip4 address.
+ */
+always_inline ip4_fib_mtrie_leaf_t
+ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m,
+ const ip4_address_t * dst_address)
+{
+ ip4_fib_mtrie_leaf_t next_leaf;
+
+ next_leaf = m->root_ply.leaves[dst_address->as_u16[0]];
+
+ return next_leaf;
+}
+
+#endif /* included_ip_ip4_fib_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h
new file mode 100644
index 00000000..1ff9fbdb
--- /dev/null
+++ b/src/vnet/ip/ip4_packet.h
@@ -0,0 +1,385 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/packet.h: ip4 packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip4_packet_h
+#define included_ip4_packet_h
+
+#include <vnet/ip/ip_packet.h> /* for ip_csum_t */
+#include <vnet/tcp/tcp_packet.h> /* for tcp_header_t */
+#include <vppinfra/byte_order.h> /* for clib_net_to_host_u16 */
+
+/* IP4 address which can be accessed either as 4 bytes
+ or as a 32-bit number. */
+typedef union
+{
+ u8 data[4];
+ u32 data_u32;
+ /* Aliases. */
+ u8 as_u8[4];
+ u16 as_u16[2];
+ u32 as_u32;
+} ip4_address_t;
+
+typedef struct
+{
+ /* IP address must be first for ip_interface_address_get_address() to work */
+ ip4_address_t ip4_addr;
+ u32 fib_index;
+} ip4_address_fib_t;
+
+always_inline void
+ip4_addr_fib_init (ip4_address_fib_t * addr_fib, ip4_address_t * address,
+ u32 fib_index)
+{
+ clib_memcpy (&addr_fib->ip4_addr, address, sizeof (addr_fib->ip4_addr));
+ addr_fib->fib_index = fib_index;
+}
+
+/* (src,dst) pair of addresses as found in packet header. */
+typedef struct
+{
+ ip4_address_t src, dst;
+} ip4_address_pair_t;
+
+/* If address is a valid netmask, return length of mask. */
+always_inline uword
+ip4_address_netmask_length (ip4_address_t * a)
+{
+ uword result = 0;
+ uword i;
+ for (i = 0; i < ARRAY_LEN (a->as_u8); i++)
+ {
+ switch (a->as_u8[i])
+ {
+ case 0xff:
+ result += 8;
+ break;
+ case 0xfe:
+ result += 7;
+ goto done;
+ case 0xfc:
+ result += 6;
+ goto done;
+ case 0xf8:
+ result += 5;
+ goto done;
+ case 0xf0:
+ result += 4;
+ goto done;
+ case 0xe0:
+ result += 3;
+ goto done;
+ case 0xc0:
+ result += 2;
+ goto done;
+ case 0x80:
+ result += 1;
+ goto done;
+ case 0x00:
+ result += 0;
+ goto done;
+ default:
+ /* Not a valid netmask mask. */
+ return ~0;
+ }
+ }
+done:
+ return result;
+}
+
+typedef union
+{
+ struct
+ {
+ /* 4 bit packet length (in 32bit units) and version VVVVLLLL.
+ e.g. for packets w/ no options ip_version_and_header_length == 0x45. */
+ u8 ip_version_and_header_length;
+
+ /* Type of service. */
+ u8 tos;
+
+ /* Total layer 3 packet length including this header. */
+ u16 length;
+
+ /* Fragmentation ID. */
+ u16 fragment_id;
+
+ /* 3 bits of flags and 13 bits of fragment offset (in units
+ of 8 byte quantities). */
+ u16 flags_and_fragment_offset;
+#define IP4_HEADER_FLAG_MORE_FRAGMENTS (1 << 13)
+#define IP4_HEADER_FLAG_DONT_FRAGMENT (1 << 14)
+#define IP4_HEADER_FLAG_CONGESTION (1 << 15)
+
+ /* Time to live decremented by router at each hop. */
+ u8 ttl;
+
+ /* Next level protocol packet. */
+ u8 protocol;
+
+ /* Checksum. */
+ u16 checksum;
+
+ /* Source and destination address. */
+ union
+ {
+ struct
+ {
+ ip4_address_t src_address, dst_address;
+ };
+ ip4_address_pair_t address_pair;
+ };
+ };
+
+ /* For checksumming we'll want to access IP header in word sized chunks. */
+ /* For 64 bit machines. */
+ /* *INDENT-OFF* */
+ CLIB_PACKED (struct {
+ u64 checksum_data_64[2];
+ u32 checksum_data_64_32[1];
+ });
+ /* *INDENT-ON* */
+
+ /* For 32 bit machines. */
+ /* *INDENT-OFF* */
+ CLIB_PACKED (struct {
+ u32 checksum_data_32[5];
+ });
+ /* *INDENT-ON* */
+} ip4_header_t;
+
+/* Value of ip_version_and_header_length for packets w/o options. */
+#define IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS \
+ ((4 << 4) | (sizeof (ip4_header_t) / sizeof (u32)))
+
+always_inline int
+ip4_get_fragment_offset (ip4_header_t * i)
+{
+ return clib_net_to_host_u16 (i->flags_and_fragment_offset) & 0x1fff;
+}
+
+always_inline int
+ip4_get_fragment_more (ip4_header_t * i)
+{
+ return clib_net_to_host_u16 (i->flags_and_fragment_offset) &
+ IP4_HEADER_FLAG_MORE_FRAGMENTS;
+}
+
+always_inline int
+ip4_is_fragment (ip4_header_t * i)
+{
+ return (i->flags_and_fragment_offset &
+ clib_net_to_host_u16 (0x1fff | IP4_HEADER_FLAG_MORE_FRAGMENTS));
+}
+
+always_inline int
+ip4_is_first_fragment (ip4_header_t * i)
+{
+ return (i->flags_and_fragment_offset &
+ clib_net_to_host_u16 (0x1fff | IP4_HEADER_FLAG_MORE_FRAGMENTS)) ==
+ clib_net_to_host_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+}
+
+/* Fragment offset in bytes. */
+always_inline int
+ip4_get_fragment_offset_bytes (ip4_header_t * i)
+{
+ return 8 * ip4_get_fragment_offset (i);
+}
+
+always_inline int
+ip4_header_bytes (ip4_header_t * i)
+{
+ return sizeof (u32) * (i->ip_version_and_header_length & 0xf);
+}
+
+always_inline void *
+ip4_next_header (ip4_header_t * i)
+{
+ return (void *) i + ip4_header_bytes (i);
+}
+
+always_inline u16
+ip4_header_checksum (ip4_header_t * i)
+{
+ u16 save, csum;
+ ip_csum_t sum;
+
+ save = i->checksum;
+ i->checksum = 0;
+ sum = ip_incremental_checksum (0, i, ip4_header_bytes (i));
+ csum = ~ip_csum_fold (sum);
+
+ i->checksum = save;
+
+ /* Make checksum agree for special case where either
+ 0 or 0xffff would give same 1s complement sum. */
+ if (csum == 0 && save == 0xffff)
+ csum = save;
+
+ return csum;
+}
+
+static inline uword
+ip4_header_checksum_is_valid (ip4_header_t * i)
+{
+ return i->checksum == ip4_header_checksum (i);
+}
+
+#define ip4_partial_header_checksum_x1(ip0,sum0) \
+do { \
+ if (BITS (ip_csum_t) > 32) \
+ { \
+ sum0 = ip0->checksum_data_64[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64_32[0]); \
+ } \
+ else \
+ { \
+ sum0 = ip0->checksum_data_32[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[2]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[3]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[4]); \
+ } \
+} while (0)
+
+#define ip4_partial_header_checksum_x2(ip0,ip1,sum0,sum1) \
+do { \
+ if (BITS (ip_csum_t) > 32) \
+ { \
+ sum0 = ip0->checksum_data_64[0]; \
+ sum1 = ip1->checksum_data_64[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64[1]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_64[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_64_32[0]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_64_32[0]); \
+ } \
+ else \
+ { \
+ sum0 = ip0->checksum_data_32[0]; \
+ sum1 = ip1->checksum_data_32[0]; \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[1]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[1]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[2]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[2]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[3]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[3]); \
+ sum0 = ip_csum_with_carry (sum0, ip0->checksum_data_32[4]); \
+ sum1 = ip_csum_with_carry (sum1, ip1->checksum_data_32[4]); \
+ } \
+} while (0)
+
+always_inline uword
+ip4_address_is_multicast (ip4_address_t * a)
+{
+ return (a->data[0] & 0xf0) == 0xe0;
+}
+
+always_inline void
+ip4_multicast_address_set_for_group (ip4_address_t * a,
+ ip_multicast_group_t g)
+{
+ ASSERT ((u32) g < (1 << 28));
+ a->as_u32 = clib_host_to_net_u32 ((0xe << 28) + g);
+}
+
+always_inline void
+ip4_multicast_ethernet_address (u8 * ethernet_address, ip4_address_t * a)
+{
+ u8 *d = a->as_u8;
+
+ ethernet_address[0] = 0x01;
+ ethernet_address[1] = 0x00;
+ ethernet_address[2] = 0x5e;
+ ethernet_address[3] = d[1] & 0x7f;
+ ethernet_address[4] = d[2];
+ ethernet_address[5] = d[3];
+}
+
+always_inline void
+ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0)
+{
+ u32 src0, dst0;
+
+ src0 = ip0->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip0->dst_address.data_u32 = src0;
+
+ src0 = tcp0->src;
+ dst0 = tcp0->dst;
+ tcp0->src = dst0;
+ tcp0->dst = src0;
+}
+
+always_inline void
+ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1,
+ tcp_header_t * tcp0, tcp_header_t * tcp1)
+{
+ u32 src0, dst0, src1, dst1;
+
+ src0 = ip0->src_address.data_u32;
+ src1 = ip1->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ dst1 = ip1->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip1->src_address.data_u32 = dst1;
+ ip0->dst_address.data_u32 = src0;
+ ip1->dst_address.data_u32 = src1;
+
+ src0 = tcp0->src;
+ src1 = tcp1->src;
+ dst0 = tcp0->dst;
+ dst1 = tcp1->dst;
+ tcp0->src = dst0;
+ tcp1->src = dst1;
+ tcp0->dst = src0;
+ tcp1->dst = src1;
+}
+
+#endif /* included_ip4_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_pg.c b/src/vnet/ip/ip4_pg.c
new file mode 100644
index 00000000..9697a3b9
--- /dev/null
+++ b/src/vnet/ip/ip4_pg.c
@@ -0,0 +1,387 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_pg: IP v4 packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+#define IP4_PG_EDIT_CHECKSUM (1 << 0)
+#define IP4_PG_EDIT_LENGTH (1 << 1)
+
+static_always_inline void
+compute_length_and_or_checksum (vlib_main_t * vm,
+ u32 * packets,
+ u32 n_packets,
+ u32 ip_header_offset, u32 flags)
+{
+ ASSERT (flags != 0);
+
+ while (n_packets >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ ip_csum_t sum0, sum1;
+
+ pi0 = packets[0];
+ pi1 = packets[1];
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ n_packets -= 2;
+ packets += 2;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+ ip1 = (void *) (p1->data + ip_header_offset);
+
+ if (flags & IP4_PG_EDIT_LENGTH)
+ {
+ ip0->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) -
+ ip_header_offset);
+ ip1->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p1) -
+ ip_header_offset);
+ }
+
+ if (flags & IP4_PG_EDIT_CHECKSUM)
+ {
+ ASSERT (ip4_header_bytes (ip0) == sizeof (ip0[0]));
+ ASSERT (ip4_header_bytes (ip1) == sizeof (ip1[0]));
+
+ ip0->checksum = 0;
+ ip1->checksum = 0;
+
+ ip4_partial_header_checksum_x2 (ip0, ip1, sum0, sum1);
+ ip0->checksum = ~ip_csum_fold (sum0);
+ ip1->checksum = ~ip_csum_fold (sum1);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ ASSERT (ip1->checksum == ip4_header_checksum (ip1));
+ }
+ }
+
+ while (n_packets >= 1)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ ip_csum_t sum0;
+
+ pi0 = packets[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ n_packets -= 1;
+ packets += 1;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+
+ if (flags & IP4_PG_EDIT_LENGTH)
+ ip0->length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) -
+ ip_header_offset);
+
+ if (flags & IP4_PG_EDIT_CHECKSUM)
+ {
+ ASSERT (ip4_header_bytes (ip0) == sizeof (ip0[0]));
+
+ ip0->checksum = 0;
+
+ ip4_partial_header_checksum_x1 (ip0, sum0);
+ ip0->checksum = ~ip_csum_fold (sum0);
+
+ ASSERT (ip0->checksum == ip4_header_checksum (ip0));
+ }
+ }
+}
+
+static void
+ip4_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_offset;
+
+ ip_offset = g->start_byte_offset;
+
+ switch (g->edit_function_opaque)
+ {
+ case IP4_PG_EDIT_LENGTH:
+ compute_length_and_or_checksum (vm, packets, n_packets, ip_offset,
+ IP4_PG_EDIT_LENGTH);
+ break;
+
+ case IP4_PG_EDIT_CHECKSUM:
+ compute_length_and_or_checksum (vm, packets, n_packets, ip_offset,
+ IP4_PG_EDIT_CHECKSUM);
+ break;
+
+ case IP4_PG_EDIT_LENGTH | IP4_PG_EDIT_CHECKSUM:
+ compute_length_and_or_checksum (vm, packets, n_packets, ip_offset,
+ IP4_PG_EDIT_LENGTH
+ | IP4_PG_EDIT_CHECKSUM);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+}
+
+typedef struct
+{
+ pg_edit_t ip_version, header_length;
+ pg_edit_t tos;
+ pg_edit_t length;
+
+ pg_edit_t fragment_id, fragment_offset;
+
+ /* Flags together with fragment offset. */
+ pg_edit_t mf_flag, df_flag, ce_flag;
+
+ pg_edit_t ttl;
+
+ pg_edit_t protocol;
+
+ pg_edit_t checksum;
+
+ pg_edit_t src_address, dst_address;
+} pg_ip4_header_t;
+
+static inline void
+pg_ip4_header_init (pg_ip4_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, ip4_header_t, f);
+ _(tos);
+ _(length);
+ _(fragment_id);
+ _(ttl);
+ _(protocol);
+ _(checksum);
+ _(src_address);
+ _(dst_address);
+#undef _
+
+ /* Initialize bit fields. */
+ pg_edit_init_bitfield (&p->header_length, ip4_header_t,
+ ip_version_and_header_length, 0, 4);
+ pg_edit_init_bitfield (&p->ip_version, ip4_header_t,
+ ip_version_and_header_length, 4, 4);
+
+ pg_edit_init_bitfield (&p->fragment_offset, ip4_header_t,
+ flags_and_fragment_offset, 0, 13);
+ pg_edit_init_bitfield (&p->mf_flag, ip4_header_t,
+ flags_and_fragment_offset, 13, 1);
+ pg_edit_init_bitfield (&p->df_flag, ip4_header_t,
+ flags_and_fragment_offset, 14, 1);
+ pg_edit_init_bitfield (&p->ce_flag, ip4_header_t,
+ flags_and_fragment_offset, 15, 1);
+}
+
+uword
+unformat_pg_ip4_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_ip4_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ip4_header_t),
+ &group_index);
+ pg_ip4_header_init (p);
+
+ /* Defaults. */
+ pg_edit_set_fixed (&p->ip_version, 4);
+ pg_edit_set_fixed (&p->header_length, sizeof (ip4_header_t) / sizeof (u32));
+
+ pg_edit_set_fixed (&p->tos, 0);
+ pg_edit_set_fixed (&p->ttl, 64);
+
+ pg_edit_set_fixed (&p->fragment_id, 0);
+ pg_edit_set_fixed (&p->fragment_offset, 0);
+ pg_edit_set_fixed (&p->mf_flag, 0);
+ pg_edit_set_fixed (&p->df_flag, 0);
+ pg_edit_set_fixed (&p->ce_flag, 0);
+
+ p->length.type = PG_EDIT_UNSPECIFIED;
+ p->checksum.type = PG_EDIT_UNSPECIFIED;
+
+ if (unformat (input, "%U: %U -> %U",
+ unformat_pg_edit,
+ unformat_ip_protocol, &p->protocol,
+ unformat_pg_edit,
+ unformat_ip4_address, &p->src_address,
+ unformat_pg_edit, unformat_ip4_address, &p->dst_address))
+ goto found;
+
+ if (!unformat (input, "%U:",
+ unformat_pg_edit, unformat_ip_protocol, &p->protocol))
+ goto error;
+
+found:
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "version %U",
+ unformat_pg_edit, unformat_pg_number, &p->ip_version))
+ ;
+
+ else if (unformat (input, "header-length %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->header_length))
+ ;
+
+ else if (unformat (input, "tos %U",
+ unformat_pg_edit, unformat_pg_number, &p->tos))
+ ;
+
+ else if (unformat (input, "length %U",
+ unformat_pg_edit, unformat_pg_number, &p->length))
+ ;
+
+ else if (unformat (input, "checksum %U",
+ unformat_pg_edit, unformat_pg_number, &p->checksum))
+ ;
+
+ else if (unformat (input, "ttl %U",
+ unformat_pg_edit, unformat_pg_number, &p->ttl))
+ ;
+
+ else if (unformat (input, "fragment id %U offset %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->fragment_id,
+ unformat_pg_edit,
+ unformat_pg_number, &p->fragment_offset))
+ {
+ int i;
+ for (i = 0; i < ARRAY_LEN (p->fragment_offset.values); i++)
+ pg_edit_set_value (&p->fragment_offset, i,
+ pg_edit_get_value (&p->fragment_offset,
+ i) / 8);
+
+ }
+
+ /* Flags. */
+ else if (unformat (input, "mf") || unformat (input, "MF"))
+ pg_edit_set_fixed (&p->mf_flag, 1);
+
+ else if (unformat (input, "df") || unformat (input, "DF"))
+ pg_edit_set_fixed (&p->df_flag, 1);
+
+ else if (unformat (input, "ce") || unformat (input, "CE"))
+ pg_edit_set_fixed (&p->ce_flag, 1);
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_t protocol;
+ ip_protocol_info_t *pi;
+
+ pi = 0;
+ if (p->protocol.type == PG_EDIT_FIXED)
+ {
+ protocol = pg_edit_get_value (&p->protocol, PG_EDIT_LO);
+ pi = ip_get_protocol_info (im, protocol);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->length.type == PG_EDIT_UNSPECIFIED
+ && s->min_packet_bytes == s->max_packet_bytes
+ && group_index + 1 < vec_len (s->edit_groups))
+ {
+ pg_edit_set_fixed (&p->length,
+ pg_edit_group_n_bytes (s, group_index));
+ }
+
+ /* Compute IP header checksum if all edits are fixed. */
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ ip4_header_t fixed_header, fixed_mask, cmp_mask;
+
+ /* See if header is all fixed and specified except for
+ checksum field. */
+ memset (&cmp_mask, ~0, sizeof (cmp_mask));
+ cmp_mask.checksum = 0;
+
+ pg_edit_group_get_fixed_packet_data (s, group_index,
+ &fixed_header, &fixed_mask);
+ if (!memcmp (&fixed_mask, &cmp_mask, sizeof (cmp_mask)))
+ pg_edit_set_fixed (&p->checksum,
+ clib_net_to_host_u16 (ip4_header_checksum
+ (&fixed_header)));
+ }
+
+ p = pg_get_edit_group (s, group_index);
+ if (p->length.type == PG_EDIT_UNSPECIFIED
+ || p->checksum.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = ip4_pg_edit_function;
+ g->edit_function_opaque = 0;
+ if (p->length.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= IP4_PG_EDIT_LENGTH;
+ if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ g->edit_function_opaque |= IP4_PG_EDIT_CHECKSUM;
+ }
+
+ return 1;
+ }
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c
new file mode 100644
index 00000000..4829079b
--- /dev/null
+++ b/src/vnet/ip/ip4_source_and_port_range_check.c
@@ -0,0 +1,1424 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_source_and_port_range_check.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+
+/**
+ * @file
+ * @brief IPv4 Source and Port Range Checking.
+ *
+ * This file contains the source code for IPv4 source and port range
+ * checking.
+ */
+
+
+/**
+ * @brief The pool of range chack DPOs
+ */
+static protocol_port_range_dpo_t *ppr_dpo_pool;
+
+/**
+ * @brief Dynamically registered DPO type
+ */
+static dpo_type_t ppr_dpo_type;
+
+vlib_node_registration_t ip4_source_port_and_range_check_rx;
+vlib_node_registration_t ip4_source_port_and_range_check_tx;
+
+#define foreach_ip4_source_and_port_range_check_error \
+ _(CHECK_FAIL, "ip4 source and port range check bad packets") \
+ _(CHECK_OK, "ip4 source and port range check good packets")
+
+typedef enum
+{
+#define _(sym,str) IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_##sym,
+ foreach_ip4_source_and_port_range_check_error
+#undef _
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_N_ERROR,
+} ip4_source_and_port_range_check_error_t;
+
+static char *ip4_source_and_port_range_check_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip4_source_and_port_range_check_error
+#undef _
+};
+
+typedef struct
+{
+ u32 pass;
+ u32 bypass;
+ u32 is_tcp;
+ ip4_address_t src_addr;
+ u16 port;
+ u32 fib_index;
+} ip4_source_and_port_range_check_trace_t;
+
+static u8 *
+format_ip4_source_and_port_range_check_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip4_source_and_port_range_check_trace_t *t =
+ va_arg (*va, ip4_source_and_port_range_check_trace_t *);
+
+ if (t->bypass)
+ s = format (s, "PASS (bypass case)");
+ else
+ s = format (s, "fib %d src ip %U %s dst port %d: %s",
+ t->fib_index, format_ip4_address, &t->src_addr,
+ t->is_tcp ? "TCP" : "UDP", (u32) t->port,
+ (t->pass == 1) ? "PASS" : "FAIL");
+ return s;
+}
+
+typedef enum
+{
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP,
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_N_NEXT,
+} ip4_source_and_port_range_check_next_t;
+
+
+static inline u32
+check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo,
+ u16 dst_port, u32 next)
+{
+ u16x8vec_t key;
+ u16x8vec_t diff1;
+ u16x8vec_t diff2;
+ u16x8vec_t sum, sum_equal_diff2;
+ u16 sum_nonzero, sum_equal, winner_mask;
+ int i;
+
+ if (NULL == ppr_dpo || dst_port == 0)
+ return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
+
+ /* Make the obvious screw-case work. A variant also works w/ no MMX */
+ if (PREDICT_FALSE (dst_port == 65535))
+ {
+ int j;
+
+ for (i = 0;
+ i < VLIB_BUFFER_PRE_DATA_SIZE / sizeof (protocol_port_range_t);
+ i++)
+ {
+ for (j = 0; j < 8; j++)
+ if (ppr_dpo->blocks[i].low.as_u16[j] == 65535)
+ return next;
+ }
+ return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
+ }
+
+ key.as_u16x8 = u16x8_splat (dst_port);
+
+ for (i = 0; i < ppr_dpo->n_used_blocks; i++)
+ {
+ diff1.as_u16x8 =
+ u16x8_sub_saturate (ppr_dpo->blocks[i].low.as_u16x8, key.as_u16x8);
+ diff2.as_u16x8 =
+ u16x8_sub_saturate (ppr_dpo->blocks[i].hi.as_u16x8, key.as_u16x8);
+ sum.as_u16x8 = u16x8_add (diff1.as_u16x8, diff2.as_u16x8);
+ sum_equal_diff2.as_u16x8 =
+ u16x8_is_equal (sum.as_u16x8, diff2.as_u16x8);
+ sum_nonzero = ~u16x8_zero_byte_mask (sum.as_u16x8);
+ sum_equal = ~u16x8_zero_byte_mask (sum_equal_diff2.as_u16x8);
+ winner_mask = sum_nonzero & sum_equal;
+ if (winner_mask)
+ return next;
+ }
+ return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
+}
+
+always_inline protocol_port_range_dpo_t *
+protocol_port_range_dpo_get (index_t index)
+{
+ return (pool_elt_at_index (ppr_dpo_pool, index));
+}
+
+always_inline uword
+ip4_source_and_port_range_check_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_tx)
+{
+ ip4_main_t *im = &ip4_main;
+ u32 n_left_from, *from, *to_next;
+ u32 next_index;
+ vlib_node_runtime_t *error_node = node;
+ u32 good_packets = 0;
+ int i;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+
+ /* while (n_left_from >= 4 && n_left_to_next >= 2) */
+ /* { */
+ /* vlib_buffer_t *b0, *b1; */
+ /* ip4_header_t *ip0, *ip1; */
+ /* ip4_fib_mtrie_t *mtrie0, *mtrie1; */
+ /* ip4_fib_mtrie_leaf_t leaf0, leaf1; */
+ /* ip_source_and_port_range_check_config_t *c0, *c1; */
+ /* ip_adjacency_t *adj0 = 0, *adj1 = 0; */
+ /* u32 bi0, next0, adj_index0, pass0, save_next0, fib_index0; */
+ /* u32 bi1, next1, adj_index1, pass1, save_next1, fib_index1; */
+ /* udp_header_t *udp0, *udp1; */
+
+ /* /\* Prefetch next iteration. *\/ */
+ /* { */
+ /* vlib_buffer_t *p2, *p3; */
+
+ /* p2 = vlib_get_buffer (vm, from[2]); */
+ /* p3 = vlib_get_buffer (vm, from[3]); */
+
+ /* vlib_prefetch_buffer_header (p2, LOAD); */
+ /* vlib_prefetch_buffer_header (p3, LOAD); */
+
+ /* CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD); */
+ /* CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD); */
+ /* } */
+
+ /* bi0 = to_next[0] = from[0]; */
+ /* bi1 = to_next[1] = from[1]; */
+ /* from += 2; */
+ /* to_next += 2; */
+ /* n_left_from -= 2; */
+ /* n_left_to_next -= 2; */
+
+ /* b0 = vlib_get_buffer (vm, bi0); */
+ /* b1 = vlib_get_buffer (vm, bi1); */
+
+ /* fib_index0 = */
+ /* vec_elt (im->fib_index_by_sw_if_index, */
+ /* vnet_buffer (b0)->sw_if_index[VLIB_RX]); */
+ /* fib_index1 = */
+ /* vec_elt (im->fib_index_by_sw_if_index, */
+ /* vnet_buffer (b1)->sw_if_index[VLIB_RX]); */
+
+ /* ip0 = vlib_buffer_get_current (b0); */
+ /* ip1 = vlib_buffer_get_current (b1); */
+
+ /* if (is_tx) */
+ /* { */
+ /* c0 = vnet_get_config_data (&tx_cm->config_main, */
+ /* &b0->current_config_index, */
+ /* &next0, sizeof (c0[0])); */
+ /* c1 = vnet_get_config_data (&tx_cm->config_main, */
+ /* &b1->current_config_index, */
+ /* &next1, sizeof (c1[0])); */
+ /* } */
+ /* else */
+ /* { */
+ /* c0 = vnet_get_config_data (&rx_cm->config_main, */
+ /* &b0->current_config_index, */
+ /* &next0, sizeof (c0[0])); */
+ /* c1 = vnet_get_config_data (&rx_cm->config_main, */
+ /* &b1->current_config_index, */
+ /* &next1, sizeof (c1[0])); */
+ /* } */
+
+ /* /\* we can't use the default VRF here... *\/ */
+ /* for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++) */
+ /* { */
+ /* ASSERT (c0->fib_index[i] && c1->fib_index[i]); */
+ /* } */
+
+
+ /* if (is_tx) */
+ /* { */
+ /* if (ip0->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */
+ /* if (ip0->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */
+ /* } */
+ /* else */
+ /* { */
+ /* if (ip0->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */
+ /* if (ip0->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index0 = */
+ /* c0->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */
+ /* } */
+
+ /* if (PREDICT_TRUE (fib_index0 != ~0)) */
+ /* { */
+
+ /* mtrie0 = &vec_elt_at_index (im->fibs, fib_index0)->mtrie; */
+
+ /* leaf0 = IP4_FIB_MTRIE_LEAF_ROOT; */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 0); */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 1); */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 2); */
+
+ /* leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, */
+ /* &ip0->src_address, 3); */
+
+ /* adj_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); */
+
+ /* ASSERT (adj_index0 == ip4_fib_lookup_with_table (im, fib_index0, */
+ /* &ip0->src_address, */
+ /* 0 */
+ /* /\* use dflt rt *\/ */
+ /* )); */
+ /* adj0 = ip_get_adjacency (lm, adj_index0); */
+ /* } */
+
+ /* if (is_tx) */
+ /* { */
+ /* if (ip1->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]; */
+ /* if (ip1->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]; */
+ /* } */
+ /* else */
+ /* { */
+ /* if (ip1->protocol == IP_PROTOCOL_UDP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]; */
+ /* if (ip1->protocol == IP_PROTOCOL_TCP) */
+ /* fib_index1 = */
+ /* c1->fib_index */
+ /* [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]; */
+ /* } */
+
+ /* if (PREDICT_TRUE (fib_index1 != ~0)) */
+ /* { */
+
+ /* mtrie1 = &vec_elt_at_index (im->fibs, fib_index1)->mtrie; */
+
+ /* leaf1 = IP4_FIB_MTRIE_LEAF_ROOT; */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 0); */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 1); */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 2); */
+
+ /* leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, */
+ /* &ip1->src_address, 3); */
+
+ /* adj_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); */
+
+ /* ASSERT (adj_index1 == ip4_fib_lookup_with_table (im, fib_index1, */
+ /* &ip1->src_address, */
+ /* 0)); */
+ /* adj1 = ip_get_adjacency (lm, adj_index1); */
+ /* } */
+
+ /* pass0 = 0; */
+ /* pass0 |= adj0 == 0; */
+ /* pass0 |= ip4_address_is_multicast (&ip0->src_address); */
+ /* pass0 |= */
+ /* ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */
+ /* pass0 |= (ip0->protocol != IP_PROTOCOL_UDP) */
+ /* && (ip0->protocol != IP_PROTOCOL_TCP); */
+
+ /* pass1 = 0; */
+ /* pass1 |= adj1 == 0; */
+ /* pass1 |= ip4_address_is_multicast (&ip1->src_address); */
+ /* pass1 |= */
+ /* ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF); */
+ /* pass1 |= (ip1->protocol != IP_PROTOCOL_UDP) */
+ /* && (ip1->protocol != IP_PROTOCOL_TCP); */
+
+ /* save_next0 = next0; */
+ /* udp0 = ip4_next_header (ip0); */
+ /* save_next1 = next1; */
+ /* udp1 = ip4_next_header (ip1); */
+
+ /* if (PREDICT_TRUE (pass0 == 0)) */
+ /* { */
+ /* good_packets++; */
+ /* next0 = check_adj_port_range_x1 */
+ /* (adj0, clib_net_to_host_u16 (udp0->dst_port), next0); */
+ /* good_packets -= (save_next0 != next0); */
+ /* b0->error = error_node->errors */
+ /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */
+ /* } */
+
+ /* if (PREDICT_TRUE (pass1 == 0)) */
+ /* { */
+ /* good_packets++; */
+ /* next1 = check_adj_port_range_x1 */
+ /* (adj1, clib_net_to_host_u16 (udp1->dst_port), next1); */
+ /* good_packets -= (save_next1 != next1); */
+ /* b1->error = error_node->errors */
+ /* [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL]; */
+ /* } */
+
+ /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */
+ /* && (b0->flags & VLIB_BUFFER_IS_TRACED))) */
+ /* { */
+ /* ip4_source_and_port_range_check_trace_t *t = */
+ /* vlib_add_trace (vm, node, b0, sizeof (*t)); */
+ /* t->pass = next0 == save_next0; */
+ /* t->bypass = pass0; */
+ /* t->fib_index = fib_index0; */
+ /* t->src_addr.as_u32 = ip0->src_address.as_u32; */
+ /* t->port = (pass0 == 0) ? */
+ /* clib_net_to_host_u16 (udp0->dst_port) : 0; */
+ /* t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP; */
+ /* } */
+
+ /* if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) */
+ /* && (b1->flags & VLIB_BUFFER_IS_TRACED))) */
+ /* { */
+ /* ip4_source_and_port_range_check_trace_t *t = */
+ /* vlib_add_trace (vm, node, b1, sizeof (*t)); */
+ /* t->pass = next1 == save_next1; */
+ /* t->bypass = pass1; */
+ /* t->fib_index = fib_index1; */
+ /* t->src_addr.as_u32 = ip1->src_address.as_u32; */
+ /* t->port = (pass1 == 0) ? */
+ /* clib_net_to_host_u16 (udp1->dst_port) : 0; */
+ /* t->is_tcp = ip1->protocol == IP_PROTOCOL_TCP; */
+ /* } */
+
+ /* vlib_validate_buffer_enqueue_x2 (vm, node, next_index, */
+ /* to_next, n_left_to_next, */
+ /* bi0, bi1, next0, next1); */
+ /* } */
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *b0;
+ ip4_header_t *ip0;
+ ip_source_and_port_range_check_config_t *c0;
+ u32 bi0, next0, lb_index0, pass0, save_next0, fib_index0;
+ udp_header_t *udp0;
+ const protocol_port_range_dpo_t *ppr_dpo0 = NULL;
+ const dpo_id_t *dpo;
+ u32 sw_if_index0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
+
+ if (is_tx)
+ vlib_buffer_advance (b0, sizeof (ethernet_header_t));
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ c0 = vnet_feature_next_with_data (sw_if_index0, &next0,
+ b0, sizeof (c0[0]));
+
+ /* we can't use the default VRF here... */
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+ ASSERT (c0->fib_index[i]);
+ }
+
+
+ if (is_tx)
+ {
+ if (ip0->protocol == IP_PROTOCOL_UDP)
+ fib_index0 =
+ c0->fib_index
+ [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN];
+ if (ip0->protocol == IP_PROTOCOL_TCP)
+ fib_index0 =
+ c0->fib_index
+ [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN];
+ }
+ else
+ {
+ if (ip0->protocol == IP_PROTOCOL_UDP)
+ fib_index0 =
+ c0->fib_index
+ [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT];
+ if (ip0->protocol == IP_PROTOCOL_TCP)
+ fib_index0 =
+ c0->fib_index
+ [IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT];
+ }
+
+ if (fib_index0 != ~0)
+ {
+ lb_index0 = ip4_fib_forwarding_lookup (fib_index0,
+ &ip0->src_address);
+
+ dpo =
+ load_balance_get_bucket_i (load_balance_get (lb_index0), 0);
+
+ if (ppr_dpo_type == dpo->dpoi_type)
+ {
+ ppr_dpo0 = protocol_port_range_dpo_get (dpo->dpoi_index);
+ }
+ /*
+ * else the lookup hit an enty that was no inserted
+ * by this range checker, which is the default route
+ */
+ }
+ /*
+ * $$$ which (src,dst) categories should we always pass?
+ */
+ pass0 = 0;
+ pass0 |= ip4_address_is_multicast (&ip0->src_address);
+ pass0 |=
+ ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
+ pass0 |= (ip0->protocol != IP_PROTOCOL_UDP)
+ && (ip0->protocol != IP_PROTOCOL_TCP);
+
+ save_next0 = next0;
+ udp0 = ip4_next_header (ip0);
+
+ if (PREDICT_TRUE (pass0 == 0))
+ {
+ good_packets++;
+ next0 = check_adj_port_range_x1
+ (ppr_dpo0, clib_net_to_host_u16 (udp0->dst_port), next0);
+ good_packets -= (save_next0 != next0);
+ b0->error = error_node->errors
+ [IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_FAIL];
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip4_source_and_port_range_check_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->pass = next0 == save_next0;
+ t->bypass = pass0;
+ t->fib_index = fib_index0;
+ t->src_addr.as_u32 = ip0->src_address.as_u32;
+ t->port = (pass0 == 0) ?
+ clib_net_to_host_u16 (udp0->dst_port) : 0;
+ t->is_tcp = ip0->protocol == IP_PROTOCOL_TCP;
+ }
+
+ if (is_tx)
+ vlib_buffer_advance (b0, -sizeof (ethernet_header_t));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ if (is_tx)
+ vlib_node_increment_counter (vm, ip4_source_port_and_range_check_tx.index,
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_OK,
+ good_packets);
+ else
+ vlib_node_increment_counter (vm, ip4_source_port_and_range_check_rx.index,
+ IP4_SOURCE_AND_PORT_RANGE_CHECK_ERROR_CHECK_OK,
+ good_packets);
+ return frame->n_vectors;
+}
+
+static uword
+ip4_source_and_port_range_check_rx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_and_port_range_check_inline (vm, node, frame,
+ 0 /* !is_tx */ );
+}
+
+static uword
+ip4_source_and_port_range_check_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_and_port_range_check_inline (vm, node, frame,
+ 1 /* is_tx */ );
+}
+
+/* Note: Calling same function for both RX and TX nodes
+ as always checking dst_port, although
+ if this changes can easily make new function
+*/
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = {
+ .function = ip4_source_and_port_range_check_rx,
+ .name = "ip4-source-and-port-range-check-rx",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN(ip4_source_and_port_range_check_error_strings),
+ .error_strings = ip4_source_and_port_range_check_error_strings,
+
+ .n_next_nodes = IP4_SOURCE_AND_PORT_RANGE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_and_port_range_check_trace,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = {
+ .function = ip4_source_and_port_range_check_tx,
+ .name = "ip4-source-and-port-range-check-tx",
+ .vector_size = sizeof (u32),
+
+ .n_errors = ARRAY_LEN(ip4_source_and_port_range_check_error_strings),
+ .error_strings = ip4_source_and_port_range_check_error_strings,
+
+ .n_next_nodes = IP4_SOURCE_AND_PORT_RANGE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_and_port_range_check_trace,
+};
+/* *INDENT-ON* */
+
+int
+set_ip_source_and_port_range_check (vlib_main_t * vm,
+ u32 * fib_index,
+ u32 sw_if_index, u32 is_add)
+{
+ ip_source_and_port_range_check_config_t config;
+ int rv = 0;
+ int i;
+
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+ config.fib_index[i] = fib_index[i];
+ }
+
+ /* For OUT we are in the RX path */
+ if ((fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT] != ~0) ||
+ (fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT] != ~0))
+ {
+ vnet_feature_enable_disable ("ip4-unicast",
+ "ip4-source-and-port-range-check-rx",
+ sw_if_index, is_add, &config,
+ sizeof (config));
+ }
+
+ /* For IN we are in the TX path */
+ if ((fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN] != ~0) ||
+ (fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN] != ~0))
+ {
+ vnet_feature_enable_disable ("ip4-output",
+ "ip4-source-and-port-range-check-tx",
+ sw_if_index, is_add, &config,
+ sizeof (config));
+ }
+ return rv;
+}
+
+static clib_error_t *
+set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ clib_error_t *error = 0;
+ u8 is_add = 1;
+ u32 sw_if_index = ~0;
+ u32 vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS];
+ u32 fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS];
+ int vrf_set = 0;
+ uword *p;
+ int rv = 0;
+ int i;
+
+ sw_if_index = ~0;
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+ fib_index[i] = ~0;
+ vrf_id[i] = ~0;
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else
+ if (unformat
+ (input, "tcp-out-vrf %d",
+ &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT]))
+ vrf_set = 1;
+ else
+ if (unformat
+ (input, "udp-out-vrf %d",
+ &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT]))
+ vrf_set = 1;
+ else
+ if (unformat
+ (input, "tcp-in-vrf %d",
+ &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN]))
+ vrf_set = 1;
+ else
+ if (unformat
+ (input, "udp-in-vrf %d",
+ &vrf_id[IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN]))
+ vrf_set = 1;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Interface required but not specified");
+
+ if (!vrf_set)
+ return clib_error_return (0,
+ "TCP or UDP VRF ID required but not specified");
+
+ for (i = 0; i < IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS; i++)
+ {
+
+ if (vrf_id[i] == 0)
+ return clib_error_return (0,
+ "TCP, UDP VRF ID should not be 0 (default). Should be distinct VRF for this purpose. ");
+
+ if (vrf_id[i] != ~0)
+ {
+ p = hash_get (im->fib_index_by_table_id, vrf_id[i]);
+
+ if (p == 0)
+ return clib_error_return (0, "Invalid VRF ID %d", vrf_id[i]);
+
+ fib_index[i] = p[0];
+ }
+ }
+ rv =
+ set_ip_source_and_port_range_check (vm, fib_index, sw_if_index, is_add);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ default:
+ return clib_error_return
+ (0,
+ "set source and port-range on interface returned an unexpected value: %d",
+ rv);
+ }
+ return error;
+}
+
+/*?
+ * Add the 'ip4-source-and-port-range-check-rx' or
+ * 'ip4-source-and-port-range-check-tx' graph node for a given
+ * interface. 'tcp-out-vrf' and 'udp-out-vrf' will add to
+ * the RX path. 'tcp-in-vrf' and 'udp-in-vrf' will add to
+ * the TX path. A graph node will be inserted into the chain when
+ * the range check is added to the first interface. It will not
+ * be removed from when range check is removed from the last
+ * interface.
+ *
+ * By adding the range check graph node to the interface, incoming
+ * or outgoing TCP/UDP packets will be validated using the
+ * provided IPv4 FIB table (VRF).
+ *
+ * @note 'ip4-source-and-port-range-check-rx' and
+ * 'ip4-source-and-port-range-check-tx' strings are too long, so
+ * they are truncated on the 'show vlib graph' output.
+ *
+ * @todo This content needs to be validated and potentially more detail added.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before range checking is enabled:
+ * @cliexstart{show vlib graph ip4-source-and-port-range-check-tx}
+ * Name Next Previous
+ * ip4-source-and-port-range- error-drop [0]
+ * @cliexend
+ *
+ * Example of how to enable range checking on TX:
+ * @cliexcmd{set interface ip source-and-port-range-check GigabitEthernet2/0/0 udp-in-vrf 7}
+ *
+ * Example of graph node after range checking is enabled:
+ * @cliexstart{show vlib graph ip4-source-and-port-range-check-tx}
+ * Name Next Previous
+ * ip4-source-and-port-range- error-drop [0] ip4-rewrite
+ * interface-output [1]
+ * @cliexend
+ *
+ * Example of how to display the features enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ *
+ * ipv4 unicast:
+ * ip4-source-and-port-range-check-rx
+ * ip4-lookup
+ *
+ * ipv4 multicast:
+ * ip4-lookup-multicast
+ *
+ * ipv4 multicast:
+ * interface-output
+ *
+ * ipv6 unicast:
+ * ip6-lookup
+ *
+ * ipv6 multicast:
+ * ip6-lookup
+ *
+ * ipv6 multicast:
+ * interface-output
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command, static) = {
+ .path = "set interface ip source-and-port-range-check",
+ .function = set_ip_source_and_port_range_check_fn,
+ .short_help = "set interface ip source-and-port-range-check <interface> [tcp-out-vrf <table-id>] [udp-out-vrf <table-id>] [tcp-in-vrf <table-id>] [udp-in-vrf <table-id>] [del]",
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_ppr_dpo (u8 * s, va_list * args)
+{
+ index_t index = va_arg (*args, index_t);
+ CLIB_UNUSED (u32 indent) = va_arg (*args, u32);
+
+ protocol_port_range_dpo_t *ppr_dpo;
+ int i, j;
+ int printed = 0;
+
+ ppr_dpo = protocol_port_range_dpo_get (index);
+
+ s = format (s, "allow ");
+
+ for (i = 0; i < ppr_dpo->n_used_blocks; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ if (ppr_dpo->blocks[i].low.as_u16[j])
+ {
+ if (printed)
+ s = format (s, ", ");
+ if (ppr_dpo->blocks[i].hi.as_u16[j] >
+ (ppr_dpo->blocks[i].low.as_u16[j] + 1))
+ s =
+ format (s, "%d-%d", (u32) ppr_dpo->blocks[i].low.as_u16[j],
+ (u32) ppr_dpo->blocks[i].hi.as_u16[j] - 1);
+ else
+ s = format (s, "%d", ppr_dpo->blocks[i].low.as_u16[j]);
+ printed = 1;
+ }
+ }
+ }
+ return s;
+}
+
+static void
+ppr_dpo_lock (dpo_id_t * dpo)
+{
+}
+
+static void
+ppr_dpo_unlock (dpo_id_t * dpo)
+{
+}
+
+const static dpo_vft_t ppr_vft = {
+ .dv_lock = ppr_dpo_lock,
+ .dv_unlock = ppr_dpo_unlock,
+ .dv_format = format_ppr_dpo,
+};
+
+const static char *const ppr_ip4_nodes[] = {
+ "ip4-source-and-port-range-check-rx",
+ NULL,
+};
+
+const static char *const *const ppr_nodes[DPO_PROTO_NUM] = {
+ [DPO_PROTO_IP4] = ppr_ip4_nodes,
+};
+
+clib_error_t *
+ip4_source_and_port_range_check_init (vlib_main_t * vm)
+{
+ source_range_check_main_t *srm = &source_range_check_main;
+
+ srm->vlib_main = vm;
+ srm->vnet_main = vnet_get_main ();
+
+ ppr_dpo_type = dpo_register_new_type (&ppr_vft, ppr_nodes);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_source_and_port_range_check_init);
+
+protocol_port_range_dpo_t *
+protocol_port_range_dpo_alloc (void)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+
+ pool_get_aligned (ppr_dpo_pool, ppr_dpo, CLIB_CACHE_LINE_BYTES);
+ memset (ppr_dpo, 0, sizeof (*ppr_dpo));
+
+ ppr_dpo->n_free_ranges = N_PORT_RANGES_PER_DPO;
+
+ return (ppr_dpo);
+}
+
+
+static int
+add_port_range_adjacency (u32 fib_index,
+ ip4_address_t * address,
+ u32 length, u16 * low_ports, u16 * high_ports)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+ dpo_id_t dpop = DPO_INVALID;
+ int i, j, k;
+
+ fib_node_index_t fei;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = length,
+ .fp_addr = {
+ .ip4 = *address,
+ },
+ };
+
+ /*
+ * check to see if we have already sourced this prefix
+ */
+ fei = fib_table_lookup_exact_match (fib_index, &pfx);
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ /*
+ * this is a first time add for this prefix.
+ */
+ ppr_dpo = protocol_port_range_dpo_alloc ();
+ }
+ else
+ {
+ /*
+ * the prefix is already there.
+ * check it was sourced by us, and if so get the ragne DPO from it.
+ */
+ dpo_id_t dpo = DPO_INVALID;
+ const dpo_id_t *bucket;
+
+ if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo))
+ {
+ /*
+ * there is existing state. we'll want to add the new ranges to it
+ */
+ bucket =
+ load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0);
+ ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ /*
+ * there is no PPR state associated with this prefix,
+ * so we'll need a new DPO
+ */
+ ppr_dpo = protocol_port_range_dpo_alloc ();
+ }
+ }
+
+ if (vec_len (low_ports) > ppr_dpo->n_free_ranges)
+ return VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY;
+
+ j = k = 0;
+
+ for (i = 0; i < vec_len (low_ports); i++)
+ {
+ for (; j < N_BLOCKS_PER_DPO; j++)
+ {
+ for (; k < 8; k++)
+ {
+ if (ppr_dpo->blocks[j].low.as_u16[k] == 0)
+ {
+ ppr_dpo->blocks[j].low.as_u16[k] = low_ports[i];
+ ppr_dpo->blocks[j].hi.as_u16[k] = high_ports[i];
+ goto doublebreak;
+ }
+ }
+ }
+ doublebreak:;
+ }
+ ppr_dpo->n_used_blocks = j + 1;
+
+ /*
+ * add or update the entry in the FIB
+ */
+ dpo_set (&dpop, ppr_dpo_type, DPO_PROTO_IP4, (ppr_dpo - ppr_dpo_pool));
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_NONE, &dpop);
+ }
+ else
+ {
+ fib_entry_special_update (fei,
+ FIB_SOURCE_SPECIAL,
+ FIB_ENTRY_FLAG_NONE, &dpop);
+ }
+
+ return 0;
+}
+
+static int
+remove_port_range_adjacency (u32 fib_index,
+ ip4_address_t * address,
+ u32 length, u16 * low_ports, u16 * high_ports)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+ fib_node_index_t fei;
+ int i, j, k;
+
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = length,
+ .fp_addr = {
+ .ip4 = *address,
+ },
+ };
+
+ /*
+ * check to see if we have sourced this prefix
+ */
+ fei = fib_table_lookup_exact_match (fib_index, &pfx);
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ /*
+ * not one of ours
+ */
+ return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE;
+ }
+ else
+ {
+ /*
+ * the prefix is already there.
+ * check it was sourced by us
+ */
+ dpo_id_t dpo = DPO_INVALID;
+ const dpo_id_t *bucket;
+
+ if (fib_entry_get_dpo_for_source (fei, FIB_SOURCE_SPECIAL, &dpo))
+ {
+ /*
+ * there is existing state. we'll want to add the new ranges to it
+ */
+ bucket =
+ load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0);
+ ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ /*
+ * not one of ours
+ */
+ return VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE;
+ }
+ }
+
+ for (i = 0; i < vec_len (low_ports); i++)
+ {
+ for (j = 0; j < N_BLOCKS_PER_DPO; j++)
+ {
+ for (k = 0; k < 8; k++)
+ {
+ if (low_ports[i] == ppr_dpo->blocks[j].low.as_u16[k] &&
+ high_ports[i] == ppr_dpo->blocks[j].hi.as_u16[k])
+ {
+ ppr_dpo->blocks[j].low.as_u16[k] =
+ ppr_dpo->blocks[j].hi.as_u16[k] = 0;
+ goto doublebreak;
+ }
+ }
+ }
+ doublebreak:;
+ }
+
+ ppr_dpo->n_free_ranges = 0;
+
+ /* Have we deleted all ranges yet? */
+ for (i = 0; i < N_BLOCKS_PER_DPO; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ if (ppr_dpo->blocks[j].low.as_u16[i] == 0)
+ ppr_dpo->n_free_ranges++;
+ }
+ }
+
+ if (N_PORT_RANGES_PER_DPO == ppr_dpo->n_free_ranges)
+ {
+ /* Yes, lose the adjacency... */
+ fib_table_entry_special_remove (fib_index, &pfx, FIB_SOURCE_SPECIAL);
+ }
+ else
+ {
+ /*
+ * compact the ranges down to a contiguous block
+ */
+ // FIXME. TODO.
+ }
+
+ return 0;
+}
+
+// This will be moved to another file and implemented post API freeze.
+int
+ip6_source_and_port_range_check_add_del (ip6_address_t * address,
+ u32 length,
+ u32 vrf_id,
+ u16 * low_ports,
+ u16 * high_ports, int is_add)
+{
+ u32 fib_index;
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
+
+ ASSERT (~0 != fib_index);
+
+ fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_CLASSIFY);
+
+ return 0;
+}
+
+int
+ip4_source_and_port_range_check_add_del (ip4_address_t * address,
+ u32 length,
+ u32 vrf_id,
+ u16 * low_ports,
+ u16 * high_ports, int is_add)
+{
+ u32 fib_index;
+
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
+ FIB_SOURCE_CLASSIFY);
+
+ if (is_add == 0)
+ {
+ remove_port_range_adjacency (fib_index, address, length,
+ low_ports, high_ports);
+ }
+ else
+ {
+ add_port_range_adjacency (fib_index, address, length,
+ low_ports, high_ports);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+ip_source_and_port_range_check_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u16 *low_ports = 0;
+ u16 *high_ports = 0;
+ u16 this_low;
+ u16 this_hi;
+ ip4_address_t ip4_addr;
+ ip6_address_t ip6_addr; //This function will be moved to generic impl when v6 done.
+ u32 length;
+ u32 tmp, tmp2;
+ u32 vrf_id = ~0;
+ int is_add = 1, ip_ver = ~0;
+ int rv;
+
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U/%d", unformat_ip4_address, &ip4_addr, &length))
+ ip_ver = 4;
+ else
+ if (unformat
+ (input, "%U/%d", unformat_ip6_address, &ip6_addr, &length))
+ ip_ver = 6;
+ else if (unformat (input, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "port %d", &tmp))
+ {
+ if (tmp == 0 || tmp > 65535)
+ return clib_error_return (0, "port %d out of range", tmp);
+ this_low = tmp;
+ this_hi = this_low + 1;
+ vec_add1 (low_ports, this_low);
+ vec_add1 (high_ports, this_hi);
+ }
+ else if (unformat (input, "range %d - %d", &tmp, &tmp2))
+ {
+ if (tmp > tmp2)
+ return clib_error_return (0, "ports %d and %d out of order",
+ tmp, tmp2);
+ if (tmp == 0 || tmp > 65535)
+ return clib_error_return (0, "low port %d out of range", tmp);
+ if (tmp2 == 0 || tmp2 > 65535)
+ return clib_error_return (0, "high port %d out of range", tmp2);
+ this_low = tmp;
+ this_hi = tmp2 + 1;
+ vec_add1 (low_ports, this_low);
+ vec_add1 (high_ports, this_hi);
+ }
+ else
+ break;
+ }
+
+ if (ip_ver == ~0)
+ return clib_error_return (0, " <address>/<mask> not specified");
+
+ if (vrf_id == ~0)
+ return clib_error_return (0, " VRF ID required, not specified");
+
+ if (vec_len (low_ports) == 0)
+ return clib_error_return (0,
+ " Both VRF ID and range/port must be set for a protocol.");
+
+ if (vrf_id == 0)
+ return clib_error_return (0, " VRF ID can not be 0 (default).");
+
+
+ if (ip_ver == 4)
+ rv = ip4_source_and_port_range_check_add_del
+ (&ip4_addr, length, vrf_id, low_ports, high_ports, is_add);
+ else
+ return clib_error_return (0, " IPv6 in subsequent patch");
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE:
+ return clib_error_return
+ (0, " Incorrect adjacency for add/del operation");
+
+ case VNET_API_ERROR_EXCEEDED_NUMBER_OF_PORTS_CAPACITY:
+ return clib_error_return (0, " Too many ports in add/del operation");
+
+ case VNET_API_ERROR_EXCEEDED_NUMBER_OF_RANGES_CAPACITY:
+ return clib_error_return
+ (0, " Too many ranges requested for add operation");
+
+ default:
+ return clib_error_return (0, " returned an unexpected value: %d", rv);
+ }
+
+ return 0;
+}
+
+/*?
+ * This command adds an IP Subnet and range of ports to be validated
+ * by an IP FIB table (VRF).
+ *
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+ *
+ * @cliexpar
+ * Example of how to add an IPv4 subnet and single port to an IPv4 FIB table:
+ * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 port 23}
+ * Example of how to add an IPv4 subnet and range of ports to an IPv4 FIB table:
+ * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100}
+ * Example of how to delete an IPv4 subnet and single port from an IPv4 FIB table:
+ * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 port 23 del}
+ * Example of how to delete an IPv4 subnet and range of ports from an IPv4 FIB table:
+ * @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100 del}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_source_and_port_range_check_command, static) = {
+ .path = "set ip source-and-port-range-check",
+ .function = ip_source_and_port_range_check_command_fn,
+ .short_help =
+ "set ip source-and-port-range-check vrf <table-id> <ip-addr>/<mask> {port nn | range <nn> - <nn>} [del]",
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+show_source_and_port_range_check_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ protocol_port_range_dpo_t *ppr_dpo;
+ u32 fib_index;
+ u8 addr_set = 0;
+ u32 vrf_id = ~0;
+ int rv, i, j;
+ u32 port = 0;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+ };
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_ip4_address, &pfx.fp_addr.ip4))
+ addr_set = 1;
+ else if (unformat (input, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (input, "port %d", &port))
+ ;
+ else
+ break;
+ }
+
+ if (addr_set == 0)
+ return clib_error_return (0, "<address> not specified");
+
+ if (vrf_id == ~0)
+ return clib_error_return (0, "VRF ID required, not specified");
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
+ if (~0 == fib_index)
+ return clib_error_return (0, "VRF %d not found", vrf_id);
+
+ /*
+ * find the longest prefix match on the address requested,
+ * check it was sourced by us
+ */
+ dpo_id_t dpo = DPO_INVALID;
+ const dpo_id_t *bucket;
+
+ if (!fib_entry_get_dpo_for_source (fib_table_lookup (fib_index, &pfx),
+ FIB_SOURCE_SPECIAL, &dpo))
+ {
+ /*
+ * not one of ours
+ */
+ vlib_cli_output (vm, "%U: src address drop", format_ip4_address,
+ &pfx.fp_addr.ip4);
+ return 0;
+ }
+
+ bucket = load_balance_get_bucket_i (load_balance_get (dpo.dpoi_index), 0);
+ ppr_dpo = protocol_port_range_dpo_get (bucket->dpoi_index);
+ dpo_reset (&dpo);
+
+ if (port)
+ {
+ rv = check_adj_port_range_x1 (ppr_dpo, (u16) port, 1234);
+ if (rv == 1234)
+ vlib_cli_output (vm, "%U port %d PASS", format_ip4_address,
+ &pfx.fp_addr.ip4, port);
+ else
+ vlib_cli_output (vm, "%U port %d FAIL", format_ip4_address,
+ &pfx.fp_addr.ip4, port);
+ return 0;
+ }
+ else
+ {
+ u8 *s;
+
+ s = format (0, "%U: ", format_ip4_address, &pfx.fp_addr.ip4);
+
+ for (i = 0; i < N_BLOCKS_PER_DPO; i++)
+ {
+ for (j = 0; j < 8; j++)
+ {
+ if (ppr_dpo->blocks[i].low.as_u16[j])
+ s = format (s, "%d - %d ",
+ (u32) ppr_dpo->blocks[i].low.as_u16[j],
+ (u32) ppr_dpo->blocks[i].hi.as_u16[j]);
+ }
+ }
+ vlib_cli_output (vm, "%s", s);
+ vec_free (s);
+ }
+
+ return 0;
+}
+
+/*?
+ * Display the range of ports being validated by an IPv4 FIB for a given
+ * IP or subnet, or test if a given IP and port are being validated.
+ *
+ * @todo This is incomplete. This needs a detailed description and a
+ * practical example.
+ *
+ * @cliexpar
+ * Example of how to display the set of ports being validated for a given
+ * IPv4 subnet:
+ * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.0}
+ * 172.16.2.0: 23 - 101
+ * @cliexend
+ * Example of how to test to determine of a given Pv4 address and port
+ * are being validated:
+ * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.2 port 23}
+ * 172.16.2.2 port 23 PASS
+ * @cliexend
+ * @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.2 port 250}
+ * 172.16.2.2 port 250 FAIL
+ * @cliexend
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_source_and_port_range_check, static) = {
+ .path = "show ip source-and-port-range-check",
+ .function = show_source_and_port_range_check_fn,
+ .short_help =
+ "show ip source-and-port-range-check vrf <table-id> <ip-addr> [port <n>]",
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_source_check.c b/src/vnet/ip/ip4_source_check.c
new file mode 100644
index 00000000..17a1cb1b
--- /dev/null
+++ b/src/vnet/ip/ip4_source_check.c
@@ -0,0 +1,562 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip4_source_check.c: IP v4 check source address (unicast RPF check)
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_urpf_list.h>
+#include <vnet/dpo/load_balance.h>
+
+/**
+ * @file
+ * @brief IPv4 Unicast Source Check.
+ *
+ * This file contains the IPv4 interface unicast source check.
+ */
+
+
+typedef struct
+{
+ u8 packet_data[64];
+ index_t urpf;
+} ip4_source_check_trace_t;
+
+static u8 *
+format_ip4_source_check_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip4_source_check_trace_t *t = va_arg (*va, ip4_source_check_trace_t *);
+
+ s = format (s, "%U",
+ format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum
+{
+ IP4_SOURCE_CHECK_NEXT_DROP,
+ IP4_SOURCE_CHECK_N_NEXT,
+} ip4_source_check_next_t;
+
+typedef enum
+{
+ IP4_SOURCE_CHECK_REACHABLE_VIA_RX,
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY,
+} ip4_source_check_type_t;
+
+typedef union
+{
+ u32 fib_index;
+} ip4_source_check_config_t;
+
+always_inline uword
+ip4_source_check_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ ip4_source_check_type_t source_check_type)
+{
+ u32 n_left_from, *from, *to_next;
+ u32 next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ip4_source_check_trace_t));
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip4_header_t *ip0, *ip1;
+ ip4_fib_mtrie_t *mtrie0, *mtrie1;
+ ip4_fib_mtrie_leaf_t leaf0, leaf1;
+ ip4_source_check_config_t *c0, *c1;
+ const load_balance_t *lb0, *lb1;
+ u32 pi0, next0, pass0, lb_index0;
+ u32 pi1, next1, pass1, lb_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ c0 =
+ vnet_feature_next_with_data (vnet_buffer (p0)->sw_if_index
+ [VLIB_RX], &next0, p0,
+ sizeof (c0[0]));
+ c1 =
+ vnet_feature_next_with_data (vnet_buffer (p1)->sw_if_index
+ [VLIB_RX], &next1, p1,
+ sizeof (c1[0]));
+
+ mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
+ mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+ leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
+ leaf1 =
+ ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+ leaf1 =
+ ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
+
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+
+ lb0 = load_balance_get (lb_index0);
+ lb1 = load_balance_get (lb_index1);
+
+ /* Pass multicast. */
+ pass0 = ip4_address_is_multicast (&ip0->src_address)
+ || ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
+ pass1 = ip4_address_is_multicast (&ip1->src_address)
+ || ip1->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
+
+ if (IP4_SOURCE_CHECK_REACHABLE_VIA_RX == source_check_type)
+ {
+ pass0 |= fib_urpf_check (lb0->lb_urpf,
+ vnet_buffer (p0)->sw_if_index
+ [VLIB_RX]);
+ pass1 |=
+ fib_urpf_check (lb1->lb_urpf,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ }
+ else
+ {
+ pass0 |= fib_urpf_check_size (lb0->lb_urpf);
+ pass1 |= fib_urpf_check_size (lb1->lb_urpf);
+ }
+ next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP);
+ next1 = (pass1 ? next1 : IP4_SOURCE_CHECK_NEXT_DROP);
+
+ p0->error =
+ error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
+ p1->error =
+ error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip4_header_t *ip0;
+ ip4_fib_mtrie_t *mtrie0;
+ ip4_fib_mtrie_leaf_t leaf0;
+ ip4_source_check_config_t *c0;
+ u32 pi0, next0, pass0, lb_index0;
+ const load_balance_t *lb0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ c0 =
+ vnet_feature_next_with_data (vnet_buffer (p0)->sw_if_index
+ [VLIB_RX], &next0, p0,
+ sizeof (c0[0]));
+
+ mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
+
+ leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
+
+ leaf0 =
+ ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
+
+ lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+
+ lb0 = load_balance_get (lb_index0);
+
+ /* Pass multicast. */
+ pass0 = ip4_address_is_multicast (&ip0->src_address)
+ || ip0->src_address.as_u32 == clib_host_to_net_u32 (0xFFFFFFFF);
+
+ if (IP4_SOURCE_CHECK_REACHABLE_VIA_RX == source_check_type)
+ {
+ pass0 |= fib_urpf_check (lb0->lb_urpf,
+ vnet_buffer (p0)->sw_if_index
+ [VLIB_RX]);
+ }
+ else
+ {
+ pass0 |= fib_urpf_check_size (lb0->lb_urpf);
+ }
+
+ next0 = (pass0 ? next0 : IP4_SOURCE_CHECK_NEXT_DROP);
+ p0->error =
+ error_node->errors[IP4_ERROR_UNICAST_SOURCE_CHECK_FAILS];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip4_source_check_reachable_via_any (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_check_inline (vm, node, frame,
+ IP4_SOURCE_CHECK_REACHABLE_VIA_ANY);
+}
+
+static uword
+ip4_source_check_reachable_via_rx (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return ip4_source_check_inline (vm, node, frame,
+ IP4_SOURCE_CHECK_REACHABLE_VIA_RX);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_check_source_reachable_via_any) = {
+ .function = ip4_source_check_reachable_via_any,
+ .name = "ip4-source-check-via-any",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP4_SOURCE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_check_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_check_source_reachable_via_any,
+ ip4_source_check_reachable_via_any);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_check_source_reachable_via_rx) = {
+ .function = ip4_source_check_reachable_via_rx,
+ .name = "ip4-source-check-via-rx",
+ .vector_size = sizeof (u32),
+
+ .n_next_nodes = IP4_SOURCE_CHECK_N_NEXT,
+ .next_nodes = {
+ [IP4_SOURCE_CHECK_NEXT_DROP] = "error-drop",
+ },
+
+ .format_buffer = format_ip4_header,
+ .format_trace = format_ip4_source_check_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_check_source_reachable_via_rx,
+ ip4_source_check_reachable_via_rx);
+
+static clib_error_t *
+set_ip_source_check (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip4_main_t *im = &ip4_main;
+ clib_error_t *error = 0;
+ u32 sw_if_index, is_del;
+ ip4_source_check_config_t config;
+ char *feature_name = "ip4-source-check-via-rx";
+
+ sw_if_index = ~0;
+ is_del = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user
+ (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "loose"))
+ feature_name = "ip4-source-check-via-any";
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == sw_if_index)
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ config.fib_index = im->fib_index_by_sw_if_index[sw_if_index];
+ vnet_feature_enable_disable ("ip4-unicast", feature_name, sw_if_index,
+ is_del == 0, &config, sizeof (config));
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command adds the 'ip4-source-check-via-rx' graph node for
+ * a given interface. By adding the IPv4 source check graph node to
+ * an interface, the code verifies that the source address of incoming
+ * unicast packets are reachable over the incoming interface. Two flavours
+ * are supported (the default is strict):
+ * - loose: accept ingress packet if there is a route to reach the source
+ * - strict: accept ingress packet if it arrived on an interface which
+ * the route to the source uses. i.e. an interface that the source
+ * is reachable via.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of graph node before range checking is enabled:
+ * @cliexstart{show vlib graph ip4-source-check-via-rx}
+ * Name Next Previous
+ * ip4-source-check-via-rx error-drop [0]
+ * @cliexend
+ *
+ * Example of how to enable unicast source checking on an interface:
+ * @cliexcmd{set interface ip source-check GigabitEthernet2/0/0 loose}
+ *
+ * Example of graph node after range checking is enabled:
+ * @cliexstart{show vlib graph ip4-source-check-via-rx}
+ * Name Next Previous
+ * ip4-source-check-via-rx error-drop [0] ip4-input-no-checksum
+ * ip4-source-and-port-range- ip4-input
+ * @cliexend
+ *
+ * Example of how to display the feature enabed on an interface:
+ * @cliexstart{show ip interface features GigabitEthernet2/0/0}
+ * IP feature paths configured on GigabitEthernet2/0/0...
+ *
+ * ipv4 unicast:
+ * ip4-source-check-via-rx
+ * ip4-lookup
+ *
+ * ipv4 multicast:
+ * ip4-lookup-multicast
+ *
+ * ipv4 multicast:
+ * interface-output
+ *
+ * ipv6 unicast:
+ * ip6-lookup
+ *
+ * ipv6 multicast:
+ * ip6-lookup
+ *
+ * ipv6 multicast:
+ * interface-output
+ * @cliexend
+ *
+ * Example of how to disable unicast source checking on an interface:
+ * @cliexcmd{set interface ip source-check GigabitEthernet2/0/0 del}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_source_check_command, static) = {
+ .path = "set interface ip source-check",
+ .function = set_ip_source_check,
+ .short_help = "set interface ip source-check <interface> [strict|loose] [del]",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip_source_check_accept (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ clib_error_t *error = NULL;
+ u32 table_id, is_add, fib_index;
+
+ is_add = 1;
+ table_id = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip4_address, &pfx.fp_addr.ip4, &pfx.fp_len))
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 != table_id)
+ {
+ fib_index = fib_table_find (pfx.fp_proto, table_id);
+ if (~0 == fib_index)
+ {
+ error = clib_error_return (0, "Nonexistent table id %d", table_id);
+ goto done;
+ }
+ }
+ else
+ {
+ fib_index = 0;
+ }
+
+ if (is_add)
+ {
+ fib_table_entry_special_add (fib_index,
+ &pfx,
+ FIB_SOURCE_URPF_EXEMPT,
+ FIB_ENTRY_FLAG_DROP);
+ }
+ else
+ {
+ fib_table_entry_special_remove (fib_index,
+ &pfx, FIB_SOURCE_URPF_EXEMPT);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * Add an exemption for a prefix to pass the Unicast Reverse Path
+ * Forwarding (uRPF) loose check. This is for testing purposes only.
+ * If the '<em>table</em>' is not enter it is defaulted to 0. Default
+ * is to '<em>add</em>'. VPP always performs a loose uRPF check for
+ * for-us traffic.
+ *
+ * @cliexpar
+ * Example of how to add a uRPF exception to a FIB table to pass the
+ * loose RPF tests:
+ * @cliexcmd{ip urpf-accept table 7 add}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_source_check_accept_command, static) = {
+ .path = "ip urpf-accept",
+ .function = ip_source_check_accept,
+ .short_help = "ip urpf-accept [table <table-id>] [add|del]",
+};
+/* *INDENT-ON* */
+
+
+/* Dummy init function to get us linked in. */
+clib_error_t *
+ip4_source_check_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip4_source_check_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_test.c b/src/vnet/ip/ip4_test.c
new file mode 100644
index 00000000..73dabfdc
--- /dev/null
+++ b/src/vnet/ip/ip4_test.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+/**
+ * @file
+ * @brief IPv4 FIB Tester.
+ *
+ * Not compiled in by default. IPv4 FIB tester. Add, probe, delete a bunch of
+ * random routes / masks and make sure that the mtrie agrees with
+ * the hash-table FIB.
+ *
+ * Manipulate the FIB by means of the debug CLI commands, to minimize
+ * the chances of doing something idiotic.
+ */
+
+/*
+ * These routines need to be redeclared non-static elsewhere.
+ *
+ * Also: rename ip_route() -> vnet_ip_route_cmd() and add the usual
+ * test_route_init() call to main.c
+ */
+clib_error_t *vnet_ip_route_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd_arg);
+
+int ip4_lookup_validate (ip4_address_t * a, u32 fib_index0);
+
+ip4_fib_t *find_fib_by_table_index_or_id (ip4_main_t * im,
+ u32 table_index_or_id, u32 flags);
+
+/* Routes to insert/delete/probe in FIB */
+typedef struct
+{
+ ip4_address_t address;
+ u32 mask_width;
+ u32 interface_id; /* not an xx_if_index */
+} test_route_t;
+
+typedef struct
+{
+ /* Test routes in use */
+ test_route_t *route_pool;
+
+ /* Number of fake ethernets created */
+ u32 test_interfaces_created;
+} test_main_t;
+
+test_main_t test_main;
+
+/* fake ethernet device class, distinct from "fake-ethX" */
+static u8 *
+format_test_interface_name (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ return format (s, "test-eth%d", dev_instance);
+}
+
+static uword
+dummy_interface_tx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ clib_warning ("you shouldn't be here, leaking buffers...");
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VNET_DEVICE_CLASS (test_interface_device_class,static) = {
+ .name = "Test interface",
+ .format_device_name = format_test_interface_name,
+ .tx_function = dummy_interface_tx,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+thrash (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd_arg)
+{
+ u32 seed = 0xdeaddabe;
+ u32 niter = 10;
+ u32 nroutes = 10;
+ u32 ninterfaces = 4;
+ f64 min_mask_bits = 7.0;
+ f64 max_mask_bits = 32.0;
+ u32 table_id = 11; /* my amp goes to 11 (use fib 11) */
+ u32 table_index;
+ int iter, i;
+ u8 *cmd;
+ test_route_t *tr;
+ test_main_t *tm = &test_main;
+ ip4_main_t *im = &ip4_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t cmd_input;
+ f64 rf;
+ u32 *masks = 0;
+ u32 tmp;
+ u32 hw_if_index;
+ clib_error_t *error = 0;
+ uword *p;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 hw_address[6];
+ ip4_fib_t *fib;
+ int verbose = 0;
+
+ /* Precompute mask width -> mask vector */
+ tmp = (u32) ~ 0;
+ vec_validate (masks, 32);
+ for (i = 32; i > 0; i--)
+ {
+ masks[i] = tmp;
+ tmp <<= 1;
+ }
+
+ if (unformat_user (main_input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "seed %d", &seed))
+ ;
+ else if (unformat (line_input, "niter %d", &niter))
+ ;
+ else if (unformat (line_input, "nroutes %d", &nroutes))
+ ;
+ else if (unformat (line_input, "ninterfaces %d", &ninterfaces))
+ ;
+ else if (unformat (line_input, "min-mask-bits %d", &tmp))
+ min_mask_bits = (f64) tmp;
+ else if (unformat (line_input, "max-mask-bits %d", &tmp))
+ max_mask_bits = (f64) tmp;
+ else if (unformat (line_input, "verbose"))
+ verbose = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ }
+
+ /* Find or create FIB table 11 */
+ fib = ip4_fib_find_or_create_fib_by_table_id (table_id);
+
+ for (i = tm->test_interfaces_created; i < ninterfaces; i++)
+ {
+ vnet_hw_interface_t *hw;
+ memset (hw_address, 0, sizeof (hw_address));
+ hw_address[0] = 0xd0;
+ hw_address[1] = 0x0f;
+ hw_address[5] = i;
+
+ error = ethernet_register_interface
+ (vnm, test_interface_device_class.index, i /* instance */ ,
+ hw_address, &hw_if_index,
+ /* flag change */ 0);
+
+ /* Fake interfaces use FIB table 11 */
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vec_validate (im->fib_index_by_sw_if_index, hw->sw_if_index);
+ im->fib_index_by_sw_if_index[hw->sw_if_index] = fib->index;
+ ip4_sw_interface_enable_disable (sw_if_index, 1);
+ }
+
+ tm->test_interfaces_created = ninterfaces;
+
+ /* Find fib index corresponding to FIB id 11 */
+ p = hash_get (im->fib_index_by_table_id, table_id);
+ if (p == 0)
+ {
+ vlib_cli_output (vm, "Couldn't map fib id %d to fib index\n", table_id);
+ goto done;
+ }
+ table_index = p[0];
+
+ for (iter = 0; iter < niter; iter++)
+ {
+ /* Pick random routes to install */
+ for (i = 0; i < nroutes; i++)
+ {
+ int j;
+
+ pool_get (tm->route_pool, tr);
+ memset (tr, 0, sizeof (*tr));
+
+ again:
+ rf = random_f64 (&seed);
+ tr->mask_width = (u32) (min_mask_bits
+ + rf * (max_mask_bits - min_mask_bits));
+ tmp = random_u32 (&seed);
+ tmp &= masks[tr->mask_width];
+ tr->address.as_u32 = clib_host_to_net_u32 (tmp);
+
+ /* We can't add the same address/mask twice... */
+ for (j = 0; j < i; j++)
+ {
+ test_route_t *prev;
+ prev = pool_elt_at_index (tm->route_pool, j);
+ if ((prev->address.as_u32 == tr->address.as_u32)
+ && (prev->mask_width == tr->mask_width))
+ goto again;
+ }
+
+ rf = random_f64 (&seed);
+ tr->interface_id = (u32) (rf * ninterfaces);
+ }
+
+ /* Add them */
+ for (i = 0; i < nroutes; i++)
+ {
+ tr = pool_elt_at_index (tm->route_pool, i);
+ cmd = format (0, "add table %d %U/%d via test-eth%d",
+ table_id,
+ format_ip4_address, &tr->address,
+ tr->mask_width, tr->interface_id);
+ vec_add1 (cmd, 0);
+ if (verbose)
+ fformat (stderr, "ip route %s\n", cmd);
+ unformat_init_string (&cmd_input, (char *) cmd, vec_len (cmd) - 1);
+ error = vnet_ip_route_cmd (vm, &cmd_input, cmd_arg);
+ if (error)
+ clib_error_report (error);
+ unformat_free (&cmd_input);
+ vec_free (cmd);
+ }
+ /* Probe them */
+ for (i = 0; i < nroutes; i++)
+ {
+ tr = pool_elt_at_index (tm->route_pool, i);
+ if (!ip4_lookup_validate (&tr->address, table_index))
+ {
+ if (verbose)
+ fformat (stderr, "test lookup table %d %U\n",
+ table_index, format_ip4_address, &tr->address);
+
+ fformat (stderr, "FAIL-after-insert: %U/%d\n",
+ format_ip4_address, &tr->address, tr->mask_width);
+ }
+ }
+
+ /* Delete them */
+ for (i = 0; i < nroutes; i++)
+ {
+ int j;
+ tr = pool_elt_at_index (tm->route_pool, i);
+ if (0)
+ cmd = format (0, "del table %d %U/%d via test-eth%d",
+ table_id,
+ format_ip4_address, &tr->address,
+ tr->mask_width, tr->interface_id);
+ else
+ cmd = format (0, "del table %d %U/%d",
+ table_id,
+ format_ip4_address, &tr->address, tr->mask_width);
+ vec_add1 (cmd, 0);
+ if (verbose)
+ fformat (stderr, "ip route %s\n", cmd);
+ unformat_init_string (&cmd_input, (char *) cmd, vec_len (cmd) - 1);
+ error = vnet_ip_route_cmd (vm, &cmd_input, cmd_arg);
+ if (error)
+ clib_error_report (error);
+ unformat_free (&cmd_input);
+ vec_free (cmd);
+
+ /* Make sure all undeleted routes still work */
+ for (j = i + 1; j < nroutes; j++)
+ {
+ test_route_t *rr; /* remaining route */
+ rr = pool_elt_at_index (tm->route_pool, j);
+ if (!ip4_lookup_validate (&rr->address, table_index))
+ {
+ if (verbose)
+ fformat (stderr, "test lookup table %d %U\n",
+ table_index, format_ip4_address, &rr->address);
+
+ fformat (stderr, "FAIL: %U/%d AWOL\n",
+ format_ip4_address, &rr->address, rr->mask_width);
+ fformat (stderr, " iter %d after %d of %d deletes\n",
+ iter, i, nroutes);
+ fformat (stderr, " last route deleted %U/%d\n",
+ format_ip4_address, &tr->address, tr->mask_width);
+ }
+ }
+ }
+
+ pool_free (tm->route_pool);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command in not in the build by default. It is an internal
+ * command used to test the route functonality.
+ *
+ * Create test routes on IPv4 FIB table 11. Table will be created if it
+ * does not exist.
+ *
+ * There are several optional attributes:
+ * - If not provided, <seed> defaults to 0xdeaddabe.
+ * - If not provided, <num-iter> defaults to 10.
+ * - If not provided, <num-iface> defaults to 4.
+ * - If not provided, <min-mask> defaults to 7.0.
+ * - If not provided, <max-mask> defaults to 32.0.
+ *
+ * @cliexpar
+ * Example of how to run:
+ * @cliexcmd{test route}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_route_command, static) = {
+ .path = "test route",
+ .short_help = "test route [seed <seed-num>] [niter <num-iter>] [ninterfaces <num-iface>] [min-mask-bits <min-mask>] [max-mask-bits <max-mask>] [verbose]", .function = thrash,
+ .function = thrash,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+test_route_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (test_route_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h
new file mode 100644
index 00000000..6ffc562c
--- /dev/null
+++ b/src/vnet/ip/ip4_to_ip6.h
@@ -0,0 +1,659 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief IPv4 to IPv6 translation
+ */
+#ifndef __included_ip4_to_ip6_h__
+#define __included_ip4_to_ip6_h__
+
+#include <vnet/ip/ip.h>
+
+
+/**
+ * IPv4 to IPv6 set call back function type
+ */
+typedef int (*ip4_to_ip6_set_fn_t) (ip4_header_t * ip4, ip6_header_t * ip6,
+ void *ctx);
+
+/* *INDENT-OFF* */
+static u8 icmp_to_icmp6_updater_pointer_table[] =
+ { 0, 1, 4, 4, ~0,
+ ~0, ~0, ~0, 7, 6,
+ ~0, ~0, 8, 8, 8,
+ 8, 24, 24, 24, 24
+ };
+/* *INDENT-ON* */
+
+#define frag_id_4to6(id) (id)
+
+/**
+ * @brief Get TCP/UDP port number or ICMP id from IPv4 packet.
+ *
+ * @param ip4 IPv4 header.
+ * @param sender 1 get sender port, 0 get receiver port.
+ *
+ * @returns Port number on success, 0 otherwise.
+ */
+always_inline u16
+ip4_get_port (ip4_header_t * ip, u8 sender)
+{
+ if (ip->ip_version_and_header_length != 0x45 ||
+ ip4_get_fragment_offset (ip))
+ return 0;
+
+ if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) ||
+ (ip->protocol == IP_PROTOCOL_UDP)))
+ {
+ udp_header_t *udp = (void *) (ip + 1);
+ return (sender) ? udp->src_port : udp->dst_port;
+ }
+ else if (ip->protocol == IP_PROTOCOL_ICMP)
+ {
+ icmp46_header_t *icmp = (void *) (ip + 1);
+ if (icmp->type == ICMP4_echo_request || icmp->type == ICMP4_echo_reply)
+ {
+ return *((u16 *) (icmp + 1));
+ }
+ else if (clib_net_to_host_u16 (ip->length) >= 64)
+ {
+ ip = (ip4_header_t *) (icmp + 2);
+ if (PREDICT_TRUE ((ip->protocol == IP_PROTOCOL_TCP) ||
+ (ip->protocol == IP_PROTOCOL_UDP)))
+ {
+ udp_header_t *udp = (void *) (ip + 1);
+ return (sender) ? udp->dst_port : udp->src_port;
+ }
+ else if (ip->protocol == IP_PROTOCOL_ICMP)
+ {
+ icmp46_header_t *icmp = (void *) (ip + 1);
+ if (icmp->type == ICMP4_echo_request ||
+ icmp->type == ICMP4_echo_reply)
+ {
+ return *((u16 *) (icmp + 1));
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * @brief Convert type and code value from ICMP4 to ICMP6.
+ *
+ * @param icmp ICMP header.
+ * @param inner_ip4 Inner IPv4 header if present, 0 otherwise.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+icmp_to_icmp6_header (icmp46_header_t * icmp, ip4_header_t ** inner_ip4)
+{
+ *inner_ip4 = NULL;
+ switch (icmp->type)
+ {
+ case ICMP4_echo_reply:
+ icmp->type = ICMP6_echo_reply;
+ break;
+ case ICMP4_echo_request:
+ icmp->type = ICMP6_echo_request;
+ break;
+ case ICMP4_destination_unreachable:
+ *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8);
+
+ switch (icmp->code)
+ {
+ case ICMP4_destination_unreachable_destination_unreachable_net: //0
+ case ICMP4_destination_unreachable_destination_unreachable_host: //1
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_no_route_to_destination;
+ break;
+ case ICMP4_destination_unreachable_protocol_unreachable: //2
+ icmp->type = ICMP6_parameter_problem;
+ icmp->code = ICMP6_parameter_problem_unrecognized_next_header;
+ break;
+ case ICMP4_destination_unreachable_port_unreachable: //3
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_port_unreachable;
+ break;
+ case ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set: //4
+ icmp->type =
+ ICMP6_packet_too_big;
+ icmp->code = 0;
+ {
+ u32 advertised_mtu = clib_net_to_host_u32 (*((u32 *) (icmp + 1)));
+ if (advertised_mtu)
+ advertised_mtu += 20;
+ else
+ advertised_mtu = 1000; //FIXME ! (RFC 1191 - plateau value)
+
+ //FIXME: = minimum(advertised MTU+20, MTU_of_IPv6_nexthop, (MTU_of_IPv4_nexthop)+20)
+ *((u32 *) (icmp + 1)) = clib_host_to_net_u32 (advertised_mtu);
+ }
+ break;
+
+ case ICMP4_destination_unreachable_source_route_failed: //5
+ case ICMP4_destination_unreachable_destination_network_unknown: //6
+ case ICMP4_destination_unreachable_destination_host_unknown: //7
+ case ICMP4_destination_unreachable_source_host_isolated: //8
+ case ICMP4_destination_unreachable_network_unreachable_for_type_of_service: //11
+ case ICMP4_destination_unreachable_host_unreachable_for_type_of_service: //12
+ icmp->type =
+ ICMP6_destination_unreachable;
+ icmp->code = ICMP6_destination_unreachable_no_route_to_destination;
+ break;
+ case ICMP4_destination_unreachable_network_administratively_prohibited: //9
+ case ICMP4_destination_unreachable_host_administratively_prohibited: //10
+ case ICMP4_destination_unreachable_communication_administratively_prohibited: //13
+ case ICMP4_destination_unreachable_precedence_cutoff_in_effect: //15
+ icmp->type = ICMP6_destination_unreachable;
+ icmp->code =
+ ICMP6_destination_unreachable_destination_administratively_prohibited;
+ break;
+ case ICMP4_destination_unreachable_host_precedence_violation: //14
+ default:
+ return -1;
+ }
+ break;
+
+ case ICMP4_time_exceeded: //11
+ *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8);
+ icmp->type = ICMP6_time_exceeded;
+ break;
+
+ case ICMP4_parameter_problem:
+ *inner_ip4 = (ip4_header_t *) (((u8 *) icmp) + 8);
+
+ switch (icmp->code)
+ {
+ case ICMP4_parameter_problem_pointer_indicates_error:
+ case ICMP4_parameter_problem_bad_length:
+ icmp->type = ICMP6_parameter_problem;
+ icmp->code = ICMP6_parameter_problem_erroneous_header_field;
+ {
+ u8 ptr =
+ icmp_to_icmp6_updater_pointer_table[*((u8 *) (icmp + 1))];
+ if (ptr == 0xff)
+ return -1;
+
+ *((u32 *) (icmp + 1)) = clib_host_to_net_u32 (ptr);
+ }
+ break;
+ default:
+ //All other codes cause error
+ return -1;
+ }
+ break;
+
+ default:
+ //All other types cause error
+ return -1;
+ break;
+ }
+ return 0;
+}
+
+/**
+ * @brief Translate ICMP4 packet to ICMP6.
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate outer header.
+ * @param ctx A context passed in the outer header translate function.
+ * @param inner_fn The function to translate inner header.
+ * @param inner_ctx A context passed in the inner header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+icmp_to_icmp6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx,
+ ip4_to_ip6_set_fn_t inner_fn, void *inner_ctx)
+{
+ ip4_header_t *ip4, *inner_ip4;
+ ip6_header_t *ip6, *inner_ip6;
+ u32 ip_len;
+ icmp46_header_t *icmp;
+ ip_csum_t csum;
+ ip6_frag_hdr_t *inner_frag;
+ u32 inner_frag_id;
+ u32 inner_frag_offset;
+ u8 inner_frag_more;
+ u16 *inner_L4_checksum = 0;
+ int rv;
+
+ ip4 = vlib_buffer_get_current (p);
+ ip_len = clib_net_to_host_u16 (ip4->length);
+ ASSERT (ip_len <= p->current_length);
+
+ icmp = (icmp46_header_t *) (ip4 + 1);
+ if (icmp_to_icmp6_header (icmp, &inner_ip4))
+ return -1;
+
+ if (inner_ip4)
+ {
+ //We have 2 headers to translate.
+ //We need to make some room in the middle of the packet
+ if (PREDICT_FALSE (ip4_is_fragment (inner_ip4)))
+ {
+ //Here it starts getting really tricky
+ //We will add a fragmentation header in the inner packet
+
+ if (!ip4_is_first_fragment (inner_ip4))
+ {
+ //For now we do not handle unless it is the first fragment
+ //Ideally we should handle the case as we are in slow path already
+ return -1;
+ }
+
+ vlib_buffer_advance (p,
+ -2 * (sizeof (*ip6) - sizeof (*ip4)) -
+ sizeof (*inner_frag));
+ ip6 = vlib_buffer_get_current (p);
+ clib_memcpy (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4,
+ 20 + 8);
+ ip4 =
+ (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4));
+ icmp = (icmp46_header_t *) (ip4 + 1);
+
+ inner_ip6 =
+ (ip6_header_t *) u8_ptr_add (inner_ip4,
+ sizeof (*ip4) - sizeof (*ip6) -
+ sizeof (*inner_frag));
+ inner_frag =
+ (ip6_frag_hdr_t *) u8_ptr_add (inner_ip6, sizeof (*inner_ip6));
+ ip6->payload_length =
+ u16_net_add (ip4->length,
+ sizeof (*ip6) - 2 * sizeof (*ip4) +
+ sizeof (*inner_frag));
+ inner_frag_id = frag_id_4to6 (inner_ip4->fragment_id);
+ inner_frag_offset = ip4_get_fragment_offset (inner_ip4);
+ inner_frag_more =
+ ! !(inner_ip4->flags_and_fragment_offset &
+ clib_net_to_host_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS));
+ }
+ else
+ {
+ vlib_buffer_advance (p, -2 * (sizeof (*ip6) - sizeof (*ip4)));
+ ip6 = vlib_buffer_get_current (p);
+ clib_memcpy (u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4)), ip4,
+ 20 + 8);
+ ip4 =
+ (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4));
+ icmp = (icmp46_header_t *) u8_ptr_add (ip4, sizeof (*ip4));
+ inner_ip6 =
+ (ip6_header_t *) u8_ptr_add (inner_ip4,
+ sizeof (*ip4) - sizeof (*ip6));
+ ip6->payload_length =
+ u16_net_add (ip4->length, sizeof (*ip6) - 2 * sizeof (*ip4));
+ inner_frag = NULL;
+ }
+
+ if (PREDICT_TRUE (inner_ip4->protocol == IP_PROTOCOL_TCP))
+ {
+ inner_L4_checksum = &((tcp_header_t *) (inner_ip4 + 1))->checksum;
+ *inner_L4_checksum =
+ ip_csum_fold (ip_csum_sub_even
+ (*inner_L4_checksum,
+ *((u64 *) (&inner_ip4->src_address))));
+ }
+ else if (PREDICT_TRUE (inner_ip4->protocol == IP_PROTOCOL_UDP))
+ {
+ inner_L4_checksum = &((udp_header_t *) (inner_ip4 + 1))->checksum;
+ if (*inner_L4_checksum)
+ *inner_L4_checksum =
+ ip_csum_fold (ip_csum_sub_even
+ (*inner_L4_checksum,
+ *((u64 *) (&inner_ip4->src_address))));
+ }
+ else if (inner_ip4->protocol == IP_PROTOCOL_ICMP)
+ {
+ //We have an ICMP inside an ICMP
+ //It needs to be translated, but not for error ICMP messages
+ icmp46_header_t *inner_icmp = (icmp46_header_t *) (inner_ip4 + 1);
+ //Only types ICMP4_echo_request and ICMP4_echo_reply are handled by icmp_to_icmp6_header
+ inner_icmp->type = (inner_icmp->type == ICMP4_echo_request) ?
+ ICMP6_echo_request : ICMP6_echo_reply;
+ inner_L4_checksum = &inner_icmp->checksum;
+ inner_ip4->protocol = IP_PROTOCOL_ICMP6;
+ }
+ else
+ {
+ /* To shut up Coverity */
+ os_panic ();
+ }
+
+ inner_ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (inner_ip4->tos << 20));
+ inner_ip6->payload_length =
+ u16_net_add (inner_ip4->length, -sizeof (*inner_ip4));
+ inner_ip6->hop_limit = inner_ip4->ttl;
+ inner_ip6->protocol = inner_ip4->protocol;
+
+ if ((rv = inner_fn (inner_ip4, inner_ip6, inner_ctx)) != 0)
+ return rv;
+
+ if (PREDICT_FALSE (inner_frag != NULL))
+ {
+ inner_frag->next_hdr = inner_ip6->protocol;
+ inner_frag->identification = inner_frag_id;
+ inner_frag->rsv = 0;
+ inner_frag->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (inner_frag_offset, inner_frag_more);
+ inner_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ inner_ip6->payload_length =
+ clib_host_to_net_u16 (clib_net_to_host_u16
+ (inner_ip6->payload_length) +
+ sizeof (*inner_frag));
+ }
+
+ csum = *inner_L4_checksum;
+ if (inner_ip6->protocol == IP_PROTOCOL_ICMP6)
+ {
+ //Recompute ICMP checksum
+ icmp46_header_t *inner_icmp = (icmp46_header_t *) (inner_ip4 + 1);
+
+ inner_icmp->checksum = 0;
+ csum = ip_csum_with_carry (0, inner_ip6->payload_length);
+ csum =
+ ip_csum_with_carry (csum,
+ clib_host_to_net_u16 (inner_ip6->protocol));
+ csum = ip_csum_with_carry (csum, inner_ip6->src_address.as_u64[0]);
+ csum = ip_csum_with_carry (csum, inner_ip6->src_address.as_u64[1]);
+ csum = ip_csum_with_carry (csum, inner_ip6->dst_address.as_u64[0]);
+ csum = ip_csum_with_carry (csum, inner_ip6->dst_address.as_u64[1]);
+ csum =
+ ip_incremental_checksum (csum, inner_icmp,
+ clib_net_to_host_u16
+ (inner_ip6->payload_length));
+ inner_icmp->checksum = ~ip_csum_fold (csum);
+ }
+ else
+ {
+ /* UDP checksum is optional */
+ if (csum)
+ {
+ csum =
+ ip_csum_add_even (csum, inner_ip6->src_address.as_u64[0]);
+ csum =
+ ip_csum_add_even (csum, inner_ip6->src_address.as_u64[1]);
+ csum =
+ ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[0]);
+ csum =
+ ip_csum_add_even (csum, inner_ip6->dst_address.as_u64[1]);
+ *inner_L4_checksum = ip_csum_fold (csum);
+ }
+ }
+ }
+ else
+ {
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
+ ip6 = vlib_buffer_get_current (p);
+ ip6->payload_length =
+ clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
+ sizeof (*ip4));
+ }
+
+ //Translate outer IPv6
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+
+ ip6->hop_limit = ip4->ttl;
+ ip6->protocol = IP_PROTOCOL_ICMP6;
+
+ if ((rv = fn (ip4, ip6, ctx)) != 0)
+ return rv;
+
+ //Truncate when the packet exceeds the minimal IPv6 MTU
+ if (p->current_length > 1280)
+ {
+ ip6->payload_length = clib_host_to_net_u16 (1280 - sizeof (*ip6));
+ p->current_length = 1280; //Looks too simple to be correct...
+ }
+
+ //Recompute ICMP checksum
+ icmp->checksum = 0;
+ csum = ip_csum_with_carry (0, ip6->payload_length);
+ csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (ip6->protocol));
+ csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[0]);
+ csum = ip_csum_with_carry (csum, ip6->src_address.as_u64[1]);
+ csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[0]);
+ csum = ip_csum_with_carry (csum, ip6->dst_address.as_u64[1]);
+ csum =
+ ip_incremental_checksum (csum, icmp,
+ clib_net_to_host_u16 (ip6->payload_length));
+ icmp->checksum = ~ip_csum_fold (csum);
+
+ return 0;
+}
+
+/**
+ * @brief Translate IPv4 fragmented packet to IPv6.
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate header.
+ * @param ctx A context passed in the header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+ip4_to_ip6_fragmented (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx)
+{
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+ ip6_frag_hdr_t *frag;
+ int rv;
+
+ ip4 = vlib_buffer_get_current (p);
+ frag = (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
+ ip6 =
+ (ip6_header_t *) u8_ptr_add (ip4,
+ sizeof (*ip4) - sizeof (*frag) -
+ sizeof (*ip6));
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
+
+ //We know that the protocol was one of ICMP, TCP or UDP
+ //because the first fragment was found and cached
+ frag->next_hdr =
+ (ip4->protocol == IP_PROTOCOL_ICMP) ? IP_PROTOCOL_ICMP6 : ip4->protocol;
+ frag->identification = frag_id_4to6 (ip4->fragment_id);
+ frag->rsv = 0;
+ frag->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (ip4_get_fragment_offset (ip4),
+ clib_net_to_host_u16
+ (ip4->flags_and_fragment_offset) &
+ IP4_HEADER_FLAG_MORE_FRAGMENTS);
+
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+ ip6->payload_length =
+ clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
+ sizeof (*ip4) + sizeof (*frag));
+ ip6->hop_limit = ip4->ttl;
+ ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+
+ if ((rv = fn (ip4, ip6, ctx)) != 0)
+ return rv;
+
+ return 0;
+}
+
+/**
+ * @brief Translate IPv4 UDP/TCP packet to IPv6.
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate header.
+ * @param ctx A context passed in the header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+ip4_to_ip6_tcp_udp (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx)
+{
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+ ip_csum_t csum;
+ u16 *checksum;
+ ip6_frag_hdr_t *frag;
+ u32 frag_id;
+ int rv;
+
+ ip4 = vlib_buffer_get_current (p);
+
+ if (ip4->protocol == IP_PROTOCOL_UDP)
+ {
+ udp_header_t *udp = ip4_next_header (ip4);
+ checksum = &udp->checksum;
+
+ //UDP checksum is optional over IPv4 but mandatory for IPv6
+ //We do not check udp->length sanity but use our safe computed value instead
+ if (PREDICT_FALSE (!checksum))
+ {
+ u16 udp_len = clib_host_to_net_u16 (ip4->length) - sizeof (*ip4);
+ csum = ip_incremental_checksum (0, udp, udp_len);
+ csum = ip_csum_with_carry (csum, clib_host_to_net_u16 (udp_len));
+ csum =
+ ip_csum_with_carry (csum, clib_host_to_net_u16 (IP_PROTOCOL_UDP));
+ csum = ip_csum_with_carry (csum, *((u64 *) (&ip4->src_address)));
+ *checksum = ~ip_csum_fold (csum);
+ }
+ }
+ else
+ {
+ tcp_header_t *tcp = ip4_next_header (ip4);
+ checksum = &tcp->checksum;
+ }
+
+ csum = ip_csum_sub_even (*checksum, ip4->src_address.as_u32);
+ csum = ip_csum_sub_even (csum, ip4->dst_address.as_u32);
+ *checksum = ip_csum_fold (csum);
+
+ // Deal with fragmented packets
+ if (PREDICT_FALSE (ip4->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)))
+ {
+ ip6 =
+ (ip6_header_t *) u8_ptr_add (ip4,
+ sizeof (*ip4) - sizeof (*ip6) -
+ sizeof (*frag));
+ frag =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
+ frag_id = frag_id_4to6 (ip4->fragment_id);
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6));
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
+ frag = NULL;
+ }
+
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+ ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4));
+ ip6->hop_limit = ip4->ttl;
+ ip6->protocol = ip4->protocol;
+
+ if (PREDICT_FALSE (frag != NULL))
+ {
+ frag->next_hdr = ip6->protocol;
+ frag->identification = frag_id;
+ frag->rsv = 0;
+ frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1);
+ ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag));
+ }
+
+ if ((rv = fn (ip4, ip6, ctx)) != 0)
+ return rv;
+
+ csum = ip_csum_add_even (*checksum, ip6->src_address.as_u64[0]);
+ csum = ip_csum_add_even (csum, ip6->src_address.as_u64[1]);
+ csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[0]);
+ csum = ip_csum_add_even (csum, ip6->dst_address.as_u64[1]);
+ *checksum = ip_csum_fold (csum);
+
+ return 0;
+}
+
+/**
+ * @brief Translate IPv4 packet to IPv6 (IP header only).
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate header.
+ * @param ctx A context passed in the header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+ip4_to_ip6 (vlib_buffer_t * p, ip4_to_ip6_set_fn_t fn, void *ctx)
+{
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+ ip6_frag_hdr_t *frag;
+ u32 frag_id;
+ int rv;
+
+ ip4 = vlib_buffer_get_current (p);
+
+ // Deal with fragmented packets
+ if (PREDICT_FALSE (ip4->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS)))
+ {
+ ip6 =
+ (ip6_header_t *) u8_ptr_add (ip4,
+ sizeof (*ip4) - sizeof (*ip6) -
+ sizeof (*frag));
+ frag =
+ (ip6_frag_hdr_t *) u8_ptr_add (ip4, sizeof (*ip4) - sizeof (*frag));
+ frag_id = frag_id_4to6 (ip4->fragment_id);
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6) - sizeof (*frag));
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) (((u8 *) ip4) + sizeof (*ip4) - sizeof (*ip6));
+ vlib_buffer_advance (p, sizeof (*ip4) - sizeof (*ip6));
+ frag = NULL;
+ }
+
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 ((6 << 28) + (ip4->tos << 20));
+ ip6->payload_length = u16_net_add (ip4->length, -sizeof (*ip4));
+ ip6->hop_limit = ip4->ttl;
+ ip6->protocol = ip4->protocol;
+
+ if (PREDICT_FALSE (frag != NULL))
+ {
+ frag->next_hdr = ip6->protocol;
+ frag->identification = frag_id;
+ frag->rsv = 0;
+ frag->fragment_offset_and_more = ip6_frag_hdr_offset_and_more (0, 1);
+ ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ ip6->payload_length = u16_net_add (ip6->payload_length, sizeof (*frag));
+ }
+
+ if ((rv = fn (ip4, ip6, ctx)) != 0)
+ return rv;
+
+ return 0;
+}
+
+#endif /* __included_ip4_to_ip6_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
new file mode 100644
index 00000000..8aef53a9
--- /dev/null
+++ b/src/vnet/ip/ip6.h
@@ -0,0 +1,605 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6.h: ip6 main include file
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip6_h
+#define included_ip_ip6_h
+
+#include <vlib/mc.h>
+#include <vlib/buffer.h>
+#include <vnet/ethernet/packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+#include <vnet/ip/lookup.h>
+#include <stdbool.h>
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_template.h>
+#include <vnet/util/radix.h>
+
+/*
+ * Default size of the ip6 fib hash table
+ */
+#define IP6_FIB_DEFAULT_HASH_NUM_BUCKETS (64 * 1024)
+#define IP6_FIB_DEFAULT_HASH_MEMORY_SIZE (32<<20)
+
+typedef struct
+{
+ ip6_address_t addr;
+ u32 dst_address_length;
+ u32 vrf_index;
+} ip6_fib_key_t;
+
+typedef struct
+{
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+
+ /* Index into FIB vector. */
+ u32 index;
+} ip6_fib_t;
+
+typedef struct ip6_mfib_t
+{
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+
+ /* Index into FIB vector. */
+ u32 index;
+
+ /*
+ * Pointer to the top of a radix tree.
+ * This cannot be realloc'd, hence it cannot be inlined with this table
+ */
+ struct radix_node_head *rhead;
+} ip6_mfib_t;
+
+struct ip6_main_t;
+
+typedef void (ip6_add_del_interface_address_function_t)
+ (struct ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length, u32 if_address_index, u32 is_del);
+
+typedef struct
+{
+ ip6_add_del_interface_address_function_t *function;
+ uword function_opaque;
+} ip6_add_del_interface_address_callback_t;
+
+typedef void (ip6_table_bind_function_t)
+ (struct ip6_main_t * im,
+ uword opaque, u32 sw_if_index, u32 new_fib_index, u32 old_fib_index);
+
+typedef struct
+{
+ ip6_table_bind_function_t *function;
+ uword function_opaque;
+} ip6_table_bind_callback_t;
+
+/**
+ * Enumeration of the FIB table instance types
+ */
+typedef enum ip6_fib_table_instance_type_t_
+{
+ /**
+ * This table stores the routes that are used to forward traffic.
+ * The key is the prefix, the result the adjacnecy to forward on.
+ */
+ IP6_FIB_TABLE_FWDING,
+ /**
+ * The table that stores ALL routes learned by the DP.
+ * Some of these routes may not be ready to install in forwarding
+ * at a given time.
+ * The key in this table is the prefix, the result is the fib_entry_t
+ */
+ IP6_FIB_TABLE_NON_FWDING,
+} ip6_fib_table_instance_type_t;
+
+#define IP6_FIB_NUM_TABLES (IP6_FIB_TABLE_NON_FWDING+1)
+
+/**
+ * A represenation of a single IP6 table
+ */
+typedef struct ip6_fib_table_instance_t_
+{
+ /* The hash table */
+ BVT (clib_bihash) ip6_hash;
+
+ /* bitmap / refcounts / vector of mask widths to search */
+ uword *non_empty_dst_address_length_bitmap;
+ u8 *prefix_lengths_in_search_order;
+ i32 dst_address_length_refcounts[129];
+} ip6_fib_table_instance_t;
+
+typedef struct ip6_main_t
+{
+ /**
+ * The two FIB tables; fwding and non-fwding
+ */
+ ip6_fib_table_instance_t ip6_table[IP6_FIB_NUM_TABLES];
+
+ ip_lookup_main_t lookup_main;
+
+ /* Pool of FIBs. */
+ struct fib_table_t_ *fibs;
+
+ /* Pool of V6 FIBs. */
+ ip6_fib_t *v6_fibs;
+
+ /** Vector of MFIBs. */
+ struct mfib_table_t_ *mfibs;
+
+ /* Network byte orders subnet mask for each prefix length */
+ ip6_address_t fib_masks[129];
+
+ /* Table index indexed by software interface. */
+ u32 *fib_index_by_sw_if_index;
+
+ /** Table index indexed by software interface. */
+ u32 *mfib_index_by_sw_if_index;
+
+ /* IP6 enabled count by software interface */
+ u8 *ip_enabled_by_sw_if_index;
+
+ /* Hash table mapping table id to fib index.
+ ID space is not necessarily dense; index space is dense. */
+ uword *fib_index_by_table_id;
+
+ /** Hash table mapping table id to multicast fib index.
+ ID space is not necessarily dense; index space is dense. */
+ uword *mfib_index_by_table_id;
+
+ /* Hash table mapping interface rewrite adjacency index by sw if index. */
+ uword *interface_route_adj_index_by_sw_if_index;
+
+ /* Functions to call when interface address changes. */
+ ip6_add_del_interface_address_callback_t
+ * add_del_interface_address_callbacks;
+
+ /** Functions to call when interface to table biding changes. */
+ ip6_table_bind_callback_t *table_bind_callbacks;
+
+ /* Template used to generate IP6 neighbor solicitation packets. */
+ vlib_packet_template_t discover_neighbor_packet_template;
+
+ /* ip6 lookup table config parameters */
+ u32 lookup_table_nbuckets;
+ uword lookup_table_size;
+
+ /* Seed for Jenkins hash used to compute ip6 flow hash. */
+ u32 flow_hash_seed;
+
+ struct
+ {
+ /* TTL to use for host generated packets. */
+ u8 ttl;
+
+ u8 pad[3];
+ } host_config;
+
+ /* HBH processing enabled? */
+ u8 hbh_enabled;
+} ip6_main_t;
+
+/* Global ip6 main structure. */
+extern ip6_main_t ip6_main;
+
+/* Global ip6 input node. Errors get attached to ip6 input node. */
+extern vlib_node_registration_t ip6_input_node;
+extern vlib_node_registration_t ip6_rewrite_node;
+extern vlib_node_registration_t ip6_rewrite_mcast_node;
+extern vlib_node_registration_t ip6_rewrite_local_node;
+extern vlib_node_registration_t ip6_discover_neighbor_node;
+extern vlib_node_registration_t ip6_glean_node;
+extern vlib_node_registration_t ip6_midchain_node;
+
+always_inline uword
+ip6_destination_matches_route (const ip6_main_t * im,
+ const ip6_address_t * key,
+ const ip6_address_t * dest, uword dest_length)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (key->as_uword); i++)
+ {
+ if ((key->as_uword[i] ^ dest->as_uword[i]) & im->
+ fib_masks[dest_length].as_uword[i])
+ return 0;
+ }
+ return 1;
+}
+
+always_inline uword
+ip6_destination_matches_interface (ip6_main_t * im,
+ ip6_address_t * key,
+ ip_interface_address_t * ia)
+{
+ ip6_address_t *a = ip_interface_address_get_address (&im->lookup_main, ia);
+ return ip6_destination_matches_route (im, key, a, ia->address_length);
+}
+
+/* As above but allows for unaligned destinations (e.g. works right from IP header of packet). */
+always_inline uword
+ip6_unaligned_destination_matches_route (ip6_main_t * im,
+ ip6_address_t * key,
+ ip6_address_t * dest,
+ uword dest_length)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (key->as_uword); i++)
+ {
+ if ((clib_mem_unaligned (&key->as_uword[i], uword) ^ dest->as_uword[i])
+ & im->fib_masks[dest_length].as_uword[i])
+ return 0;
+ }
+ return 1;
+}
+
+always_inline int
+ip6_src_address_for_packet (ip_lookup_main_t * lm,
+ u32 sw_if_index, ip6_address_t * src)
+{
+ u32 if_add_index = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ if (PREDICT_TRUE (if_add_index != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index);
+ ip6_address_t *if_ip = ip_interface_address_get_address (lm, if_add);
+ *src = *if_ip;
+ return (0);
+ }
+ else
+ {
+ src->as_u64[0] = 0;
+ src->as_u64[1] = 0;
+ }
+ return (!0);
+}
+
+/* Find interface address which matches destination. */
+always_inline ip6_address_t *
+ip6_interface_address_matching_destination (ip6_main_t * im,
+ ip6_address_t * dst,
+ u32 sw_if_index,
+ ip_interface_address_t **
+ result_ia)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia;
+ ip6_address_t *result = 0;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip6_address_t * a = ip_interface_address_get_address (lm, ia);
+ if (ip6_destination_matches_route (im, dst, a, ia->address_length))
+ {
+ result = a;
+ break;
+ }
+ }));
+ /* *INDENT-ON* */
+ if (result_ia)
+ *result_ia = result ? ia : 0;
+ return result;
+}
+
+clib_error_t *ip6_add_del_interface_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length, u32 is_del);
+void ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable);
+
+/**
+ * @brie get first IPv6 interface address
+ */
+ip6_address_t *ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index);
+
+int ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2);
+
+clib_error_t *ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst,
+ u32 sw_if_index);
+
+uword
+ip6_udp_register_listener (vlib_main_t * vm,
+ u16 dst_port, u32 next_node_index);
+
+u16 ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip6_header_t * ip0,
+ int *bogus_lengthp);
+
+void ip6_register_protocol (u32 protocol, u32 node_index);
+
+serialize_function_t serialize_vnet_ip6_main, unserialize_vnet_ip6_main;
+
+void ip6_ethernet_update_adjacency (vnet_main_t * vnm,
+ u32 sw_if_index, u32 ai);
+
+
+void
+ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip,
+ u8 * mac);
+
+void
+ip6_ethernet_mac_address_from_link_local_address (u8 * mac,
+ ip6_address_t * ip);
+
+int vnet_set_ip6_flow_hash (u32 table_id,
+ flow_hash_config_t flow_hash_config);
+
+clib_error_t *enable_ip6_interface (vlib_main_t * vm, u32 sw_if_index);
+
+clib_error_t *disable_ip6_interface (vlib_main_t * vm, u32 sw_if_index);
+
+int ip6_interface_enabled (vlib_main_t * vm, u32 sw_if_index);
+
+clib_error_t *set_ip6_link_local_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * address);
+
+int vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm,
+ void *data_callback,
+ u32 pid,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque,
+ uword data, int is_add);
+
+int vnet_ip6_nd_term (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_buffer_t * p0,
+ ethernet_header_t * eth,
+ ip6_header_t * ip, u32 sw_if_index, u16 bd_index);
+
+void send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi);
+
+u8 *format_ip6_forward_next_trace (u8 * s, va_list * args);
+
+u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0);
+
+int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index);
+extern vlib_node_registration_t ip6_lookup_node;
+
+/* Compute flow hash. We'll use it to select which Sponge to use for this
+ flow. And other things. */
+always_inline u32
+ip6_compute_flow_hash (const ip6_header_t * ip,
+ flow_hash_config_t flow_hash_config)
+{
+ tcp_header_t *tcp;
+ u64 a, b, c;
+ u64 t1, t2;
+ uword is_tcp_udp = 0;
+ u8 protocol = ip->protocol;
+
+ if (PREDICT_TRUE
+ ((ip->protocol == IP_PROTOCOL_TCP)
+ || (ip->protocol == IP_PROTOCOL_UDP)))
+ {
+ is_tcp_udp = 1;
+ tcp = (void *) (ip + 1);
+ }
+ else if (ip->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ ip6_hop_by_hop_header_t *hbh = (ip6_hop_by_hop_header_t *) (ip + 1);
+ if ((hbh->protocol == IP_PROTOCOL_TCP) ||
+ (hbh->protocol == IP_PROTOCOL_UDP))
+ {
+ is_tcp_udp = 1;
+ tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
+ }
+ protocol = hbh->protocol;
+ }
+
+ t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]);
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) ? t1 : 0;
+
+ t2 = (ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1]);
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR) ? t2 : 0;
+
+ a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1;
+ b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
+ b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? protocol : 0;
+
+ t1 = is_tcp_udp ? tcp->src : 0;
+ t2 = is_tcp_udp ? tcp->dst : 0;
+
+ t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
+ t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;
+
+ c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
+ ((t1 << 16) | t2) : ((t2 << 16) | t1);
+
+ hash_mix64 (a, b, c);
+ return (u32) c;
+}
+
+/* ip6_locate_header
+ *
+ * This function is to search for the header specified by the protocol number
+ * in find_hdr_type.
+ * This is used to locate a specific IPv6 extension header
+ * or to find transport layer header.
+ * 1. If the find_hdr_type < 0 then it finds and returns the protocol number and
+ * offset stored in *offset of the transport or ESP header in the chain if
+ * found.
+ * 2. If a header with find_hdr_type > 0 protocol number is found then the
+ * offset is stored in *offset and protocol number of the header is
+ * returned.
+ * 3. If find_hdr_type is not found or packet is malformed or
+ * it is a non-first fragment -1 is returned.
+ */
+always_inline int
+ip6_locate_header (vlib_buffer_t * p0,
+ ip6_header_t * ip0, int find_hdr_type, u32 * offset)
+{
+ u8 next_proto = ip0->protocol;
+ u8 *next_header;
+ u8 done = 0;
+ u32 cur_offset;
+ u8 *temp_nxthdr = 0;
+ u32 exthdr_len = 0;
+
+ next_header = ip6_next_header (ip0);
+ cur_offset = sizeof (ip6_header_t);
+ while (1)
+ {
+ done = (next_proto == find_hdr_type);
+ if (PREDICT_FALSE
+ (next_header >=
+ (u8 *) vlib_buffer_get_current (p0) + p0->current_length))
+ {
+ //A malicious packet could set an extension header with a too big size
+ return (-1);
+ }
+ if (done)
+ break;
+ if ((!ip6_ext_hdr (next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT)
+ {
+ if (find_hdr_type < 0)
+ break;
+ return -1;
+ }
+ if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) next_header;
+ u16 frag_off = ip6_frag_hdr_offset (frag_hdr);
+ /* Non first fragment return -1 */
+ if (frag_off)
+ return (-1);
+ exthdr_len = sizeof (ip6_frag_hdr_t);
+ temp_nxthdr = next_header + exthdr_len;
+ }
+ else if (next_proto == IP_PROTOCOL_IPSEC_AH)
+ {
+ exthdr_len =
+ ip6_ext_authhdr_len (((ip6_ext_header_t *) next_header));
+ temp_nxthdr = next_header + exthdr_len;
+ }
+ else
+ {
+ exthdr_len =
+ ip6_ext_header_len (((ip6_ext_header_t *) next_header));
+ temp_nxthdr = next_header + exthdr_len;
+ }
+ next_proto = ((ip6_ext_header_t *) next_header)->next_hdr;
+ next_header = temp_nxthdr;
+ cur_offset += exthdr_len;
+ }
+
+ *offset = cur_offset;
+ return (next_proto);
+}
+
+u8 *format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args);
+/*
+ * Hop-by-Hop handling
+ */
+typedef struct
+{
+ /* Array of function pointers to HBH option handling routines */
+ int (*options[256]) (vlib_buffer_t * b, ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt);
+ u8 *(*trace[256]) (u8 * s, ip6_hop_by_hop_option_t * opt);
+ uword next_override;
+} ip6_hop_by_hop_main_t;
+
+extern ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
+
+int ip6_hbh_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt),
+ u8 * trace (u8 * s,
+ ip6_hop_by_hop_option_t * opt));
+int ip6_hbh_unregister_option (u8 option);
+void ip6_hbh_set_next_override (uword next);
+
+/**
+ * Push IPv6 header to buffer
+ *
+ * @param vm - vlib_main
+ * @param b - buffer to write the header to
+ * @param src - source IP
+ * @param dst - destination IP
+ * @param prot - payload proto
+ *
+ * @return - pointer to start of IP header
+ */
+always_inline void *
+vlib_buffer_push_ip6 (vlib_main_t * vm, vlib_buffer_t * b,
+ ip6_address_t * src, ip6_address_t * dst, int proto)
+{
+ ip6_header_t *ip6h;
+ u16 payload_length;
+
+ /* make some room */
+ ip6h = vlib_buffer_push_uninit (b, sizeof (ip6_header_t));
+
+ ip6h->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+
+ /* calculate ip6 payload length */
+ payload_length = vlib_buffer_length_in_chain (vm, b);
+ payload_length -= sizeof (*ip6h);
+
+ ip6h->payload_length = clib_host_to_net_u16 (payload_length);
+
+ ip6h->hop_limit = 0xff;
+ ip6h->protocol = proto;
+ clib_memcpy (ip6h->src_address.as_u8, src->as_u8,
+ sizeof (ip6h->src_address));
+ clib_memcpy (ip6h->dst_address.as_u8, dst->as_u8,
+ sizeof (ip6h->src_address));
+ b->flags |= VNET_BUFFER_F_IS_IP6;
+
+ return ip6h;
+}
+
+#endif /* included_ip_ip6_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_error.h b/src/vnet/ip/ip6_error.h
new file mode 100644
index 00000000..a2807169
--- /dev/null
+++ b/src/vnet/ip/ip6_error.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_error.h: ip6 fast path errors
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_ip6_error_h
+#define included_ip_ip6_error_h
+
+#define foreach_ip6_error \
+ /* Must be first. */ \
+ _ (NONE, "valid ip6 packets") \
+ \
+ /* Errors signalled by ip6-input */ \
+ _ (TOO_SHORT, "ip6 length < 40 bytes") \
+ _ (BAD_LENGTH, "ip6 length > l2 length") \
+ _ (VERSION, "ip6 version != 6") \
+ _ (TIME_EXPIRED, "ip6 ttl <= 1") \
+ \
+ /* Errors signalled by ip6-rewrite. */ \
+ _ (MTU_EXCEEDED, "ip6 MTU exceeded") \
+ _ (DST_LOOKUP_MISS, "ip6 destination lookup miss") \
+ _ (SRC_LOOKUP_MISS, "ip6 source lookup miss") \
+ _ (ADJACENCY_DROP, "ip6 adjacency drop") \
+ _ (ADJACENCY_PUNT, "ip6 adjacency punt") \
+ \
+ /* Errors signalled by ip6-local. */ \
+ _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
+ _ (UDP_CHECKSUM, "bad udp checksum") \
+ _ (ICMP_CHECKSUM, "bad icmp checksum") \
+ _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
+ \
+ /* Errors signalled by udp6-lookup. */ \
+ _ (UNKNOWN_UDP_PORT, "no listener for udp port") \
+ \
+ /* Spoofed packets in ip6-rewrite-local */ \
+ _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
+ \
+ /* Erros singalled by ip6-inacl */ \
+ _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
+ _ (INACL_SESSION_DENY, "input ACL session deny drops")
+
+typedef enum
+{
+#define _(sym,str) IP6_ERROR_##sym,
+ foreach_ip6_error
+#undef _
+ IP6_N_ERROR,
+} ip6_error_t;
+
+#endif /* included_ip_ip6_error_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_format.c b/src/vnet/ip/ip6_format.c
new file mode 100644
index 00000000..56899b73
--- /dev/null
+++ b/src/vnet/ip/ip6_format.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_format.c: ip6 formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+/* Format an IP6 address. */
+u8 *
+format_ip6_address (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u32 max_zero_run = 0, this_zero_run = 0;
+ int max_zero_run_index = -1, this_zero_run_index = 0;
+ int in_zero_run = 0, i;
+ int last_double_colon = 0;
+
+ /* Ugh, this is a pain. Scan forward looking for runs of 0's */
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (a->as_u16[i] == 0)
+ {
+ if (in_zero_run)
+ this_zero_run++;
+ else
+ {
+ in_zero_run = 1;
+ this_zero_run = 1;
+ this_zero_run_index = i;
+ }
+ }
+ else
+ {
+ if (in_zero_run)
+ {
+ /* offer to compress the biggest run of > 1 zero */
+ if (this_zero_run > max_zero_run && this_zero_run > 1)
+ {
+ max_zero_run_index = this_zero_run_index;
+ max_zero_run = this_zero_run;
+ }
+ }
+ in_zero_run = 0;
+ this_zero_run = 0;
+ }
+ }
+
+ if (in_zero_run)
+ {
+ if (this_zero_run > max_zero_run && this_zero_run > 1)
+ {
+ max_zero_run_index = this_zero_run_index;
+ max_zero_run = this_zero_run;
+ }
+ }
+
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == max_zero_run_index)
+ {
+ s = format (s, "::");
+ i += max_zero_run - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x",
+ (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+/* Format an IP6 route destination and length. */
+u8 *
+format_ip6_address_and_length (u8 * s, va_list * args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u8 l = va_arg (*args, u32);
+ return format (s, "%U/%d", format_ip6_address, a, l);
+}
+
+/* Parse an IP6 address. */
+uword
+unformat_ip6_address (unformat_input_t * input, va_list * args)
+{
+ ip6_address_t *result = va_arg (*args, ip6_address_t *);
+ u16 hex_quads[8];
+ uword hex_quad, n_hex_quads, hex_digit, n_hex_digits;
+ uword c, n_colon, double_colon_index;
+
+ n_hex_quads = hex_quad = n_hex_digits = n_colon = 0;
+ double_colon_index = ARRAY_LEN (hex_quads);
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ hex_digit = 16;
+ if (c >= '0' && c <= '9')
+ hex_digit = c - '0';
+ else if (c >= 'a' && c <= 'f')
+ hex_digit = c + 10 - 'a';
+ else if (c >= 'A' && c <= 'F')
+ hex_digit = c + 10 - 'A';
+ else if (c == ':' && n_colon < 2)
+ n_colon++;
+ else
+ {
+ unformat_put_input (input);
+ break;
+ }
+
+ /* Too many hex quads. */
+ if (n_hex_quads >= ARRAY_LEN (hex_quads))
+ return 0;
+
+ if (hex_digit < 16)
+ {
+ hex_quad = (hex_quad << 4) | hex_digit;
+
+ /* Hex quad must fit in 16 bits. */
+ if (n_hex_digits >= 4)
+ return 0;
+
+ n_colon = 0;
+ n_hex_digits++;
+ }
+
+ /* Save position of :: */
+ if (n_colon == 2)
+ {
+ /* More than one :: ? */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ return 0;
+ double_colon_index = n_hex_quads;
+ }
+
+ if (n_colon > 0 && n_hex_digits > 0)
+ {
+ hex_quads[n_hex_quads++] = hex_quad;
+ hex_quad = 0;
+ n_hex_digits = 0;
+ }
+ }
+
+ if (n_hex_digits > 0)
+ hex_quads[n_hex_quads++] = hex_quad;
+
+ {
+ word i;
+
+ /* Expand :: to appropriate number of zero hex quads. */
+ if (double_colon_index < ARRAY_LEN (hex_quads))
+ {
+ word n_zero = ARRAY_LEN (hex_quads) - n_hex_quads;
+
+ for (i = n_hex_quads - 1; i >= (signed) double_colon_index; i--)
+ hex_quads[n_zero + i] = hex_quads[i];
+
+ for (i = 0; i < n_zero; i++)
+ {
+ ASSERT ((double_colon_index + i) < ARRAY_LEN (hex_quads));
+ hex_quads[double_colon_index + i] = 0;
+ }
+
+ n_hex_quads = ARRAY_LEN (hex_quads);
+ }
+
+ /* Too few hex quads given. */
+ if (n_hex_quads < ARRAY_LEN (hex_quads))
+ return 0;
+
+ for (i = 0; i < ARRAY_LEN (hex_quads); i++)
+ result->as_u16[i] = clib_host_to_net_u16 (hex_quads[i]);
+
+ return 1;
+ }
+}
+
+/* Format an IP6 header. */
+u8 *
+format_ip6_header (u8 * s, va_list * args)
+{
+ ip6_header_t *ip = va_arg (*args, ip6_header_t *);
+ u32 max_header_bytes = va_arg (*args, u32);
+ u32 i, ip_version, traffic_class, flow_label;
+ uword indent;
+
+ /* Nothing to do. */
+ if (max_header_bytes < sizeof (ip[0]))
+ return format (s, "IP header truncated");
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ s = format (s, "%U: %U -> %U",
+ format_ip_protocol, ip->protocol,
+ format_ip6_address, &ip->src_address,
+ format_ip6_address, &ip->dst_address);
+
+ i = clib_net_to_host_u32 (ip->ip_version_traffic_class_and_flow_label);
+ ip_version = (i >> 28);
+ traffic_class = (i >> 20) & 0xff;
+ flow_label = i & pow2_mask (20);
+
+ if (ip_version != 6)
+ s = format (s, "\n%Uversion %d", format_white_space, indent, ip_version);
+
+ s =
+ format (s,
+ "\n%Utos 0x%02x, flow label 0x%x, hop limit %d, payload length %d",
+ format_white_space, indent, traffic_class, flow_label,
+ ip->hop_limit, clib_net_to_host_u16 (ip->payload_length));
+
+ /* Recurse into next protocol layer. */
+ if (max_header_bytes != 0 && sizeof (ip[0]) < max_header_bytes)
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->format_header)
+ s = format (s, "\n%U%U",
+ format_white_space, indent - 2, pi->format_header,
+ /* next protocol header */ (void *) (ip + 1),
+ max_header_bytes - sizeof (ip[0]));
+ }
+
+ return s;
+}
+
+/* Parse an IP6 header. */
+uword
+unformat_ip6_header (unformat_input_t * input, va_list * args)
+{
+ u8 **result = va_arg (*args, u8 **);
+ ip6_header_t *ip;
+ int old_length;
+
+ /* Allocate space for IP header. */
+ {
+ void *p;
+
+ old_length = vec_len (*result);
+ vec_add2 (*result, p, sizeof (ip[0]));
+ ip = p;
+ }
+
+ memset (ip, 0, sizeof (ip[0]));
+ ip->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (6 << 28);
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_ip_protocol, &ip->protocol,
+ unformat_ip6_address, &ip->src_address,
+ unformat_ip6_address, &ip->dst_address))
+ return 0;
+
+ /* Parse options. */
+ while (1)
+ {
+ int i;
+
+ if (unformat (input, "tos %U", unformat_vlib_number, &i))
+ ip->ip_version_traffic_class_and_flow_label |=
+ clib_host_to_net_u32 ((i & 0xff) << 20);
+
+ else if (unformat (input, "hop-limit %U", unformat_vlib_number, &i))
+ ip->hop_limit = i;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ /* Recurse into next protocol layer. */
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_info_t *pi = ip_get_protocol_info (im, ip->protocol);
+
+ if (pi && pi->unformat_header)
+ {
+ if (!unformat_user (input, pi->unformat_header, result))
+ return 0;
+
+ /* Result may have moved. */
+ ip = (void *) *result + old_length;
+ }
+ }
+
+ ip->payload_length =
+ clib_host_to_net_u16 (vec_len (*result) - (old_length + sizeof (ip[0])));
+
+ return 1;
+}
+
+/* Parse an IP46 address. */
+uword
+unformat_ip46_address (unformat_input_t * input, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ if ((type != IP46_TYPE_IP6) &&
+ unformat (input, "%U", unformat_ip4_address, &ip46->ip4))
+ {
+ ip46_address_mask_ip4 (ip46);
+ return 1;
+ }
+ else if ((type != IP46_TYPE_IP4) &&
+ unformat (input, "%U", unformat_ip6_address, &ip46->ip6))
+ {
+ return 1;
+ }
+ return 0;
+}
+
+/* Format an IP46 address. */
+u8 *
+format_ip46_address (u8 * s, va_list * args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ int is_ip4 = 1;
+
+ switch (type)
+ {
+ case IP46_TYPE_ANY:
+ is_ip4 = ip46_address_is_ip4 (ip46);
+ break;
+ case IP46_TYPE_IP4:
+ is_ip4 = 1;
+ break;
+ case IP46_TYPE_IP6:
+ is_ip4 = 0;
+ break;
+ }
+
+ return is_ip4 ?
+ format (s, "%U", format_ip4_address, &ip46->ip4) :
+ format (s, "%U", format_ip6_address, &ip46->ip6);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
new file mode 100644
index 00000000..54582d38
--- /dev/null
+++ b/src/vnet/ip/ip6_forward.c
@@ -0,0 +1,3558 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_forward.c: IP v6 forwarding
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6_neighbor.h>
+#include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
+#include <vnet/srp/srp.h> /* for srp_hw_interface_class */
+#include <vppinfra/cache.h>
+#include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/mfib/ip6_mfib.h>
+#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/classify_dpo.h>
+
+#include <vppinfra/bihash_template.c>
+
+/* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
+#define OI_DECAP 0x80000000
+
+/**
+ * @file
+ * @brief IPv6 Forwarding.
+ *
+ * This file contains the source code for IPv6 forwarding.
+ */
+
+void
+ip6_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ vlib_rx_or_tx_t which_adj_index);
+
+always_inline uword
+ip6_lookup_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ip6_main_t *im = &ip6_main;
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ u32 pi0, pi1, lbi0, lbi1, wrong_next;
+ ip_lookup_next_t next0, next1;
+ ip6_header_t *ip0, *ip1;
+ ip6_address_t *dst_addr0, *dst_addr1;
+ u32 fib_index0, fib_index1;
+ u32 flow_hash_config0, flow_hash_config1;
+ const dpo_id_t *dpo0, *dpo1;
+ const load_balance_t *lb0, *lb1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ dst_addr0 = &ip0->dst_address;
+ dst_addr1 = &ip1->dst_address;
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index1 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+
+ fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+ fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
+ fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
+
+ lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
+ lbi1 = ip6_fib_table_fwding_lookup (im, fib_index1, dst_addr1);
+
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (lb1->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+ ASSERT (is_pow2 (lb1->lb_n_buckets));
+
+ vnet_buffer (p0)->ip.flow_hash = vnet_buffer (p1)->ip.flow_hash = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, flow_hash_config0);
+ dpo0 =
+ load_balance_get_fwd_bucket (lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ flow_hash_config1 = lb1->lb_hash_config;
+ vnet_buffer (p1)->ip.flow_hash =
+ ip6_compute_flow_hash (ip1, flow_hash_config1);
+ dpo1 =
+ load_balance_get_fwd_bucket (lb1,
+ (vnet_buffer (p1)->ip.flow_hash &
+ (lb1->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ }
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next1 = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1;
+ }
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
+
+ wrong_next = (next0 != next) + 2 * (next1 != next);
+ if (PREDICT_FALSE (wrong_next != 0))
+ {
+ switch (wrong_next)
+ {
+ case 1:
+ /* A B A */
+ to_next[-2] = pi1;
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ break;
+
+ case 2:
+ /* A A B */
+ to_next -= 1;
+ n_left_to_next += 1;
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ break;
+
+ case 3:
+ /* A B C */
+ to_next -= 2;
+ n_left_to_next += 2;
+ vlib_set_next_frame_buffer (vm, node, next0, pi0);
+ vlib_set_next_frame_buffer (vm, node, next1, pi1);
+ if (next0 == next1)
+ {
+ /* A B B */
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next1;
+ vlib_get_next_frame (vm, node, next, to_next,
+ n_left_to_next);
+ }
+ }
+ }
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ u32 pi0, lbi0;
+ ip_lookup_next_t next0;
+ load_balance_t *lb0;
+ ip6_address_t *dst_addr0;
+ u32 fib_index0, flow_hash_config0;
+ const dpo_id_t *dpo0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ dst_addr0 = &ip0->dst_address;
+
+ fib_index0 =
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ fib_index0 =
+ (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
+ (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ lbi0 = ip6_fib_table_fwding_lookup (im, fib_index0, dst_addr0);
+
+ lb0 = load_balance_get (lbi0);
+ flow_hash_config0 = lb0->lb_hash_config;
+
+ vnet_buffer (p0)->ip.flow_hash = 0;
+ ASSERT (lb0->lb_n_buckets > 0);
+ ASSERT (is_pow2 (lb0->lb_n_buckets));
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ flow_hash_config0 = lb0->lb_hash_config;
+ vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, flow_hash_config0);
+ dpo0 =
+ load_balance_get_fwd_bucket (lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ (lb0->lb_n_buckets_minus_1)));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ dpo0 = load_balance_get_bucket_i (lb0,
+ (vnet_buffer (p0)->ip.flow_hash &
+ lb0->lb_n_buckets_minus_1));
+ next0 = dpo0->dpoi_next_node;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ }
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ if (PREDICT_FALSE (next0 != next))
+ {
+ n_left_to_next += 1;
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ next = next0;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+ to_next[0] = pi0;
+ to_next += 1;
+ n_left_to_next -= 1;
+ }
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+static void
+ip6_add_interface_routes (vnet_main_t * vnm, u32 sw_if_index,
+ ip6_main_t * im, u32 fib_index,
+ ip_interface_address_t * a)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip6_address_t *address = ip_interface_address_get_address (lm, a);
+ fib_prefix_t pfx = {
+ .fp_len = a->address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = *address,
+ };
+
+ if (a->address_length < 128)
+ {
+ fib_table_entry_update_one_path (fib_index,
+ &pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_ATTACHED),
+ DPO_PROTO_IP6,
+ /* No next-hop address */
+ NULL, sw_if_index,
+ /* invalid FIB index */
+ ~0, 1,
+ /* no label stack */
+ NULL, FIB_ROUTE_PATH_FLAG_NONE);
+ }
+
+ pfx.fp_len = 128;
+ if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
+ {
+ u32 classify_table_index =
+ lm->classify_table_index_by_sw_if_index[sw_if_index];
+ if (classify_table_index != (u32) ~ 0)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP6,
+ classify_dpo_create (DPO_PROTO_IP6, classify_table_index));
+
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE, &dpo);
+ dpo_reset (&dpo);
+ }
+ }
+
+ fib_table_entry_update_one_path (fib_index, &pfx,
+ FIB_SOURCE_INTERFACE,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL),
+ DPO_PROTO_IP6,
+ &pfx.fp_addr,
+ sw_if_index, ~0,
+ 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
+}
+
+static void
+ip6_del_interface_routes (ip6_main_t * im,
+ u32 fib_index,
+ ip6_address_t * address, u32 address_length)
+{
+ fib_prefix_t pfx = {
+ .fp_len = address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = *address,
+ };
+
+ if (pfx.fp_len < 128)
+ {
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+
+ }
+
+ pfx.fp_len = 128;
+ fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
+}
+
+void
+ip6_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
+{
+ ip6_main_t *im = &ip6_main;
+
+ vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
+
+ /*
+ * enable/disable only on the 1<->0 transition
+ */
+ if (is_enable)
+ {
+ if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+ else
+ {
+ /* The ref count is 0 when an address is removed from an interface that has
+ * no address - this is not a ciritical error */
+ if (0 == im->ip_enabled_by_sw_if_index[sw_if_index] ||
+ 0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
+ return;
+ }
+
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-drop", sw_if_index,
+ !is_enable, 0, 0);
+
+ vnet_feature_enable_disable ("ip6-multicast", "ip6-drop", sw_if_index,
+ !is_enable, 0, 0);
+}
+
+/* get first interface address */
+ip6_address_t *
+ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
+{
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_interface_address_t *ia = 0;
+ ip6_address_t *result = 0;
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ ip6_address_t * a = ip_interface_address_get_address (lm, ia);
+ result = a;
+ break;
+ }));
+ /* *INDENT-ON* */
+ return result;
+}
+
+clib_error_t *
+ip6_add_del_interface_address (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length, u32 is_del)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ clib_error_t *error;
+ u32 if_address_index;
+ ip6_address_fib_t ip6_af, *addr_fib = 0;
+
+ /* local0 interface doesn't support IP addressing */
+ if (sw_if_index == 0)
+ {
+ return
+ clib_error_create ("local0 interface doesn't support IP addressing");
+ }
+
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
+
+ ip6_addr_fib_init (&ip6_af, address,
+ vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
+ vec_add1 (addr_fib, ip6_af);
+
+ {
+ uword elts_before = pool_elts (lm->if_address_pool);
+
+ error = ip_interface_address_add_del
+ (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
+ if (error)
+ goto done;
+
+ /* Pool did not grow: add duplicate address. */
+ if (elts_before == pool_elts (lm->if_address_pool))
+ goto done;
+ }
+
+ ip6_sw_interface_enable_disable (sw_if_index, !is_del);
+
+ if (is_del)
+ ip6_del_interface_routes (im, ip6_af.fib_index, address, address_length);
+ else
+ ip6_add_interface_routes (vnm, sw_if_index,
+ im, ip6_af.fib_index,
+ pool_elt_at_index (lm->if_address_pool,
+ if_address_index));
+
+ {
+ ip6_add_del_interface_address_callback_t *cb;
+ vec_foreach (cb, im->add_del_interface_address_callbacks)
+ cb->function (im, cb->function_opaque, sw_if_index,
+ address, address_length, if_address_index, is_del);
+ }
+
+done:
+ vec_free (addr_fib);
+ return error;
+}
+
+clib_error_t *
+ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+{
+ ip6_main_t *im = &ip6_main;
+ ip_interface_address_t *ia;
+ ip6_address_t *a;
+ u32 is_admin_up, fib_index;
+
+ /* Fill in lookup tables with default table (0). */
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+
+ vec_validate_init_empty (im->
+ lookup_main.if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+
+ is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
+
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
+ 0 /* honor unnumbered */,
+ ({
+ a = ip_interface_address_get_address (&im->lookup_main, ia);
+ if (is_admin_up)
+ ip6_add_interface_routes (vnm, sw_if_index,
+ im, fib_index,
+ ia);
+ else
+ ip6_del_interface_routes (im, fib_index,
+ a, ia->address_length);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
+
+/* Built-in ip6 unicast rx feature path definition */
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
+{
+ .arc_name = "ip6-unicast",
+ .start_nodes = VNET_FEATURES ("ip6-input"),
+ .arc_index_ptr = &ip6_main.lookup_main.ucast_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip6_flow_classify, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-flow-classify",
+ .runs_before = VNET_FEATURES ("ip6-inacl"),
+};
+
+VNET_FEATURE_INIT (ip6_inacl, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-inacl",
+ .runs_before = VNET_FEATURES ("ip6-policer-classify"),
+};
+
+VNET_FEATURE_INIT (ip6_policer_classify, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-policer-classify",
+ .runs_before = VNET_FEATURES ("ipsec-input-ip6"),
+};
+
+VNET_FEATURE_INIT (ip6_ipsec, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ipsec-input-ip6",
+ .runs_before = VNET_FEATURES ("l2tp-decap"),
+};
+
+VNET_FEATURE_INIT (ip6_l2tp, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "l2tp-decap",
+ .runs_before = VNET_FEATURES ("vpath-input-ip6"),
+};
+
+VNET_FEATURE_INIT (ip6_vpath, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "vpath-input-ip6",
+ .runs_before = VNET_FEATURES ("ip6-vxlan-bypass"),
+};
+
+VNET_FEATURE_INIT (ip6_vxlan_bypass, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-vxlan-bypass",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_drop, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-drop",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_lookup, static) =
+{
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-lookup",
+ .runs_before = 0, /*last feature*/
+};
+
+/* Built-in ip6 multicast rx feature path definition (none now) */
+VNET_FEATURE_ARC_INIT (ip6_multicast, static) =
+{
+ .arc_name = "ip6-multicast",
+ .start_nodes = VNET_FEATURES ("ip6-input"),
+ .arc_index_ptr = &ip6_main.lookup_main.mcast_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip6_vpath_mc, static) = {
+ .arc_name = "ip6-multicast",
+ .node_name = "vpath-input-ip6",
+ .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_drop_mc, static) = {
+ .arc_name = "ip6-multicast",
+ .node_name = "ip6-drop",
+ .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_mc_lookup, static) = {
+ .arc_name = "ip6-multicast",
+ .node_name = "ip6-mfib-forward-lookup",
+ .runs_before = 0, /* last feature */
+};
+
+/* Built-in ip4 tx feature path definition */
+VNET_FEATURE_ARC_INIT (ip6_output, static) =
+{
+ .arc_name = "ip6-output",
+ .start_nodes = VNET_FEATURES ("ip6-rewrite", "ip6-midchain"),
+ .arc_index_ptr = &ip6_main.lookup_main.output_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (ip6_ipsec_output, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "ipsec-output-ip6",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+VNET_FEATURE_INIT (ip6_interface_output, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "interface-output",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
+{
+ ip6_main_t *im = &ip6_main;
+
+ vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
+ vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
+
+ if (!is_add)
+ {
+ /* Ensure that IPv6 is disabled */
+ ip6_main_t *im6 = &ip6_main;
+ ip_lookup_main_t *lm6 = &im6->lookup_main;
+ ip_interface_address_t *ia = 0;
+ ip6_address_t *address;
+ vlib_main_t *vm = vlib_get_main ();
+
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, 0 /* is_add */ );
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* honor unnumbered */,
+ ({
+ address = ip_interface_address_get_address (lm6, ia);
+ ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
+ }));
+ /* *INDENT-ON* */
+ ip6_mfib_interface_enable_disable (sw_if_index, 0);
+ }
+
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-drop", sw_if_index,
+ is_add, 0, 0);
+
+ vnet_feature_enable_disable ("ip6-multicast", "ip6-drop", sw_if_index,
+ is_add, 0, 0);
+
+ return /* no error */ 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_sw_interface_add_del);
+
+static uword
+ip6_lookup (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_lookup_inline (vm, node, frame);
+}
+
+static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_lookup_node) =
+{
+ .function = ip6_lookup,
+ .name = "ip6-lookup",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_lookup_trace,
+ .n_next_nodes = IP6_LOOKUP_N_NEXT,
+ .next_nodes = IP6_LOOKUP_NEXT_NODES,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_lookup_node, ip6_lookup);
+
+always_inline uword
+ip6_load_balance (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
+ u32 n_left_from, n_left_to_next, *from, *to_next;
+ ip_lookup_next_t next;
+ u32 thread_index = vlib_get_thread_index ();
+ ip6_main_t *im = &ip6_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_lookup_next_t next0, next1;
+ const load_balance_t *lb0, *lb1;
+ vlib_buffer_t *p0, *p1;
+ u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
+ const ip6_header_t *ip0, *ip1;
+ const dpo_id_t *dpo0, *dpo1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, STORE);
+ vlib_prefetch_buffer_header (p3, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get (lbi0);
+ lb1 = load_balance_get (lbi1);
+
+ /*
+ * this node is for via FIBs we can re-use the hash value from the
+ * to node if present.
+ * We don't want to use the same hash value at each level in the recursion
+ * graph as that would lead to polarisation
+ */
+ hc0 = hc1 = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
+ }
+ dpo0 =
+ load_balance_get_fwd_bucket (lb0,
+ (hc0 &
+ lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ vnet_buffer (p1)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ ip6_compute_flow_hash (ip1, lb1->lb_hash_config);
+ }
+ dpo1 =
+ load_balance_get_fwd_bucket (lb1,
+ (hc1 &
+ lb1->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ }
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip1->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next1 = (dpo_is_adj (dpo1) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next1;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_lookup_next_t next0;
+ const load_balance_t *lb0;
+ vlib_buffer_t *p0;
+ u32 pi0, lbi0, hc0;
+ const ip6_header_t *ip0;
+ const dpo_id_t *dpo0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+ n_left_from -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ ip0 = vlib_buffer_get_current (p0);
+ lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ lb0 = load_balance_get (lbi0);
+
+ hc0 = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ ip6_compute_flow_hash (ip0, lb0->lb_hash_config);
+ }
+ dpo0 =
+ load_balance_get_fwd_bucket (lb0,
+ (hc0 &
+ lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ /* Only process the HBH Option Header if explicitly configured to do so */
+ if (PREDICT_FALSE
+ (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ next0 = (dpo_is_adj (dpo0) && im->hbh_enabled) ?
+ (ip_lookup_next_t) IP6_LOOKUP_NEXT_HOP_BY_HOP : next0;
+ }
+
+ vlib_increment_combined_counter
+ (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_load_balance_node) =
+{
+ .function = ip6_load_balance,
+ .name = "ip6-load-balance",
+ .vector_size = sizeof (u32),
+ .sibling_of = "ip6-lookup",
+ .format_trace = format_ip6_lookup_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_load_balance_node, ip6_load_balance);
+
+typedef struct
+{
+ /* Adjacency taken. */
+ u32 adj_index;
+ u32 flow_hash;
+ u32 fib_index;
+
+ /* Packet data, possibly *after* rewrite. */
+ u8 packet_data[128 - 1 * sizeof (u32)];
+}
+ip6_forward_next_trace_t;
+
+u8 *
+format_ip6_forward_next_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%U%U",
+ format_white_space, indent,
+ format_ip6_header, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+static u8 *
+format_ip6_lookup_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
+ t->fib_index, t->adj_index, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip6_header, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+
+static u8 *
+format_ip6_rewrite_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "tx_sw_if_index %d adj-idx %d : %U flow hash: 0x%08x",
+ t->fib_index, t->adj_index, format_ip_adjacency,
+ t->adj_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
+ s = format (s, "\n%U%U",
+ format_white_space, indent,
+ format_ip_adjacency_packet_data,
+ t->adj_index, t->packet_data, sizeof (t->packet_data));
+ return s;
+}
+
+/* Common trace function for all ip6-forward next nodes. */
+void
+ip6_forward_next_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
+{
+ u32 *from, n_left;
+ ip6_main_t *im = &ip6_main;
+
+ n_left = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ ip6_forward_next_trace_t *t0, *t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+
+ clib_memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
+ t1->adj_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
+ t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
+ t1->fib_index =
+ (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b1)->sw_if_index[VLIB_RX]);
+
+ clib_memcpy (t1->packet_data,
+ vlib_buffer_get_current (b1),
+ sizeof (t1->packet_data));
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ ip6_forward_next_trace_t *t0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
+ t0->adj_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
+ t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
+ t0->fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
+ (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
+ vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+
+ clib_memcpy (t0->packet_data,
+ vlib_buffer_get_current (b0),
+ sizeof (t0->packet_data));
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+static uword
+ip6_drop_or_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, ip6_error_t error_code)
+{
+ u32 *buffers = vlib_frame_vector_args (frame);
+ uword n_packets = frame->n_vectors;
+
+ vlib_error_drop_buffers (vm, node, buffers,
+ /* stride */ 1,
+ n_packets,
+ /* next */ 0,
+ ip6_input_node.index, error_code);
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return n_packets;
+}
+
+static uword
+ip6_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_DROP);
+}
+
+static uword
+ip6_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_drop_or_punt (vm, node, frame, IP6_ERROR_ADJACENCY_PUNT);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_drop_node, static) =
+{
+ .function = ip6_drop,
+ .name = "ip6-drop",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_next_nodes = 1,
+ .next_nodes =
+ {
+ [0] = "error-drop",},
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_drop_node, ip6_drop);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_punt_node, static) =
+{
+ .function = ip6_punt,
+ .name = "ip6-punt",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_next_nodes = 1,
+ .next_nodes =
+ {
+ [0] = "error-punt",},
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_punt_node, ip6_punt);
+
+/* Compute TCP/UDP/ICMP6 checksum in software. */
+u16
+ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+ ip6_header_t * ip0, int *bogus_lengthp)
+{
+ ip_csum_t sum0;
+ u16 sum16, payload_length_host_byte_order;
+ u32 i, n_this_buffer, n_bytes_left;
+ u32 headers_size = sizeof (ip0[0]);
+ void *data_this_buffer;
+
+ ASSERT (bogus_lengthp);
+ *bogus_lengthp = 0;
+
+ /* Initialize checksum with ip header. */
+ sum0 = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
+ payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
+ data_this_buffer = (void *) (ip0 + 1);
+
+ for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
+ {
+ sum0 = ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->
+ src_address.as_uword[i],
+ uword));
+ sum0 =
+ ip_csum_with_carry (sum0,
+ clib_mem_unaligned (&ip0->dst_address.as_uword[i],
+ uword));
+ }
+
+ /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
+ * or UDP-Ping packets */
+ if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+ {
+ u32 skip_bytes;
+ ip6_hop_by_hop_ext_t *ext_hdr =
+ (ip6_hop_by_hop_ext_t *) data_this_buffer;
+
+ /* validate really icmp6 next */
+ ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_ICMP6)
+ || (ext_hdr->next_hdr == IP_PROTOCOL_UDP));
+
+ skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
+ data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
+
+ payload_length_host_byte_order -= skip_bytes;
+ headers_size += skip_bytes;
+ }
+
+ n_bytes_left = n_this_buffer = payload_length_host_byte_order;
+ if (p0 && n_this_buffer + headers_size > p0->current_length)
+ n_this_buffer =
+ p0->current_length >
+ headers_size ? p0->current_length - headers_size : 0;
+ while (1)
+ {
+ sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
+ n_bytes_left -= n_this_buffer;
+ if (n_bytes_left == 0)
+ break;
+
+ if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ *bogus_lengthp = 1;
+ return 0xfefe;
+ }
+ p0 = vlib_get_buffer (vm, p0->next_buffer);
+ data_this_buffer = vlib_buffer_get_current (p0);
+ n_this_buffer = p0->current_length;
+ }
+
+ sum16 = ~ip_csum_fold (sum0);
+
+ return sum16;
+}
+
+u32
+ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
+{
+ ip6_header_t *ip0 = vlib_buffer_get_current (p0);
+ udp_header_t *udp0;
+ u16 sum16;
+ int bogus_length;
+
+ /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) */
+ ASSERT (ip0->protocol == IP_PROTOCOL_TCP
+ || ip0->protocol == IP_PROTOCOL_ICMP6
+ || ip0->protocol == IP_PROTOCOL_UDP
+ || ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS);
+
+ udp0 = (void *) (ip0 + 1);
+ if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
+ {
+ p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
+ | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+ return p0->flags;
+ }
+
+ sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0, &bogus_length);
+
+ p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
+ | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
+
+ return p0->flags;
+}
+
+/**
+ * @brief returns number of links on which src is reachable.
+ */
+always_inline int
+ip6_urpf_loose_check (ip6_main_t * im, vlib_buffer_t * b, ip6_header_t * i)
+{
+ const load_balance_t *lb0;
+ index_t lbi;
+
+ lbi = ip6_fib_table_fwding_lookup_with_if_index (im,
+ vnet_buffer
+ (b)->sw_if_index[VLIB_RX],
+ &i->src_address);
+
+ lb0 = load_balance_get (lbi);
+
+ return (fib_urpf_check_size (lb0->lb_urpf));
+}
+
+always_inline u8
+ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
+ u32 * udp_offset0)
+{
+ u32 proto0;
+ proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0);
+ if (proto0 != IP_PROTOCOL_UDP)
+ {
+ proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0);
+ proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0;
+ }
+ return proto0;
+}
+
+/* *INDENT-OFF* */
+VNET_FEATURE_ARC_INIT (ip6_local) =
+{
+ .arc_name = "ip6-local",
+ .start_nodes = VNET_FEATURES ("ip6-local"),
+};
+/* *INDENT-ON* */
+
+static uword
+ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int head_of_feature_arc)
+{
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ ip_local_next_t next_index;
+ u32 *from, *to_next, n_left_from, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_input_node.index);
+ u8 arc_index = vnet_feat_arc_ip6_local.feature_arc_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ udp_header_t *udp0, *udp1;
+ u32 pi0, ip_len0, udp_len0, flags0, next0;
+ u32 pi1, ip_len1, udp_len1, flags1, next1;
+ i32 len_diff0, len_diff1;
+ u8 error0, type0, good_l4_csum0, is_tcp_udp0;
+ u8 error1, type1, good_l4_csum1, is_tcp_udp1;
+ u32 udp_offset0, udp_offset1;
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ if (head_of_feature_arc == 0)
+ goto skip_checks;
+
+ vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
+ vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
+
+ type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
+ type1 = lm->builtin_protocol_by_ip_protocol[ip1->protocol];
+
+ flags0 = p0->flags;
+ flags1 = p1->flags;
+
+ is_tcp_udp0 = ip6_next_proto_is_tcp_udp (p0, ip0, &udp_offset0);
+ is_tcp_udp1 = ip6_next_proto_is_tcp_udp (p1, ip1, &udp_offset1);
+
+ good_l4_csum0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ good_l4_csum1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ len_diff0 = 0;
+ len_diff1 = 0;
+
+ if (PREDICT_TRUE (is_tcp_udp0))
+ {
+ udp0 = (udp_header_t *) ((u8 *) ip0 + udp_offset0);
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP
+ && udp0->checksum == 0;
+ /* Verify UDP length. */
+ if (is_tcp_udp0 == IP_PROTOCOL_UDP)
+ {
+ ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+ }
+ }
+ if (PREDICT_TRUE (is_tcp_udp1))
+ {
+ udp1 = (udp_header_t *) ((u8 *) ip1 + udp_offset1);
+ /* Don't verify UDP checksum for packets with explicit zero checksum. */
+ good_l4_csum1 |= type1 == IP_BUILTIN_PROTOCOL_UDP
+ && udp1->checksum == 0;
+ /* Verify UDP length. */
+ if (is_tcp_udp1 == IP_PROTOCOL_UDP)
+ {
+ ip_len1 = clib_net_to_host_u16 (ip1->payload_length);
+ udp_len1 = clib_net_to_host_u16 (udp1->length);
+ len_diff1 = ip_len1 - udp_len1;
+ }
+ }
+
+ good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
+ good_l4_csum1 |= type1 == IP_BUILTIN_PROTOCOL_UNKNOWN;
+
+ len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
+ len_diff1 = type1 == IP_BUILTIN_PROTOCOL_UDP ? len_diff1 : 0;
+
+ if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
+ && !good_l4_csum0
+ && !(flags0 &
+ VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)))
+ {
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
+ good_l4_csum0 =
+ (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+ if (PREDICT_FALSE (type1 != IP_BUILTIN_PROTOCOL_UNKNOWN
+ && !good_l4_csum1
+ && !(flags1 &
+ VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)))
+ {
+ flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, p1);
+ good_l4_csum1 =
+ (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ error0 = error1 = IP6_ERROR_UNKNOWN_PROTOCOL;
+ error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
+ error1 = len_diff1 < 0 ? IP6_ERROR_UDP_LENGTH : error1;
+
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
+ IP6_ERROR_UDP_CHECKSUM);
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
+ IP6_ERROR_ICMP_CHECKSUM);
+ error0 = (!good_l4_csum0 ? IP6_ERROR_UDP_CHECKSUM + type0 : error0);
+ error1 = (!good_l4_csum1 ? IP6_ERROR_UDP_CHECKSUM + type1 : error1);
+
+ /* Drop packets from unroutable hosts. */
+ /* If this is a neighbor solicitation (ICMP), skip source RPF check */
+ if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
+ type0 != IP_BUILTIN_PROTOCOL_ICMP &&
+ !ip6_address_is_link_local_unicast (&ip0->src_address))
+ {
+ error0 = (!ip6_urpf_loose_check (im, p0, ip0)
+ ? IP6_ERROR_SRC_LOOKUP_MISS : error0);
+ }
+ if (error1 == IP6_ERROR_UNKNOWN_PROTOCOL &&
+ type1 != IP_BUILTIN_PROTOCOL_ICMP &&
+ !ip6_address_is_link_local_unicast (&ip1->src_address))
+ {
+ error1 = (!ip6_urpf_loose_check (im, p1, ip1)
+ ? IP6_ERROR_SRC_LOOKUP_MISS : error1);
+ }
+
+ skip_checks:
+
+ next0 = lm->local_next_by_ip_protocol[ip0->protocol];
+ next1 = lm->local_next_by_ip_protocol[ip1->protocol];
+
+ next0 =
+ error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+ next1 =
+ error1 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ if (head_of_feature_arc)
+ {
+ if (PREDICT_TRUE (error0 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
+ vnet_feature_arc_start (arc_index,
+ vnet_buffer (p0)->sw_if_index
+ [VLIB_RX], &next0, p0);
+ if (PREDICT_TRUE (error1 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
+ vnet_feature_arc_start (arc_index,
+ vnet_buffer (p1)->sw_if_index
+ [VLIB_RX], &next1, p1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ udp_header_t *udp0;
+ u32 pi0, ip_len0, udp_len0, flags0, next0;
+ i32 len_diff0;
+ u8 error0, type0, good_l4_csum0;
+ u32 udp_offset0;
+ u8 is_tcp_udp0;
+
+ pi0 = to_next[0] = from[0];
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ if (head_of_feature_arc == 0)
+ goto skip_check;
+
+ vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
+
+ type0 = lm->builtin_protocol_by_ip_protocol[ip0->protocol];
+ flags0 = p0->flags;
+ is_tcp_udp0 = ip6_next_proto_is_tcp_udp (p0, ip0, &udp_offset0);
+ good_l4_csum0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+
+ len_diff0 = 0;
+ if (PREDICT_TRUE (is_tcp_udp0))
+ {
+ udp0 = (udp_header_t *) ((u8 *) ip0 + udp_offset0);
+ /* Don't verify UDP checksum for packets with explicit zero
+ * checksum. */
+ good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UDP
+ && udp0->checksum == 0;
+ /* Verify UDP length. */
+ if (is_tcp_udp0 == IP_PROTOCOL_UDP)
+ {
+ ip_len0 = clib_net_to_host_u16 (ip0->payload_length);
+ udp_len0 = clib_net_to_host_u16 (udp0->length);
+ len_diff0 = ip_len0 - udp_len0;
+ }
+ }
+
+ good_l4_csum0 |= type0 == IP_BUILTIN_PROTOCOL_UNKNOWN;
+ len_diff0 = type0 == IP_BUILTIN_PROTOCOL_UDP ? len_diff0 : 0;
+
+ if (PREDICT_FALSE (type0 != IP_BUILTIN_PROTOCOL_UNKNOWN
+ && !good_l4_csum0
+ && !(flags0 &
+ VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)))
+ {
+ flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, p0);
+ good_l4_csum0 =
+ (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
+ }
+
+ error0 = IP6_ERROR_UNKNOWN_PROTOCOL;
+ error0 = len_diff0 < 0 ? IP6_ERROR_UDP_LENGTH : error0;
+
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_UDP ==
+ IP6_ERROR_UDP_CHECKSUM);
+ ASSERT (IP6_ERROR_UDP_CHECKSUM + IP_BUILTIN_PROTOCOL_ICMP ==
+ IP6_ERROR_ICMP_CHECKSUM);
+ error0 = (!good_l4_csum0 ? IP6_ERROR_UDP_CHECKSUM + type0 : error0);
+
+ /* If this is a neighbor solicitation (ICMP), skip src RPF check */
+ if (error0 == IP6_ERROR_UNKNOWN_PROTOCOL &&
+ type0 != IP_BUILTIN_PROTOCOL_ICMP &&
+ !ip6_address_is_link_local_unicast (&ip0->src_address))
+ {
+ error0 = (!ip6_urpf_loose_check (im, p0, ip0)
+ ? IP6_ERROR_SRC_LOOKUP_MISS : error0);
+ }
+
+ skip_check:
+
+ next0 = lm->local_next_by_ip_protocol[ip0->protocol];
+ next0 =
+ error0 != IP6_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
+ p0->error = error_node->errors[error0];
+
+ if (head_of_feature_arc)
+ {
+ if (PREDICT_TRUE (error0 == (u8) IP6_ERROR_UNKNOWN_PROTOCOL))
+ vnet_feature_arc_start (arc_index,
+ vnet_buffer (p0)->sw_if_index
+ [VLIB_RX], &next0, p0);
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip6_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_local_node, static) =
+{
+ .function = ip6_local,
+ .name = "ip6-local",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_next_nodes = IP_LOCAL_N_NEXT,
+ .next_nodes =
+ {
+ [IP_LOCAL_NEXT_DROP] = "error-drop",
+ [IP_LOCAL_NEXT_PUNT] = "error-punt",
+ [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
+ [IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_node, ip6_local);
+
+
+static uword
+ip6_local_end_of_arc (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_local_end_of_arc_node,static) = {
+ .function = ip6_local_end_of_arc,
+ .name = "ip6-local-end-of-arc",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_ip6_forward_next_trace,
+ .sibling_of = "ip6-local",
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_local_end_of_arc_node, ip6_local_end_of_arc)
+
+VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
+ .arc_name = "ip6-local",
+ .node_name = "ip6-local-end-of-arc",
+ .runs_before = 0, /* not before any other features */
+};
+/* *INDENT-ON* */
+
+void
+ip6_register_protocol (u32 protocol, u32 node_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+
+ ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
+ lm->local_next_by_ip_protocol[protocol] =
+ vlib_node_add_next (vm, ip6_local_node.index, node_index);
+}
+
+typedef enum
+{
+ IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+ IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX,
+ IP6_DISCOVER_NEIGHBOR_N_NEXT,
+} ip6_discover_neighbor_next_t;
+
+typedef enum
+{
+ IP6_DISCOVER_NEIGHBOR_ERROR_DROP,
+ IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT,
+ IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS,
+} ip6_discover_neighbor_error_t;
+
+static uword
+ip6_discover_neighbor_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_glean)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 *from, *to_next_drop;
+ uword n_left_from, n_left_to_next_drop;
+ static f64 time_last_seed_change = -1e100;
+ static u32 hash_seeds[3];
+ static uword hash_bitmap[256 / BITS (uword)];
+ f64 time_now;
+ int bogus_length;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ time_now = vlib_time_now (vm);
+ if (time_now - time_last_seed_change > 1e-3)
+ {
+ uword i;
+ u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
+ sizeof (hash_seeds));
+ for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
+ hash_seeds[i] = r[i];
+
+ /* Mark all hash keys as been not-seen before. */
+ for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
+ hash_bitmap[i] = 0;
+
+ time_last_seed_change = time_now;
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+ to_next_drop, n_left_to_next_drop);
+
+ while (n_left_from > 0 && n_left_to_next_drop > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
+ uword bm0;
+ ip_adjacency_t *adj0;
+ vnet_hw_interface_t *hw_if0;
+ u32 next0;
+
+ pi0 = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ adj0 = adj_get (adj_index0);
+
+ if (!is_glean)
+ {
+ ip0->dst_address.as_u64[0] =
+ adj0->sub_type.nbr.next_hop.ip6.as_u64[0];
+ ip0->dst_address.as_u64[1] =
+ adj0->sub_type.nbr.next_hop.ip6.as_u64[1];
+ }
+
+ a0 = hash_seeds[0];
+ b0 = hash_seeds[1];
+ c0 = hash_seeds[2];
+
+ sw_if_index0 = adj0->rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+
+ a0 ^= sw_if_index0;
+ b0 ^= ip0->dst_address.as_u32[0];
+ c0 ^= ip0->dst_address.as_u32[1];
+
+ hash_v3_mix32 (a0, b0, c0);
+
+ b0 ^= ip0->dst_address.as_u32[2];
+ c0 ^= ip0->dst_address.as_u32[3];
+
+ hash_v3_finalize32 (a0, b0, c0);
+
+ c0 &= BITS (hash_bitmap) - 1;
+ c0 = c0 / BITS (uword);
+ m0 = (uword) 1 << (c0 % BITS (uword));
+
+ bm0 = hash_bitmap[c0];
+ drop0 = (bm0 & m0) != 0;
+
+ /* Mark it as seen. */
+ hash_bitmap[c0] = bm0 | m0;
+
+ from += 1;
+ n_left_from -= 1;
+ to_next_drop[0] = pi0;
+ to_next_drop += 1;
+ n_left_to_next_drop -= 1;
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
+
+ /* If the interface is link-down, drop the pkt */
+ if (!(hw_if0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))
+ drop0 = 1;
+
+ p0->error =
+ node->errors[drop0 ? IP6_DISCOVER_NEIGHBOR_ERROR_DROP
+ : IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT];
+ if (drop0)
+ continue;
+
+ /*
+ * the adj has been updated to a rewrite but the node the DPO that got
+ * us here hasn't - yet. no big deal. we'll drop while we wait.
+ */
+ if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
+ continue;
+
+ {
+ u32 bi0 = 0;
+ icmp6_neighbor_solicitation_header_t *h0;
+ vlib_buffer_t *b0;
+
+ h0 = vlib_packet_template_get_packet
+ (vm, &im->discover_neighbor_packet_template, &bi0);
+
+ /*
+ * Build ethernet header.
+ * Choose source address based on destination lookup
+ * adjacency.
+ */
+ if (ip6_src_address_for_packet (lm,
+ sw_if_index0,
+ &h0->ip.src_address))
+ {
+ /* There is no address on the interface */
+ p0->error =
+ node->errors[IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS];
+ vlib_buffer_free (vm, &bi0, 1);
+ continue;
+ }
+
+ /*
+ * Destination address is a solicited node multicast address.
+ * We need to fill in
+ * the low 24 bits with low 24 bits of target's address.
+ */
+ h0->ip.dst_address.as_u8[13] = ip0->dst_address.as_u8[13];
+ h0->ip.dst_address.as_u8[14] = ip0->dst_address.as_u8[14];
+ h0->ip.dst_address.as_u8[15] = ip0->dst_address.as_u8[15];
+
+ h0->neighbor.target_address = ip0->dst_address;
+
+ clib_memcpy (h0->link_layer_option.ethernet_address,
+ hw_if0->hw_address, vec_len (hw_if0->hw_address));
+
+ /* $$$$ appears we need this; why is the checksum non-zero? */
+ h0->neighbor.icmp.checksum = 0;
+ h0->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip,
+ &bogus_length);
+
+ ASSERT (bogus_length == 0);
+
+ vlib_buffer_copy_trace_flag (vm, p0, bi0);
+ b0 = vlib_get_buffer (vm, bi0);
+ vnet_buffer (b0)->sw_if_index[VLIB_TX]
+ = vnet_buffer (p0)->sw_if_index[VLIB_TX];
+
+ /* Add rewrite/encap string. */
+ vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
+
+ next0 = IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX;
+
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+ }
+ }
+
+ vlib_put_next_frame (vm, node, IP6_DISCOVER_NEIGHBOR_NEXT_DROP,
+ n_left_to_next_drop);
+ }
+
+ return frame->n_vectors;
+}
+
+static uword
+ip6_discover_neighbor (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return (ip6_discover_neighbor_inline (vm, node, frame, 0));
+}
+
+static uword
+ip6_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return (ip6_discover_neighbor_inline (vm, node, frame, 1));
+}
+
+static char *ip6_discover_neighbor_error_strings[] = {
+ [IP6_DISCOVER_NEIGHBOR_ERROR_DROP] = "address overflow drops",
+ [IP6_DISCOVER_NEIGHBOR_ERROR_REQUEST_SENT] = "neighbor solicitations sent",
+ [IP6_DISCOVER_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS]
+ = "no source address for ND solicitation",
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_discover_neighbor_node) =
+{
+ .function = ip6_discover_neighbor,
+ .name = "ip6-discover-neighbor",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
+ .error_strings = ip6_discover_neighbor_error_strings,
+ .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
+ [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_glean_node) =
+{
+ .function = ip6_glean,
+ .name = "ip6-glean",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
+ .error_strings = ip6_discover_neighbor_error_strings,
+ .n_next_nodes = IP6_DISCOVER_NEIGHBOR_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_DISCOVER_NEIGHBOR_NEXT_DROP] = "error-drop",
+ [IP6_DISCOVER_NEIGHBOR_NEXT_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+ip6_probe_neighbor (vlib_main_t * vm, ip6_address_t * dst, u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ icmp6_neighbor_solicitation_header_t *h;
+ ip6_address_t *src;
+ ip_interface_address_t *ia;
+ ip_adjacency_t *adj;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ vlib_buffer_t *b;
+ adj_index_t ai;
+ u32 bi = 0;
+ int bogus_length;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return clib_error_return (0, "%U: interface %U down",
+ format_ip6_address, dst,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ src =
+ ip6_interface_address_matching_destination (im, dst, sw_if_index, &ia);
+ if (!src)
+ {
+ vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ return clib_error_return
+ (0, "no matching interface address for destination %U (interface %U)",
+ format_ip6_address, dst,
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ }
+
+ h =
+ vlib_packet_template_get_packet (vm,
+ &im->discover_neighbor_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ /* Destination address is a solicited node multicast address. We need to fill in
+ the low 24 bits with low 24 bits of target's address. */
+ h->ip.dst_address.as_u8[13] = dst->as_u8[13];
+ h->ip.dst_address.as_u8[14] = dst->as_u8[14];
+ h->ip.dst_address.as_u8[15] = dst->as_u8[15];
+
+ h->ip.src_address = src[0];
+ h->neighbor.target_address = dst[0];
+
+ if (PREDICT_FALSE (!hi->hw_address))
+ {
+ return clib_error_return (0, "%U: interface %U do not support ip probe",
+ format_ip6_address, dst,
+ format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
+ }
+
+ clib_memcpy (h->link_layer_option.ethernet_address, hi->hw_address,
+ vec_len (hi->hw_address));
+
+ h->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ ip46_address_t nh = {
+ .ip6 = *dst,
+ };
+
+ ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6,
+ VNET_LINK_IP6, &nh, sw_if_index);
+ adj = adj_get (ai);
+
+ /* Peer has been previously resolved, retrieve glean adj instead */
+ if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
+ {
+ adj_unlock (ai);
+ ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP6, sw_if_index, &nh);
+ adj = adj_get (ai);
+ }
+
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+
+ adj_unlock (ai);
+ return /* no error */ 0;
+}
+
+typedef enum
+{
+ IP6_REWRITE_NEXT_DROP,
+ IP6_REWRITE_NEXT_ICMP_ERROR,
+} ip6_rewrite_next_t;
+
+always_inline uword
+ip6_rewrite_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ int do_counters, int is_midchain, int is_mcast)
+{
+ ip_lookup_main_t *lm = &ip6_main.lookup_main;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from, n_left_to_next, *to_next, next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_input_node.index);
+
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 thread_index = vlib_get_thread_index ();
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ ip_adjacency_t *adj0, *adj1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ u32 pi0, rw_len0, next0, error0, adj_index0;
+ u32 pi1, rw_len1, next1, error1, adj_index1;
+ u32 tx_sw_if_index0, tx_sw_if_index1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->pre_data, 32, STORE);
+ CLIB_PREFETCH (p3->pre_data, 32, STORE);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
+ CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ error0 = error1 = IP6_ERROR_NONE;
+ next0 = next1 = IP6_REWRITE_NEXT_DROP;
+
+ if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
+ {
+ i32 hop_limit0 = ip0->hop_limit;
+
+ /* Input node should have reject packets with hop limit 0. */
+ ASSERT (ip0->hop_limit > 0);
+
+ hop_limit0 -= 1;
+
+ ip0->hop_limit = hop_limit0;
+
+ /*
+ * If the hop count drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (hop_limit0 <= 0))
+ {
+ error0 = IP6_ERROR_TIME_EXPIRED;
+ next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ }
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+ if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
+ {
+ i32 hop_limit1 = ip1->hop_limit;
+
+ /* Input node should have reject packets with hop limit 0. */
+ ASSERT (ip1->hop_limit > 0);
+
+ hop_limit1 -= 1;
+
+ ip1->hop_limit = hop_limit1;
+
+ /*
+ * If the hop count drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ if (PREDICT_FALSE (hop_limit1 <= 0))
+ {
+ error1 = IP6_ERROR_TIME_EXPIRED;
+ next1 = IP6_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ }
+ }
+ else
+ {
+ p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+ adj0 = adj_get (adj_index0);
+ adj1 = adj_get (adj_index1);
+
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ rw_len1 = adj1[0].rewrite_header.data_bytes;
+ vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+ vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
+
+ if (do_counters)
+ {
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index, adj_index1, 1,
+ vlib_buffer_length_in_chain (vm, p1) + rw_len1);
+ }
+
+ /* Check MTU of outgoing interface. */
+ error0 =
+ (vlib_buffer_length_in_chain (vm, p0) >
+ adj0[0].
+ rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
+ error0);
+ error1 =
+ (vlib_buffer_length_in_chain (vm, p1) >
+ adj1[0].
+ rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
+ error1);
+
+ /* Don't adjust the buffer for hop count issue; icmp-error node
+ * wants to see the IP headerr */
+ if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+ next0 = adj0[0].rewrite_header.next_index;
+
+ if (PREDICT_FALSE
+ (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+ }
+ if (PREDICT_TRUE (error1 == IP6_ERROR_NONE))
+ {
+ p1->current_data -= rw_len1;
+ p1->current_length += rw_len1;
+
+ tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
+ next1 = adj1[0].rewrite_header.next_index;
+
+ if (PREDICT_FALSE
+ (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index1, &next1, p1);
+ }
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_two_headers (adj0[0], adj1[0],
+ ip0, ip1, sizeof (ethernet_header_t));
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
+ adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
+ }
+ if (is_mcast)
+ {
+ /*
+ * copy bytes from the IP address into the MAC rewrite
+ */
+ vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+ vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
+ }
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip_adjacency_t *adj0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ u32 pi0, rw_len0;
+ u32 adj_index0, next0, error0;
+ u32 tx_sw_if_index0;
+
+ pi0 = to_next[0] = from[0];
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+
+ adj0 = adj_get (adj_index0);
+
+ ip0 = vlib_buffer_get_current (p0);
+
+ error0 = IP6_ERROR_NONE;
+ next0 = IP6_REWRITE_NEXT_DROP;
+
+ /* Check hop limit */
+ if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
+ {
+ i32 hop_limit0 = ip0->hop_limit;
+
+ ASSERT (ip0->hop_limit > 0);
+
+ hop_limit0 -= 1;
+
+ ip0->hop_limit = hop_limit0;
+
+ if (PREDICT_FALSE (hop_limit0 <= 0))
+ {
+ /*
+ * If the hop count drops below 1 when forwarding, generate
+ * an ICMP response.
+ */
+ error0 = IP6_ERROR_TIME_EXPIRED;
+ next0 = IP6_REWRITE_NEXT_ICMP_ERROR;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
+ icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ }
+ }
+ else
+ {
+ p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ }
+
+ /* Guess we are only writing on simple Ethernet header. */
+ vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
+
+ /* Update packet buffer attributes/set output interface. */
+ rw_len0 = adj0[0].rewrite_header.data_bytes;
+ vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
+
+ if (do_counters)
+ {
+ vlib_increment_combined_counter
+ (&adjacency_counters,
+ thread_index, adj_index0, 1,
+ vlib_buffer_length_in_chain (vm, p0) + rw_len0);
+ }
+
+ /* Check MTU of outgoing interface. */
+ error0 =
+ (vlib_buffer_length_in_chain (vm, p0) >
+ adj0[0].
+ rewrite_header.max_l3_packet_bytes ? IP6_ERROR_MTU_EXCEEDED :
+ error0);
+
+ /* Don't adjust the buffer for hop count issue; icmp-error node
+ * wants to see the IP headerr */
+ if (PREDICT_TRUE (error0 == IP6_ERROR_NONE))
+ {
+ p0->current_data -= rw_len0;
+ p0->current_length += rw_len0;
+
+ tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
+
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
+ next0 = adj0[0].rewrite_header.next_index;
+
+ if (PREDICT_FALSE
+ (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
+ vnet_feature_arc_start (lm->output_feature_arc_index,
+ tx_sw_if_index0, &next0, p0);
+ }
+
+ if (is_midchain)
+ {
+ adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
+ }
+ if (is_mcast)
+ {
+ vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
+ }
+
+ p0->error = error_node->errors[error0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Need to do trace after rewrites to pick up new packet data. */
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ ip6_forward_next_trace (vm, node, frame, VLIB_TX);
+
+ return frame->n_vectors;
+}
+
+static uword
+ip6_rewrite (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip6_rewrite_inline (vm, node, frame, 1, 0, 0);
+ else
+ return ip6_rewrite_inline (vm, node, frame, 0, 0, 0);
+}
+
+static uword
+ip6_rewrite_mcast (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip6_rewrite_inline (vm, node, frame, 1, 0, 1);
+ else
+ return ip6_rewrite_inline (vm, node, frame, 0, 0, 1);
+}
+
+static uword
+ip6_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip6_rewrite_inline (vm, node, frame, 1, 1, 0);
+ else
+ return ip6_rewrite_inline (vm, node, frame, 0, 1, 0);
+}
+
+static uword
+ip6_mcast_midchain (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ if (adj_are_counters_enabled ())
+ return ip6_rewrite_inline (vm, node, frame, 1, 1, 1);
+ else
+ return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_midchain_node) =
+{
+ .function = ip6_midchain,
+ .name = "ip6-midchain",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .sibling_of = "ip6-rewrite",
+ };
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_midchain_node, ip6_midchain);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_rewrite_node) =
+{
+ .function = ip6_rewrite,
+ .name = "ip6-rewrite",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_rewrite_trace,
+ .n_next_nodes = 2,
+ .next_nodes =
+ {
+ [IP6_REWRITE_NEXT_DROP] = "error-drop",
+ [IP6_REWRITE_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_node, ip6_rewrite);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_rewrite_mcast_node) =
+{
+ .function = ip6_rewrite_mcast,
+ .name = "ip6-rewrite-mcast",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_rewrite_trace,
+ .sibling_of = "ip6-rewrite",
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_rewrite_mcast_node, ip6_rewrite_mcast);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_mcast_midchain_node, static) =
+{
+ .function = ip6_mcast_midchain,
+ .name = "ip6-mcast-midchain",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_rewrite_trace,
+ .sibling_of = "ip6-rewrite",
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_mcast_midchain_node, ip6_mcast_midchain);
+
+/*
+ * Hop-by-Hop handling
+ */
+ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
+
+#define foreach_ip6_hop_by_hop_error \
+_(PROCESSED, "pkts with ip6 hop-by-hop options") \
+_(FORMAT, "incorrectly formatted hop-by-hop options") \
+_(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
+
+/* *INDENT-OFF* */
+typedef enum
+{
+#define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip6_hop_by_hop_error
+#undef _
+ IP6_HOP_BY_HOP_N_ERROR,
+} ip6_hop_by_hop_error_t;
+/* *INDENT-ON* */
+
+/*
+ * Primary h-b-h handler trace support
+ * We work pretty hard on the problem for obvious reasons
+ */
+typedef struct
+{
+ u32 next_index;
+ u32 trace_len;
+ u8 option_data[256];
+} ip6_hop_by_hop_trace_t;
+
+vlib_node_registration_t ip6_hop_by_hop_node;
+
+static char *ip6_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_hop_by_hop_error
+#undef _
+};
+
+u8 *
+format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args)
+{
+ ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *);
+ int total_len = va_arg (*args, int);
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+ u8 type0;
+
+ s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d",
+ hbh0->protocol, (hbh0->length + 1) << 3, total_len);
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len);
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad, just stop */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1);
+ break;
+
+ default:
+ if (hm->trace[type0])
+ {
+ s = (*hm->trace[type0]) (s, opt0);
+ }
+ else
+ {
+ s =
+ format (s, "\n unrecognized option %d length %d", type0,
+ opt0->length);
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ break;
+ }
+ }
+ return s;
+}
+
+static u8 *
+format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_hop_by_hop_trace_t *t = va_arg (*args, ip6_hop_by_hop_trace_t *);
+ ip6_hop_by_hop_header_t *hbh0;
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+
+ u8 type0;
+
+ hbh0 = (ip6_hop_by_hop_header_t *) t->option_data;
+
+ s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d",
+ t->next_index, (hbh0->length + 1) << 3, t->trace_len);
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0) + t->trace_len;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad, just stop */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ break;
+
+ default:
+ if (hm->trace[type0])
+ {
+ s = (*hm->trace[type0]) (s, opt0);
+ }
+ else
+ {
+ s =
+ format (s, "\n unrecognized option %d length %d", type0,
+ opt0->length);
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ break;
+ }
+ }
+ return s;
+}
+
+always_inline u8
+ip6_scan_hbh_options (vlib_buffer_t * b0,
+ ip6_header_t * ip0,
+ ip6_hop_by_hop_header_t * hbh0,
+ ip6_hop_by_hop_option_t * opt0,
+ ip6_hop_by_hop_option_t * limit0, u32 * next0)
+{
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+ u8 type0;
+ u8 error0 = 0;
+
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (hm->options[type0])
+ {
+ if ((*hm->options[type0]) (b0, ip0, opt0) < 0)
+ {
+ error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+ return (error0);
+ }
+ }
+ else
+ {
+ /* Unrecognized mandatory option, check the two high order bits */
+ switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
+ {
+ case HBH_OPTION_TYPE_SKIP_UNKNOWN:
+ break;
+ case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
+ error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
+ *next0 = IP_LOOKUP_NEXT_DROP;
+ break;
+ case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
+ error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
+ *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
+ icmp6_error_set_vnet_buffer (b0, ICMP6_parameter_problem,
+ ICMP6_parameter_problem_unrecognized_option,
+ (u8 *) opt0 - (u8 *) ip0);
+ break;
+ case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
+ error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
+ if (!ip6_address_is_multicast (&ip0->dst_address))
+ {
+ *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
+ icmp6_error_set_vnet_buffer (b0,
+ ICMP6_parameter_problem,
+ ICMP6_parameter_problem_unrecognized_option,
+ (u8 *) opt0 - (u8 *) ip0);
+ }
+ else
+ {
+ *next0 = IP_LOOKUP_NEXT_DROP;
+ }
+ break;
+ }
+ return (error0);
+ }
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ }
+ return (error0);
+}
+
+/*
+ * Process the Hop-by-Hop Options header
+ */
+static uword
+ip6_hop_by_hop (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_hop_by_hop_node.index);
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+ u32 n_left_from, *from, *to_next;
+ ip_lookup_next_t next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ ip6_header_t *ip0, *ip1;
+ ip6_hop_by_hop_header_t *hbh0, *hbh1;
+ ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
+ u8 error0 = 0, error1 = 0;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ /* Speculatively enqueue b0, b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
+ u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ ip_adjacency_t *adj0 = adj_get (adj_index0);
+ u32 adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
+ ip_adjacency_t *adj1 = adj_get (adj_index1);
+
+ /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
+ next0 = adj0->lookup_next_index;
+ next1 = adj1->lookup_next_index;
+
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ opt1 = (ip6_hop_by_hop_option_t *) (hbh1 + 1);
+ limit0 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
+ ((hbh0->length + 1) << 3));
+ limit1 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh1 +
+ ((hbh1->length + 1) << 3));
+
+ /*
+ * Basic validity checks
+ */
+ if ((hbh0->length + 1) << 3 >
+ clib_net_to_host_u16 (ip0->payload_length))
+ {
+ error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+ next0 = IP_LOOKUP_NEXT_DROP;
+ goto outdual;
+ }
+ /* Scan the set of h-b-h options, process ones that we understand */
+ error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
+
+ if ((hbh1->length + 1) << 3 >
+ clib_net_to_host_u16 (ip1->payload_length))
+ {
+ error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+ next1 = IP_LOOKUP_NEXT_DROP;
+ goto outdual;
+ }
+ /* Scan the set of h-b-h options, process ones that we understand */
+ error1 = ip6_scan_hbh_options (b1, ip1, hbh1, opt1, limit1, &next1);
+
+ outdual:
+ /* Has the classifier flagged this buffer for special treatment? */
+ if (PREDICT_FALSE
+ ((error0 == 0)
+ && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
+ next0 = hm->next_override;
+
+ /* Has the classifier flagged this buffer for special treatment? */
+ if (PREDICT_FALSE
+ ((error1 == 0)
+ && (vnet_buffer (b1)->l2_classify.opaque_index & OI_DECAP)))
+ next1 = hm->next_override;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ u32 trace_len = (hbh0->length + 1) << 3;
+ t->next_index = next0;
+ /* Capture the h-b-h option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->option_data) ? trace_len :
+ ARRAY_LEN (t->option_data);
+ t->trace_len = trace_len;
+ clib_memcpy (t->option_data, hbh0, trace_len);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ u32 trace_len = (hbh1->length + 1) << 3;
+ t->next_index = next1;
+ /* Capture the h-b-h option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->option_data) ? trace_len :
+ ARRAY_LEN (t->option_data);
+ t->trace_len = trace_len;
+ clib_memcpy (t->option_data, hbh1, trace_len);
+ }
+
+ }
+
+ b0->error = error_node->errors[error0];
+ b1->error = error_node->errors[error1];
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ip6_header_t *ip0;
+ ip6_hop_by_hop_header_t *hbh0;
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ u8 error0 = 0;
+
+ /* Speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ /*
+ * Default use the next_index from the adjacency.
+ * A HBH option rarely redirects to a different node
+ */
+ u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ ip_adjacency_t *adj0 = adj_get (adj_index0);
+ next0 = adj0->lookup_next_index;
+
+ ip0 = vlib_buffer_get_current (b0);
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 =
+ (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 +
+ ((hbh0->length + 1) << 3));
+
+ /*
+ * Basic validity checks
+ */
+ if ((hbh0->length + 1) << 3 >
+ clib_net_to_host_u16 (ip0->payload_length))
+ {
+ error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+ next0 = IP_LOOKUP_NEXT_DROP;
+ goto out0;
+ }
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ error0 = ip6_scan_hbh_options (b0, ip0, hbh0, opt0, limit0, &next0);
+
+ out0:
+ /* Has the classifier flagged this buffer for special treatment? */
+ if (PREDICT_FALSE
+ ((error0 == 0)
+ && (vnet_buffer (b0)->l2_classify.opaque_index & OI_DECAP)))
+ next0 = hm->next_override;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip6_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ u32 trace_len = (hbh0->length + 1) << 3;
+ t->next_index = next0;
+ /* Capture the h-b-h option verbatim */
+ trace_len =
+ trace_len <
+ ARRAY_LEN (t->option_data) ? trace_len :
+ ARRAY_LEN (t->option_data);
+ t->trace_len = trace_len;
+ clib_memcpy (t->option_data, hbh0, trace_len);
+ }
+
+ b0->error = error_node->errors[error0];
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
+{
+ .function = ip6_hop_by_hop,
+ .name = "ip6-hop-by-hop",
+ .sibling_of = "ip6-lookup",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_hop_by_hop_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (ip6_hop_by_hop_error_strings),
+ .error_strings = ip6_hop_by_hop_error_strings,
+ .n_next_nodes = 0,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_hop_by_hop_node, ip6_hop_by_hop);
+
+static clib_error_t *
+ip6_hop_by_hop_init (vlib_main_t * vm)
+{
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+ memset (hm->options, 0, sizeof (hm->options));
+ memset (hm->trace, 0, sizeof (hm->trace));
+ hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
+
+void
+ip6_hbh_set_next_override (uword next)
+{
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+
+ hm->next_override = next;
+}
+
+int
+ip6_hbh_register_option (u8 option,
+ int options (vlib_buffer_t * b, ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt),
+ u8 * trace (u8 * s, ip6_hop_by_hop_option_t * opt))
+{
+ ip6_main_t *im = &ip6_main;
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+
+ ASSERT (option < ARRAY_LEN (hm->options));
+
+ /* Already registered */
+ if (hm->options[option])
+ return (-1);
+
+ hm->options[option] = options;
+ hm->trace[option] = trace;
+
+ /* Set global variable */
+ im->hbh_enabled = 1;
+
+ return (0);
+}
+
+int
+ip6_hbh_unregister_option (u8 option)
+{
+ ip6_main_t *im = &ip6_main;
+ ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+
+ ASSERT (option < ARRAY_LEN (hm->options));
+
+ /* Not registered */
+ if (!hm->options[option])
+ return (-1);
+
+ hm->options[option] = NULL;
+ hm->trace[option] = NULL;
+
+ /* Disable global knob if this was the last option configured */
+ int i;
+ bool found = false;
+ for (i = 0; i < 256; i++)
+ {
+ if (hm->options[option])
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ im->hbh_enabled = 0;
+
+ return (0);
+}
+
+/* Global IP6 main. */
+ip6_main_t ip6_main;
+
+static clib_error_t *
+ip6_lookup_init (vlib_main_t * vm)
+{
+ ip6_main_t *im = &ip6_main;
+ clib_error_t *error;
+ uword i;
+
+ if ((error = vlib_call_init_function (vm, vnet_feature_init)))
+ return error;
+
+ for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
+ {
+ u32 j, i0, i1;
+
+ i0 = i / 32;
+ i1 = i % 32;
+
+ for (j = 0; j < i0; j++)
+ im->fib_masks[i].as_u32[j] = ~0;
+
+ if (i1)
+ im->fib_masks[i].as_u32[i0] =
+ clib_host_to_net_u32 (pow2_mask (i1) << (32 - i1));
+ }
+
+ ip_lookup_init (&im->lookup_main, /* is_ip6 */ 1);
+
+ if (im->lookup_table_nbuckets == 0)
+ im->lookup_table_nbuckets = IP6_FIB_DEFAULT_HASH_NUM_BUCKETS;
+
+ im->lookup_table_nbuckets = 1 << max_log2 (im->lookup_table_nbuckets);
+
+ if (im->lookup_table_size == 0)
+ im->lookup_table_size = IP6_FIB_DEFAULT_HASH_MEMORY_SIZE;
+
+ BV (clib_bihash_init) (&(im->ip6_table[IP6_FIB_TABLE_FWDING].ip6_hash),
+ "ip6 FIB fwding table",
+ im->lookup_table_nbuckets, im->lookup_table_size);
+ BV (clib_bihash_init) (&im->ip6_table[IP6_FIB_TABLE_NON_FWDING].ip6_hash,
+ "ip6 FIB non-fwding table",
+ im->lookup_table_nbuckets, im->lookup_table_size);
+
+ /* Create FIB with index 0 and table id of 0. */
+ fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
+ FIB_SOURCE_DEFAULT_ROUTE);
+ mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, 0,
+ MFIB_SOURCE_DEFAULT_ROUTE);
+
+ {
+ pg_node_t *pn;
+ pn = pg_get_node (ip6_lookup_node.index);
+ pn->unformat_edit = unformat_pg_ip6_header;
+ }
+
+ /* Unless explicitly configured, don't process HBH options */
+ im->hbh_enabled = 0;
+
+ {
+ icmp6_neighbor_solicitation_header_t p;
+
+ memset (&p, 0, sizeof (p));
+
+ p.ip.ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+ p.ip.payload_length =
+ clib_host_to_net_u16 (sizeof (p) -
+ STRUCT_OFFSET_OF
+ (icmp6_neighbor_solicitation_header_t, neighbor));
+ p.ip.protocol = IP_PROTOCOL_ICMP6;
+ p.ip.hop_limit = 255;
+ ip6_set_solicited_node_multicast_address (&p.ip.dst_address, 0);
+
+ p.neighbor.icmp.type = ICMP6_neighbor_solicitation;
+
+ p.link_layer_option.header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
+ p.link_layer_option.header.n_data_u64s =
+ sizeof (p.link_layer_option) / sizeof (u64);
+
+ vlib_packet_template_init (vm,
+ &im->discover_neighbor_packet_template,
+ &p, sizeof (p),
+ /* alloc chunk size */ 8,
+ "ip6 neighbor discovery");
+ }
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip6_lookup_init);
+
+void
+ip6_link_local_address_from_ethernet_mac_address (ip6_address_t * ip,
+ u8 * mac)
+{
+ ip->as_u64[0] = clib_host_to_net_u64 (0xFE80000000000000ULL);
+ /* Invert the "u" bit */
+ ip->as_u8[8] = mac[0] ^ (1 << 1);
+ ip->as_u8[9] = mac[1];
+ ip->as_u8[10] = mac[2];
+ ip->as_u8[11] = 0xFF;
+ ip->as_u8[12] = 0xFE;
+ ip->as_u8[13] = mac[3];
+ ip->as_u8[14] = mac[4];
+ ip->as_u8[15] = mac[5];
+}
+
+void
+ip6_ethernet_mac_address_from_link_local_address (u8 * mac,
+ ip6_address_t * ip)
+{
+ /* Invert the previously inverted "u" bit */
+ mac[0] = ip->as_u8[8] ^ (1 << 1);
+ mac[1] = ip->as_u8[9];
+ mac[2] = ip->as_u8[10];
+ mac[3] = ip->as_u8[13];
+ mac[4] = ip->as_u8[14];
+ mac[5] = ip->as_u8[15];
+}
+
+static clib_error_t *
+test_ip6_link_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u8 mac[6];
+ ip6_address_t _a, *a = &_a;
+
+ if (unformat (input, "%U", unformat_ethernet_address, mac))
+ {
+ ip6_link_local_address_from_ethernet_mac_address (a, mac);
+ vlib_cli_output (vm, "Link local address: %U", format_ip6_address, a);
+ ip6_ethernet_mac_address_from_link_local_address (mac, a);
+ vlib_cli_output (vm, "Original MAC address: %U",
+ format_ethernet_address, mac);
+ }
+
+ return 0;
+}
+
+/*?
+ * This command converts the given MAC Address into an IPv6 link-local
+ * address.
+ *
+ * @cliexpar
+ * Example of how to create an IPv6 link-local address:
+ * @cliexstart{test ip6 link 16:d9:e0:91:79:86}
+ * Link local address: fe80::14d9:e0ff:fe91:7986
+ * Original MAC address: 16:d9:e0:91:79:86
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_link_command, static) =
+{
+ .path = "test ip6 link",
+ .function = test_ip6_link_command_fn,
+ .short_help = "test ip6 link <mac-address>",
+};
+/* *INDENT-ON* */
+
+int
+vnet_set_ip6_flow_hash (u32 table_id, u32 flow_hash_config)
+{
+ u32 fib_index;
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP6, table_id);
+
+ if (~0 == fib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP6,
+ flow_hash_config);
+
+ return 0;
+}
+
+static clib_error_t *
+set_ip6_flow_hash_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int matched = 0;
+ u32 table_id = 0;
+ u32 flow_hash_config = 0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %d", &table_id))
+ matched = 1;
+#define _(a,v) \
+ else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
+ foreach_flow_hash_bit
+#undef _
+ else
+ break;
+ }
+
+ if (matched == 0)
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+
+ rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case -1:
+ return clib_error_return (0, "no such FIB table %d", table_id);
+
+ default:
+ clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
+ break;
+ }
+
+ return 0;
+}
+
+/*?
+ * Configure the set of IPv6 fields used by the flow hash.
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to set the flow hash on a given table:
+ * @cliexcmd{set ip6 flow-hash table 8 dst sport dport proto}
+ *
+ * Example of display the configured flow hash:
+ * @cliexstart{show ip6 fib}
+ * ipv6-VRF:0, fib_index 0, flow hash: src dst sport dport proto
+ * @::/0
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:5 buckets:1 uRPF:5 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * fe80::/10
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:10 buckets:1 uRPF:10 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::1/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:8 buckets:1 uRPF:8 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::2/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:7 buckets:1 uRPF:7 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::16/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:9 buckets:1 uRPF:9 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::1:ff00:0/104
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:6 buckets:1 uRPF:6 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ipv6-VRF:8, fib_index 1, flow hash: dst sport dport proto
+ * @::/0
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
+ * [0] [@0]: dpo-drop ip6
+ * @::a:1:1:0:4/126
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:27 buckets:1 uRPF:26 to:[0:0]]
+ * [0] [@4]: ipv6-glean: af_packet0
+ * @::a:1:1:0:7/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:28 buckets:1 uRPF:27 to:[0:0]]
+ * [0] [@2]: dpo-receive: @::a:1:1:0:7 on af_packet0
+ * fe80::/10
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:26 buckets:1 uRPF:25 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * fe80::fe:3eff:fe3e:9222/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:29 buckets:1 uRPF:28 to:[0:0]]
+ * [0] [@2]: dpo-receive: fe80::fe:3eff:fe3e:9222 on af_packet0
+ * ff02::1/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:24 buckets:1 uRPF:23 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::2/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:23 buckets:1 uRPF:22 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::16/128
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:25 buckets:1 uRPF:24 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * ff02::1:ff00:0/104
+ * unicast-ip6-chain
+ * [@0]: dpo-load-balance: [index:22 buckets:1 uRPF:21 to:[0:0]]
+ * [0] [@2]: dpo-receive
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) =
+{
+ .path = "set ip6 flow-hash",
+ .short_help =
+ "set ip6 flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
+ .function = set_ip6_flow_hash_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_ip6_local_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ int i;
+
+ vlib_cli_output (vm, "Protocols handled by ip6_local");
+ for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
+ {
+ if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
+ {
+
+ u32 node_index = vlib_get_node (vm,
+ ip6_local_node.index)->
+ next_nodes[lm->local_next_by_ip_protocol[i]];
+ vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
+ node_index);
+ }
+ }
+ return 0;
+}
+
+
+
+/*?
+ * Display the set of protocols handled by the local IPv6 stack.
+ *
+ * @cliexpar
+ * Example of how to display local protocol table:
+ * @cliexstart{show ip6 local}
+ * Protocols handled by ip6_local
+ * 17
+ * 43
+ * 58
+ * 115
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip6_local, static) =
+{
+ .path = "show ip6 local",
+ .function = show_ip6_local_command_fn,
+ .short_help = "show ip6 local",
+};
+/* *INDENT-ON* */
+
+int
+vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
+ u32 table_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ ip6_main_t *ipm = &ip6_main;
+ ip_lookup_main_t *lm = &ipm->lookup_main;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ ip6_address_t *if_addr;
+
+ if (pool_is_free_index (im->sw_interfaces, sw_if_index))
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+
+ if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
+ lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
+
+ if_addr = ip6_interface_first_address (ipm, sw_if_index);
+
+ if (NULL != if_addr)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = *if_addr,
+ };
+ u32 fib_index;
+
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ sw_if_index);
+
+
+ if (table_index != (u32) ~ 0)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+
+ dpo_set (&dpo,
+ DPO_CLASSIFY,
+ DPO_PROTO_IP6,
+ classify_dpo_create (DPO_PROTO_IP6, table_index));
+
+ fib_table_entry_special_dpo_add (fib_index,
+ &pfx,
+ FIB_SOURCE_CLASSIFY,
+ FIB_ENTRY_FLAG_NONE, &dpo);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ fib_table_entry_special_remove (fib_index,
+ &pfx, FIB_SOURCE_CLASSIFY);
+ }
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+set_ip6_classify_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 table_index = ~0;
+ int table_index_set = 0;
+ u32 sw_if_index = ~0;
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table-index %d", &table_index))
+ table_index_set = 1;
+ else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
+ vnet_get_main (), &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (table_index_set == 0)
+ return clib_error_return (0, "classify table-index must be specified");
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "interface / subif must be specified");
+
+ rv = vnet_set_ip6_classify_intfc (vm, sw_if_index, table_index);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_NO_MATCHING_INTERFACE:
+ return clib_error_return (0, "No such interface");
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ return clib_error_return (0, "No such classifier table");
+ }
+ return 0;
+}
+
+/*?
+ * Assign a classification table to an interface. The classification
+ * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
+ * commands. Once the table is create, use this command to filter packets
+ * on an interface.
+ *
+ * @cliexpar
+ * Example of how to assign a classification table to an interface:
+ * @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
+{
+ .path = "set ip6 classify",
+ .short_help =
+ "set ip6 classify intfc <interface> table-index <classify-idx>",
+ .function = set_ip6_classify_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip6_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ ip6_main_t *im = &ip6_main;
+ uword heapsize = 0;
+ u32 tmp;
+ u32 nbuckets = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "hash-buckets %d", &tmp))
+ nbuckets = tmp;
+ else if (unformat (input, "heap-size %dm", &tmp))
+ heapsize = ((u64) tmp) << 20;
+ else if (unformat (input, "heap-size %dM", &tmp))
+ heapsize = ((u64) tmp) << 20;
+ else if (unformat (input, "heap-size %dg", &tmp))
+ heapsize = ((u64) tmp) << 30;
+ else if (unformat (input, "heap-size %dG", &tmp))
+ heapsize = ((u64) tmp) << 30;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ im->lookup_table_nbuckets = nbuckets;
+ im->lookup_table_size = heapsize;
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (ip6_config, "ip6");
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_hop_by_hop.c b/src/vnet/ip/ip6_hop_by_hop.c
new file mode 100644
index 00000000..14fbb392
--- /dev/null
+++ b/src/vnet/ip/ip6_hop_by_hop.c
@@ -0,0 +1,1166 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+
+#include <vnet/ip/ip.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/classify/vnet_classify.h>
+
+/**
+ * @file
+ * @brief In-band OAM (iOAM).
+ *
+ * In-band OAM (iOAM) is an implementation study to record operational
+ * information in the packet while the packet traverses a path between
+ * two points in the network.
+ *
+ * VPP can function as in-band OAM encapsulating, transit and
+ * decapsulating node. In this version of VPP in-band OAM data is
+ * transported as options in an IPv6 hop-by-hop extension header. Hence
+ * in-band OAM can be enabled for IPv6 traffic.
+ */
+
+ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main;
+
+#define foreach_ip6_hbyh_ioam_input_next \
+ _(IP6_REWRITE, "ip6-rewrite") \
+ _(IP6_LOOKUP, "ip6-lookup") \
+ _(DROP, "error-drop")
+
+typedef enum
+{
+#define _(s,n) IP6_HBYH_IOAM_INPUT_NEXT_##s,
+ foreach_ip6_hbyh_ioam_input_next
+#undef _
+ IP6_HBYH_IOAM_INPUT_N_NEXT,
+} ip6_hbyh_ioam_input_next_t;
+
+static uword
+unformat_opaque_ioam (unformat_input_t * input, va_list * args)
+{
+ u64 *opaquep = va_arg (*args, u64 *);
+ u8 *flow_name = NULL;
+ uword ret = 0;
+
+ if (unformat (input, "ioam-encap %s", &flow_name))
+ {
+ *opaquep = ioam_flow_add (1, flow_name);
+ ret = 1;
+ }
+ else if (unformat (input, "ioam-decap %s", &flow_name))
+ {
+ *opaquep = ioam_flow_add (0, flow_name);
+ ret = 1;
+ }
+
+ vec_free (flow_name);
+ return ret;
+}
+
+u8 *
+get_flow_name_from_flow_ctx (u32 flow_ctx)
+{
+ flow_data_t *flow = NULL;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u32 index;
+
+ index = IOAM_MASK_DECAP_BIT (flow_ctx);
+
+ if (pool_is_free_index (hm->flows, index))
+ return NULL;
+
+ flow = pool_elt_at_index (hm->flows, index);
+ return (flow->flow_name);
+}
+
+/* The main h-b-h tracer will be invoked, no need to do much here */
+int
+ip6_hbh_add_register_option (u8 option,
+ u8 size,
+ int rewrite_options (u8 * rewrite_string,
+ u8 * rewrite_size))
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->add_options));
+
+ /* Already registered */
+ if (hm->add_options[option])
+ return (-1);
+
+ hm->add_options[option] = rewrite_options;
+ hm->options_size[option] = size;
+
+ return (0);
+}
+
+int
+ip6_hbh_add_unregister_option (u8 option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->add_options));
+
+ /* Not registered */
+ if (!hm->add_options[option])
+ return (-1);
+
+ hm->add_options[option] = NULL;
+ hm->options_size[option] = 0;
+ return (0);
+}
+
+/* Config handler registration */
+int
+ip6_hbh_config_handler_register (u8 option,
+ int config_handler (void *data, u8 disable))
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->config_handler));
+
+ /* Already registered */
+ if (hm->config_handler[option])
+ return (VNET_API_ERROR_INVALID_REGISTRATION);
+
+ hm->config_handler[option] = config_handler;
+
+ return (0);
+}
+
+int
+ip6_hbh_config_handler_unregister (u8 option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->config_handler));
+
+ /* Not registered */
+ if (!hm->config_handler[option])
+ return (VNET_API_ERROR_INVALID_REGISTRATION);
+
+ hm->config_handler[option] = NULL;
+ return (0);
+}
+
+/* Flow handler registration */
+int
+ip6_hbh_flow_handler_register (u8 option,
+ u32 ioam_flow_handler (u32 flow_ctx, u8 add))
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->flow_handler));
+
+ /* Already registered */
+ if (hm->flow_handler[option])
+ return (VNET_API_ERROR_INVALID_REGISTRATION);
+
+ hm->flow_handler[option] = ioam_flow_handler;
+
+ return (0);
+}
+
+int
+ip6_hbh_flow_handler_unregister (u8 option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->flow_handler));
+
+ /* Not registered */
+ if (!hm->flow_handler[option])
+ return (VNET_API_ERROR_INVALID_REGISTRATION);
+
+ hm->flow_handler[option] = NULL;
+ return (0);
+}
+
+typedef struct
+{
+ u32 next_index;
+} ip6_add_hop_by_hop_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip6_add_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_add_hop_by_hop_trace_t *t = va_arg (*args,
+ ip6_add_hop_by_hop_trace_t *);
+
+ s = format (s, "IP6_ADD_HOP_BY_HOP: next index %d", t->next_index);
+ return s;
+}
+
+vlib_node_registration_t ip6_add_hop_by_hop_node;
+
+#define foreach_ip6_add_hop_by_hop_error \
+_(PROCESSED, "Pkts w/ added ip6 hop-by-hop options")
+
+typedef enum
+{
+#define _(sym,str) IP6_ADD_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip6_add_hop_by_hop_error
+#undef _
+ IP6_ADD_HOP_BY_HOP_N_ERROR,
+} ip6_add_hop_by_hop_error_t;
+
+static char *ip6_add_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_add_hop_by_hop_error
+#undef _
+};
+
+static uword
+ip6_add_hop_by_hop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u32 n_left_from, *from, *to_next;
+ ip_lookup_next_t next_index;
+ u32 processed = 0;
+ u8 *rewrite = hm->rewrite;
+ u32 rewrite_length = vec_len (rewrite);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ ip6_header_t *ip0, *ip1;
+ ip6_hop_by_hop_header_t *hbh0, *hbh1;
+ u64 *copy_src0, *copy_dst0, *copy_src1, *copy_dst1;
+ u16 new_l0, new_l1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data - rewrite_length,
+ 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data - rewrite_length,
+ 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ /* Copy the ip header left by the required amount */
+ copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length);
+ copy_dst1 = (u64 *) (((u8 *) ip1) - rewrite_length);
+ copy_src0 = (u64 *) ip0;
+ copy_src1 = (u64 *) ip1;
+
+ copy_dst0[0] = copy_src0[0];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[4] = copy_src0[4];
+
+ copy_dst1[0] = copy_src1[0];
+ copy_dst1[1] = copy_src1[1];
+ copy_dst1[2] = copy_src1[2];
+ copy_dst1[3] = copy_src1[3];
+ copy_dst1[4] = copy_src1[4];
+
+ vlib_buffer_advance (b0, -(word) rewrite_length);
+ vlib_buffer_advance (b1, -(word) rewrite_length);
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
+ /* $$$ tune, rewrite_length is a multiple of 8 */
+ clib_memcpy (hbh0, rewrite, rewrite_length);
+ clib_memcpy (hbh1, rewrite, rewrite_length);
+ /* Patch the protocol chain, insert the h-b-h (type 0) header */
+ hbh0->protocol = ip0->protocol;
+ hbh1->protocol = ip1->protocol;
+ ip0->protocol = 0;
+ ip1->protocol = 0;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
+ new_l1 =
+ clib_net_to_host_u16 (ip1->payload_length) + rewrite_length;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+
+ /* Populate the (first) h-b-h list elt */
+ next0 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP;
+ next1 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP;
+
+
+ /* $$$$$ End of processing 2 x packets $$$$$ */
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_add_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_add_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->next_index = next1;
+ }
+ }
+ processed += 2;
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ ip6_header_t *ip0;
+ ip6_hop_by_hop_header_t *hbh0;
+ u64 *copy_src0, *copy_dst0;
+ u16 new_l0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+
+ /* Copy the ip header left by the required amount */
+ copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length);
+ copy_src0 = (u64 *) ip0;
+
+ copy_dst0[0] = copy_src0[0];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[4] = copy_src0[4];
+ vlib_buffer_advance (b0, -(word) rewrite_length);
+ ip0 = vlib_buffer_get_current (b0);
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ /* $$$ tune, rewrite_length is a multiple of 8 */
+ clib_memcpy (hbh0, rewrite, rewrite_length);
+ /* Patch the protocol chain, insert the h-b-h (type 0) header */
+ hbh0->protocol = ip0->protocol;
+ ip0->protocol = 0;
+ new_l0 =
+ clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+
+ /* Populate the (first) h-b-h list elt */
+ next0 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_add_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+
+ processed++;
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ip6_add_hop_by_hop_node.index,
+ IP6_ADD_HOP_BY_HOP_ERROR_PROCESSED, processed);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */
+{
+ .function = ip6_add_hop_by_hop_node_fn,.name =
+ "ip6-add-hop-by-hop",.vector_size = sizeof (u32),.format_trace =
+ format_ip6_add_hop_by_hop_trace,.type =
+ VLIB_NODE_TYPE_INTERNAL,.n_errors =
+ ARRAY_LEN (ip6_add_hop_by_hop_error_strings),.error_strings =
+ ip6_add_hop_by_hop_error_strings,
+ /* See ip/lookup.h */
+ .n_next_nodes = IP6_HBYH_IOAM_INPUT_N_NEXT,.next_nodes =
+ {
+#define _(s,n) [IP6_HBYH_IOAM_INPUT_NEXT_##s] = n,
+ foreach_ip6_hbyh_ioam_input_next
+#undef _
+ }
+,};
+/* *INDENT-ON* */
+
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_add_hop_by_hop_node,
+ ip6_add_hop_by_hop_node_fn);
+/* The main h-b-h tracer was already invoked, no need to do much here */
+typedef struct
+{
+ u32 next_index;
+} ip6_pop_hop_by_hop_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip6_pop_hop_by_hop_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip6_pop_hop_by_hop_trace_t *t =
+ va_arg (*args, ip6_pop_hop_by_hop_trace_t *);
+
+ s = format (s, "IP6_POP_HOP_BY_HOP: next index %d", t->next_index);
+ return s;
+}
+
+int
+ip6_hbh_pop_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt))
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->pop_options));
+
+ /* Already registered */
+ if (hm->pop_options[option])
+ return (-1);
+
+ hm->pop_options[option] = options;
+
+ return (0);
+}
+
+int
+ip6_hbh_pop_unregister_option (u8 option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ ASSERT (option < ARRAY_LEN (hm->pop_options));
+
+ /* Not registered */
+ if (!hm->pop_options[option])
+ return (-1);
+
+ hm->pop_options[option] = NULL;
+ return (0);
+}
+
+vlib_node_registration_t ip6_pop_hop_by_hop_node;
+
+#define foreach_ip6_pop_hop_by_hop_error \
+_(PROCESSED, "Pkts w/ removed ip6 hop-by-hop options") \
+_(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options") \
+_(OPTION_FAILED, "ip6 pop hop-by-hop failed to process")
+
+typedef enum
+{
+#define _(sym,str) IP6_POP_HOP_BY_HOP_ERROR_##sym,
+ foreach_ip6_pop_hop_by_hop_error
+#undef _
+ IP6_POP_HOP_BY_HOP_N_ERROR,
+} ip6_pop_hop_by_hop_error_t;
+
+static char *ip6_pop_hop_by_hop_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_pop_hop_by_hop_error
+#undef _
+};
+
+static inline void
+ioam_pop_hop_by_hop_processing (vlib_main_t * vm,
+ ip6_header_t * ip0,
+ ip6_hop_by_hop_header_t * hbh0,
+ vlib_buffer_t * b)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ u8 type0;
+
+ if (!hbh0 || !ip0)
+ return;
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 = (ip6_hop_by_hop_option_t *)
+ ((u8 *) hbh0 + ((hbh0->length + 1) << 3));
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (hm->pop_options[type0])
+ {
+ if ((*hm->pop_options[type0]) (b, ip0, opt0) < 0)
+ {
+ vlib_node_increment_counter (vm,
+ ip6_pop_hop_by_hop_node.index,
+ IP6_POP_HOP_BY_HOP_ERROR_OPTION_FAILED,
+ 1);
+ }
+ }
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ }
+}
+
+static uword
+ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, *to_next;
+ ip_lookup_next_t next_index;
+ u32 processed = 0;
+ u32 no_header = 0;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u32 next0, next1;
+ u32 adj_index0, adj_index1;
+ ip6_header_t *ip0, *ip1;
+ ip_adjacency_t *adj0, *adj1;
+ ip6_hop_by_hop_header_t *hbh0, *hbh1;
+ u64 *copy_dst0, *copy_src0, *copy_dst1, *copy_src1;
+ u16 new_l0, new_l1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ /* speculatively enqueue b0 and b1 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
+ ip0 = vlib_buffer_get_current (b0);
+ ip1 = vlib_buffer_get_current (b1);
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get (adj_index0);
+ adj1 = adj_get (adj_index1);
+
+ next0 = adj0->lookup_next_index;
+ next1 = adj1->lookup_next_index;
+
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+ hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
+
+ ioam_pop_hop_by_hop_processing (vm, ip0, hbh0, b0);
+ ioam_pop_hop_by_hop_processing (vm, ip1, hbh1, b1);
+
+ vlib_buffer_advance (b0, (hbh0->length + 1) << 3);
+ vlib_buffer_advance (b1, (hbh1->length + 1) << 3);
+
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
+ ((hbh0->length + 1) << 3);
+ new_l1 = clib_net_to_host_u16 (ip1->payload_length) -
+ ((hbh1->length + 1) << 3);
+
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip1->payload_length = clib_host_to_net_u16 (new_l1);
+
+ ip0->protocol = hbh0->protocol;
+ ip1->protocol = hbh1->protocol;
+
+ copy_src0 = (u64 *) ip0;
+ copy_src1 = (u64 *) ip1;
+ copy_dst0 = copy_src0 + (hbh0->length + 1);
+ copy_dst0[4] = copy_src0[4];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[0] = copy_src0[0];
+ copy_dst1 = copy_src1 + (hbh1->length + 1);
+ copy_dst1[4] = copy_src1[4];
+ copy_dst1[3] = copy_src1[3];
+ copy_dst1[2] = copy_src1[2];
+ copy_dst1[1] = copy_src1[1];
+ copy_dst1[0] = copy_src1[0];
+ processed += 2;
+ /* $$$$$ End of processing 2 x packets $$$$$ */
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_pop_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ ip6_pop_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b1, sizeof (*t));
+ t->next_index = next1;
+ }
+ }
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, bi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 adj_index0;
+ ip6_header_t *ip0;
+ ip_adjacency_t *adj0;
+ ip6_hop_by_hop_header_t *hbh0;
+ u64 *copy_dst0, *copy_src0;
+ u16 new_l0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ ip0 = vlib_buffer_get_current (b0);
+ adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get (adj_index0);
+
+ /* Default use the next_index from the adjacency. */
+ next0 = adj0->lookup_next_index;
+
+ /* Perfectly normal to end up here w/ out h-b-h header */
+ hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+
+ /* TODO:Temporarily doing it here.. do this validation in end_of_path_cb */
+ ioam_pop_hop_by_hop_processing (vm, ip0, hbh0, b0);
+ /* Pop the trace data */
+ vlib_buffer_advance (b0, (hbh0->length + 1) << 3);
+ new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
+ ((hbh0->length + 1) << 3);
+ ip0->payload_length = clib_host_to_net_u16 (new_l0);
+ ip0->protocol = hbh0->protocol;
+ copy_src0 = (u64 *) ip0;
+ copy_dst0 = copy_src0 + (hbh0->length + 1);
+ copy_dst0[4] = copy_src0[4];
+ copy_dst0[3] = copy_src0[3];
+ copy_dst0[2] = copy_src0[2];
+ copy_dst0[1] = copy_src0[1];
+ copy_dst0[0] = copy_src0[0];
+ processed++;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip6_pop_hop_by_hop_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->next_index = next0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
+ IP6_POP_HOP_BY_HOP_ERROR_PROCESSED, processed);
+ vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index,
+ IP6_POP_HOP_BY_HOP_ERROR_NO_HOHO, no_header);
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) =
+{
+ .function = ip6_pop_hop_by_hop_node_fn,.name =
+ "ip6-pop-hop-by-hop",.vector_size = sizeof (u32),.format_trace =
+ format_ip6_pop_hop_by_hop_trace,.type =
+ VLIB_NODE_TYPE_INTERNAL,.sibling_of = "ip6-lookup",.n_errors =
+ ARRAY_LEN (ip6_pop_hop_by_hop_error_strings),.error_strings =
+ ip6_pop_hop_by_hop_error_strings,
+ /* See ip/lookup.h */
+.n_next_nodes = 0,};
+
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_pop_hop_by_hop_node,
+ ip6_pop_hop_by_hop_node_fn);
+static clib_error_t *
+ip6_hop_by_hop_ioam_init (vlib_main_t * vm)
+{
+ clib_error_t *error;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ if ((error = vlib_call_init_function (vm, ip_main_init)))
+ return (error);
+
+ if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+ return error;
+
+ hm->vlib_main = vm;
+ hm->vnet_main = vnet_get_main ();
+ hm->unix_time_0 = (u32) time (0); /* Store starting time */
+ hm->vlib_time_0 = vlib_time_now (vm);
+ hm->ioam_flag = IOAM_HBYH_MOD;
+ memset (hm->add_options, 0, sizeof (hm->add_options));
+ memset (hm->pop_options, 0, sizeof (hm->pop_options));
+ memset (hm->options_size, 0, sizeof (hm->options_size));
+
+ vnet_classify_register_unformat_opaque_index_fn (unformat_opaque_ioam);
+
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_init);
+
+int
+ip6_ioam_set_rewrite (u8 ** rwp, int has_trace_option,
+ int has_pot_option, int has_seqno_option)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u8 *rewrite = NULL;
+ u32 size, rnd_size;
+ ip6_hop_by_hop_header_t *hbh;
+ u8 *current;
+ u8 *trace_data_size = NULL;
+ u8 *pot_data_size = NULL;
+
+ vec_free (*rwp);
+
+ if (has_trace_option == 0 && has_pot_option == 0)
+ return -1;
+
+ /* Work out how much space we need */
+ size = sizeof (ip6_hop_by_hop_header_t);
+
+ //if (has_trace_option && hm->get_sizeof_options[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] != 0)
+ if (has_trace_option
+ && hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] != 0)
+ {
+ size += hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST];
+ }
+ if (has_pot_option
+ && hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0)
+ {
+ size += hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT];
+ }
+
+ if (has_seqno_option)
+ {
+ size += hm->options_size[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE];
+ }
+
+ /* Round to a multiple of 8 octets */
+ rnd_size = (size + 7) & ~7;
+
+ /* allocate it, zero-fill / pad by construction */
+ vec_validate (rewrite, rnd_size - 1);
+
+ hbh = (ip6_hop_by_hop_header_t *) rewrite;
+ /* Length of header in 8 octet units, not incl first 8 octets */
+ hbh->length = (rnd_size >> 3) - 1;
+ current = (u8 *) (hbh + 1);
+
+ if (has_trace_option
+ && hm->add_options[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] != 0)
+ {
+ if (0 != (hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST]))
+ {
+ trace_data_size =
+ &hm->options_size[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST];
+ if (0 ==
+ hm->add_options[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] (current,
+ trace_data_size))
+ current += *trace_data_size;
+ }
+ }
+ if (has_pot_option
+ && hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0)
+ {
+ pot_data_size =
+ &hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT];
+ if (0 ==
+ hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] (current,
+ pot_data_size))
+ current += *pot_data_size;
+ }
+
+ if (has_seqno_option &&
+ (hm->add_options[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] != 0))
+ {
+ if (0 == hm->add_options[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] (current,
+ &
+ (hm->options_size
+ [HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE])))
+ current += hm->options_size[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE];
+ }
+
+ *rwp = rewrite;
+ return 0;
+}
+
+clib_error_t *
+clear_ioam_rewrite_fn (void)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ vec_free (hm->rewrite);
+ hm->rewrite = 0;
+ hm->has_trace_option = 0;
+ hm->has_pot_option = 0;
+ hm->has_seqno_option = 0;
+ hm->has_analyse_option = 0;
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST])
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] (NULL, 1);
+
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT])
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] (NULL, 1);
+
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE])
+ {
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] ((void *)
+ &hm->has_analyse_option,
+ 1);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+clear_ioam_rewrite_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return (clear_ioam_rewrite_fn ());
+}
+
+/*?
+ * This command clears all the In-band OAM (iOAM) features enabled by
+ * the '<em>set ioam rewrite</em>' command. Use '<em>show ioam summary</em>' to
+ * verify the configured settings cleared.
+ *
+ * @cliexpar
+ * Example of how to clear iOAM features:
+ * @cliexcmd{clear ioam rewrite}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_clear_ioam_rewrite_cmd, static) = {
+ .path = "clear ioam rewrite",
+ .short_help = "clear ioam rewrite",
+ .function = clear_ioam_rewrite_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+ip6_ioam_enable (int has_trace_option, int has_pot_option,
+ int has_seqno_option, int has_analyse_option)
+{
+ int rv;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ rv = ip6_ioam_set_rewrite (&hm->rewrite, has_trace_option,
+ has_pot_option, has_seqno_option);
+
+ switch (rv)
+ {
+ case 0:
+ if (has_trace_option)
+ {
+ hm->has_trace_option = has_trace_option;
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST])
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST] (NULL,
+ 0);
+ }
+
+ if (has_pot_option)
+ {
+ hm->has_pot_option = has_pot_option;
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT])
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] (NULL,
+ 0);
+ }
+ hm->has_analyse_option = has_analyse_option;
+ if (has_seqno_option)
+ {
+ hm->has_seqno_option = has_seqno_option;
+ if (hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE])
+ {
+ hm->config_handler[HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE] ((void *)
+ &has_analyse_option,
+ 0);
+ }
+ }
+ break;
+
+ default:
+ return clib_error_return_code (0, rv, 0,
+ "ip6_ioam_set_rewrite returned %d", rv);
+ }
+
+ return 0;
+}
+
+
+static clib_error_t *
+ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int has_trace_option = 0;
+ int has_pot_option = 0;
+ int has_seqno_option = 0;
+ int has_analyse_option = 0;
+ clib_error_t *rv = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace"))
+ has_trace_option = 1;
+ else if (unformat (input, "pot"))
+ has_pot_option = 1;
+ else if (unformat (input, "seqno"))
+ has_seqno_option = 1;
+ else if (unformat (input, "analyse"))
+ has_analyse_option = 1;
+ else
+ break;
+ }
+
+
+ rv = ip6_ioam_enable (has_trace_option, has_pot_option,
+ has_seqno_option, has_analyse_option);
+
+ return rv;
+}
+
+/*?
+ * This command is used to enable In-band OAM (iOAM) features on IPv6.
+ * '<em>trace</em>' is used to enable iOAM trace feature. '<em>pot</em>' is used to
+ * enable the Proof Of Transit feature. '<em>ppc</em>' is used to indicate the
+ * Per Packet Counter feature for Edge to Edge processing. '<em>ppc</em>' is
+ * used to indicate if this node is an '<em>encap</em>' node (iOAM edge node
+ * where packet enters iOAM domain), a '<em>decap</em>' node (iOAM edge node
+ * where packet leaves iOAM domain) or '<em>none</em>' (iOAM node where packet
+ * is in-transit through the iOAM domain). '<em>ppc</em>' can only be set if
+ * '<em>trace</em>' or '<em>pot</em>' is enabled.
+ *
+ * Use '<em>clear ioam rewrite</em>' to disable all features enabled by this
+ * command. Use '<em>show ioam summary</em>' to verify the configured settings.
+ *
+ * @cliexpar
+ * Example of how to enable trace and pot with ppc set to encap:
+ * @cliexcmd{set ioam rewrite trace pot ppc encap}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = {
+ .path = "set ioam rewrite",
+ .short_help = "set ioam [trace] [pot] [seqno] [analyse]",
+ .function = ip6_set_ioam_rewrite_command_fn,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u8 *s = 0;
+
+
+ if (!is_zero_ip6_address (&hm->adj))
+ {
+ s = format (s, " REWRITE FLOW CONFIGS - \n");
+ s = format (s, " Destination Address : %U\n",
+ format_ip6_address, &hm->adj, sizeof (ip6_address_t));
+ s =
+ format (s, " Flow operation : %d (%s)\n",
+ hm->ioam_flag,
+ (hm->ioam_flag ==
+ IOAM_HBYH_ADD) ? "Add" : ((hm->ioam_flag ==
+ IOAM_HBYH_MOD) ? "Mod" : "Pop"));
+ }
+ else
+ {
+ s = format (s, " REWRITE FLOW CONFIGS - Not configured\n");
+ }
+
+
+ s = format (s, " TRACE OPTION - %d (%s)\n",
+ hm->has_trace_option,
+ (hm->has_trace_option ? "Enabled" : "Disabled"));
+ if (hm->has_trace_option)
+ s =
+ format (s,
+ "Try 'show ioam trace and show ioam-trace profile' for more information\n");
+
+
+ s = format (s, " POT OPTION - %d (%s)\n",
+ hm->has_pot_option,
+ (hm->has_pot_option ? "Enabled" : "Disabled"));
+ if (hm->has_pot_option)
+ s =
+ format (s,
+ "Try 'show ioam pot and show pot profile' for more information\n");
+
+ s = format (s, " EDGE TO EDGE - SeqNo OPTION - %d (%s)\n",
+ hm->has_seqno_option,
+ hm->has_seqno_option ? "Enabled" : "Disabled");
+ if (hm->has_seqno_option)
+ s = format (s, "Try 'show ioam e2e' for more information\n");
+
+ s = format (s, " iOAM Analyse OPTION - %d (%s)\n",
+ hm->has_analyse_option,
+ hm->has_analyse_option ? "Enabled" : "Disabled");
+
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+/*?
+ * This command displays the current configuration data for In-band
+ * OAM (iOAM).
+ *
+ * @cliexpar
+ * Example to show the iOAM configuration:
+ * @cliexstart{show ioam summary}
+ * REWRITE FLOW CONFIGS -
+ * Destination Address : ff02::1
+ * Flow operation : 2 (Pop)
+ * TRACE OPTION - 1 (Enabled)
+ * Try 'show ioam trace and show ioam-trace profile' for more information
+ * POT OPTION - 1 (Enabled)
+ * Try 'show ioam pot and show pot profile' for more information
+ * EDGE TO EDGE - PPC OPTION - 1 (Encap)
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = {
+ .path = "show ioam summary",
+ .short_help = "show ioam summary",
+ .function = ip6_show_ioam_summary_cmd_fn,
+};
+/* *INDENT-ON* */
+
+void
+vnet_register_ioam_end_of_path_callback (void *cb)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ hm->ioam_end_of_path_cb = cb;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_hop_by_hop.h b/src/vnet/ip/ip6_hop_by_hop.h
new file mode 100644
index 00000000..5f12f647
--- /dev/null
+++ b/src/vnet/ip/ip6_hop_by_hop.h
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ip6_hop_by_hop_ioam_h__
+#define __included_ip6_hop_by_hop_ioam_h__
+
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+#include <vnet/ip/ip.h>
+
+
+#define MAX_IP6_HBH_OPTION 256
+
+/* To determine whether a node is decap MS bit is set */
+#define IOAM_DECAP_BIT 0x80000000
+
+#define IOAM_DEAP_ENABLED(opaque_data) (opaque_data & IOAM_DECAP_BIT)
+
+#define IOAM_SET_DECAP(opaque_data) \
+ (opaque_data |= IOAM_DECAP_BIT)
+
+#define IOAM_MASK_DECAP_BIT(x) (x & ~IOAM_DECAP_BIT)
+
+/*
+ * Stores the run time flow data of hbh options
+ */
+typedef struct
+{
+ u32 ctx[MAX_IP6_HBH_OPTION];
+ u8 flow_name[64];
+} flow_data_t;
+
+typedef struct
+{
+ /* The current rewrite we're using */
+ u8 *rewrite;
+
+ /* Trace data processing callback */
+ void *ioam_end_of_path_cb;
+ /* Configuration data */
+ /* Adjacency */
+ ip6_address_t adj;
+#define IOAM_HBYH_ADD 0
+#define IOAM_HBYH_MOD 1
+#define IOAM_HBYH_POP 2
+ u8 ioam_flag;
+ /* time scale transform. Joy. */
+ u32 unix_time_0;
+ f64 vlib_time_0;
+
+
+ /* Trace option */
+ u8 has_trace_option;
+
+ /* Pot option */
+ u8 has_pot_option;
+
+ /* Per Packet Counter option */
+ u8 has_seqno_option;
+
+ /* Enabling analyis of iOAM data on decap node */
+ u8 has_analyse_option;
+
+ /* Array of function pointers to ADD and POP HBH option handling routines */
+ u8 options_size[MAX_IP6_HBH_OPTION];
+ int (*add_options[MAX_IP6_HBH_OPTION]) (u8 * rewrite_string,
+ u8 * rewrite_size);
+ int (*pop_options[MAX_IP6_HBH_OPTION]) (vlib_buffer_t * b,
+ ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt);
+ int (*get_sizeof_options[MAX_IP6_HBH_OPTION]) (u32 * rewrite_size);
+ int (*config_handler[MAX_IP6_HBH_OPTION]) (void *data, u8 disable);
+
+ /* Array of function pointers to handle hbh options being used with classifier */
+ u32 (*flow_handler[MAX_IP6_HBH_OPTION]) (u32 flow_ctx, u8 add);
+ flow_data_t *flows;
+
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} ip6_hop_by_hop_ioam_main_t;
+
+extern ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main;
+
+extern clib_error_t *ip6_ioam_enable (int has_trace_option,
+ int has_pot_option,
+ int has_seqno_option,
+ int has_analyse_option);
+
+extern int ip6_ioam_set_destination (ip6_address_t * addr, u32 mask_width,
+ u32 vrf_id, int is_add, int is_pop,
+ int is_none);
+
+extern clib_error_t *clear_ioam_rewrite_fn (void);
+
+static inline u8
+is_zero_ip4_address (ip4_address_t * a)
+{
+ return (a->as_u32 == 0);
+}
+
+static inline void
+copy_ip6_address (ip6_address_t * dst, ip6_address_t * src)
+{
+ dst->as_u64[0] = src->as_u64[0];
+ dst->as_u64[1] = src->as_u64[1];
+}
+
+static inline void
+set_zero_ip6_address (ip6_address_t * a)
+{
+ a->as_u64[0] = 0;
+ a->as_u64[1] = 0;
+}
+
+static inline u8
+cmp_ip6_address (ip6_address_t * a1, ip6_address_t * a2)
+{
+ return ((a1->as_u64[0] == a2->as_u64[0])
+ && (a1->as_u64[1] == a2->as_u64[1]));
+}
+
+static inline u8
+is_zero_ip6_address (ip6_address_t * a)
+{
+ return ((a->as_u64[0] == 0) && (a->as_u64[1] == 0));
+}
+
+int ip6_hbh_add_register_option (u8 option,
+ u8 size,
+ int rewrite_options (u8 * rewrite_string,
+ u8 * size));
+int ip6_hbh_add_unregister_option (u8 option);
+
+int ip6_hbh_pop_register_option (u8 option,
+ int options (vlib_buffer_t * b,
+ ip6_header_t * ip,
+ ip6_hop_by_hop_option_t * opt));
+int ip6_hbh_pop_unregister_option (u8 option);
+
+int
+ip6_hbh_get_sizeof_register_option (u8 option,
+ int get_sizeof_hdr_options (u32 *
+ rewrite_size));
+
+int
+ip6_ioam_set_rewrite (u8 ** rwp, int has_trace_option,
+ int has_pot_option, int has_seq_no);
+
+int
+ip6_hbh_config_handler_register (u8 option,
+ int config_handler (void *data, u8 disable));
+
+int ip6_hbh_config_handler_unregister (u8 option);
+
+int ip6_hbh_flow_handler_register (u8 option,
+ u32 ioam_flow_handler (u32 flow_ctx,
+ u8 add));
+
+int ip6_hbh_flow_handler_unregister (u8 option);
+
+u8 *get_flow_name_from_flow_ctx (u32 flow_ctx);
+
+static inline flow_data_t *
+get_flow (u32 index)
+{
+ flow_data_t *flow = NULL;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+
+ if (pool_is_free_index (hm->flows, index))
+ return NULL;
+
+ flow = pool_elt_at_index (hm->flows, index);
+ return flow;
+}
+
+static inline u32
+get_flow_data_from_flow_ctx (u32 flow_ctx, u8 option)
+{
+ flow_data_t *flow = NULL;
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ u32 index;
+
+ index = IOAM_MASK_DECAP_BIT (flow_ctx);
+ //flow = pool_elt_at_index (hm->flows, index);
+ flow = &hm->flows[index];
+ return (flow->ctx[option]);
+}
+
+static inline u8
+is_seqno_enabled (void)
+{
+ return (ip6_hop_by_hop_ioam_main.has_seqno_option);
+}
+
+int ip6_trace_profile_setup ();
+
+static inline u32
+ioam_flow_add (u8 encap, u8 * flow_name)
+{
+ ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+ flow_data_t *flow = 0;
+ u32 index = 0;
+ u8 i;
+
+ pool_get (hm->flows, flow);
+ memset (flow, 0, sizeof (flow_data_t));
+
+ index = flow - hm->flows;
+ strncpy ((char *) flow->flow_name, (char *) flow_name, 31);
+
+ if (!encap)
+ IOAM_SET_DECAP (index);
+
+ for (i = 0; i < 255; i++)
+ {
+ if (hm->flow_handler[i])
+ flow->ctx[i] = hm->flow_handler[i] (index, 1);
+ }
+ return (index);
+}
+
+always_inline ip6_hop_by_hop_option_t *
+ip6_hbh_get_option (ip6_hop_by_hop_header_t * hbh0, u8 option_to_search)
+{
+ ip6_hop_by_hop_option_t *opt0, *limit0;
+ u8 type0;
+
+ if (!hbh0)
+ return NULL;
+
+ opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+ limit0 = (ip6_hop_by_hop_option_t *)
+ ((u8 *) hbh0 + ((hbh0->length + 1) << 3));
+
+ /* Scan the set of h-b-h options, process ones that we understand */
+ while (opt0 < limit0)
+ {
+ type0 = opt0->type;
+ switch (type0)
+ {
+ case 0: /* Pad1 */
+ opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+ continue;
+ case 1: /* PadN */
+ break;
+ default:
+ if (type0 == option_to_search)
+ return opt0;
+ break;
+ }
+ opt0 =
+ (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+ sizeof (ip6_hop_by_hop_option_t));
+ }
+ return NULL;
+}
+
+#endif /* __included_ip6_hop_by_hop_ioam_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_hop_by_hop_packet.h b/src/vnet/ip/ip6_hop_by_hop_packet.h
new file mode 100644
index 00000000..dd8c7d5e
--- /dev/null
+++ b/src/vnet/ip/ip6_hop_by_hop_packet.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ip6_hop_by_hop_packet_h__
+#define __included_ip6_hop_by_hop_packet_h__
+
+typedef struct
+{
+ /* Protocol for next header */
+ u8 protocol;
+ /*
+ * Length of hop_by_hop header in 8 octet units,
+ * not including the first 8 octets
+ */
+ u8 length;
+} ip6_hop_by_hop_header_t;
+
+typedef struct
+{
+ /* Option Type */
+#define HBH_OPTION_TYPE_SKIP_UNKNOWN (0x00)
+#define HBH_OPTION_TYPE_DISCARD_UNKNOWN (0x40)
+#define HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP (0x80)
+#define HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST (0xc0)
+#define HBH_OPTION_TYPE_HIGH_ORDER_BITS (0xc0)
+#define HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE (1<<5)
+ u8 type;
+ /* Length in octets of the option data field */
+ u8 length;
+} ip6_hop_by_hop_option_t;
+
+/* $$$$ IANA banana constants */
+#define HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST 59 /* Third highest bit set (change en-route) */
+#define HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT 60 /* Third highest bit set (change en-route) */
+#define HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE 29
+
+#endif /* __included_ip6_hop_by_hop_packet_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c
new file mode 100644
index 00000000..ffdc4727
--- /dev/null
+++ b/src/vnet/ip/ip6_input.c
@@ -0,0 +1,378 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_input.c: IP v6 input node
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ppp/ppp.h>
+#include <vnet/hdlc/hdlc.h>
+
+typedef struct
+{
+ u8 packet_data[64];
+} ip6_input_trace_t;
+
+static u8 *
+format_ip6_input_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ ip6_input_trace_t *t = va_arg (*va, ip6_input_trace_t *);
+
+ s = format (s, "%U",
+ format_ip6_header, t->packet_data, sizeof (t->packet_data));
+
+ return s;
+}
+
+typedef enum
+{
+ IP6_INPUT_NEXT_DROP,
+ IP6_INPUT_NEXT_LOOKUP,
+ IP6_INPUT_NEXT_LOOKUP_MULTICAST,
+ IP6_INPUT_NEXT_ICMP_ERROR,
+ IP6_INPUT_N_NEXT,
+} ip6_input_next_t;
+
+/* Validate IP v6 packets and pass them either to forwarding code
+ or drop exception packets. */
+static uword
+ip6_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ u32 n_left_from, *from, *to_next;
+ ip6_input_next_t next_index;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_input_node.index);
+ vlib_simple_counter_main_t *cm;
+ u32 thread_index = vlib_get_thread_index ();
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (ip6_input_trace_t));
+
+ cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
+ VNET_INTERFACE_COUNTER_IP6);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ u32 pi0, sw_if_index0, next0 = 0;
+ u32 pi1, sw_if_index1, next1 = 0;
+ u8 error0, error1, arc0, arc1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
+ CLIB_PREFETCH (p3->data, sizeof (ip1[0]), LOAD);
+ }
+
+ pi0 = from[0];
+ pi1 = from[1];
+
+ to_next[0] = pi0;
+ to_next[1] = pi1;
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
+
+ if (PREDICT_FALSE (ip6_address_is_multicast (&ip0->dst_address)))
+ {
+ arc0 = lm->mcast_feature_arc_index;
+ next0 = IP6_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc0 = lm->ucast_feature_arc_index;
+ next0 = IP6_INPUT_NEXT_LOOKUP;
+ }
+
+ if (PREDICT_FALSE (ip6_address_is_multicast (&ip1->dst_address)))
+ {
+ arc1 = lm->mcast_feature_arc_index;
+ next1 = IP6_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc1 = lm->ucast_feature_arc_index;
+ next1 = IP6_INPUT_NEXT_LOOKUP;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_buffer (p1)->ip.adj_index[VLIB_RX] = ~0;
+
+ vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
+ vnet_feature_arc_start (arc1, sw_if_index1, &next1, p1);
+
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index1, 1);
+
+ error0 = error1 = IP6_ERROR_NONE;
+
+ /* Version != 6? Drop it. */
+ error0 =
+ (clib_net_to_host_u32
+ (ip0->ip_version_traffic_class_and_flow_label) >> 28) !=
+ 6 ? IP6_ERROR_VERSION : error0;
+ error1 =
+ (clib_net_to_host_u32
+ (ip1->ip_version_traffic_class_and_flow_label) >> 28) !=
+ 6 ? IP6_ERROR_VERSION : error1;
+
+ /* hop limit < 1? Drop it. for link-local broadcast packets,
+ * like dhcpv6 packets from client has hop-limit 1, which should not
+ * be dropped.
+ */
+ error0 = ip0->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error0;
+ error1 = ip1->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error1;
+
+ /* L2 length must be at least minimal IP header. */
+ error0 =
+ p0->current_length <
+ sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0;
+ error1 =
+ p1->current_length <
+ sizeof (ip1[0]) ? IP6_ERROR_TOO_SHORT : error1;
+
+ if (PREDICT_FALSE (error0 != IP6_ERROR_NONE))
+ {
+ if (error0 == IP6_ERROR_TIME_EXPIRED)
+ {
+ icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP6_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ next0 = IP6_INPUT_NEXT_DROP;
+ }
+ }
+ if (PREDICT_FALSE (error1 != IP6_ERROR_NONE))
+ {
+ if (error1 == IP6_ERROR_TIME_EXPIRED)
+ {
+ icmp6_error_set_vnet_buffer (p1, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next1 = IP6_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ next1 = IP6_INPUT_NEXT_DROP;
+ }
+ }
+
+ p0->error = error_node->errors[error0];
+ p1->error = error_node->errors[error1];
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ u32 pi0, sw_if_index0, next0 = 0;
+ u8 error0, arc0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip0 = vlib_buffer_get_current (p0);
+
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ if (PREDICT_FALSE (ip6_address_is_multicast (&ip0->dst_address)))
+ {
+ arc0 = lm->mcast_feature_arc_index;
+ next0 = IP6_INPUT_NEXT_LOOKUP_MULTICAST;
+ }
+ else
+ {
+ arc0 = lm->ucast_feature_arc_index;
+ next0 = IP6_INPUT_NEXT_LOOKUP;
+ }
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_RX] = ~0;
+ vnet_feature_arc_start (arc0, sw_if_index0, &next0, p0);
+
+ vlib_increment_simple_counter (cm, thread_index, sw_if_index0, 1);
+ error0 = IP6_ERROR_NONE;
+
+ /* Version != 6? Drop it. */
+ error0 =
+ (clib_net_to_host_u32
+ (ip0->ip_version_traffic_class_and_flow_label) >> 28) !=
+ 6 ? IP6_ERROR_VERSION : error0;
+
+ /* hop limit < 1? Drop it. for link-local broadcast packets,
+ * like dhcpv6 packets from client has hop-limit 1, which should not
+ * be dropped.
+ */
+ error0 = ip0->hop_limit < 1 ? IP6_ERROR_TIME_EXPIRED : error0;
+
+ /* L2 length must be at least minimal IP header. */
+ error0 =
+ p0->current_length <
+ sizeof (ip0[0]) ? IP6_ERROR_TOO_SHORT : error0;
+
+ if (PREDICT_FALSE (error0 != IP6_ERROR_NONE))
+ {
+ if (error0 == IP6_ERROR_TIME_EXPIRED)
+ {
+ icmp6_error_set_vnet_buffer (p0, ICMP6_time_exceeded,
+ ICMP6_time_exceeded_ttl_exceeded_in_transit,
+ 0);
+ next0 = IP6_INPUT_NEXT_ICMP_ERROR;
+ }
+ else
+ {
+ next0 = IP6_INPUT_NEXT_DROP;
+ }
+ }
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+static char *ip6_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip6_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_input_node) = {
+ .function = ip6_input,
+ .name = "ip6-input",
+ .vector_size = sizeof (u32),
+
+ .n_errors = IP6_N_ERROR,
+ .error_strings = ip6_error_strings,
+
+ .n_next_nodes = IP6_INPUT_N_NEXT,
+ .next_nodes = {
+ [IP6_INPUT_NEXT_DROP] = "error-drop",
+ [IP6_INPUT_NEXT_LOOKUP] = "ip6-lookup",
+ [IP6_INPUT_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_INPUT_NEXT_LOOKUP_MULTICAST] = "ip6-mfib-forward-lookup",
+ },
+
+ .format_buffer = format_ip6_header,
+ .format_trace = format_ip6_input_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_input_node, ip6_input)
+ static clib_error_t *ip6_init (vlib_main_t * vm)
+{
+ ethernet_register_input_type (vm, ETHERNET_TYPE_IP6, ip6_input_node.index);
+ ppp_register_input_protocol (vm, PPP_PROTOCOL_ip6, ip6_input_node.index);
+ hdlc_register_input_protocol (vm, HDLC_PROTOCOL_ip6, ip6_input_node.index);
+
+ {
+ pg_node_t *pn;
+ pn = pg_get_node (ip6_input_node.index);
+ pn->unformat_edit = unformat_pg_ip6_header;
+ }
+
+ /* Set flow hash to something non-zero. */
+ ip6_main.flow_hash_seed = 0xdeadbeef;
+
+ /* Default hop limit for packets we generate. */
+ ip6_main.host_config.ttl = 64;
+
+ return /* no error */ 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_neighbor.c b/src/vnet/ip/ip6_neighbor.c
new file mode 100644
index 00000000..d549ac37
--- /dev/null
+++ b/src/vnet/ip/ip6_neighbor.c
@@ -0,0 +1,4332 @@
+/*
+ * ip/ip6_neighbor.c: IP6 neighbor handling
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6_neighbor.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/mhash.h>
+#include <vppinfra/md5.h>
+#include <vnet/adj/adj.h>
+#include <vnet/adj/adj_mcast.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/mfib/ip6_mfib.h>
+
+/**
+ * @file
+ * @brief IPv6 Neighbor Adjacency and Neighbor Discovery.
+ *
+ * The files contains the API and CLI code for managing IPv6 neighbor
+ * adjacency tables and neighbor discovery logic.
+ */
+
+/* can't use sizeof link_layer_address, that's 8 */
+#define ETHER_MAC_ADDR_LEN 6
+
+/* advertised prefix option */
+typedef struct
+{
+ /* basic advertised information */
+ ip6_address_t prefix;
+ u8 prefix_len;
+ int adv_on_link_flag;
+ int adv_autonomous_flag;
+ u32 adv_valid_lifetime_in_secs;
+ u32 adv_pref_lifetime_in_secs;
+
+ /* advertised values are computed from these times if decrementing */
+ f64 valid_lifetime_expires;
+ f64 pref_lifetime_expires;
+
+ /* local information */
+ int enabled;
+ int deprecated_prefix_flag;
+ int decrement_lifetime_flag;
+
+#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */
+#define DEF_ADV_VALID_LIFETIME 2592000
+#define DEF_ADV_PREF_LIFETIME 604800
+
+ /* extensions are added here, mobile, DNS etc.. */
+} ip6_radv_prefix_t;
+
+
+typedef struct
+{
+ /* group information */
+ u8 type;
+ ip6_address_t mcast_address;
+ u16 num_sources;
+ ip6_address_t *mcast_source_address_pool;
+} ip6_mldp_group_t;
+
+/* configured router advertisement information per ipv6 interface */
+typedef struct
+{
+
+ /* advertised config information, zero means unspecified */
+ u8 curr_hop_limit;
+ int adv_managed_flag;
+ int adv_other_flag;
+ u16 adv_router_lifetime_in_sec;
+ u32 adv_neighbor_reachable_time_in_msec;
+ u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations;
+
+ /* mtu option */
+ u32 adv_link_mtu;
+
+ /* source link layer option */
+ u8 link_layer_address[8];
+ u8 link_layer_addr_len;
+
+ /* prefix option */
+ ip6_radv_prefix_t *adv_prefixes_pool;
+
+ /* Hash table mapping address to index in interface advertised prefix pool. */
+ mhash_t address_to_prefix_index;
+
+ /* MLDP group information */
+ ip6_mldp_group_t *mldp_group_pool;
+
+ /* Hash table mapping address to index in mldp address pool. */
+ mhash_t address_to_mldp_index;
+
+ /* local information */
+ u32 sw_if_index;
+ int send_radv; /* radv on/off on this interface - set by config */
+ int cease_radv; /* we are ceasing to send - set byf config */
+ int send_unicast;
+ int adv_link_layer_address;
+ int prefix_option;
+ int failed_device_check;
+ int all_routers_mcast;
+ u32 seed;
+ u64 randomizer;
+ int ref_count;
+ adj_index_t mcast_adj_index;
+
+ /* timing information */
+#define DEF_MAX_RADV_INTERVAL 200
+#define DEF_MIN_RADV_INTERVAL .75 * DEF_MAX_RADV_INTERVAL
+#define DEF_CURR_HOP_LIMIT 64
+#define DEF_DEF_RTR_LIFETIME 3 * DEF_MAX_RADV_INTERVAL
+#define MAX_DEF_RTR_LIFETIME 9000
+
+#define MAX_INITIAL_RTR_ADVERT_INTERVAL 16 /* seconds */
+#define MAX_INITIAL_RTR_ADVERTISEMENTS 3 /*transmissions */
+#define MIN_DELAY_BETWEEN_RAS 3 /* seconds */
+#define MAX_DELAY_BETWEEN_RAS 1800 /* seconds */
+#define MAX_RA_DELAY_TIME .5 /* seconds */
+
+ f64 max_radv_interval;
+ f64 min_radv_interval;
+ f64 min_delay_between_radv;
+ f64 max_delay_between_radv;
+ f64 max_rtr_default_lifetime;
+
+ f64 last_radv_time;
+ f64 last_multicast_time;
+ f64 next_multicast_time;
+
+
+ u32 initial_adverts_count;
+ f64 initial_adverts_interval;
+ u32 initial_adverts_sent;
+
+ /* stats */
+ u32 n_advertisements_sent;
+ u32 n_solicitations_rcvd;
+ u32 n_solicitations_dropped;
+
+ /* Link local address to use (defaults to underlying physical for logical interfaces */
+ ip6_address_t link_local_address;
+} ip6_radv_t;
+
+typedef struct
+{
+ u32 next_index;
+ uword node_index;
+ uword type_opaque;
+ uword data;
+ /* Used for nd event notification only */
+ void *data_callback;
+ u32 pid;
+} pending_resolution_t;
+
+
+typedef struct
+{
+ /* Hash tables mapping name to opcode. */
+ uword *opcode_by_name;
+
+ /* lite beer "glean" adjacency handling */
+ mhash_t pending_resolutions_by_address;
+ pending_resolution_t *pending_resolutions;
+
+ /* Mac address change notification */
+ mhash_t mac_changes_by_address;
+ pending_resolution_t *mac_changes;
+
+ u32 *neighbor_input_next_index_by_hw_if_index;
+
+ ip6_neighbor_t *neighbor_pool;
+
+ mhash_t neighbor_index_by_key;
+
+ u32 *if_radv_pool_index_by_sw_if_index;
+
+ ip6_radv_t *if_radv_pool;
+
+ /* Neighbor attack mitigation */
+ u32 limit_neighbor_cache_size;
+ u32 neighbor_delete_rotor;
+
+ /* Wildcard nd report publisher */
+ uword wc_ip6_nd_publisher_node;
+ uword wc_ip6_nd_publisher_et;
+} ip6_neighbor_main_t;
+
+/* ipv6 neighbor discovery - timer/event types */
+typedef enum
+{
+ ICMP6_ND_EVENT_INIT,
+} ip6_icmp_neighbor_discovery_event_type_t;
+
+typedef union
+{
+ u32 add_del_swindex;
+ struct
+ {
+ u32 up_down_swindex;
+ u32 fib_index;
+ } up_down_event;
+} ip6_icmp_neighbor_discovery_event_data_t;
+
+static ip6_neighbor_main_t ip6_neighbor_main;
+static ip6_address_t ip6a_zero; /* ip6 address 0 */
+
+static void wc_nd_signal_report (wc_nd_report_t * r);
+
+/**
+ * @brief publish wildcard arp event
+ * @param sw_if_index The interface on which the ARP entires are acted
+ */
+static int
+vnet_nd_wc_publish (u32 sw_if_index, u8 * mac, ip6_address_t * ip6)
+{
+ wc_nd_report_t r = {
+ .sw_if_index = sw_if_index,
+ .ip6 = *ip6,
+ };
+ memcpy (r.mac, mac, sizeof r.mac);
+
+ void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+ vl_api_rpc_call_main_thread (wc_nd_signal_report, (u8 *) & r, sizeof r);
+ return 0;
+}
+
+static void
+wc_nd_signal_report (wc_nd_report_t * r)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ uword ni = nm->wc_ip6_nd_publisher_node;
+ uword et = nm->wc_ip6_nd_publisher_et;
+
+ if (ni == (uword) ~ 0)
+ return;
+ wc_nd_report_t *q =
+ vlib_process_signal_event_data (vm, ni, et, 1, sizeof *q);
+
+ *q = *r;
+}
+
+void
+wc_nd_set_publisher_node (uword node_index, uword event_type)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ nm->wc_ip6_nd_publisher_node = node_index;
+ nm->wc_ip6_nd_publisher_et = event_type;
+}
+
+static u8 *
+format_ip6_neighbor_ip6_entry (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ ip6_neighbor_t *n = va_arg (*va, ip6_neighbor_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *si;
+ u8 *flags = 0;
+
+ if (!n)
+ return format (s, "%=12s%=20s%=6s%=20s%=40s", "Time", "Address", "Flags",
+ "Link layer", "Interface");
+
+ if (n->flags & IP6_NEIGHBOR_FLAG_DYNAMIC)
+ flags = format (flags, "D");
+
+ if (n->flags & IP6_NEIGHBOR_FLAG_STATIC)
+ flags = format (flags, "S");
+
+ if (n->flags & IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY)
+ flags = format (flags, "N");
+
+ si = vnet_get_sw_interface (vnm, n->key.sw_if_index);
+ s = format (s, "%=12U%=20U%=6s%=20U%=40U",
+ format_vlib_cpu_time, vm, n->cpu_time_last_updated,
+ format_ip6_address, &n->key.ip6_address,
+ flags ? (char *) flags : "",
+ format_ethernet_address, n->link_layer_address,
+ format_vnet_sw_interface_name, vnm, si);
+
+ vec_free (flags);
+ return s;
+}
+
+static void
+ip6_neighbor_adj_fib_remove (ip6_neighbor_t * n, u32 fib_index)
+{
+ if (FIB_NODE_INDEX_INVALID != n->fib_entry_index)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = n->key.ip6_address,
+ };
+ fib_table_entry_path_remove (fib_index,
+ &pfx,
+ FIB_SOURCE_ADJ,
+ DPO_PROTO_IP6,
+ &pfx.fp_addr,
+ n->key.sw_if_index, ~0,
+ 1, FIB_ROUTE_PATH_FLAG_NONE);
+ }
+}
+
+static clib_error_t *
+ip6_neighbor_sw_interface_up_down (vnet_main_t * vnm,
+ u32 sw_if_index, u32 flags)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n;
+
+ if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ u32 i, *to_delete = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, nm->neighbor_pool,
+ ({
+ if (n->key.sw_if_index == sw_if_index)
+ vec_add1 (to_delete, n - nm->neighbor_pool);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (to_delete); i++)
+ {
+ n = pool_elt_at_index (nm->neighbor_pool, to_delete[i]);
+ mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
+ ip6_neighbor_adj_fib_remove (n,
+ ip6_fib_table_get_index_for_sw_if_index
+ (n->key.sw_if_index));
+ pool_put (nm->neighbor_pool, n);
+ }
+ vec_free (to_delete);
+ }
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_neighbor_sw_interface_up_down);
+
+static void
+unset_random_neighbor_entry (void)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vnm->vlib_main;
+ ip6_neighbor_t *e;
+ u32 index;
+
+ index = pool_next_index (nm->neighbor_pool, nm->neighbor_delete_rotor);
+ nm->neighbor_delete_rotor = index;
+
+ /* Try again from elt 0, could happen if an intfc goes down */
+ if (index == ~0)
+ {
+ index = pool_next_index (nm->neighbor_pool, nm->neighbor_delete_rotor);
+ nm->neighbor_delete_rotor = index;
+ }
+
+ /* Nothing left in the pool */
+ if (index == ~0)
+ return;
+
+ e = pool_elt_at_index (nm->neighbor_pool, index);
+
+ vnet_unset_ip6_ethernet_neighbor (vm, e->key.sw_if_index,
+ &e->key.ip6_address,
+ e->link_layer_address,
+ ETHER_MAC_ADDR_LEN);
+}
+
+typedef struct
+{
+ u8 is_add;
+ u8 is_static;
+ u8 is_no_fib_entry;
+ u8 link_layer_address[6];
+ u32 sw_if_index;
+ ip6_address_t addr;
+} ip6_neighbor_set_unset_rpc_args_t;
+
+static void ip6_neighbor_set_unset_rpc_callback
+ (ip6_neighbor_set_unset_rpc_args_t * a);
+
+static void set_unset_ip6_neighbor_rpc
+ (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a, u8 * link_layer_address, int is_add, int is_static,
+ int is_no_fib_entry)
+{
+ ip6_neighbor_set_unset_rpc_args_t args;
+ void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
+
+ args.sw_if_index = sw_if_index;
+ args.is_add = is_add;
+ args.is_static = is_static;
+ args.is_no_fib_entry = is_no_fib_entry;
+ clib_memcpy (&args.addr, a, sizeof (*a));
+ if (NULL != link_layer_address)
+ clib_memcpy (args.link_layer_address, link_layer_address, 6);
+
+ vl_api_rpc_call_main_thread (ip6_neighbor_set_unset_rpc_callback,
+ (u8 *) & args, sizeof (args));
+}
+
+static void
+ip6_nbr_probe (ip_adjacency_t * adj)
+{
+ icmp6_neighbor_solicitation_header_t *h;
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip_interface_address_t *ia;
+ ip6_address_t *dst, *src;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ vlib_buffer_t *b;
+ int bogus_length;
+ vlib_main_t *vm;
+ u32 bi = 0;
+
+ vm = vlib_get_main ();
+
+ si = vnet_get_sw_interface (vnm, adj->rewrite_header.sw_if_index);
+ dst = &adj->sub_type.nbr.next_hop.ip6;
+
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ return;
+ }
+ src = ip6_interface_address_matching_destination (im, dst,
+ adj->rewrite_header.
+ sw_if_index, &ia);
+ if (!src)
+ {
+ return;
+ }
+
+ h = vlib_packet_template_get_packet (vm,
+ &im->discover_neighbor_packet_template,
+ &bi);
+
+ hi = vnet_get_sup_hw_interface (vnm, adj->rewrite_header.sw_if_index);
+
+ h->ip.dst_address.as_u8[13] = dst->as_u8[13];
+ h->ip.dst_address.as_u8[14] = dst->as_u8[14];
+ h->ip.dst_address.as_u8[15] = dst->as_u8[15];
+ h->ip.src_address = src[0];
+ h->neighbor.target_address = dst[0];
+
+ clib_memcpy (h->link_layer_option.ethernet_address,
+ hi->hw_address, vec_len (hi->hw_address));
+
+ h->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = adj->rewrite_header.sw_if_index;
+
+ /* Add encapsulation string for software interface (e.g. ethernet header). */
+ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
+ vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
+
+ {
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+}
+
+static void
+ip6_nd_mk_complete (adj_index_t ai, ip6_neighbor_t * nbr)
+{
+ adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ nbr->key.sw_if_index,
+ adj_get_link_type (ai),
+ nbr->link_layer_address));
+}
+
+static void
+ip6_nd_mk_incomplete (adj_index_t ai)
+{
+ ip_adjacency_t *adj = adj_get (ai);
+
+ adj_nbr_update_rewrite (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite (vnet_get_main (),
+ adj->rewrite_header.
+ sw_if_index,
+ adj_get_link_type (ai),
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+}
+
+#define IP6_NBR_MK_KEY(k, sw_if_index, addr) \
+{ \
+ k.sw_if_index = sw_if_index; \
+ k.ip6_address = *addr; \
+ k.pad = 0; \
+}
+
+static ip6_neighbor_t *
+ip6_nd_find (u32 sw_if_index, const ip6_address_t * addr)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n = NULL;
+ ip6_neighbor_key_t k;
+ uword *p;
+
+ IP6_NBR_MK_KEY (k, sw_if_index, addr);
+
+ p = mhash_get (&nm->neighbor_index_by_key, &k);
+ if (p)
+ {
+ n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ }
+
+ return (n);
+}
+
+static adj_walk_rc_t
+ip6_nd_mk_complete_walk (adj_index_t ai, void *ctx)
+{
+ ip6_neighbor_t *nbr = ctx;
+
+ ip6_nd_mk_complete (ai, nbr);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+static adj_walk_rc_t
+ip6_nd_mk_incomplete_walk (adj_index_t ai, void *ctx)
+{
+ ip6_nd_mk_incomplete (ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+void
+ip6_ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
+{
+ ip6_neighbor_t *nbr;
+ ip_adjacency_t *adj;
+
+ adj = adj_get (ai);
+
+ nbr = ip6_nd_find (sw_if_index, &adj->sub_type.nbr.next_hop.ip6);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_GLEAN:
+ if (NULL != nbr)
+ {
+ adj_nbr_walk_nh6 (sw_if_index, &nbr->key.ip6_address,
+ ip6_nd_mk_complete_walk, nbr);
+ }
+ else
+ {
+ /*
+ * no matching ND entry.
+ * construct the rewrite required to for an ND packet, and stick
+ * that in the adj's pipe to smoke.
+ */
+ adj_nbr_update_rewrite (ai,
+ ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
+ ethernet_build_rewrite (vnm,
+ sw_if_index,
+ VNET_LINK_IP6,
+ VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
+
+ /*
+ * since the FIB has added this adj for a route, it makes sense it may
+ * want to forward traffic sometime soon. Let's send a speculative ND.
+ * just one. If we were to do periodically that wouldn't be bad either,
+ * but that's more code than i'm prepared to write at this time for
+ * relatively little reward.
+ */
+ ip6_nbr_probe (adj);
+ }
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ {
+ /*
+ * Construct a partial rewrite from the known ethernet mcast dest MAC
+ */
+ u8 *rewrite;
+ u8 offset;
+
+ rewrite = ethernet_build_rewrite (vnm,
+ sw_if_index,
+ adj->ia_link,
+ ethernet_ip6_mcast_dst_addr ());
+
+ /*
+ * Complete the remaining fields of the adj's rewrite to direct the
+ * complete of the rewrite at switch time by copying in the IP
+ * dst address's bytes.
+ * Ofset is 2 bytes into the desintation address. And we write 4 bytes.
+ */
+ offset = vec_len (rewrite) - 2;
+ adj_mcast_update_rewrite (ai, rewrite, offset, 0xffffffff);
+
+ break;
+ }
+ case IP_LOOKUP_NEXT_DROP:
+ case IP_LOOKUP_NEXT_PUNT:
+ case IP_LOOKUP_NEXT_LOCAL:
+ case IP_LOOKUP_NEXT_REWRITE:
+ case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ case IP_LOOKUP_NEXT_ICMP_ERROR:
+ case IP_LOOKUP_N_NEXT:
+ ASSERT (0);
+ break;
+ }
+}
+
+
+static void
+ip6_neighbor_adj_fib_add (ip6_neighbor_t * n, u32 fib_index)
+{
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = n->key.ip6_address,
+ };
+
+ n->fib_entry_index =
+ fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
+ FIB_ENTRY_FLAG_ATTACHED,
+ DPO_PROTO_IP6, &pfx.fp_addr,
+ n->key.sw_if_index, ~0, 1, NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+}
+
+int
+vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address,
+ int is_static, int is_no_fib_entry)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_key_t k;
+ ip6_neighbor_t *n = 0;
+ int make_new_nd_cache_entry = 1;
+ uword *p;
+ u32 next_index;
+ pending_resolution_t *pr, *mc;
+
+ if (vlib_get_thread_index ())
+ {
+ set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
+ 1 /* set new neighbor */ , is_static,
+ is_no_fib_entry);
+ return 0;
+ }
+
+ k.sw_if_index = sw_if_index;
+ k.ip6_address = a[0];
+ k.pad = 0;
+
+ p = mhash_get (&nm->neighbor_index_by_key, &k);
+ if (p)
+ {
+ n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ /* Refuse to over-write static neighbor entry. */
+ if (!is_static && (n->flags & IP6_NEIGHBOR_FLAG_STATIC))
+ return -2;
+ make_new_nd_cache_entry = 0;
+ }
+
+ if (make_new_nd_cache_entry)
+ {
+ pool_get (nm->neighbor_pool, n);
+ mhash_set (&nm->neighbor_index_by_key, &k, n - nm->neighbor_pool,
+ /* old value */ 0);
+ n->key = k;
+ n->fib_entry_index = FIB_NODE_INDEX_INVALID;
+
+ clib_memcpy (n->link_layer_address,
+ link_layer_address, n_bytes_link_layer_address);
+
+ /*
+ * create the adj-fib. the entry in the FIB table for and to the peer.
+ */
+ if (!is_no_fib_entry)
+ {
+ ip6_neighbor_adj_fib_add (n,
+ ip6_fib_table_get_index_for_sw_if_index
+ (n->key.sw_if_index));
+ }
+ else
+ {
+ n->flags |= IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY;
+ }
+ }
+ else
+ {
+ /*
+ * prevent a DoS attack from the data-plane that
+ * spams us with no-op updates to the MAC address
+ */
+ if (0 == memcmp (n->link_layer_address,
+ link_layer_address, n_bytes_link_layer_address))
+ goto check_customers;
+
+ clib_memcpy (n->link_layer_address,
+ link_layer_address, n_bytes_link_layer_address);
+ }
+
+ /* Update time stamp and flags. */
+ n->cpu_time_last_updated = clib_cpu_time_now ();
+ if (is_static)
+ n->flags |= IP6_NEIGHBOR_FLAG_STATIC;
+ else
+ n->flags |= IP6_NEIGHBOR_FLAG_DYNAMIC;
+
+ adj_nbr_walk_nh6 (sw_if_index,
+ &n->key.ip6_address, ip6_nd_mk_complete_walk, n);
+
+check_customers:
+ /* Customer(s) waiting for this address to be resolved? */
+ p = mhash_get (&nm->pending_resolutions_by_address, a);
+ if (p)
+ {
+ next_index = p[0];
+
+ while (next_index != (u32) ~ 0)
+ {
+ pr = pool_elt_at_index (nm->pending_resolutions, next_index);
+ vlib_process_signal_event (vm, pr->node_index,
+ pr->type_opaque, pr->data);
+ next_index = pr->next_index;
+ pool_put (nm->pending_resolutions, pr);
+ }
+
+ mhash_unset (&nm->pending_resolutions_by_address, a, 0);
+ }
+
+ /* Customer(s) requesting ND event for this address? */
+ p = mhash_get (&nm->mac_changes_by_address, a);
+ if (p)
+ {
+ next_index = p[0];
+
+ while (next_index != (u32) ~ 0)
+ {
+ int (*fp) (u32, u8 *, u32, ip6_address_t *);
+ int rv = 1;
+ mc = pool_elt_at_index (nm->mac_changes, next_index);
+ fp = mc->data_callback;
+
+ /* Call the user's data callback, return 1 to suppress dup events */
+ if (fp)
+ rv =
+ (*fp) (mc->data, link_layer_address, sw_if_index, &ip6a_zero);
+ /*
+ * Signal the resolver process, as long as the user
+ * says they want to be notified
+ */
+ if (rv == 0)
+ vlib_process_signal_event (vm, mc->node_index,
+ mc->type_opaque, mc->data);
+ next_index = mc->next_index;
+ }
+ }
+
+ return 0;
+}
+
+int
+vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_key_t k;
+ ip6_neighbor_t *n;
+ uword *p;
+ int rv = 0;
+
+ if (vlib_get_thread_index ())
+ {
+ set_unset_ip6_neighbor_rpc (vm, sw_if_index, a, link_layer_address,
+ 0 /* unset */ , 0, 0);
+ return 0;
+ }
+
+ k.sw_if_index = sw_if_index;
+ k.ip6_address = a[0];
+ k.pad = 0;
+
+ p = mhash_get (&nm->neighbor_index_by_key, &k);
+ if (p == 0)
+ {
+ rv = -1;
+ goto out;
+ }
+
+ n = pool_elt_at_index (nm->neighbor_pool, p[0]);
+ mhash_unset (&nm->neighbor_index_by_key, &n->key, 0);
+
+ adj_nbr_walk_nh6 (sw_if_index,
+ &n->key.ip6_address, ip6_nd_mk_incomplete_walk, NULL);
+
+
+ if (FIB_NODE_INDEX_INVALID != n->fib_entry_index)
+ {
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr.ip6 = n->key.ip6_address,
+ };
+ fib_table_entry_path_remove
+ (ip6_fib_table_get_index_for_sw_if_index (n->key.sw_if_index),
+ &pfx,
+ FIB_SOURCE_ADJ,
+ DPO_PROTO_IP6,
+ &pfx.fp_addr, n->key.sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ pool_put (nm->neighbor_pool, n);
+
+out:
+ return rv;
+}
+
+static void ip6_neighbor_set_unset_rpc_callback
+ (ip6_neighbor_set_unset_rpc_args_t * a)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ if (a->is_add)
+ vnet_set_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr,
+ a->link_layer_address, 6, a->is_static,
+ a->is_no_fib_entry);
+ else
+ vnet_unset_ip6_ethernet_neighbor (vm, a->sw_if_index, &a->addr,
+ a->link_layer_address, 6);
+}
+
+static int
+ip6_neighbor_sort (void *a1, void *a2)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_t *n1 = a1, *n2 = a2;
+ int cmp;
+ cmp = vnet_sw_interface_compare (vnm, n1->key.sw_if_index,
+ n2->key.sw_if_index);
+ if (!cmp)
+ cmp = ip6_address_compare (&n1->key.ip6_address, &n2->key.ip6_address);
+ return cmp;
+}
+
+ip6_neighbor_t *
+ip6_neighbors_entries (u32 sw_if_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n, *ns = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, nm->neighbor_pool,
+ ({
+ if (sw_if_index != ~0 && n->key.sw_if_index != sw_if_index)
+ continue;
+ vec_add1 (ns, n[0]);
+ }));
+ /* *INDENT-ON* */
+
+ if (ns)
+ vec_sort_with_function (ns, ip6_neighbor_sort);
+ return ns;
+}
+
+static clib_error_t *
+show_ip6_neighbors (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_t *n, *ns;
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ /* Filter entries by interface if given. */
+ sw_if_index = ~0;
+ (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
+
+ ns = ip6_neighbors_entries (sw_if_index);
+ if (ns)
+ {
+ vlib_cli_output (vm, "%U", format_ip6_neighbor_ip6_entry, vm, 0);
+ vec_foreach (n, ns)
+ {
+ vlib_cli_output (vm, "%U", format_ip6_neighbor_ip6_entry, vm, n);
+ }
+ vec_free (ns);
+ }
+
+ return error;
+}
+
+/*?
+ * This command is used to display the adjacent IPv6 hosts found via
+ * neighbor discovery. Optionally, limit the output to the specified
+ * interface.
+ *
+ * @cliexpar
+ * Example of how to display the IPv6 neighbor adjacency table:
+ * @cliexstart{show ip6 neighbors}
+ * Time Address Flags Link layer Interface
+ * 34.0910 ::a:1:1:0:7 02:fe:6a:07:39:6f GigabitEthernet2/0/0
+ * 173.2916 ::b:5:1:c:2 02:fe:50:62:3a:94 GigabitEthernet2/0/0
+ * 886.6654 ::1:1:c:0:9 S 02:fe:e4:45:27:5b GigabitEthernet3/0/0
+ * @cliexend
+ * Example of how to display the IPv6 neighbor adjacency table for given interface:
+ * @cliexstart{show ip6 neighbors GigabitEthernet2/0/0}
+ * Time Address Flags Link layer Interface
+ * 34.0910 ::a:1:1:0:7 02:fe:6a:07:39:6f GigabitEthernet2/0/0
+ * 173.2916 ::b:5:1:c:2 02:fe:50:62:3a:94 GigabitEthernet2/0/0
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip6_neighbors_command, static) = {
+ .path = "show ip6 neighbors",
+ .function = show_ip6_neighbors,
+ .short_help = "show ip6 neighbors [<interface>]",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_ip6_neighbor (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_address_t addr;
+ u8 mac_address[6];
+ int addr_valid = 0;
+ int is_del = 0;
+ int is_static = 0;
+ int is_no_fib_entry = 0;
+ u32 sw_if_index;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ /* intfc, ip6-address, mac-address */
+ if (unformat (input, "%U %U %U",
+ unformat_vnet_sw_interface, vnm, &sw_if_index,
+ unformat_ip6_address, &addr,
+ unformat_ethernet_address, mac_address))
+ addr_valid = 1;
+
+ else if (unformat (input, "delete") || unformat (input, "del"))
+ is_del = 1;
+ else if (unformat (input, "static"))
+ is_static = 1;
+ else if (unformat (input, "no-fib-entry"))
+ is_no_fib_entry = 1;
+ else
+ break;
+ }
+
+ if (!addr_valid)
+ return clib_error_return (0, "Missing interface, ip6 or hw address");
+
+ if (!is_del)
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index, &addr,
+ mac_address, sizeof (mac_address),
+ is_static, is_no_fib_entry);
+ else
+ vnet_unset_ip6_ethernet_neighbor (vm, sw_if_index, &addr,
+ mac_address, sizeof (mac_address));
+ return 0;
+}
+
+/*?
+ * This command is used to manually add an entry to the IPv6 neighbor
+ * adjacency table. Optionally, the entry can be added as static. It is
+ * also used to remove an entry from the table. Use the '<em>show ip6
+ * neighbors</em>' command to display all learned and manually entered entries.
+ *
+ * @cliexpar
+ * Example of how to add a static entry to the IPv6 neighbor adjacency table:
+ * @cliexcmd{set ip6 neighbor GigabitEthernet2/0/0 ::1:1:c:0:9 02:fe:e4:45:27:5b static}
+ * Example of how to delete an entry from the IPv6 neighbor adjacency table:
+ * @cliexcmd{set ip6 neighbor del GigabitEthernet2/0/0 ::1:1:c:0:9 02:fe:e4:45:27:5b}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_neighbor_command, static) =
+{
+ .path = "set ip6 neighbor",
+ .function = set_ip6_neighbor,
+ .short_help = "set ip6 neighbor [del] <interface> <ip6-address> <mac-address> [static]",
+};
+/* *INDENT-ON* */
+
+typedef enum
+{
+ ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP,
+ ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY,
+ ICMP6_NEIGHBOR_SOLICITATION_N_NEXT,
+} icmp6_neighbor_solicitation_or_advertisement_next_t;
+
+static_always_inline uword
+icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ uword is_solicitation)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index, n_advertisements_sent;
+ icmp6_neighbor_discovery_option_type_t option_type;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+ int bogus_length;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ option_type =
+ (is_solicitation
+ ? ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address
+ : ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address);
+ n_advertisements_sent = 0;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ icmp6_neighbor_solicitation_or_advertisement_header_t *h0;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *o0;
+ u32 bi0, options_len0, sw_if_index0, next0, error0;
+ u32 ip6_sadd_link_local, ip6_sadd_unspecified;
+ int is_rewrite0;
+ u32 ni0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ h0 = ip6_next_header (ip0);
+ options_len0 =
+ clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]);
+
+ error0 = ICMP6_ERROR_NONE;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ ip6_sadd_link_local =
+ ip6_address_is_link_local_unicast (&ip0->src_address);
+ ip6_sadd_unspecified =
+ ip6_address_is_unspecified (&ip0->src_address);
+
+ /* Check that source address is unspecified, link-local or else on-link. */
+ if (!ip6_sadd_unspecified && !ip6_sadd_link_local)
+ {
+ u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
+
+ if (ADJ_INDEX_INVALID != src_adj_index0)
+ {
+ ip_adjacency_t *adj0 = adj_get (src_adj_index0);
+
+ /* Allow all realistic-looking rewrite adjacencies to pass */
+ ni0 = adj0->lookup_next_index;
+ is_rewrite0 = (ni0 >= IP_LOOKUP_NEXT_ARP) &&
+ (ni0 < IP6_LOOKUP_N_NEXT);
+
+ error0 = ((adj0->rewrite_header.sw_if_index != sw_if_index0
+ || !is_rewrite0)
+ ?
+ ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK
+ : error0);
+ }
+ else
+ {
+ error0 =
+ ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK;
+ }
+ }
+
+ o0 = (void *) (h0 + 1);
+ o0 = ((options_len0 == 8 && o0->header.type == option_type
+ && o0->header.n_data_u64s == 1) ? o0 : 0);
+
+ /* If src address unspecified or link local, donot learn neighbor MAC */
+ if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
+ !ip6_sadd_unspecified))
+ {
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ if (nm->limit_neighbor_cache_size &&
+ pool_elts (nm->neighbor_pool) >=
+ nm->limit_neighbor_cache_size)
+ unset_random_neighbor_entry ();
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index0,
+ is_solicitation ?
+ &ip0->src_address :
+ &h0->target_address,
+ o0->ethernet_address,
+ sizeof (o0->ethernet_address),
+ 0, ip6_sadd_link_local);
+ }
+
+ if (is_solicitation && error0 == ICMP6_ERROR_NONE)
+ {
+ /* Check that target address is local to this router. */
+ fib_node_index_t fei;
+ u32 fib_index;
+
+ fib_index =
+ ip6_fib_table_get_index_for_sw_if_index (sw_if_index0);
+
+ if (~0 == fib_index)
+ {
+ error0 = ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN;
+ }
+ else
+ {
+ fei = ip6_fib_table_lookup_exact_match (fib_index,
+ &h0->target_address,
+ 128);
+
+ if (FIB_NODE_INDEX_INVALID == fei)
+ {
+ /* The target address is not in the FIB */
+ error0 =
+ ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN;
+ }
+ else
+ {
+ if (FIB_ENTRY_FLAG_LOCAL &
+ fib_entry_get_flags_for_source (fei,
+ FIB_SOURCE_INTERFACE))
+ {
+ /* It's an address that belongs to one of our interfaces
+ * that's good. */
+ }
+ else
+ if (fib_entry_is_sourced
+ (fei, FIB_SOURCE_IP6_ND_PROXY))
+ {
+ /* The address was added by IPv6 Proxy ND config.
+ * We should only respond to these if the NS arrived on
+ * the link that has a matching covering prefix */
+ }
+ else
+ {
+ error0 =
+ ICMP6_ERROR_NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN;
+ }
+ }
+ }
+ }
+
+ if (is_solicitation)
+ next0 = (error0 != ICMP6_ERROR_NONE
+ ? ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP
+ : ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY);
+ else
+ {
+ next0 = 0;
+ error0 = error0 == ICMP6_ERROR_NONE ?
+ ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_RX : error0;
+ }
+
+ if (is_solicitation && error0 == ICMP6_ERROR_NONE)
+ {
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0;
+ ethernet_header_t *eth0;
+
+ /* dst address is either source address or the all-nodes mcast addr */
+ if (!ip6_sadd_unspecified)
+ ip0->dst_address = ip0->src_address;
+ else
+ ip6_set_reserved_multicast_address (&ip0->dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+
+ ip0->src_address = h0->target_address;
+ ip0->hop_limit = 255;
+ h0->icmp.type = ICMP6_neighbor_advertisement;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+ if (eth_if0 && o0)
+ {
+ clib_memcpy (o0->ethernet_address, eth_if0->address, 6);
+ o0->header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address;
+ }
+
+ h0->advertisement_flags = clib_host_to_net_u32
+ (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED
+ | ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+
+ h0->icmp.checksum = 0;
+ h0->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ /* Reuse current MAC header, copy SMAC to DMAC and
+ * interface MAC to SMAC */
+ vlib_buffer_advance (p0, -ethernet_buffer_header_size (p0));
+ eth0 = vlib_buffer_get_current (p0);
+ clib_memcpy (eth0->dst_address, eth0->src_address, 6);
+ if (eth_if0)
+ clib_memcpy (eth0->src_address, eth_if0->address, 6);
+
+ /* Setup input and output sw_if_index for packet */
+ ASSERT (vnet_buffer (p0)->sw_if_index[VLIB_RX] == sw_if_index0);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ n_advertisements_sent++;
+ }
+
+ p0->error = error_node->errors[error0];
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Account for advertisements sent. */
+ vlib_error_count (vm, error_node->node_index,
+ ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX,
+ n_advertisements_sent);
+
+ return frame->n_vectors;
+}
+
+/* for "syslogging" - use elog for now */
+#define foreach_log_level \
+ _ (DEBUG, "DEBUG") \
+ _ (INFO, "INFORMATION") \
+ _ (NOTICE, "NOTICE") \
+ _ (WARNING, "WARNING") \
+ _ (ERR, "ERROR") \
+ _ (CRIT, "CRITICAL") \
+ _ (ALERT, "ALERT") \
+ _ (EMERG, "EMERGENCY")
+
+typedef enum
+{
+#define _(f,s) LOG_##f,
+ foreach_log_level
+#undef _
+} log_level_t;
+
+static char *log_level_strings[] = {
+#define _(f,s) s,
+ foreach_log_level
+#undef _
+};
+
+static int logmask = 1 << LOG_DEBUG;
+
+static void
+ip6_neighbor_syslog (vlib_main_t * vm, int priority, char *fmt, ...)
+{
+ /* just use elog for now */
+ u8 *what;
+ va_list va;
+
+ if ((priority > LOG_EMERG) || !(logmask & (1 << priority)))
+ return;
+
+ va_start (va, fmt);
+ if (fmt)
+ {
+ what = va_format (0, fmt, &va);
+
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "ip6 nd: (%s): %s",.format_args = "T4T4",};
+ struct
+ {
+ u32 s[2];
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->s[0] = elog_string (&vm->elog_main, log_level_strings[priority]);
+ ed->s[1] = elog_string (&vm->elog_main, (char *) what);
+ }
+ va_end (va);
+ return;
+}
+
+/* ipv6 neighbor discovery - router advertisements */
+typedef enum
+{
+ ICMP6_ROUTER_SOLICITATION_NEXT_DROP,
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW,
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX,
+ ICMP6_ROUTER_SOLICITATION_N_NEXT,
+} icmp6_router_solicitation_or_advertisement_next_t;
+
+static_always_inline uword
+icmp6_router_solicitation (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_main_t *im = &ip6_main;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ u32 n_advertisements_sent = 0;
+ int bogus_length;
+
+ icmp6_neighbor_discovery_option_type_t option_type;
+
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ /* source may append his LL address */
+ option_type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ ip6_radv_t *radv_info = 0;
+
+ icmp6_neighbor_discovery_header_t *h0;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *o0;
+
+ u32 bi0, options_len0, sw_if_index0, next0, error0;
+ u32 is_solicitation = 1, is_dropped = 0;
+ u32 is_unspecified, is_link_local;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ h0 = ip6_next_header (ip0);
+ options_len0 =
+ clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]);
+ is_unspecified = ip6_address_is_unspecified (&ip0->src_address);
+ is_link_local =
+ ip6_address_is_link_local_unicast (&ip0->src_address);
+
+ error0 = ICMP6_ERROR_NONE;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* check if solicitation (not from nd_timer node) */
+ if (ip6_address_is_unspecified (&ip0->dst_address))
+ is_solicitation = 0;
+
+ /* Check that source address is unspecified, link-local or else on-link. */
+ if (!is_unspecified && !is_link_local)
+ {
+ u32 src_adj_index0 = ip6_src_lookup_for_packet (im, p0, ip0);
+
+ if (ADJ_INDEX_INVALID != src_adj_index0)
+ {
+ ip_adjacency_t *adj0 = adj_get (src_adj_index0);
+
+ error0 = (adj0->rewrite_header.sw_if_index != sw_if_index0
+ ?
+ ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK
+ : error0);
+ }
+ else
+ {
+ error0 = ICMP6_ERROR_ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK;
+ }
+ }
+
+ /* check for source LL option and process */
+ o0 = (void *) (h0 + 1);
+ o0 = ((options_len0 == 8
+ && o0->header.type == option_type
+ && o0->header.n_data_u64s == 1) ? o0 : 0);
+
+ /* if src address unspecified IGNORE any options */
+ if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
+ !is_unspecified && !is_link_local))
+ {
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ if (nm->limit_neighbor_cache_size &&
+ pool_elts (nm->neighbor_pool) >=
+ nm->limit_neighbor_cache_size)
+ unset_random_neighbor_entry ();
+
+ vnet_set_ip6_ethernet_neighbor (vm, sw_if_index0,
+ &ip0->src_address,
+ o0->ethernet_address,
+ sizeof (o0->ethernet_address),
+ 0, 0);
+ }
+
+ /* default is to drop */
+ next0 = ICMP6_ROUTER_SOLICITATION_NEXT_DROP;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0;
+ u32 adj_index0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ /* only support ethernet interface type for now */
+ error0 =
+ (!eth_if0) ? ICMP6_ERROR_ROUTER_SOLICITATION_UNSUPPORTED_INTF
+ : error0;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ u32 ri;
+
+ /* adjust the sizeof the buffer to just include the ipv6 header */
+ p0->current_length -=
+ (options_len0 +
+ sizeof (icmp6_neighbor_discovery_header_t));
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty
+ (nm->if_radv_pool_index_by_sw_if_index, sw_if_index0, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index0];
+
+ if (ri != ~0)
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ error0 =
+ ((!radv_info) ?
+ ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG :
+ error0);
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ f64 now = vlib_time_now (vm);
+
+ /* for solicited adverts - need to rate limit */
+ if (is_solicitation)
+ {
+ if (0 != radv_info->last_radv_time &&
+ (now - radv_info->last_radv_time) <
+ MIN_DELAY_BETWEEN_RAS)
+ is_dropped = 1;
+ else
+ radv_info->last_radv_time = now;
+ }
+
+ /* send now */
+ icmp6_router_advertisement_header_t rh;
+
+ rh.icmp.type = ICMP6_router_advertisement;
+ rh.icmp.code = 0;
+ rh.icmp.checksum = 0;
+
+ rh.current_hop_limit = radv_info->curr_hop_limit;
+ rh.router_lifetime_in_sec =
+ clib_host_to_net_u16
+ (radv_info->adv_router_lifetime_in_sec);
+ rh.
+ time_in_msec_between_retransmitted_neighbor_solicitations
+ =
+ clib_host_to_net_u32 (radv_info->
+ adv_time_in_msec_between_retransmitted_neighbor_solicitations);
+ rh.neighbor_reachable_time_in_msec =
+ clib_host_to_net_u32 (radv_info->
+ adv_neighbor_reachable_time_in_msec);
+
+ rh.flags =
+ (radv_info->adv_managed_flag) ?
+ ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP :
+ 0;
+ rh.flags |=
+ ((radv_info->adv_other_flag) ?
+ ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP :
+ 0);
+
+
+ u16 payload_length =
+ sizeof (icmp6_router_advertisement_header_t);
+
+ vlib_buffer_add_data (vm,
+ vlib_buffer_get_free_list_index
+ (p0), bi0, (void *) &rh,
+ sizeof
+ (icmp6_router_advertisement_header_t));
+
+ if (radv_info->adv_link_layer_address)
+ {
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ h;
+
+ h.header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address;
+ h.header.n_data_u64s = 1;
+
+ /* copy ll address */
+ clib_memcpy (&h.ethernet_address[0],
+ eth_if0->address, 6);
+
+ vlib_buffer_add_data (vm,
+ vlib_buffer_get_free_list_index
+ (p0), bi0, (void *) &h,
+ sizeof
+ (icmp6_neighbor_discovery_ethernet_link_layer_address_option_t));
+
+ payload_length +=
+ sizeof
+ (icmp6_neighbor_discovery_ethernet_link_layer_address_option_t);
+ }
+
+ /* add MTU option */
+ if (radv_info->adv_link_mtu)
+ {
+ icmp6_neighbor_discovery_mtu_option_t h;
+
+ h.unused = 0;
+ h.mtu =
+ clib_host_to_net_u32 (radv_info->adv_link_mtu);
+ h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_mtu;
+ h.header.n_data_u64s = 1;
+
+ payload_length +=
+ sizeof (icmp6_neighbor_discovery_mtu_option_t);
+
+ vlib_buffer_add_data (vm,
+ vlib_buffer_get_free_list_index
+ (p0), bi0, (void *) &h,
+ sizeof
+ (icmp6_neighbor_discovery_mtu_option_t));
+ }
+
+ /* add advertised prefix options */
+ ip6_radv_prefix_t *pr_info;
+
+ /* *INDENT-OFF* */
+ pool_foreach (pr_info, radv_info->adv_prefixes_pool,
+ ({
+ if(pr_info->enabled &&
+ (!pr_info->decrement_lifetime_flag
+ || (pr_info->pref_lifetime_expires >0)))
+ {
+ /* advertise this prefix */
+ icmp6_neighbor_discovery_prefix_information_option_t h;
+
+ h.header.type = ICMP6_NEIGHBOR_DISCOVERY_OPTION_prefix_information;
+ h.header.n_data_u64s = (sizeof(icmp6_neighbor_discovery_prefix_information_option_t) >> 3);
+
+ h.dst_address_length = pr_info->prefix_len;
+
+ h.flags = (pr_info->adv_on_link_flag) ? ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_FLAG_ON_LINK : 0;
+ h.flags |= (pr_info->adv_autonomous_flag) ? ICMP6_NEIGHBOR_DISCOVERY_PREFIX_INFORMATION_AUTO : 0;
+
+ if(radv_info->cease_radv && pr_info->deprecated_prefix_flag)
+ {
+ h.valid_time = clib_host_to_net_u32(MIN_ADV_VALID_LIFETIME);
+ h.preferred_time = 0;
+ }
+ else
+ {
+ if(pr_info->decrement_lifetime_flag)
+ {
+ pr_info->adv_valid_lifetime_in_secs = ((pr_info->valid_lifetime_expires > now)) ?
+ (pr_info->valid_lifetime_expires - now) : 0;
+
+ pr_info->adv_pref_lifetime_in_secs = ((pr_info->pref_lifetime_expires > now)) ?
+ (pr_info->pref_lifetime_expires - now) : 0;
+ }
+
+ h.valid_time = clib_host_to_net_u32(pr_info->adv_valid_lifetime_in_secs);
+ h.preferred_time = clib_host_to_net_u32(pr_info->adv_pref_lifetime_in_secs) ;
+ }
+ h.unused = 0;
+
+ clib_memcpy(&h.dst_address, &pr_info->prefix, sizeof(ip6_address_t));
+
+ payload_length += sizeof( icmp6_neighbor_discovery_prefix_information_option_t);
+
+ vlib_buffer_add_data (vm,
+ vlib_buffer_get_free_list_index (p0),
+ bi0,
+ (void *)&h, sizeof(icmp6_neighbor_discovery_prefix_information_option_t));
+
+ }
+ }));
+ /* *INDENT-ON* */
+
+ /* add additional options before here */
+
+ /* finish building the router advertisement... */
+ if (!is_unspecified && radv_info->send_unicast)
+ {
+ ip0->dst_address = ip0->src_address;
+ }
+ else
+ {
+ /* target address is all-nodes mcast addr */
+ ip6_set_reserved_multicast_address
+ (&ip0->dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+ }
+
+ /* source address MUST be the link-local address */
+ ip0->src_address = radv_info->link_local_address;
+
+ ip0->hop_limit = 255;
+ ip0->payload_length =
+ clib_host_to_net_u16 (payload_length);
+
+ icmp6_router_advertisement_header_t *rh0 =
+ (icmp6_router_advertisement_header_t *) (ip0 + 1);
+ rh0->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ /* setup output if and adjacency */
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ if (is_solicitation)
+ {
+ ethernet_header_t *eth0;
+ /* Reuse current MAC header, copy SMAC to DMAC and
+ * interface MAC to SMAC */
+ vlib_buffer_reset (p0);
+ eth0 = vlib_buffer_get_current (p0);
+ clib_memcpy (eth0->dst_address, eth0->src_address,
+ 6);
+ clib_memcpy (eth0->src_address, eth_if0->address,
+ 6);
+ next0 =
+ is_dropped ? next0 :
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX;
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ sw_if_index0;
+ }
+ else
+ {
+ adj_index0 = radv_info->mcast_adj_index;
+ if (adj_index0 == 0)
+ error0 = ICMP6_ERROR_DST_LOOKUP_MISS;
+ else
+ {
+ next0 =
+ is_dropped ? next0 :
+ ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
+ adj_index0;
+ }
+ }
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
+ radv_info->n_solicitations_dropped += is_dropped;
+ radv_info->n_solicitations_rcvd += is_solicitation;
+
+ if ((error0 == ICMP6_ERROR_NONE) && !is_dropped)
+ {
+ radv_info->n_advertisements_sent++;
+ n_advertisements_sent++;
+ }
+ }
+ }
+ }
+
+ p0->error = error_node->errors[error0];
+
+ if (error0 != ICMP6_ERROR_NONE)
+ vlib_error_count (vm, error_node->node_index, error0, 1);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Account for router advertisements sent. */
+ vlib_error_count (vm, error_node->node_index,
+ ICMP6_ERROR_ROUTER_ADVERTISEMENTS_TX,
+ n_advertisements_sent);
+
+ return frame->n_vectors;
+}
+
+ /* validate advertised info for consistancy (see RFC-4861 section 6.2.7) - log any inconsistencies, packet will always be dropped */
+static_always_inline uword
+icmp6_router_advertisement (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ uword n_packets = frame->n_vectors;
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ u32 n_advertisements_rcvd = 0;
+
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = n_packets;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ ip6_radv_t *radv_info = 0;
+ icmp6_router_advertisement_header_t *h0;
+ u32 bi0, options_len0, sw_if_index0, next0, error0;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ h0 = ip6_next_header (ip0);
+ options_len0 =
+ clib_net_to_host_u16 (ip0->payload_length) - sizeof (h0[0]);
+
+ error0 = ICMP6_ERROR_NONE;
+ sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+
+ /* Check that source address is link-local */
+ error0 = (!ip6_address_is_link_local_unicast (&ip0->src_address)) ?
+ ICMP6_ERROR_ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL : error0;
+
+ /* default is to drop */
+ next0 = ICMP6_ROUTER_SOLICITATION_NEXT_DROP;
+
+ n_advertisements_rcvd++;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index0);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ /* only support ethernet interface type for now */
+ error0 =
+ (!eth_if0) ? ICMP6_ERROR_ROUTER_SOLICITATION_UNSUPPORTED_INTF
+ : error0;
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty
+ (nm->if_radv_pool_index_by_sw_if_index, sw_if_index0, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index0];
+
+ if (ri != ~0)
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ error0 =
+ ((!radv_info) ?
+ ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG :
+ error0);
+
+ if (error0 == ICMP6_ERROR_NONE)
+ {
+ /* validate advertised information */
+ if ((h0->current_hop_limit && radv_info->curr_hop_limit)
+ && (h0->current_hop_limit !=
+ radv_info->curr_hop_limit))
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvCurHopLimit on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ if ((h0->flags &
+ ICMP6_ROUTER_DISCOVERY_FLAG_ADDRESS_CONFIG_VIA_DHCP)
+ != radv_info->adv_managed_flag)
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvManagedFlag on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ if ((h0->flags &
+ ICMP6_ROUTER_DISCOVERY_FLAG_OTHER_CONFIG_VIA_DHCP)
+ != radv_info->adv_other_flag)
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvOtherConfigFlag on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ if ((h0->
+ time_in_msec_between_retransmitted_neighbor_solicitations
+ && radv_info->
+ adv_time_in_msec_between_retransmitted_neighbor_solicitations)
+ && (h0->
+ time_in_msec_between_retransmitted_neighbor_solicitations
+ !=
+ clib_host_to_net_u32 (radv_info->
+ adv_time_in_msec_between_retransmitted_neighbor_solicitations)))
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvRetransTimer on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ if ((h0->neighbor_reachable_time_in_msec &&
+ radv_info->adv_neighbor_reachable_time_in_msec) &&
+ (h0->neighbor_reachable_time_in_msec !=
+ clib_host_to_net_u32
+ (radv_info->adv_neighbor_reachable_time_in_msec)))
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvReachableTime on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+
+ /* check for MTU or prefix options or .. */
+ u8 *opt_hdr = (u8 *) (h0 + 1);
+ while (options_len0 > 0)
+ {
+ icmp6_neighbor_discovery_option_header_t *o0 =
+ (icmp6_neighbor_discovery_option_header_t *)
+ opt_hdr;
+ int opt_len = o0->n_data_u64s << 3;
+ icmp6_neighbor_discovery_option_type_t option_type =
+ o0->type;
+
+ if (options_len0 < 2)
+ {
+ ip6_neighbor_syslog (vm, LOG_ERR,
+ "malformed RA packet on %U from %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ break;
+ }
+
+ if (opt_len == 0)
+ {
+ ip6_neighbor_syslog (vm, LOG_ERR,
+ " zero length option in RA on %U from %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ break;
+ }
+ else if (opt_len > options_len0)
+ {
+ ip6_neighbor_syslog (vm, LOG_ERR,
+ "option length in RA packet greater than total length on %U from %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ break;
+ }
+
+ options_len0 -= opt_len;
+ opt_hdr += opt_len;
+
+ switch (option_type)
+ {
+ case ICMP6_NEIGHBOR_DISCOVERY_OPTION_mtu:
+ {
+ icmp6_neighbor_discovery_mtu_option_t *h =
+ (icmp6_neighbor_discovery_mtu_option_t
+ *) (o0);
+
+ if (opt_len < sizeof (*h))
+ break;
+
+ if ((h->mtu && radv_info->adv_link_mtu) &&
+ (h->mtu !=
+ clib_host_to_net_u32
+ (radv_info->adv_link_mtu)))
+ {
+ ip6_neighbor_syslog (vm, LOG_WARNING,
+ "our AdvLinkMTU on %U doesn't agree with %U",
+ format_vnet_sw_if_index_name,
+ vnm, sw_if_index0,
+ format_ip6_address,
+ &ip0->src_address);
+ }
+ }
+ break;
+
+ case ICMP6_NEIGHBOR_DISCOVERY_OPTION_prefix_information:
+ {
+ icmp6_neighbor_discovery_prefix_information_option_t
+ * h =
+ (icmp6_neighbor_discovery_prefix_information_option_t
+ *) (o0);
+
+ /* validate advertised prefix options */
+ ip6_radv_prefix_t *pr_info;
+ u32 preferred, valid;
+
+ if (opt_len < sizeof (*h))
+ break;
+
+ preferred =
+ clib_net_to_host_u32 (h->preferred_time);
+ valid = clib_net_to_host_u32 (h->valid_time);
+
+ /* look for matching prefix - if we our advertising it, it better be consistant */
+ /* *INDENT-OFF* */
+ pool_foreach (pr_info, radv_info->adv_prefixes_pool,
+ ({
+
+ ip6_address_t mask;
+ ip6_address_mask_from_width(&mask, pr_info->prefix_len);
+
+ if(pr_info->enabled &&
+ (pr_info->prefix_len == h->dst_address_length) &&
+ ip6_address_is_equal_masked (&pr_info->prefix, &h->dst_address, &mask))
+ {
+ /* found it */
+ if(!pr_info->decrement_lifetime_flag &&
+ valid != pr_info->adv_valid_lifetime_in_secs)
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our ADV validlifetime on %U for %U does not agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0,format_ip6_address, &pr_info->prefix,
+ format_ip6_address, &h->dst_address);
+ }
+ if(!pr_info->decrement_lifetime_flag &&
+ preferred != pr_info->adv_pref_lifetime_in_secs)
+ {
+ ip6_neighbor_syslog(vm, LOG_WARNING,
+ "our ADV preferredlifetime on %U for %U does not agree with %U",
+ format_vnet_sw_if_index_name, vnm, sw_if_index0,format_ip6_address, &pr_info->prefix,
+ format_ip6_address, &h->dst_address);
+ }
+ }
+ break;
+ }));
+ /* *INDENT-ON* */
+ break;
+ }
+ default:
+ /* skip this one */
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ p0->error = error_node->errors[error0];
+
+ if (error0 != ICMP6_ERROR_NONE)
+ vlib_error_count (vm, error_node->node_index, error0, 1);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ /* Account for router advertisements sent. */
+ vlib_error_count (vm, error_node->node_index,
+ ICMP6_ERROR_ROUTER_ADVERTISEMENTS_RX,
+ n_advertisements_rcvd);
+
+ return frame->n_vectors;
+}
+
+/**
+ * @brief Add a multicast Address to the advertised MLD set
+ */
+static void
+ip6_neighbor_add_mld_prefix (ip6_radv_t * radv_info, ip6_address_t * addr)
+{
+ ip6_mldp_group_t *mcast_group_info;
+ uword *p;
+
+ /* lookup mldp info for this interface */
+ p = mhash_get (&radv_info->address_to_mldp_index, &addr);
+ mcast_group_info =
+ p ? pool_elt_at_index (radv_info->mldp_group_pool, p[0]) : 0;
+
+ /* add address */
+ if (!mcast_group_info)
+ {
+ /* add */
+ u32 mi;
+ pool_get (radv_info->mldp_group_pool, mcast_group_info);
+
+ mi = mcast_group_info - radv_info->mldp_group_pool;
+ mhash_set (&radv_info->address_to_mldp_index, &addr, mi, /* old_value */
+ 0);
+
+ mcast_group_info->type = 4;
+ mcast_group_info->mcast_source_address_pool = 0;
+ mcast_group_info->num_sources = 0;
+ clib_memcpy (&mcast_group_info->mcast_address, &addr,
+ sizeof (ip6_address_t));
+ }
+}
+
+/**
+ * @brief Delete a multicast Address from the advertised MLD set
+ */
+static void
+ip6_neighbor_del_mld_prefix (ip6_radv_t * radv_info, ip6_address_t * addr)
+{
+ ip6_mldp_group_t *mcast_group_info;
+ uword *p;
+
+ p = mhash_get (&radv_info->address_to_mldp_index, &addr);
+ mcast_group_info =
+ p ? pool_elt_at_index (radv_info->mldp_group_pool, p[0]) : 0;
+
+ if (mcast_group_info)
+ {
+ mhash_unset (&radv_info->address_to_mldp_index, &addr,
+ /* old_value */ 0);
+ pool_put (radv_info->mldp_group_pool, mcast_group_info);
+ }
+}
+
+/**
+ * @brief Add a multicast Address to the advertised MLD set
+ */
+static void
+ip6_neighbor_add_mld_grp (ip6_radv_t * a,
+ ip6_multicast_address_scope_t scope,
+ ip6_multicast_link_local_group_id_t group)
+{
+ ip6_address_t addr;
+
+ ip6_set_reserved_multicast_address (&addr, scope, group);
+
+ ip6_neighbor_add_mld_prefix (a, &addr);
+}
+
+/**
+ * @brief create and initialize router advertisement parameters with default
+ * values for this intfc
+ */
+u32
+ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm,
+ u32 sw_if_index, u32 is_add)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_radv_t *a = 0;
+ u32 ri = ~0;
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0 = 0;
+
+ /* lookup radv container - ethernet interfaces only */
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ if (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ if (!eth_if0)
+ return ri;
+
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ a = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ if (!is_add)
+ {
+ ip6_radv_prefix_t *p;
+ ip6_mldp_group_t *m;
+
+ /* release the lock on the interface's mcast adj */
+ adj_unlock (a->mcast_adj_index);
+
+ /* clean up prefix and MDP pools */
+ /* *INDENT-OFF* */
+ pool_flush(p, a->adv_prefixes_pool,
+ ({
+ mhash_unset (&a->address_to_prefix_index, &p->prefix, 0);
+ }));
+ pool_flush (m, a->mldp_group_pool,
+ ({
+ mhash_unset (&a->address_to_mldp_index, &m->mcast_address, 0);
+ }));
+ /* *INDENT-ON* */
+
+ pool_free (a->mldp_group_pool);
+ pool_free (a->adv_prefixes_pool);
+
+ mhash_free (&a->address_to_prefix_index);
+ mhash_free (&a->address_to_mldp_index);
+
+ pool_put (nm->if_radv_pool, a);
+ nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ~0;
+ ri = ~0;
+ }
+ }
+ else
+ {
+ if (is_add)
+ {
+ vnet_hw_interface_t *hw_if0;
+
+ hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ pool_get (nm->if_radv_pool, a);
+
+ ri = a - nm->if_radv_pool;
+ nm->if_radv_pool_index_by_sw_if_index[sw_if_index] = ri;
+
+ /* initialize default values (most of which are zero) */
+ memset (a, 0, sizeof (a[0]));
+
+ a->sw_if_index = sw_if_index;
+ a->max_radv_interval = DEF_MAX_RADV_INTERVAL;
+ a->min_radv_interval = DEF_MIN_RADV_INTERVAL;
+ a->curr_hop_limit = DEF_CURR_HOP_LIMIT;
+ a->adv_router_lifetime_in_sec = DEF_DEF_RTR_LIFETIME;
+
+ /* send ll address source address option */
+ a->adv_link_layer_address = 1;
+
+ a->min_delay_between_radv = MIN_DELAY_BETWEEN_RAS;
+ a->max_delay_between_radv = MAX_DELAY_BETWEEN_RAS;
+ a->max_rtr_default_lifetime = MAX_DEF_RTR_LIFETIME;
+ a->seed = (u32) clib_cpu_time_now ();
+ (void) random_u32 (&a->seed);
+ a->randomizer = clib_cpu_time_now ();
+ (void) random_u64 (&a->randomizer);
+
+ a->initial_adverts_count = MAX_INITIAL_RTR_ADVERTISEMENTS;
+ a->initial_adverts_sent = a->initial_adverts_count - 1;
+ a->initial_adverts_interval = MAX_INITIAL_RTR_ADVERT_INTERVAL;
+
+ /* deafult is to send */
+ a->send_radv = 1;
+
+ /* fill in radv_info for this interface that will be needed later */
+ a->adv_link_mtu = hw_if0->max_l3_packet_bytes[VLIB_RX];
+
+ clib_memcpy (a->link_layer_address, eth_if0->address, 6);
+
+ /* fill in default link-local address (this may be overridden) */
+ ip6_link_local_address_from_ethernet_address
+ (&a->link_local_address, eth_if0->address);
+
+ mhash_init (&a->address_to_prefix_index, sizeof (uword),
+ sizeof (ip6_address_t));
+ mhash_init (&a->address_to_mldp_index, sizeof (uword),
+ sizeof (ip6_address_t));
+
+ a->mcast_adj_index = adj_mcast_add_or_lock (FIB_PROTOCOL_IP6,
+ VNET_LINK_IP6,
+ sw_if_index);
+
+ /* add multicast groups we will always be reporting */
+ ip6_neighbor_add_mld_grp (a,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+ ip6_neighbor_add_mld_grp (a,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_routers);
+ ip6_neighbor_add_mld_grp (a,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers);
+ }
+ }
+ return ri;
+}
+
+/* send an mldpv2 report */
+static void
+ip6_neighbor_send_mldpv2_report (u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vnm->vlib_main;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ vnet_sw_interface_t *sw_if0;
+ ethernet_interface_t *eth_if0;
+ u32 ri;
+ int bogus_length;
+
+ ip6_radv_t *radv_info;
+ u16 payload_length;
+ vlib_buffer_t *b0;
+ ip6_header_t *ip0;
+ u32 *to_next;
+ vlib_frame_t *f;
+ u32 bo0;
+ u32 n_to_alloc = 1;
+ u32 n_allocated;
+
+ icmp6_multicast_listener_report_header_t *rh0;
+ icmp6_multicast_listener_report_packet_t *rp0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ ASSERT (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ eth_if0 = ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ if (!eth_if0 || !vnet_sw_interface_is_admin_up (vnm, sw_if_index))
+ return;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri == ~0)
+ return;
+
+ /* send report now - build a mldpv2 report packet */
+ n_allocated = vlib_buffer_alloc_from_free_list (vm,
+ &bo0,
+ n_to_alloc,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ if (PREDICT_FALSE (n_allocated == 0))
+ {
+ clib_warning ("buffer allocation failure");
+ return;
+ }
+
+ b0 = vlib_get_buffer (vm, bo0);
+
+ /* adjust the sizeof the buffer to just include the ipv6 header */
+ b0->current_length = sizeof (icmp6_multicast_listener_report_packet_t);
+
+ payload_length = sizeof (icmp6_multicast_listener_report_header_t);
+
+ b0->error = ICMP6_ERROR_NONE;
+
+ rp0 = vlib_buffer_get_current (b0);
+ ip0 = (ip6_header_t *) & rp0->ip;
+ rh0 = (icmp6_multicast_listener_report_header_t *) & rp0->report_hdr;
+
+ memset (rp0, 0x0, sizeof (icmp6_multicast_listener_report_packet_t));
+
+ ip0->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+
+ ip0->protocol = IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS;
+ /* for DEBUG - vnet driver won't seem to emit router alerts */
+ /* ip0->protocol = IP_PROTOCOL_ICMP6; */
+ ip0->hop_limit = 1;
+
+ rh0->icmp.type = ICMP6_multicast_listener_report_v2;
+
+ /* source address MUST be the link-local address */
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+ ip0->src_address = radv_info->link_local_address;
+
+ /* destination is all mldpv2 routers */
+ ip6_set_reserved_multicast_address (&ip0->dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_mldv2_routers);
+
+ /* add reports here */
+ ip6_mldp_group_t *m;
+ int num_addr_records = 0;
+ icmp6_multicast_address_record_t rr;
+
+ /* fill in the hop-by-hop extension header (router alert) info */
+ rh0->ext_hdr.next_hdr = IP_PROTOCOL_ICMP6;
+ rh0->ext_hdr.n_data_u64s = 0;
+
+ rh0->alert.type = IP6_MLDP_ALERT_TYPE;
+ rh0->alert.len = 2;
+ rh0->alert.value = 0;
+
+ rh0->pad.type = 1;
+ rh0->pad.len = 0;
+
+ rh0->icmp.checksum = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (m, radv_info->mldp_group_pool,
+ ({
+ rr.type = m->type;
+ rr.aux_data_len_u32s = 0;
+ rr.num_sources = clib_host_to_net_u16 (m->num_sources);
+ clib_memcpy(&rr.mcast_addr, &m->mcast_address, sizeof(ip6_address_t));
+
+ num_addr_records++;
+
+ vlib_buffer_add_data
+ (vm, vlib_buffer_get_free_list_index (b0), bo0,
+ (void *)&rr, sizeof(icmp6_multicast_address_record_t));
+
+ payload_length += sizeof( icmp6_multicast_address_record_t);
+ }));
+ /* *INDENT-ON* */
+
+ rh0->rsvd = 0;
+ rh0->num_addr_records = clib_host_to_net_u16 (num_addr_records);
+
+ /* update lengths */
+ ip0->payload_length = clib_host_to_net_u16 (payload_length);
+
+ rh0->icmp.checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0,
+ &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ /*
+ * OK to override w/ no regard for actual FIB, because
+ * ip6-rewrite only looks at the adjacency.
+ */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ vnet_main.local_interface_sw_if_index;
+
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = radv_info->mcast_adj_index;
+ b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
+ vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite-mcast");
+
+ f = vlib_get_frame_to_node (vm, node->index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bo0;
+ f->n_vectors = 1;
+
+ vlib_put_frame_to_node (vm, node->index, f);
+ return;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) =
+{
+ .function = icmp6_router_solicitation,
+ .name = "icmp6-router-solicitation",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_ROUTER_SOLICITATION_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ROUTER_SOLICITATION_NEXT_DROP] = "error-drop",
+ [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_RW] = "ip6-rewrite-mcast",
+ [ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* send a RA or update the timer info etc.. */
+static uword
+ip6_neighbor_process_timer_event (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_radv_t *radv_info;
+ vlib_frame_t *f = 0;
+ u32 n_this_frame = 0;
+ u32 n_left_to_next = 0;
+ u32 *to_next = 0;
+ u32 bo0;
+ icmp6_router_solicitation_header_t *h0;
+ vlib_buffer_t *b0;
+ f64 now = vlib_time_now (vm);
+
+ /* Interface ip6 radv info list */
+ /* *INDENT-OFF* */
+ pool_foreach (radv_info, nm->if_radv_pool,
+ ({
+ if( !vnet_sw_interface_is_admin_up (vnm, radv_info->sw_if_index))
+ {
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count-1;
+ radv_info->next_multicast_time = now;
+ radv_info->last_multicast_time = now;
+ radv_info->last_radv_time = 0;
+ radv_info->all_routers_mcast = 0;
+ continue;
+ }
+
+ /* Make sure that we've joined the all-routers multicast group */
+ if(!radv_info->all_routers_mcast)
+ {
+ /* send MDLP_REPORT_EVENT message */
+ ip6_neighbor_send_mldpv2_report(radv_info->sw_if_index);
+ radv_info->all_routers_mcast = 1;
+ }
+
+ /* is it time to send a multicast RA on this interface? */
+ if(radv_info->send_radv && (now >= radv_info->next_multicast_time))
+ {
+ u32 n_to_alloc = 1;
+ u32 n_allocated;
+
+ f64 rfn = (radv_info->max_radv_interval - radv_info->min_radv_interval) *
+ random_f64 (&radv_info->seed) + radv_info->min_radv_interval;
+
+ /* multicast send - compute next multicast send time */
+ if( radv_info->initial_adverts_sent > 0)
+ {
+ radv_info->initial_adverts_sent--;
+ if(rfn > radv_info-> initial_adverts_interval)
+ rfn = radv_info-> initial_adverts_interval;
+
+ /* check to see if we are ceasing to send */
+ if( radv_info->initial_adverts_sent == 0)
+ if(radv_info->cease_radv)
+ radv_info->send_radv = 0;
+ }
+
+ radv_info->next_multicast_time = rfn + now;
+ radv_info->last_multicast_time = now;
+
+ /* send advert now - build a "solicted" router advert with unspecified source address */
+ n_allocated = vlib_buffer_alloc_from_free_list
+ (vm, &bo0, n_to_alloc, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ if (PREDICT_FALSE(n_allocated == 0))
+ {
+ clib_warning ("buffer allocation failure");
+ continue;
+ }
+ b0 = vlib_get_buffer (vm, bo0);
+ b0->current_length = sizeof( icmp6_router_solicitation_header_t);
+ b0->error = ICMP6_ERROR_NONE;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = radv_info->sw_if_index;
+
+ h0 = vlib_buffer_get_current (b0);
+
+ memset (h0, 0, sizeof (icmp6_router_solicitation_header_t));
+
+ h0->ip.ip_version_traffic_class_and_flow_label = clib_host_to_net_u32 (0x6 << 28);
+ h0->ip.payload_length = clib_host_to_net_u16 (sizeof (icmp6_router_solicitation_header_t)
+ - STRUCT_OFFSET_OF (icmp6_router_solicitation_header_t, neighbor));
+ h0->ip.protocol = IP_PROTOCOL_ICMP6;
+ h0->ip.hop_limit = 255;
+
+ /* set src/dst address as "unspecified" this marks this packet as internally generated rather than recieved */
+ h0->ip.src_address.as_u64[0] = 0;
+ h0->ip.src_address.as_u64[1] = 0;
+
+ h0->ip.dst_address.as_u64[0] = 0;
+ h0->ip.dst_address.as_u64[1] = 0;
+
+ h0->neighbor.icmp.type = ICMP6_router_solicitation;
+
+ if (PREDICT_FALSE(f == 0))
+ {
+ f = vlib_get_frame_to_node (vm, ip6_icmp_router_solicitation_node.index);
+ to_next = vlib_frame_vector_args (f);
+ n_left_to_next = VLIB_FRAME_SIZE;
+ n_this_frame = 0;
+ }
+
+ n_this_frame++;
+ n_left_to_next--;
+ to_next[0] = bo0;
+ to_next += 1;
+
+ if (PREDICT_FALSE(n_left_to_next == 0))
+ {
+ f->n_vectors = n_this_frame;
+ vlib_put_frame_to_node (vm, ip6_icmp_router_solicitation_node.index, f);
+ f = 0;
+ }
+ }
+ }));
+ /* *INDENT-ON* */
+
+ if (f)
+ {
+ ASSERT (n_this_frame);
+ f->n_vectors = n_this_frame;
+ vlib_put_frame_to_node (vm, ip6_icmp_router_solicitation_node.index, f);
+ }
+ return 0;
+}
+
+static uword
+ip6_icmp_neighbor_discovery_event_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ uword event_type;
+ ip6_icmp_neighbor_discovery_event_data_t *event_data;
+
+ /* init code here */
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 1. /* seconds */ );
+
+ event_data = vlib_process_get_event_data (vm, &event_type);
+
+ if (!event_data)
+ {
+ /* No events found: timer expired. */
+ /* process interface list and send RAs as appropriate, update timer info */
+ ip6_neighbor_process_timer_event (vm, node, frame);
+ }
+ else
+ {
+ switch (event_type)
+ {
+
+ case ICMP6_ND_EVENT_INIT:
+ break;
+
+ case ~0:
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ if (event_data)
+ _vec_len (event_data) = 0;
+ }
+ }
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) =
+{
+ .function = icmp6_router_advertisement,
+ .name = "icmp6-router-advertisement",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+vlib_node_registration_t ip6_icmp_neighbor_discovery_event_node = {
+
+ .function = ip6_icmp_neighbor_discovery_event_process,
+ .name = "ip6-icmp-neighbor-discovery-event-process",
+ .type = VLIB_NODE_TYPE_PROCESS,
+};
+
+static uword
+icmp6_neighbor_solicitation (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return icmp6_neighbor_solicitation_or_advertisement (vm, node, frame,
+ /* is_solicitation */
+ 1);
+}
+
+static uword
+icmp6_neighbor_advertisement (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return icmp6_neighbor_solicitation_or_advertisement (vm, node, frame,
+ /* is_solicitation */
+ 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_neighbor_solicitation_node,static) =
+{
+ .function = icmp6_neighbor_solicitation,
+ .name = "icmp6-neighbor-solicitation",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_NEIGHBOR_SOLICITATION_N_NEXT,
+ .next_nodes = {
+ [ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP] = "error-drop",
+ [ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY] = "interface-output",
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_neighbor_advertisement_node,static) =
+{
+ .function = icmp6_neighbor_advertisement,
+ .name = "icmp6-neighbor-advertisement",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+/* API support functions */
+int
+ip6_neighbor_ra_config (vlib_main_t * vm, u32 sw_if_index,
+ u8 suppress, u8 managed, u8 other,
+ u8 ll_option, u8 send_unicast, u8 cease,
+ u8 use_lifetime, u32 lifetime,
+ u32 initial_count, u32 initial_interval,
+ u32 max_interval, u32 min_interval, u8 is_no)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ int error;
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+ error = (ri != ~0) ? 0 : VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (!error)
+ {
+
+ ip6_radv_t *radv_info;
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ if ((max_interval != 0) && (min_interval == 0))
+ min_interval = .75 * max_interval;
+
+ max_interval =
+ (max_interval !=
+ 0) ? ((is_no) ? DEF_MAX_RADV_INTERVAL : max_interval) :
+ radv_info->max_radv_interval;
+ min_interval =
+ (min_interval !=
+ 0) ? ((is_no) ? DEF_MIN_RADV_INTERVAL : min_interval) :
+ radv_info->min_radv_interval;
+ lifetime =
+ (use_lifetime !=
+ 0) ? ((is_no) ? DEF_DEF_RTR_LIFETIME : lifetime) :
+ radv_info->adv_router_lifetime_in_sec;
+
+ if (lifetime)
+ {
+ if (lifetime > MAX_DEF_RTR_LIFETIME)
+ lifetime = MAX_DEF_RTR_LIFETIME;
+
+ if (lifetime <= max_interval)
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ if (min_interval != 0)
+ {
+ if ((min_interval > .75 * max_interval) || (min_interval < 3))
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+
+ if ((initial_count > MAX_INITIAL_RTR_ADVERTISEMENTS) ||
+ (initial_interval > MAX_INITIAL_RTR_ADVERT_INTERVAL))
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ /*
+ if "flag" is set and is_no is true then restore default value else set value corresponding to "flag"
+ if "flag" is clear don't change corresponding value
+ */
+ radv_info->send_radv =
+ (suppress != 0) ? ((is_no != 0) ? 1 : 0) : radv_info->send_radv;
+ radv_info->adv_managed_flag =
+ (managed != 0) ? ((is_no) ? 0 : 1) : radv_info->adv_managed_flag;
+ radv_info->adv_other_flag =
+ (other != 0) ? ((is_no) ? 0 : 1) : radv_info->adv_other_flag;
+ radv_info->adv_link_layer_address =
+ (ll_option !=
+ 0) ? ((is_no) ? 1 : 0) : radv_info->adv_link_layer_address;
+ radv_info->send_unicast =
+ (send_unicast != 0) ? ((is_no) ? 0 : 1) : radv_info->send_unicast;
+ radv_info->cease_radv =
+ (cease != 0) ? ((is_no) ? 0 : 1) : radv_info->cease_radv;
+
+ radv_info->min_radv_interval = min_interval;
+ radv_info->max_radv_interval = max_interval;
+ radv_info->adv_router_lifetime_in_sec = lifetime;
+
+ radv_info->initial_adverts_count =
+ (initial_count !=
+ 0) ? ((is_no) ? MAX_INITIAL_RTR_ADVERTISEMENTS : initial_count) :
+ radv_info->initial_adverts_count;
+ radv_info->initial_adverts_interval =
+ (initial_interval !=
+ 0) ? ((is_no) ? MAX_INITIAL_RTR_ADVERT_INTERVAL : initial_interval) :
+ radv_info->initial_adverts_interval;
+
+ /* restart */
+ if ((cease != 0) && (is_no))
+ radv_info->send_radv = 1;
+
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count - 1;
+ radv_info->next_multicast_time = vlib_time_now (vm);
+ radv_info->last_multicast_time = vlib_time_now (vm);
+ radv_info->last_radv_time = 0;
+ }
+ return (error);
+}
+
+int
+ip6_neighbor_ra_prefix (vlib_main_t * vm, u32 sw_if_index,
+ ip6_address_t * prefix_addr, u8 prefix_len,
+ u8 use_default, u32 val_lifetime, u32 pref_lifetime,
+ u8 no_advertise, u8 off_link, u8 no_autoconfig,
+ u8 no_onlink, u8 is_no)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ int error;
+
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ error = (ri != ~0) ? 0 : VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (!error)
+ {
+ f64 now = vlib_time_now (vm);
+ ip6_radv_t *radv_info;
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* prefix info add, delete or update */
+ ip6_radv_prefix_t *prefix;
+
+ /* lookup prefix info for this address on this interface */
+ uword *p = mhash_get (&radv_info->address_to_prefix_index, prefix_addr);
+
+ prefix = p ? pool_elt_at_index (radv_info->adv_prefixes_pool, p[0]) : 0;
+
+ if (is_no)
+ {
+ /* delete */
+ if (!prefix)
+ return VNET_API_ERROR_INVALID_VALUE; /* invalid prefix */
+
+ if (prefix->prefix_len != prefix_len)
+ return VNET_API_ERROR_INVALID_VALUE_2;
+
+ /* FIXME - Should the DP do this or the CP ? */
+ /* do specific delete processing here before returning */
+ /* try to remove from routing table */
+
+ mhash_unset (&radv_info->address_to_prefix_index, prefix_addr,
+ /* old_value */ 0);
+ pool_put (radv_info->adv_prefixes_pool, prefix);
+
+ radv_info->initial_adverts_sent =
+ radv_info->initial_adverts_count - 1;
+ radv_info->next_multicast_time = vlib_time_now (vm);
+ radv_info->last_multicast_time = vlib_time_now (vm);
+ radv_info->last_radv_time = 0;
+ return (error);
+ }
+
+ /* adding or changing */
+ if (!prefix)
+ {
+ /* add */
+ u32 pi;
+ pool_get (radv_info->adv_prefixes_pool, prefix);
+ pi = prefix - radv_info->adv_prefixes_pool;
+ mhash_set (&radv_info->address_to_prefix_index, prefix_addr, pi,
+ /* old_value */ 0);
+
+ memset (prefix, 0x0, sizeof (ip6_radv_prefix_t));
+
+ prefix->prefix_len = prefix_len;
+ clib_memcpy (&prefix->prefix, prefix_addr, sizeof (ip6_address_t));
+
+ /* initialize default values */
+ prefix->adv_on_link_flag = 1; /* L bit set */
+ prefix->adv_autonomous_flag = 1; /* A bit set */
+ prefix->adv_valid_lifetime_in_secs = DEF_ADV_VALID_LIFETIME;
+ prefix->adv_pref_lifetime_in_secs = DEF_ADV_PREF_LIFETIME;
+ prefix->enabled = 1;
+ prefix->decrement_lifetime_flag = 1;
+ prefix->deprecated_prefix_flag = 1;
+
+ if (off_link == 0)
+ {
+ /* FIXME - Should the DP do this or the CP ? */
+ /* insert prefix into routing table as a connected prefix */
+ }
+
+ if (use_default)
+ goto restart;
+ }
+ else
+ {
+
+ if (prefix->prefix_len != prefix_len)
+ return VNET_API_ERROR_INVALID_VALUE_2;
+
+ if (off_link != 0)
+ {
+ /* FIXME - Should the DP do this or the CP ? */
+ /* remove from routing table if already there */
+ }
+ }
+
+ if ((val_lifetime == ~0) || (pref_lifetime == ~0))
+ {
+ prefix->adv_valid_lifetime_in_secs = ~0;
+ prefix->adv_pref_lifetime_in_secs = ~0;
+ prefix->decrement_lifetime_flag = 0;
+ }
+ else
+ {
+ prefix->adv_valid_lifetime_in_secs = val_lifetime;;
+ prefix->adv_pref_lifetime_in_secs = pref_lifetime;
+ }
+
+ /* copy remaining */
+ prefix->enabled = !(no_advertise != 0);
+ prefix->adv_on_link_flag = !((off_link != 0) || (no_onlink != 0));
+ prefix->adv_autonomous_flag = !(no_autoconfig != 0);
+
+ restart:
+ /* restart */
+ /* fill in the expiration times */
+ prefix->valid_lifetime_expires =
+ now + prefix->adv_valid_lifetime_in_secs;
+ prefix->pref_lifetime_expires = now + prefix->adv_pref_lifetime_in_secs;
+
+ radv_info->initial_adverts_sent = radv_info->initial_adverts_count - 1;
+ radv_info->next_multicast_time = vlib_time_now (vm);
+ radv_info->last_multicast_time = vlib_time_now (vm);
+ radv_info->last_radv_time = 0;
+ }
+ return (error);
+}
+
+clib_error_t *
+ip6_neighbor_cmd (vlib_main_t * vm, unformat_input_t * main_input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ clib_error_t *error = 0;
+ u8 is_no = 0;
+ u8 suppress = 0, managed = 0, other = 0;
+ u8 suppress_ll_option = 0, send_unicast = 0, cease = 0;
+ u8 use_lifetime = 0;
+ u32 sw_if_index, ra_lifetime = 0, ra_initial_count =
+ 0, ra_initial_interval = 0;
+ u32 ra_max_interval = 0, ra_min_interval = 0;
+
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_sw_interface_t *sw_if0;
+
+ int add_radv_info = 1;
+ __attribute__ ((unused)) ip6_radv_t *radv_info = 0;
+ ip6_address_t ip6_addr;
+ u32 addr_len;
+
+
+ /* Get a line of input. */
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ /* get basic radv info for this interface */
+ if (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+
+ if (unformat_user (line_input,
+ unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ u32 ri;
+ ethernet_interface_t *eth_if0 = 0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ if (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+
+ if (!eth_if0)
+ {
+ error =
+ clib_error_return (0, "Interface must be of ethernet type");
+ goto done;
+ }
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown interface %U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "invalid interface name %U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ /* get the rest of the command */
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "no"))
+ is_no = 1;
+ else if (unformat (line_input, "prefix %U/%d",
+ unformat_ip6_address, &ip6_addr, &addr_len))
+ {
+ add_radv_info = 0;
+ break;
+ }
+ else if (unformat (line_input, "ra-managed-config-flag"))
+ {
+ managed = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-other-config-flag"))
+ {
+ other = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-suppress") ||
+ unformat (line_input, "ra-surpress"))
+ {
+ suppress = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-suppress-link-layer") ||
+ unformat (line_input, "ra-surpress-link-layer"))
+ {
+ suppress_ll_option = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-send-unicast"))
+ {
+ send_unicast = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-lifetime"))
+ {
+ if (!unformat (line_input, "%d", &ra_lifetime))
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ use_lifetime = 1;
+ break;
+ }
+ else if (unformat (line_input, "ra-initial"))
+ {
+ if (!unformat
+ (line_input, "%d %d", &ra_initial_count, &ra_initial_interval))
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ break;
+ }
+ else if (unformat (line_input, "ra-interval"))
+ {
+ if (!unformat (line_input, "%d", &ra_max_interval))
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+
+ if (!unformat (line_input, "%d", &ra_min_interval))
+ ra_min_interval = 0;
+ break;
+ }
+ else if (unformat (line_input, "ra-cease"))
+ {
+ cease = 1;
+ break;
+ }
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (add_radv_info)
+ {
+ ip6_neighbor_ra_config (vm, sw_if_index,
+ suppress, managed, other,
+ suppress_ll_option, send_unicast, cease,
+ use_lifetime, ra_lifetime,
+ ra_initial_count, ra_initial_interval,
+ ra_max_interval, ra_min_interval, is_no);
+ }
+ else
+ {
+ u32 valid_lifetime_in_secs = 0;
+ u32 pref_lifetime_in_secs = 0;
+ u8 use_prefix_default_values = 0;
+ u8 no_advertise = 0;
+ u8 off_link = 0;
+ u8 no_autoconfig = 0;
+ u8 no_onlink = 0;
+
+ /* get the rest of the command */
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "default"))
+ {
+ use_prefix_default_values = 1;
+ break;
+ }
+ else if (unformat (line_input, "infinite"))
+ {
+ valid_lifetime_in_secs = ~0;
+ pref_lifetime_in_secs = ~0;
+ break;
+ }
+ else if (unformat (line_input, "%d %d", &valid_lifetime_in_secs,
+ &pref_lifetime_in_secs))
+ break;
+ else
+ break;
+ }
+
+
+ /* get the rest of the command */
+ while (!use_prefix_default_values &&
+ unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "no-advertise"))
+ no_advertise = 1;
+ else if (unformat (line_input, "off-link"))
+ off_link = 1;
+ else if (unformat (line_input, "no-autoconfig"))
+ no_autoconfig = 1;
+ else if (unformat (line_input, "no-onlink"))
+ no_onlink = 1;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ ip6_neighbor_ra_prefix (vm, sw_if_index,
+ &ip6_addr, addr_len,
+ use_prefix_default_values,
+ valid_lifetime_in_secs,
+ pref_lifetime_in_secs,
+ no_advertise,
+ off_link, no_autoconfig, no_onlink, is_no);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static void
+ip6_print_addrs (vlib_main_t * vm, u32 * addrs)
+{
+ ip_lookup_main_t *lm = &ip6_main.lookup_main;
+ u32 i;
+
+ for (i = 0; i < vec_len (addrs); i++)
+ {
+ ip_interface_address_t *a =
+ pool_elt_at_index (lm->if_address_pool, addrs[i]);
+ ip6_address_t *address = ip_interface_address_get_address (lm, a);
+
+ vlib_cli_output (vm, "\t\t%U/%d",
+ format_ip6_address, address, a->address_length);
+ }
+}
+
+static clib_error_t *
+show_ip6_interface_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ ip_lookup_main_t *lm = &ip6_main.lookup_main;
+ ip6_radv_t *radv_info;
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ vlib_cli_output (vm, "%U is admin %s\n",
+ format_vnet_sw_interface_name, vnm,
+ vnet_get_sw_interface (vnm, sw_if_index),
+ (vnet_sw_interface_is_admin_up (vnm, sw_if_index) ?
+ "up" : "down"));
+
+ u32 ai;
+ u32 *link_scope = 0, *global_scope = 0;
+ u32 *local_scope = 0, *unknown_scope = 0;
+ ip_interface_address_t *a;
+
+ vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ ai = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+
+ while (ai != (u32) ~ 0)
+ {
+ a = pool_elt_at_index (lm->if_address_pool, ai);
+ ip6_address_t *address =
+ ip_interface_address_get_address (lm, a);
+
+ if (ip6_address_is_link_local_unicast (address))
+ vec_add1 (link_scope, ai);
+ else if (ip6_address_is_global_unicast (address))
+ vec_add1 (global_scope, ai);
+ else if (ip6_address_is_local_unicast (address))
+ vec_add1 (local_scope, ai);
+ else
+ vec_add1 (unknown_scope, ai);
+
+ ai = a->next_this_sw_interface;
+ }
+
+ if (vec_len (link_scope))
+ {
+ vlib_cli_output (vm, "\tLink-local address(es):\n");
+ ip6_print_addrs (vm, link_scope);
+ vec_free (link_scope);
+ }
+
+ if (vec_len (local_scope))
+ {
+ vlib_cli_output (vm, "\tLocal unicast address(es):\n");
+ ip6_print_addrs (vm, local_scope);
+ vec_free (local_scope);
+ }
+
+ if (vec_len (global_scope))
+ {
+ vlib_cli_output (vm, "\tGlobal unicast address(es):\n");
+ ip6_print_addrs (vm, global_scope);
+ vec_free (global_scope);
+ }
+
+ if (vec_len (unknown_scope))
+ {
+ vlib_cli_output (vm, "\tOther-scope address(es):\n");
+ ip6_print_addrs (vm, unknown_scope);
+ vec_free (unknown_scope);
+ }
+
+ vlib_cli_output (vm, "\tJoined group address(es):\n");
+ ip6_mldp_group_t *m;
+ /* *INDENT-OFF* */
+ pool_foreach (m, radv_info->mldp_group_pool,
+ ({
+ vlib_cli_output (vm, "\t\t%U\n", format_ip6_address,
+ &m->mcast_address);
+ }));
+ /* *INDENT-ON* */
+
+ vlib_cli_output (vm, "\tAdvertised Prefixes:\n");
+ ip6_radv_prefix_t *p;
+ /* *INDENT-OFF* */
+ pool_foreach (p, radv_info->adv_prefixes_pool,
+ ({
+ vlib_cli_output (vm, "\t\tprefix %U, length %d\n",
+ format_ip6_address, &p->prefix, p->prefix_len);
+ }));
+ /* *INDENT-ON* */
+
+ vlib_cli_output (vm, "\tMTU is %d\n", radv_info->adv_link_mtu);
+ vlib_cli_output (vm, "\tICMP error messages are unlimited\n");
+ vlib_cli_output (vm, "\tICMP redirects are disabled\n");
+ vlib_cli_output (vm, "\tICMP unreachables are not sent\n");
+ vlib_cli_output (vm, "\tND DAD is disabled\n");
+ //vlib_cli_output (vm, "\tND reachable time is %d milliseconds\n",);
+ vlib_cli_output (vm, "\tND advertised reachable time is %d\n",
+ radv_info->adv_neighbor_reachable_time_in_msec);
+ vlib_cli_output (vm,
+ "\tND advertised retransmit interval is %d (msec)\n",
+ radv_info->
+ adv_time_in_msec_between_retransmitted_neighbor_solicitations);
+
+ u32 ra_interval = radv_info->max_radv_interval;
+ u32 ra_interval_min = radv_info->min_radv_interval;
+ vlib_cli_output (vm,
+ "\tND router advertisements are sent every %d seconds (min interval is %d)\n",
+ ra_interval, ra_interval_min);
+ vlib_cli_output (vm,
+ "\tND router advertisements live for %d seconds\n",
+ radv_info->adv_router_lifetime_in_sec);
+ vlib_cli_output (vm,
+ "\tHosts %s stateless autoconfig for addresses\n",
+ (radv_info->adv_managed_flag) ? "use" :
+ " don't use");
+ vlib_cli_output (vm, "\tND router advertisements sent %d\n",
+ radv_info->n_advertisements_sent);
+ vlib_cli_output (vm, "\tND router solicitations received %d\n",
+ radv_info->n_solicitations_rcvd);
+ vlib_cli_output (vm, "\tND router solicitations dropped %d\n",
+ radv_info->n_solicitations_dropped);
+ }
+ else
+ {
+ error = clib_error_return (0, "IPv6 not enabled on interface",
+ format_unformat_error, input);
+
+ }
+ }
+ return error;
+}
+
+/*?
+ * This command is used to display various IPv6 attributes on a given
+ * interface.
+ *
+ * @cliexpar
+ * Example of how to display IPv6 settings:
+ * @cliexstart{show ip6 interface GigabitEthernet2/0/0}
+ * GigabitEthernet2/0/0 is admin up
+ * Link-local address(es):
+ * fe80::ab8/64
+ * Joined group address(es):
+ * ff02::1
+ * ff02::2
+ * ff02::16
+ * ff02::1:ff00:ab8
+ * Advertised Prefixes:
+ * prefix fe80::fe:28ff:fe9c:75b3, length 64
+ * MTU is 1500
+ * ICMP error messages are unlimited
+ * ICMP redirects are disabled
+ * ICMP unreachables are not sent
+ * ND DAD is disabled
+ * ND advertised reachable time is 0
+ * ND advertised retransmit interval is 0 (msec)
+ * ND router advertisements are sent every 200 seconds (min interval is 150)
+ * ND router advertisements live for 600 seconds
+ * Hosts use stateless autoconfig for addresses
+ * ND router advertisements sent 19336
+ * ND router solicitations received 0
+ * ND router solicitations dropped 0
+ * @cliexend
+ * Example of output if IPv6 is not enabled on the interface:
+ * @cliexstart{show ip6 interface GigabitEthernet2/0/0}
+ * show ip6 interface: IPv6 not enabled on interface
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_ip6_interface_command, static) =
+{
+ .path = "show ip6 interface",
+ .function = show_ip6_interface_cmd,
+ .short_help = "show ip6 interface <interface>",
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+disable_ip6_interface (vlib_main_t * vm, u32 sw_if_index)
+{
+ clib_error_t *error = 0;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ /* if not created - do nothing */
+ if (ri != ~0)
+ {
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_radv_t *radv_info;
+
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* check radv_info ref count for other ip6 addresses on this interface */
+ /* This implicitly excludes the link local address */
+ if (radv_info->ref_count == 0)
+ {
+ /* essentially "disables" ipv6 on this interface */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ &radv_info->
+ link_local_address, 128,
+ 1 /* is_del */ );
+
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index,
+ 0 /* is_add */ );
+ ip6_mfib_interface_enable_disable (sw_if_index, 0);
+ }
+ }
+ return error;
+}
+
+int
+ip6_interface_enabled (vlib_main_t * vm, u32 sw_if_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri = ~0;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ return ri != ~0;
+}
+
+clib_error_t *
+enable_ip6_interface (vlib_main_t * vm, u32 sw_if_index)
+{
+ clib_error_t *error = 0;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri;
+ int is_add = 1;
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index, sw_if_index,
+ ~0);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ /* if not created yet */
+ if (ri == ~0)
+ {
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *sw_if0;
+
+ sw_if0 = vnet_get_sup_sw_interface (vnm, sw_if_index);
+ if (sw_if0->type == VNET_SW_INTERFACE_TYPE_HARDWARE)
+ {
+ ethernet_interface_t *eth_if0;
+
+ eth_if0 =
+ ethernet_get_interface (&ethernet_main, sw_if0->hw_if_index);
+ if (eth_if0)
+ {
+ /* create radv_info. for this interface. This holds all the info needed for router adverts */
+ ri =
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index, is_add);
+
+ if (ri != ~0)
+ {
+ ip6_radv_t *radv_info;
+ ip6_address_t link_local_address;
+
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ ip6_link_local_address_from_ethernet_mac_address
+ (&link_local_address, eth_if0->address);
+
+ sw_if0 = vnet_get_sw_interface (vnm, sw_if_index);
+ if (sw_if0->type == VNET_SW_INTERFACE_TYPE_SUB ||
+ sw_if0->type == VNET_SW_INTERFACE_TYPE_P2P)
+ {
+ /* make up an interface id */
+ md5_context_t m;
+ u8 digest[16];
+
+ link_local_address.as_u64[0] = radv_info->randomizer;
+
+ md5_init (&m);
+ md5_add (&m, &link_local_address, 16);
+ md5_finish (&m, digest);
+
+ clib_memcpy (&link_local_address, digest, 16);
+
+ radv_info->randomizer = link_local_address.as_u64[0];
+
+ link_local_address.as_u64[0] =
+ clib_host_to_net_u64 (0xFE80000000000000ULL);
+ /* clear u bit */
+ link_local_address.as_u8[8] &= 0xfd;
+ }
+
+ ip6_mfib_interface_enable_disable (sw_if_index, 1);
+
+ /* essentially "enables" ipv6 on this interface */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ &link_local_address,
+ 128
+ /* address width */ ,
+ 0 /* is_del */ );
+
+ if (error)
+ ip6_neighbor_sw_interface_add_del (vnm, sw_if_index,
+ !is_add);
+ else
+ {
+ radv_info->link_local_address = link_local_address;
+ }
+ }
+ }
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+enable_ip6_interface_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ enable_ip6_interface (vm, sw_if_index);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown interface\n'",
+ format_unformat_error, input);
+
+ }
+ return error;
+}
+
+/*?
+ * This command is used to enable IPv6 on a given interface.
+ *
+ * @cliexpar
+ * Example of how enable IPv6 on a given interface:
+ * @cliexcmd{enable ip6 interface GigabitEthernet2/0/0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (enable_ip6_interface_command, static) =
+{
+ .path = "enable ip6 interface",
+ .function = enable_ip6_interface_cmd,
+ .short_help = "enable ip6 interface <interface>",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+disable_ip6_interface_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = disable_ip6_interface (vm, sw_if_index);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown interface\n'",
+ format_unformat_error, input);
+
+ }
+ return error;
+}
+
+/*?
+ * This command is used to disable IPv6 on a given interface.
+ *
+ * @cliexpar
+ * Example of how disable IPv6 on a given interface:
+ * @cliexcmd{disable ip6 interface GigabitEthernet2/0/0}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (disable_ip6_interface_command, static) =
+{
+ .path = "disable ip6 interface",
+ .function = disable_ip6_interface_cmd,
+ .short_help = "disable ip6 interface <interface>",
+};
+/* *INDENT-ON* */
+
+/*?
+ * This command is used to configure the neighbor discovery
+ * parameters on a given interface. Use the '<em>show ip6 interface</em>'
+ * command to display some of the current neighbor discovery parameters
+ * on a given interface. This command has three formats:
+ *
+ *
+ * <b>Format 1 - Router Advertisement Options:</b> (Only one can be entered in a single command)
+ *
+ * '<em><b>ip6 nd <interface> [no] [ra-managed-config-flag] | [ra-other-config-flag] | [ra-suppress] | [ra-suppress-link-layer] | [ra-send-unicast] | [ra-lifetime <lifetime>] | [ra-initial <cnt> <interval>] | [ra-interval <max-interval> [<min-interval>]] | [ra-cease]</b></em>'
+ *
+ * Where:
+ *
+ * <em>[no] ra-managed-config-flag</em> - Advertises in ICMPv6
+ * router-advertisement messages to use stateful address
+ * auto-configuration to obtain address information (sets the M-bit).
+ * Default is the M-bit is not set and the '<em>no</em>' option
+ * returns it to this default state.
+ *
+ * <em>[no] ra-other-config-flag</em> - Indicates in ICMPv6
+ * router-advertisement messages that hosts use stateful auto
+ * configuration to obtain nonaddress related information (sets
+ * the O-bit). Default is the O-bit is not set and the '<em>no</em>'
+ * option returns it to this default state.
+ *
+ * <em>[no] ra-suppress</em> - Disables sending ICMPv6 router-advertisement
+ * messages. The '<em>no</em>' option implies to enable sending ICMPv6
+ * router-advertisement messages.
+ *
+ * <em>[no] ra-suppress-link-layer</em> - Indicates not to include the
+ * optional source link-layer address in the ICMPv6 router-advertisement
+ * messages. Default is to include the optional source link-layer address
+ * and the '<em>no</em>' option returns it to this default state.
+ *
+ * <em>[no] ra-send-unicast</em> - Use the source address of the
+ * router-solicitation message if availiable. The default is to use
+ * multicast address of all nodes, and the '<em>no</em>' option returns
+ * it to this default state.
+ *
+ * <em>[no] ra-lifetime <lifetime></em> - Advertises the lifetime of a
+ * default router in ICMPv6 router-advertisement messages. The range is
+ * from 0 to 9000 seconds. '<em><lifetime></em>' must be greater than
+ * '<em><max-interval></em>'. The default value is 600 seconds and the
+ * '<em>no</em>' option returns it to this default value.
+ *
+ * <em>[no] ra-initial <cnt> <interval></em> - Number of initial ICMPv6
+ * router-advertisement messages sent and the interval between each
+ * message. Range for count is 1 - 3 and default is 3. Range for interval
+ * is 1 to 16 seconds, and default is 16 seconds. The '<em>no</em>' option
+ * returns both to their default value.
+ *
+ * <em>[no] ra-interval <max-interval> [<min-interval>]</em> - Configures the
+ * interval between sending ICMPv6 router-advertisement messages. The
+ * range for max-interval is from 4 to 200 seconds. min-interval can not
+ * be more than 75% of max-interval. If not set, min-interval will be
+ * set to 75% of max-interval. The range for min-interval is from 3 to
+ * 150 seconds. The '<em>no</em>' option returns both to their default
+ * value.
+ *
+ * <em>[no] ra-cease</em> - Cease sending ICMPv6 router-advertisement messages.
+ * The '<em>no</em>' options implies to start (or restart) sending
+ * ICMPv6 router-advertisement messages.
+ *
+ *
+ * <b>Format 2 - Prefix Options:</b>
+ *
+ * '<em><b>ip6 nd <interface> [no] prefix <ip6-address>/<width> [<valid-lifetime> <pref-lifetime> | infinite] [no-advertise] [off-link] [no-autoconfig] [no-onlink]</b></em>'
+ *
+ * Where:
+ *
+ * <em>no</em> - All additional flags are ignored and the prefix is deleted.
+ *
+ * <em><valid-lifetime> <pref-lifetime></em> - '<em><valid-lifetime></em>' is the
+ * length of time in seconds during what the prefix is valid for the purpose of
+ * on-link determination. Range is 7203 to 2592000 seconds and default is 2592000
+ * seconds (30 days). '<em><pref-lifetime></em>' is the prefered-lifetime and is the
+ * length of time in seconds during what addresses generated from the prefix remain
+ * preferred. Range is 0 to 604800 seconds and default is 604800 seconds (7 days).
+ *
+ * <em>infinite</em> - Both '<em><valid-lifetime></em>' and '<em><<pref-lifetime></em>'
+ * are inifinte, no timeout.
+ *
+ * <em>no-advertise</em> - Do not send full router address in prefix
+ * advertisement. Default is to advertise (i.e. - This flag is off by default).
+ *
+ * <em>off-link</em> - Prefix is off-link, clear L-bit in packet. Default is on-link
+ * (i.e. - This flag is off and L-bit in packet is set by default and this prefix can
+ * be used for on-link determination). '<em>no-onlink</em>' also controls the L-bit.
+ *
+ * <em>no-autoconfig</em> - Do not use prefix for autoconfiguration, clear A-bit in packet.
+ * Default is autoconfig (i.e. - This flag is off and A-bit in packet is set by default.
+ *
+ * <em>no-onlink</em> - Do not use prefix for onlink determination, clear L-bit in packet.
+ * Default is on-link (i.e. - This flag is off and L-bit in packet is set by default and
+ * this prefix can be used for on-link determination). '<em>off-link</em>' also controls
+ * the L-bit.
+ *
+ *
+ * <b>Format 3: - Default of Prefix:</b>
+ *
+ * '<em><b>ip6 nd <interface> [no] prefix <ip6-address>/<width> default</b></em>'
+ *
+ * When a new prefix is added (or existing one is being overwritten) <em>default</em>
+ * uses default values for the prefix. If <em>no</em> is used, the <em>default</em>
+ * is ignored and the prefix is deleted.
+ *
+ *
+ * @cliexpar
+ * Example of how set a router advertisement option:
+ * @cliexcmd{ip6 nd GigabitEthernet2/0/0 ra-interval 100 20}
+ * Example of how to add a prefix:
+ * @cliexcmd{ip6 nd GigabitEthernet2/0/0 prefix fe80::fe:28ff:fe9c:75b3/64 infinite no-advertise}
+ * Example of how to delete a prefix:
+ * @cliexcmd{ip6 nd GigabitEthernet2/0/0 no prefix fe80::fe:28ff:fe9c:75b3/64}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_nd_command, static) =
+{
+ .path = "ip6 nd",
+ .short_help = "ip6 nd <interface> ...",
+ .function = ip6_neighbor_cmd,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+set_ip6_link_local_address (vlib_main_t * vm,
+ u32 sw_if_index, ip6_address_t * address)
+{
+ clib_error_t *error = 0;
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri;
+ ip6_radv_t *radv_info;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ if (!ip6_address_is_link_local_unicast (address))
+ {
+ vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_LINK_LOCAL;
+ return (error = clib_error_return (0, "address not link-local",
+ format_unformat_error));
+ }
+
+ /* call enable ipv6 */
+ enable_ip6_interface (vm, sw_if_index);
+
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* save if link local address (overwrite default) */
+
+ /* delete the old one */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ &radv_info->link_local_address,
+ 128, 1 /* is_del */ );
+
+ if (!error)
+ {
+ /* add the new one */
+ error = ip6_add_del_interface_address (vm, sw_if_index,
+ address, 128,
+ 0 /* is_del */ );
+
+ if (!error)
+ {
+ radv_info->link_local_address = *address;
+ }
+ }
+ }
+ else
+ {
+ vnm->api_errno = VNET_API_ERROR_IP6_NOT_ENABLED;
+ error = clib_error_return (0, "ip6 not enabled for interface",
+ format_unformat_error);
+ }
+ return error;
+}
+
+clib_error_t *
+set_ip6_link_local_address_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index;
+ ip6_address_t ip6_addr;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ /* get the rest of the command */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_ip6_address, &ip6_addr))
+ break;
+ else
+ return (unformat_parse_error (input));
+ }
+ }
+ error = set_ip6_link_local_address (vm, sw_if_index, &ip6_addr);
+ return error;
+}
+
+/*?
+ * This command is used to assign an IPv6 Link-local address to an
+ * interface. This command will enable IPv6 on an interface if it
+ * is not already enabled. Use the '<em>show ip6 interface</em>' command
+ * to display the assigned Link-local address.
+ *
+ * @cliexpar
+ * Example of how to assign an IPv6 Link-local address to an interface:
+ * @cliexcmd{set ip6 link-local address GigabitEthernet2/0/0 FE80::AB8}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_link_local_address_command, static) =
+{
+ .path = "set ip6 link-local address",
+ .short_help = "set ip6 link-local address <interface> <ip6-address>",
+ .function = set_ip6_link_local_address_cmd,
+};
+/* *INDENT-ON* */
+
+/**
+ * @brief callback when an interface address is added or deleted
+ */
+static void
+ip6_neighbor_add_del_interface_address (ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ ip6_address_t * address,
+ u32 address_length,
+ u32 if_address_index, u32 is_delete)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ u32 ri;
+ vlib_main_t *vm = vnm->vlib_main;
+ ip6_radv_t *radv_info;
+ ip6_address_t a;
+
+ /* create solicited node multicast address for this interface adddress */
+ ip6_set_solicited_node_multicast_address (&a, 0);
+
+ a.as_u8[0xd] = address->as_u8[0xd];
+ a.as_u8[0xe] = address->as_u8[0xe];
+ a.as_u8[0xf] = address->as_u8[0xf];
+
+ if (!is_delete)
+ {
+ /* try to create radv_info - does nothing if ipv6 already enabled */
+ enable_ip6_interface (vm, sw_if_index);
+
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+ if (ri != ~0)
+ {
+ /* get radv_info */
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ /* add address */
+ if (!ip6_address_is_link_local_unicast (address))
+ radv_info->ref_count++;
+
+ ip6_neighbor_add_mld_prefix (radv_info, &a);
+ }
+ }
+ else
+ {
+
+ /* delete */
+ /* look up the radv_t information for this interface */
+ vec_validate_init_empty (nm->if_radv_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ ri = nm->if_radv_pool_index_by_sw_if_index[sw_if_index];
+
+ if (ri != ~0)
+ {
+ /* get radv_info */
+ radv_info = pool_elt_at_index (nm->if_radv_pool, ri);
+
+ ip6_neighbor_del_mld_prefix (radv_info, &a);
+
+ /* if interface up send MLDP "report" */
+ radv_info->all_routers_mcast = 0;
+
+ /* add address */
+ if (!ip6_address_is_link_local_unicast (address))
+ radv_info->ref_count--;
+ }
+ /* Ensure that IPv6 is disabled, and LL removed after ref_count reaches 0 */
+ disable_ip6_interface (vm, sw_if_index);
+ }
+}
+
+clib_error_t *
+ip6_set_neighbor_limit (u32 neighbor_limit)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+
+ nm->limit_neighbor_cache_size = neighbor_limit;
+ return 0;
+}
+
+static void
+ip6_neighbor_table_bind (ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ u32 new_fib_index, u32 old_fib_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n = NULL;
+ u32 i, *to_re_add = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, nm->neighbor_pool,
+ ({
+ if (n->key.sw_if_index == sw_if_index)
+ vec_add1 (to_re_add, n - nm->neighbor_pool);
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (to_re_add); i++)
+ {
+ n = pool_elt_at_index (nm->neighbor_pool, to_re_add[i]);
+ ip6_neighbor_adj_fib_remove (n, old_fib_index);
+ ip6_neighbor_adj_fib_add (n, new_fib_index);
+ }
+ vec_free (to_re_add);
+}
+
+static clib_error_t *
+ip6_neighbor_init (vlib_main_t * vm)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_main_t *im = &ip6_main;
+
+ mhash_init (&nm->neighbor_index_by_key,
+ /* value size */ sizeof (uword),
+ /* key size */ sizeof (ip6_neighbor_key_t));
+
+ icmp6_register_type (vm, ICMP6_neighbor_solicitation,
+ ip6_icmp_neighbor_solicitation_node.index);
+ icmp6_register_type (vm, ICMP6_neighbor_advertisement,
+ ip6_icmp_neighbor_advertisement_node.index);
+ icmp6_register_type (vm, ICMP6_router_solicitation,
+ ip6_icmp_router_solicitation_node.index);
+ icmp6_register_type (vm, ICMP6_router_advertisement,
+ ip6_icmp_router_advertisement_node.index);
+
+ /* handler node for ip6 neighbor discovery events and timers */
+ vlib_register_node (vm, &ip6_icmp_neighbor_discovery_event_node);
+
+ /* add call backs */
+ ip6_add_del_interface_address_callback_t cb;
+ memset (&cb, 0x0, sizeof (ip6_add_del_interface_address_callback_t));
+
+ /* when an interface address changes... */
+ cb.function = ip6_neighbor_add_del_interface_address;
+ cb.function_opaque = 0;
+ vec_add1 (im->add_del_interface_address_callbacks, cb);
+
+ ip6_table_bind_callback_t cbt;
+ cbt.function = ip6_neighbor_table_bind;
+ cbt.function_opaque = 0;
+ vec_add1 (im->table_bind_callbacks, cbt);
+
+ mhash_init (&nm->pending_resolutions_by_address,
+ /* value size */ sizeof (uword),
+ /* key size */ sizeof (ip6_address_t));
+
+ mhash_init (&nm->mac_changes_by_address,
+ /* value size */ sizeof (uword),
+ /* key size */ sizeof (ip6_address_t));
+
+ /* default, configurable */
+ nm->limit_neighbor_cache_size = 50000;
+
+ nm->wc_ip6_nd_publisher_node = (uword) ~ 0;
+
+#if 0
+ /* $$$$ Hack fix for today */
+ vec_validate_init_empty
+ (im->discover_neighbor_next_index_by_hw_if_index, 32, 0 /* drop */ );
+#endif
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip6_neighbor_init);
+
+
+void
+vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque, uword data)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_address_t *address = address_arg;
+ uword *p;
+ pending_resolution_t *pr;
+
+ pool_get (nm->pending_resolutions, pr);
+
+ pr->next_index = ~0;
+ pr->node_index = node_index;
+ pr->type_opaque = type_opaque;
+ pr->data = data;
+
+ p = mhash_get (&nm->pending_resolutions_by_address, address);
+ if (p)
+ {
+ /* Insert new resolution at the head of the list */
+ pr->next_index = p[0];
+ mhash_unset (&nm->pending_resolutions_by_address, address, 0);
+ }
+
+ mhash_set (&nm->pending_resolutions_by_address, address,
+ pr - nm->pending_resolutions, 0 /* old value */ );
+}
+
+int
+vnet_add_del_ip6_nd_change_event (vnet_main_t * vnm,
+ void *data_callback,
+ u32 pid,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque, uword data, int is_add)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_address_t *address = address_arg;
+
+ /* Try to find an existing entry */
+ u32 *first = (u32 *) mhash_get (&nm->mac_changes_by_address, address);
+ u32 *p = first;
+ pending_resolution_t *mc;
+ while (p && *p != ~0)
+ {
+ mc = pool_elt_at_index (nm->mac_changes, *p);
+ if (mc->node_index == node_index && mc->type_opaque == type_opaque
+ && mc->pid == pid)
+ break;
+ p = &mc->next_index;
+ }
+
+ int found = p && *p != ~0;
+ if (is_add)
+ {
+ if (found)
+ return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
+
+ pool_get (nm->mac_changes, mc);
+ *mc = (pending_resolution_t)
+ {
+ .next_index = ~0,.node_index = node_index,.type_opaque =
+ type_opaque,.data = data,.data_callback = data_callback,.pid =
+ pid,};
+
+ /* Insert new resolution at the end of the list */
+ u32 new_idx = mc - nm->mac_changes;
+ if (p)
+ p[0] = new_idx;
+ else
+ mhash_set (&nm->mac_changes_by_address, address, new_idx, 0);
+ }
+ else
+ {
+ if (!found)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ /* Clients may need to clean up pool entries, too */
+ void (*fp) (u32, u8 *) = data_callback;
+ if (fp)
+ (*fp) (mc->data, 0 /* no new mac addrs */ );
+
+ /* Remove the entry from the list and delete the entry */
+ *p = mc->next_index;
+ pool_put (nm->mac_changes, mc);
+
+ /* Remove from hash if we deleted the last entry */
+ if (*p == ~0 && p == first)
+ mhash_unset (&nm->mac_changes_by_address, address, 0);
+ }
+ return 0;
+}
+
+int
+vnet_ip6_nd_term (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_buffer_t * p0,
+ ethernet_header_t * eth,
+ ip6_header_t * ip, u32 sw_if_index, u16 bd_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ icmp6_neighbor_solicitation_or_advertisement_header_t *ndh;
+
+ ndh = ip6_next_header (ip);
+ if (ndh->icmp.type != ICMP6_neighbor_solicitation &&
+ ndh->icmp.type != ICMP6_neighbor_advertisement)
+ return 0;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (p0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ u8 *t0 = vlib_add_trace (vm, node, p0,
+ sizeof (icmp6_input_trace_t));
+ clib_memcpy (t0, ip, sizeof (icmp6_input_trace_t));
+ }
+
+ /* Check if anyone want ND events for L2 BDs */
+ if (PREDICT_FALSE
+ (nm->wc_ip6_nd_publisher_node != (uword) ~ 0
+ && !ip6_address_is_link_local_unicast (&ip->src_address)))
+ {
+ vnet_nd_wc_publish (sw_if_index, eth->src_address, &ip->src_address);
+ }
+
+ /* Check if MAC entry exsist for solicited target IP */
+ if (ndh->icmp.type == ICMP6_neighbor_solicitation)
+ {
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *opt;
+ l2_bridge_domain_t *bd_config;
+ u8 *macp;
+
+ opt = (void *) (ndh + 1);
+ if ((opt->header.type !=
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address) ||
+ (opt->header.n_data_u64s != 1))
+ return 0; /* source link layer address option not present */
+
+ bd_config = vec_elt_at_index (l2input_main.bd_configs, bd_index);
+ macp =
+ (u8 *) hash_get_mem (bd_config->mac_by_ip6, &ndh->target_address);
+ if (macp)
+ { /* found ip-mac entry, generate eighbor advertisement response */
+ int bogus_length;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_icmp_input_node.index);
+ ip->dst_address = ip->src_address;
+ ip->src_address = ndh->target_address;
+ ip->hop_limit = 255;
+ opt->header.type =
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address;
+ clib_memcpy (opt->ethernet_address, macp, 6);
+ ndh->icmp.type = ICMP6_neighbor_advertisement;
+ ndh->advertisement_flags = clib_host_to_net_u32
+ (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED |
+ ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+ ndh->icmp.checksum = 0;
+ ndh->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, p0, ip, &bogus_length);
+ clib_memcpy (eth->dst_address, eth->src_address, 6);
+ clib_memcpy (eth->src_address, macp, 6);
+ vlib_error_count (vm, error_node->node_index,
+ ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_TX, 1);
+ return 1;
+ }
+ }
+
+ return 0;
+
+}
+
+int
+ip6_neighbor_proxy_add_del (u32 sw_if_index, ip6_address_t * addr, u8 is_del)
+{
+ u32 fib_index;
+
+ fib_prefix_t pfx = {
+ .fp_len = 128,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ .fp_addr = {
+ .ip6 = *addr,
+ },
+ };
+ ip46_address_t nh = {
+ .ip6 = *addr,
+ };
+
+ fib_index = ip6_fib_table_get_index_for_sw_if_index (sw_if_index);
+
+ if (~0 == fib_index)
+ return VNET_API_ERROR_NO_SUCH_FIB;
+
+ if (is_del)
+ {
+ fib_table_entry_path_remove (fib_index,
+ &pfx,
+ FIB_SOURCE_IP6_ND_PROXY,
+ DPO_PROTO_IP6,
+ &nh,
+ sw_if_index,
+ ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
+ /* flush the ND cache of this address if it's there */
+ vnet_unset_ip6_ethernet_neighbor (vlib_get_main (),
+ sw_if_index, addr, NULL, 0);
+ }
+ else
+ {
+ fib_table_entry_path_add (fib_index,
+ &pfx,
+ FIB_SOURCE_IP6_ND_PROXY,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP6,
+ &nh,
+ sw_if_index,
+ ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
+ }
+ return (0);
+}
+
+static clib_error_t *
+set_ip6_nd_proxy_cmd (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ ip6_address_t addr;
+ u32 sw_if_index;
+ u8 is_del = 0;
+
+ if (unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ /* get the rest of the command */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_ip6_address, &addr))
+ break;
+ else if (unformat (input, "delete") || unformat (input, "del"))
+ is_del = 1;
+ else
+ return (unformat_parse_error (input));
+ }
+ }
+
+ ip6_neighbor_proxy_add_del (sw_if_index, &addr, is_del);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_ip6_nd_proxy_command, static) =
+{
+ .path = "set ip6 nd proxy",
+ .short_help = "set ip6 nd proxy <HOST> <INTERFACE>",
+ .function = set_ip6_nd_proxy_cmd,
+};
+/* *INDENT-ON* */
+
+void
+ethernet_ndp_change_mac (u32 sw_if_index)
+{
+ ip6_neighbor_main_t *nm = &ip6_neighbor_main;
+ ip6_neighbor_t *n;
+
+ /* *INDENT-OFF* */
+ pool_foreach (n, nm->neighbor_pool,
+ ({
+ if (n->key.sw_if_index == sw_if_index)
+ {
+ adj_nbr_walk_nh6 (sw_if_index,
+ &n->key.ip6_address,
+ ip6_nd_mk_complete_walk, n);
+ }
+ }));
+ /* *INDENT-ON* */
+}
+
+void
+send_ip6_na (vlib_main_t * vm, vnet_hw_interface_t * hi)
+{
+ ip6_main_t *i6m = &ip6_main;
+ u32 sw_if_index = hi->sw_if_index;
+ ip6_address_t *ip6_addr = ip6_interface_first_address (i6m, sw_if_index);
+ if (ip6_addr)
+ {
+ clib_warning
+ ("Sending unsolicitated NA IP6 address %U on sw_if_idex %d",
+ format_ip6_address, ip6_addr, sw_if_index);
+
+ /* Form unsolicited neighbor advertisement packet from NS pkt template */
+ int bogus_length;
+ u32 bi = 0;
+ icmp6_neighbor_solicitation_header_t *h =
+ vlib_packet_template_get_packet (vm,
+ &i6m->discover_neighbor_packet_template,
+ &bi);
+ ip6_set_reserved_multicast_address (&h->ip.dst_address,
+ IP6_MULTICAST_SCOPE_link_local,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+ h->ip.src_address = ip6_addr[0];
+ h->neighbor.icmp.type = ICMP6_neighbor_advertisement;
+ h->neighbor.target_address = ip6_addr[0];
+ h->neighbor.advertisement_flags = clib_host_to_net_u32
+ (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+ clib_memcpy (h->link_layer_option.ethernet_address,
+ hi->hw_address, vec_len (hi->hw_address));
+ h->neighbor.icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h->ip, &bogus_length);
+ ASSERT (bogus_length == 0);
+
+ /* Setup MAC header with IP6 Etype and mcast DMAC */
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_buffer_advance (b, -sizeof (ethernet_header_t));
+ ethernet_header_t *e = vlib_buffer_get_current (b);
+ e->type = clib_host_to_net_u16 (ETHERNET_TYPE_IP6);
+ clib_memcpy (e->src_address, hi->hw_address, sizeof (e->src_address));
+ ip6_multicast_ethernet_address (e->dst_address,
+ IP6_MULTICAST_GROUP_ID_all_hosts);
+
+ /* Send unsolicited ND advertisement packet out the specified interface */
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
+ u32 *to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi;
+ f->n_vectors = 1;
+ vlib_put_frame_to_node (vm, hi->output_node_index, f);
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_neighbor.h b/src/vnet/ip/ip6_neighbor.h
new file mode 100644
index 00000000..ed80381b
--- /dev/null
+++ b/src/vnet/ip/ip6_neighbor.h
@@ -0,0 +1,109 @@
+/*
+ *
+ * ip6_neighboor.h: ip6 neighbor structures
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_ip6_neighbor_h
+#define included_ip6_neighbor_h
+
+#include <vnet/fib/fib_types.h>
+
+typedef struct
+{
+ ip6_address_t ip6_address;
+ u32 sw_if_index;
+ u32 pad;
+} ip6_neighbor_key_t;
+
+typedef enum ip6_neighbor_flags_t_
+{
+ IP6_NEIGHBOR_FLAG_STATIC = (1 << 0),
+ IP6_NEIGHBOR_FLAG_DYNAMIC = (1 << 1),
+ IP6_NEIGHBOR_FLAG_NO_FIB_ENTRY = (1 << 2),
+} __attribute__ ((packed)) ip6_neighbor_flags_t;
+
+typedef struct
+{
+ ip6_neighbor_key_t key;
+ u8 link_layer_address[8];
+ ip6_neighbor_flags_t flags;
+ u64 cpu_time_last_updated;
+ fib_node_index_t fib_entry_index;
+} ip6_neighbor_t;
+
+extern ip6_neighbor_t *ip6_neighbors_entries (u32 sw_if_index);
+
+extern int ip6_neighbor_ra_config (vlib_main_t * vm, u32 sw_if_index,
+ u8 suppress, u8 managed, u8 other,
+ u8 ll_option, u8 send_unicast, u8 cease,
+ u8 use_lifetime, u32 lifetime,
+ u32 initial_count, u32 initial_interval,
+ u32 max_interval, u32 min_interval,
+ u8 is_no);
+
+extern int ip6_neighbor_ra_prefix (vlib_main_t * vm, u32 sw_if_index,
+ ip6_address_t * prefix_addr, u8 prefix_len,
+ u8 use_default, u32 val_lifetime,
+ u32 pref_lifetime, u8 no_advertise,
+ u8 off_link, u8 no_autoconfig,
+ u8 no_onlink, u8 is_no);
+
+extern clib_error_t *ip6_set_neighbor_limit (u32 neighbor_limit);
+
+extern void vnet_register_ip6_neighbor_resolution_event (vnet_main_t * vnm,
+ void *address_arg,
+ uword node_index,
+ uword type_opaque,
+ uword data);
+
+extern int vnet_set_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword n_bytes_link_layer_address,
+ int is_static,
+ int is_no_fib_entry);
+
+extern int vnet_unset_ip6_ethernet_neighbor (vlib_main_t * vm,
+ u32 sw_if_index,
+ ip6_address_t * a,
+ u8 * link_layer_address,
+ uword
+ n_bytes_link_layer_address);
+
+extern int ip6_neighbor_proxy_add_del (u32 sw_if_index,
+ ip6_address_t * addr, u8 is_add);
+
+u32 ip6_neighbor_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index,
+ u32 is_add);
+typedef struct
+{
+ u32 sw_if_index;
+ ip6_address_t ip6;
+ u8 mac[6];
+} wc_nd_report_t;
+
+void wc_nd_set_publisher_node (uword node_index, uword event_type);
+
+#endif /* included_ip6_neighbor_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h
new file mode 100644
index 00000000..c0c745e2
--- /dev/null
+++ b/src/vnet/ip/ip6_packet.h
@@ -0,0 +1,536 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip6/packet.h: ip6 packet format
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip6_packet_h
+#define included_ip6_packet_h
+
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/ip/ip4_packet.h>
+
+typedef union
+{
+ u8 as_u8[16];
+ u16 as_u16[8];
+ u32 as_u32[4];
+ u64 as_u64[2];
+ uword as_uword[16 / sizeof (uword)];
+}
+ip6_address_t;
+
+/* Packed so that the mhash key doesn't include uninitialized pad bytes */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ /* IP address must be first for ip_interface_address_get_address() to work */
+ ip6_address_t ip6_addr;
+ u32 fib_index;
+}) ip6_address_fib_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (union {
+ struct {
+ u32 pad[3];
+ ip4_address_t ip4;
+ };
+ ip6_address_t ip6;
+ u8 as_u8[16];
+ u64 as_u64[2];
+}) ip46_address_t;
+/* *INDENT-ON* */
+#define ip46_address_is_ip4(ip46) (((ip46)->pad[0] | (ip46)->pad[1] | (ip46)->pad[2]) == 0)
+#define ip46_address_mask_ip4(ip46) ((ip46)->pad[0] = (ip46)->pad[1] = (ip46)->pad[2] = 0)
+#define ip46_address_set_ip4(ip46, ip) (ip46_address_mask_ip4(ip46), (ip46)->ip4 = (ip)[0])
+#define ip46_address_reset(ip46) ((ip46)->as_u64[0] = (ip46)->as_u64[1] = 0)
+#define ip46_address_cmp(ip46_1, ip46_2) (memcmp(ip46_1, ip46_2, sizeof(*ip46_1)))
+#define ip46_address_is_zero(ip46) (((ip46)->as_u64[0] == 0) && ((ip46)->as_u64[1] == 0))
+#define ip46_address_is_equal(a1, a2) (((a1)->as_u64[0] == (a2)->as_u64[0]) \
+ && ((a1)->as_u64[1] == (a2)->as_u64[1]))
+
+always_inline ip46_address_t
+to_ip46 (u32 is_ipv6, u8 * buf)
+{
+ ip46_address_t ip;
+ if (is_ipv6)
+ ip.ip6 = *((ip6_address_t *) buf);
+ else
+ ip46_address_set_ip4 (&ip, (ip4_address_t *) buf);
+ return ip;
+}
+
+
+always_inline void
+ip6_addr_fib_init (ip6_address_fib_t * addr_fib, ip6_address_t * address,
+ u32 fib_index)
+{
+ addr_fib->ip6_addr = *address;
+ addr_fib->fib_index = fib_index;
+}
+
+/* Special addresses:
+ unspecified ::/128
+ loopback ::1/128
+ global unicast 2000::/3
+ unique local unicast fc00::/7
+ link local unicast fe80::/10
+ multicast ff00::/8
+ ietf reserved everything else. */
+
+#define foreach_ip6_multicast_address_scope \
+ _ (loopback, 0x1) \
+ _ (link_local, 0x2) \
+ _ (admin_local, 0x4) \
+ _ (site_local, 0x5) \
+ _ (organization_local, 0x8) \
+ _ (global, 0xe)
+
+#define foreach_ip6_multicast_link_local_group_id \
+ _ (all_hosts, 0x1) \
+ _ (all_routers, 0x2) \
+ _ (rip_routers, 0x9) \
+ _ (eigrp_routers, 0xa) \
+ _ (pim_routers, 0xd) \
+ _ (mldv2_routers, 0x16)
+
+typedef enum
+{
+#define _(f,n) IP6_MULTICAST_SCOPE_##f = n,
+ foreach_ip6_multicast_address_scope
+#undef _
+} ip6_multicast_address_scope_t;
+
+typedef enum
+{
+#define _(f,n) IP6_MULTICAST_GROUP_ID_##f = n,
+ foreach_ip6_multicast_link_local_group_id
+#undef _
+} ip6_multicast_link_local_group_id_t;
+
+always_inline uword
+ip6_address_is_multicast (ip6_address_t * a)
+{
+ return a->as_u8[0] == 0xff;
+}
+
+always_inline uword
+ip46_address_is_multicast (ip46_address_t * a)
+{
+ return ip46_address_is_ip4 (a) ? ip4_address_is_multicast (&a->ip4) :
+ ip6_address_is_multicast (&a->ip6);
+}
+
+always_inline void
+ip6_set_reserved_multicast_address (ip6_address_t * a,
+ ip6_multicast_address_scope_t scope,
+ u16 id)
+{
+ a->as_u64[0] = a->as_u64[1] = 0;
+ a->as_u16[0] = clib_host_to_net_u16 (0xff00 | scope);
+ a->as_u16[7] = clib_host_to_net_u16 (id);
+}
+
+always_inline void
+ip6_set_solicited_node_multicast_address (ip6_address_t * a, u32 id)
+{
+ /* 0xff02::1:ffXX:XXXX. */
+ a->as_u64[0] = a->as_u64[1] = 0;
+ a->as_u16[0] = clib_host_to_net_u16 (0xff02);
+ a->as_u8[11] = 1;
+ ASSERT ((id >> 24) == 0);
+ id |= 0xff << 24;
+ a->as_u32[3] = clib_host_to_net_u32 (id);
+}
+
+always_inline void
+ip6_link_local_address_from_ethernet_address (ip6_address_t * a,
+ u8 * ethernet_address)
+{
+ a->as_u64[0] = a->as_u64[1] = 0;
+ a->as_u16[0] = clib_host_to_net_u16 (0xfe80);
+ /* Always set locally administered bit (6). */
+ a->as_u8[0x8] = ethernet_address[0] | (1 << 6);
+ a->as_u8[0x9] = ethernet_address[1];
+ a->as_u8[0xa] = ethernet_address[2];
+ a->as_u8[0xb] = 0xff;
+ a->as_u8[0xc] = 0xfe;
+ a->as_u8[0xd] = ethernet_address[3];
+ a->as_u8[0xe] = ethernet_address[4];
+ a->as_u8[0xf] = ethernet_address[5];
+}
+
+always_inline void
+ip6_multicast_ethernet_address (u8 * ethernet_address, u32 group_id)
+{
+ ethernet_address[0] = 0x33;
+ ethernet_address[1] = 0x33;
+ ethernet_address[2] = ((group_id >> 24) & 0xff);
+ ethernet_address[3] = ((group_id >> 16) & 0xff);
+ ethernet_address[4] = ((group_id >> 8) & 0xff);
+ ethernet_address[5] = ((group_id >> 0) & 0xff);
+}
+
+always_inline uword
+ip6_address_is_equal (ip6_address_t * a, ip6_address_t * b)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ if (a->as_uword[i] != b->as_uword[i])
+ return 0;
+ return 1;
+}
+
+always_inline uword
+ip6_address_is_equal_masked (ip6_address_t * a, ip6_address_t * b,
+ ip6_address_t * mask)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ {
+ uword a_masked, b_masked;
+ a_masked = a->as_uword[i] & mask->as_uword[i];
+ b_masked = b->as_uword[i] & mask->as_uword[i];
+
+ if (a_masked != b_masked)
+ return 0;
+ }
+ return 1;
+}
+
+always_inline void
+ip6_address_mask (ip6_address_t * a, ip6_address_t * mask)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ a->as_uword[i] &= mask->as_uword[i];
+}
+
+always_inline void
+ip6_address_set_zero (ip6_address_t * a)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ a->as_uword[i] = 0;
+}
+
+always_inline void
+ip6_address_mask_from_width (ip6_address_t * a, u32 width)
+{
+ int i, byte, bit, bitnum;
+ ASSERT (width <= 128);
+ memset (a, 0, sizeof (a[0]));
+ for (i = 0; i < width; i++)
+ {
+ bitnum = (7 - (i & 7));
+ byte = i / 8;
+ bit = 1 << bitnum;
+ a->as_u8[byte] |= bit;
+ }
+}
+
+always_inline uword
+ip6_address_is_zero (ip6_address_t * a)
+{
+ int i;
+ for (i = 0; i < ARRAY_LEN (a->as_uword); i++)
+ if (a->as_uword[i] != 0)
+ return 0;
+ return 1;
+}
+
+/* Check for unspecified address ::0 */
+always_inline uword
+ip6_address_is_unspecified (ip6_address_t * a)
+{
+ return ip6_address_is_zero (a);
+}
+
+/* Check for loopback address ::1 */
+always_inline uword
+ip6_address_is_loopback (ip6_address_t * a)
+{
+ uword is_loopback;
+ u8 save = a->as_u8[15];
+ a->as_u8[15] = save ^ 1;
+ is_loopback = ip6_address_is_zero (a);
+ a->as_u8[15] = save;
+ return is_loopback;
+}
+
+/* Check for link local unicast fe80::/10. */
+always_inline uword
+ip6_address_is_link_local_unicast (ip6_address_t * a)
+{
+ return a->as_u8[0] == 0xfe && (a->as_u8[1] & 0xc0) == 0x80;
+}
+
+/* Check for unique local unicast fc00::/7. */
+always_inline uword
+ip6_address_is_local_unicast (ip6_address_t * a)
+{
+ return (a->as_u8[0] & 0xfe) == 0xfc;
+}
+
+/* Check for unique global unicast 2000::/3. */
+always_inline uword
+ip6_address_is_global_unicast (ip6_address_t * a)
+{
+ return (a->as_u8[0] & 0xe0) == 0x20;
+}
+
+/* Check for solicited node multicast 0xff02::1:ff00:0/104 */
+always_inline uword
+ip6_is_solicited_node_multicast_address (ip6_address_t * a)
+{
+ return (a->as_u32[0] == clib_host_to_net_u32 (0xff020000)
+ && a->as_u32[1] == 0
+ && a->as_u32[2] == clib_host_to_net_u32 (1)
+ && a->as_u8[12] == 0xff);
+}
+
+typedef struct
+{
+ /* 4 bit version, 8 bit traffic class and 20 bit flow label. */
+ u32 ip_version_traffic_class_and_flow_label;
+
+ /* Total packet length not including this header (but including
+ any extension headers if present). */
+ u16 payload_length;
+
+ /* Protocol for next header. */
+ u8 protocol;
+
+ /* Hop limit decremented by router at each hop. */
+ u8 hop_limit;
+
+ /* Source and destination address. */
+ ip6_address_t src_address, dst_address;
+} ip6_header_t;
+
+always_inline u8
+ip6_traffic_class (ip6_header_t * i)
+{
+ return (i->ip_version_traffic_class_and_flow_label & 0x0FF00000) >> 20;
+}
+
+always_inline void *
+ip6_next_header (ip6_header_t * i)
+{
+ return (void *) (i + 1);
+}
+
+always_inline void
+ip6_copy_header (ip6_header_t * dst, const ip6_header_t * src)
+{
+ dst->ip_version_traffic_class_and_flow_label =
+ src->ip_version_traffic_class_and_flow_label;
+ dst->payload_length = src->payload_length;
+ dst->protocol = src->protocol;
+ dst->hop_limit = src->hop_limit;
+
+ dst->src_address.as_uword[0] = src->src_address.as_uword[0];
+ dst->src_address.as_uword[1] = src->src_address.as_uword[1];
+ dst->dst_address.as_uword[0] = src->dst_address.as_uword[0];
+ dst->dst_address.as_uword[1] = src->dst_address.as_uword[1];
+}
+
+always_inline void
+ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0)
+{
+ {
+ ip6_address_t src0, dst0;
+
+ src0 = ip0->src_address;
+ dst0 = ip0->dst_address;
+ ip0->src_address = dst0;
+ ip0->dst_address = src0;
+ }
+
+ {
+ u16 src0, dst0;
+
+ src0 = tcp0->src;
+ dst0 = tcp0->dst;
+ tcp0->src = dst0;
+ tcp0->dst = src0;
+ }
+}
+
+always_inline void
+ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1,
+ tcp_header_t * tcp0, tcp_header_t * tcp1)
+{
+ {
+ ip6_address_t src0, dst0, src1, dst1;
+
+ src0 = ip0->src_address;
+ src1 = ip1->src_address;
+ dst0 = ip0->dst_address;
+ dst1 = ip1->dst_address;
+ ip0->src_address = dst0;
+ ip1->src_address = dst1;
+ ip0->dst_address = src0;
+ ip1->dst_address = src1;
+ }
+
+ {
+ u16 src0, dst0, src1, dst1;
+
+ src0 = tcp0->src;
+ src1 = tcp1->src;
+ dst0 = tcp0->dst;
+ dst1 = tcp1->dst;
+ tcp0->src = dst0;
+ tcp1->src = dst1;
+ tcp0->dst = src0;
+ tcp1->dst = src1;
+ }
+}
+
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 data;
+}) ip6_pad1_option_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 type;
+ u8 len;
+ u8 data[0];
+}) ip6_padN_option_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+#define IP6_MLDP_ALERT_TYPE 0x5
+ u8 type;
+ u8 len;
+ u16 value;
+}) ip6_router_alert_option_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+}) ip6_ext_header_t;
+
+always_inline u8 ip6_ext_hdr(u8 nexthdr)
+{
+ /*
+ * find out if nexthdr is an extension header or a protocol
+ */
+ return (nexthdr == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ||
+ (nexthdr == IP_PROTOCOL_IPV6_FRAGMENTATION) ||
+ (nexthdr == IP_PROTOCOL_IPSEC_AH) ||
+ (nexthdr == IP_PROTOCOL_IPV6_ROUTE) ||
+ (nexthdr == IP_PROTOCOL_IP6_DESTINATION_OPTIONS);
+}
+
+#define ip6_ext_header_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+1) << 3)
+#define ip6_ext_authhdr_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+2) << 2)
+
+always_inline void *
+ip6_ext_next_header (ip6_ext_header_t *ext_hdr )
+{ return (void *)((u8 *) ext_hdr + ip6_ext_header_len(ext_hdr)); }
+
+/*
+ * Macro to find the IPv6 ext header of type t
+ * I is the IPv6 header
+ * P is the previous IPv6 ext header (NULL if none)
+ * M is the matched IPv6 ext header of type t
+ */
+#define ip6_ext_header_find_t(i, p, m, t) \
+if ((i)->protocol == t) \
+{ \
+ (m) = (void *)((i)+1); \
+ (p) = NULL; \
+} \
+else \
+{ \
+ (m) = NULL; \
+ (p) = (void *)((i)+1); \
+ while (ip6_ext_hdr((p)->next_hdr) && \
+ ((ip6_ext_header_t *)(p))->next_hdr != (t)) \
+ { \
+ (p) = ip6_ext_next_header((p)); \
+ } \
+ if ( ((p)->next_hdr) == (t)) \
+ { \
+ (m) = (void *)(ip6_ext_next_header((p))); \
+ } \
+}
+
+
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+ u8 data[0];
+}) ip6_hop_by_hop_ext_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ u8 rsv;
+ u16 fragment_offset_and_more;
+ u32 identification;
+}) ip6_frag_hdr_t;
+/* *INDENT-ON* */
+
+#define ip6_frag_hdr_offset(hdr) \
+ (clib_net_to_host_u16((hdr)->fragment_offset_and_more) >> 3)
+
+#define ip6_frag_hdr_more(hdr) \
+ (clib_net_to_host_u16((hdr)->fragment_offset_and_more) & 0x1)
+
+#define ip6_frag_hdr_offset_and_more(offset, more) \
+ clib_host_to_net_u16(((offset) << 3) + !!(more))
+
+#endif /* included_ip6_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_pg.c b/src/vnet/ip/ip6_pg.c
new file mode 100644
index 00000000..ba1e4ad9
--- /dev/null
+++ b/src/vnet/ip/ip6_pg.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip6_pg: IP v4 packet-generator interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+static void
+ip6_pg_edit_function (pg_main_t * pg,
+ pg_stream_t * s,
+ pg_edit_group_t * g, u32 * packets, u32 n_packets)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 ip_header_offset = g->start_byte_offset;
+
+ while (n_packets >= 2)
+ {
+ u32 pi0, pi1;
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+
+ pi0 = packets[0];
+ pi1 = packets[1];
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+ n_packets -= 2;
+ packets += 2;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+ ip1 = (void *) (p1->data + ip_header_offset);
+
+ ip0->payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) -
+ ip_header_offset - sizeof (ip0[0]));
+ ip1->payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p1) -
+ ip_header_offset - sizeof (ip1[0]));
+ }
+
+ while (n_packets >= 1)
+ {
+ u32 pi0;
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+
+ pi0 = packets[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ n_packets -= 1;
+ packets += 1;
+
+ ip0 = (void *) (p0->data + ip_header_offset);
+
+ ip0->payload_length =
+ clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0) -
+ ip_header_offset - sizeof (ip0[0]));
+ }
+}
+
+typedef struct
+{
+ pg_edit_t ip_version;
+ pg_edit_t traffic_class;
+ pg_edit_t flow_label;
+ pg_edit_t payload_length;
+ pg_edit_t protocol;
+ pg_edit_t hop_limit;
+ pg_edit_t src_address, dst_address;
+} pg_ip6_header_t;
+
+static inline void
+pg_ip6_header_init (pg_ip6_header_t * p)
+{
+ /* Initialize fields that are not bit fields in the IP header. */
+#define _(f) pg_edit_init (&p->f, ip6_header_t, f);
+ _(payload_length);
+ _(hop_limit);
+ _(protocol);
+ _(src_address);
+ _(dst_address);
+#undef _
+
+ /* Initialize bit fields. */
+ pg_edit_init_bitfield (&p->ip_version, ip6_header_t,
+ ip_version_traffic_class_and_flow_label, 28, 4);
+ pg_edit_init_bitfield (&p->traffic_class, ip6_header_t,
+ ip_version_traffic_class_and_flow_label, 20, 8);
+ pg_edit_init_bitfield (&p->flow_label, ip6_header_t,
+ ip_version_traffic_class_and_flow_label, 0, 20);
+}
+
+uword
+unformat_pg_ip6_header (unformat_input_t * input, va_list * args)
+{
+ pg_stream_t *s = va_arg (*args, pg_stream_t *);
+ pg_ip6_header_t *p;
+ u32 group_index;
+
+ p = pg_create_edit_group (s, sizeof (p[0]), sizeof (ip6_header_t),
+ &group_index);
+ pg_ip6_header_init (p);
+
+ /* Defaults. */
+ pg_edit_set_fixed (&p->ip_version, 6);
+ pg_edit_set_fixed (&p->traffic_class, 0);
+ pg_edit_set_fixed (&p->flow_label, 0);
+ pg_edit_set_fixed (&p->hop_limit, 64);
+
+ p->payload_length.type = PG_EDIT_UNSPECIFIED;
+
+ if (!unformat (input, "%U: %U -> %U",
+ unformat_pg_edit,
+ unformat_ip_protocol, &p->protocol,
+ unformat_pg_edit,
+ unformat_ip6_address, &p->src_address,
+ unformat_pg_edit, unformat_ip6_address, &p->dst_address))
+ goto error;
+
+ /* Parse options. */
+ while (1)
+ {
+ if (unformat (input, "version %U",
+ unformat_pg_edit, unformat_pg_number, &p->ip_version))
+ ;
+
+ else if (unformat (input, "traffic-class %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->traffic_class))
+ ;
+
+ else if (unformat (input, "length %U",
+ unformat_pg_edit,
+ unformat_pg_number, &p->payload_length))
+ ;
+
+ else if (unformat (input, "hop-limit %U",
+ unformat_pg_edit, unformat_pg_number, &p->hop_limit))
+ ;
+
+ /* Can't parse input: try next protocol level. */
+ else
+ break;
+ }
+
+ {
+ ip_main_t *im = &ip_main;
+ ip_protocol_t protocol;
+ ip_protocol_info_t *pi;
+
+ pi = 0;
+ if (p->protocol.type == PG_EDIT_FIXED)
+ {
+ protocol = pg_edit_get_value (&p->protocol, PG_EDIT_LO);
+ pi = ip_get_protocol_info (im, protocol);
+ }
+
+ if (pi && pi->unformat_pg_edit
+ && unformat_user (input, pi->unformat_pg_edit, s))
+ ;
+
+ else if (!unformat_user (input, unformat_pg_payload, s))
+ goto error;
+
+ if (p->payload_length.type == PG_EDIT_UNSPECIFIED
+ && s->min_packet_bytes == s->max_packet_bytes
+ && group_index + 1 < vec_len (s->edit_groups))
+ {
+ pg_edit_set_fixed (&p->payload_length,
+ pg_edit_group_n_bytes (s,
+ group_index) -
+ sizeof (ip6_header_t));
+ }
+
+ p = pg_get_edit_group (s, group_index);
+ if (p->payload_length.type == PG_EDIT_UNSPECIFIED)
+ {
+ pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ g->edit_function = ip6_pg_edit_function;
+ }
+
+ return 1;
+ }
+
+error:
+ /* Free up any edits we may have added. */
+ pg_free_edit_group (s);
+ return 0;
+}
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h
new file mode 100644
index 00000000..c14b46c4
--- /dev/null
+++ b/src/vnet/ip/ip6_to_ip4.h
@@ -0,0 +1,634 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief IPv6 to IPv4 translation
+ */
+#ifndef __included_ip6_to_ip4_h__
+#define __included_ip6_to_ip4_h__
+
+#include <vnet/ip/ip.h>
+
+/**
+ * IPv6 to IPv4 set call back function type
+ */
+typedef int (*ip6_to_ip4_set_fn_t) (ip6_header_t * ip6, ip4_header_t * ip4,
+ void *ctx);
+
+/* *INDENT-OFF* */
+static u8 icmp6_to_icmp_updater_pointer_table[] =
+ { 0, 1, ~0, ~0,
+ 2, 2, 9, 8,
+ 12, 12, 12, 12,
+ 12, 12, 12, 12,
+ 12, 12, 12, 12,
+ 12, 12, 12, 12,
+ 24, 24, 24, 24,
+ 24, 24, 24, 24,
+ 24, 24, 24, 24,
+ 24, 24, 24, 24
+ };
+/* *INDENT-ON* */
+
+#define frag_id_6to4(id) ((id) ^ ((id) >> 16))
+
+/**
+ * @brief Parse some useful information from IPv6 header.
+ *
+ * @param ip6 IPv6 header.
+ * @param buff_len Buffer length.
+ * @param l4_protocol L4 protocol number.
+ * @param l4_offset L4 header offset.
+ * @param frag_hdr_offset Fragment header offset if present, 0 otherwise.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+static_always_inline int
+ip6_parse (const ip6_header_t * ip6, u32 buff_len,
+ u8 * l4_protocol, u16 * l4_offset, u16 * frag_hdr_offset)
+{
+ if (ip6->protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ *l4_protocol = ((ip6_frag_hdr_t *) (ip6 + 1))->next_hdr;
+ *frag_hdr_offset = sizeof (*ip6);
+ *l4_offset = sizeof (*ip6) + sizeof (ip6_frag_hdr_t);
+ }
+ else
+ {
+ *l4_protocol = ip6->protocol;
+ *frag_hdr_offset = 0;
+ *l4_offset = sizeof (*ip6);
+ }
+
+ return (buff_len < (*l4_offset + 4)) ||
+ (clib_net_to_host_u16 (ip6->payload_length) <
+ (*l4_offset + 4 - sizeof (*ip6)));
+}
+
+/**
+ * @brief Get TCP/UDP port number or ICMP id from IPv6 packet.
+ *
+ * @param ip6 IPv6 header.
+ * @param sender 1 get sender port, 0 get receiver port.
+ * @param buffer_len Buffer length.
+ *
+ * @returns Port number on success, 0 otherwise.
+ */
+always_inline u16
+ip6_get_port (ip6_header_t * ip6, u8 sender, u16 buffer_len)
+{
+ u8 l4_protocol;
+ u16 l4_offset;
+ u16 frag_offset;
+ u8 *l4;
+
+ if (ip6_parse (ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset))
+ return 0;
+
+ if (frag_offset &&
+ ip6_frag_hdr_offset (((ip6_frag_hdr_t *)
+ u8_ptr_add (ip6, frag_offset))))
+ return 0; //Can't deal with non-first fragment for now
+
+ l4 = u8_ptr_add (ip6, l4_offset);
+ if (l4_protocol == IP_PROTOCOL_TCP || l4_protocol == IP_PROTOCOL_UDP)
+ {
+ return (sender) ? ((udp_header_t *) (l4))->src_port : ((udp_header_t
+ *)
+ (l4))->dst_port;
+ }
+ else if (l4_protocol == IP_PROTOCOL_ICMP6)
+ {
+ icmp46_header_t *icmp = (icmp46_header_t *) (l4);
+ if (icmp->type == ICMP6_echo_request)
+ {
+ return (sender) ? ((u16 *) (icmp))[2] : -1;
+ }
+ else if (icmp->type == ICMP6_echo_reply)
+ {
+ return (sender) ? -1 : ((u16 *) (icmp))[2];
+ }
+ }
+ return 0;
+}
+
+/**
+ * @brief Convert type and code value from ICMP6 to ICMP4.
+ *
+ * @param icmp ICMP header.
+ * @param inner_ip6 Inner IPv6 header if present, 0 otherwise.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+static_always_inline int
+icmp6_to_icmp_header (icmp46_header_t * icmp, ip6_header_t ** inner_ip6)
+{
+ *inner_ip6 = NULL;
+ switch (icmp->type)
+ {
+ case ICMP6_echo_request:
+ icmp->type = ICMP4_echo_request;
+ break;
+ case ICMP6_echo_reply:
+ icmp->type = ICMP4_echo_reply;
+ break;
+ case ICMP6_destination_unreachable:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+
+ switch (icmp->code)
+ {
+ case ICMP6_destination_unreachable_no_route_to_destination: //0
+ case ICMP6_destination_unreachable_beyond_scope_of_source_address: //2
+ case ICMP6_destination_unreachable_address_unreachable: //3
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code =
+ ICMP4_destination_unreachable_destination_unreachable_host;
+ break;
+ case ICMP6_destination_unreachable_destination_administratively_prohibited: //1
+ icmp->type =
+ ICMP4_destination_unreachable;
+ icmp->code =
+ ICMP4_destination_unreachable_communication_administratively_prohibited;
+ break;
+ case ICMP6_destination_unreachable_port_unreachable:
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = ICMP4_destination_unreachable_port_unreachable;
+ break;
+ default:
+ return -1;
+ }
+ break;
+ case ICMP6_packet_too_big:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = 4;
+ {
+ u32 advertised_mtu = clib_net_to_host_u32 (*((u32 *) (icmp + 1)));
+ advertised_mtu -= 20;
+ //FIXME: = minimum(advertised MTU-20, MTU_of_IPv4_nexthop, (MTU_of_IPv6_nexthop)-20)
+ ((u16 *) (icmp))[3] = clib_host_to_net_u16 (advertised_mtu);
+ }
+ break;
+
+ case ICMP6_time_exceeded:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+
+ icmp->type = ICMP4_time_exceeded;
+ break;
+
+ case ICMP6_parameter_problem:
+ *inner_ip6 = (ip6_header_t *) u8_ptr_add (icmp, 8);
+
+ switch (icmp->code)
+ {
+ case ICMP6_parameter_problem_erroneous_header_field:
+ icmp->type = ICMP4_parameter_problem;
+ icmp->code = ICMP4_parameter_problem_pointer_indicates_error;
+ u32 pointer = clib_net_to_host_u32 (*((u32 *) (icmp + 1)));
+ if (pointer >= 40)
+ return -1;
+
+ ((u8 *) (icmp + 1))[0] =
+ icmp6_to_icmp_updater_pointer_table[pointer];
+ break;
+ case ICMP6_parameter_problem_unrecognized_next_header:
+ icmp->type = ICMP4_destination_unreachable;
+ icmp->code = ICMP4_destination_unreachable_port_unreachable;
+ break;
+ case ICMP6_parameter_problem_unrecognized_option:
+ default:
+ return -1;
+ }
+ break;
+ default:
+ return -1;
+ break;
+ }
+ return 0;
+}
+
+/**
+ * @brief Translate TOS value from IPv6 to IPv4.
+ *
+ * @param ip6 IPv6 header.
+ *
+ * @returns IPv4 TOS value.
+ */
+static_always_inline u8
+ip6_translate_tos (const ip6_header_t * ip6)
+{
+ return (clib_net_to_host_u32 (ip6->ip_version_traffic_class_and_flow_label)
+ & 0x0ff00000) >> 20;
+}
+
+/**
+ * @brief Translate ICMP6 packet to ICMP4.
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate outer header.
+ * @param ctx A context passed in the outer header translate function.
+ * @param inner_fn The function to translate inner header.
+ * @param inner_ctx A context passed in the inner header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+icmp6_to_icmp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx,
+ ip6_to_ip4_set_fn_t inner_fn, void *inner_ctx)
+{
+ ip6_header_t *ip6, *inner_ip6;
+ ip4_header_t *ip4, *inner_ip4;
+ u32 ip6_pay_len;
+ icmp46_header_t *icmp;
+ ip_csum_t csum;
+ int rv;
+
+ ip6 = vlib_buffer_get_current (p);
+ ip6_pay_len = clib_net_to_host_u16 (ip6->payload_length);
+ icmp = (icmp46_header_t *) (ip6 + 1);
+ ASSERT (ip6_pay_len + sizeof (*ip6) <= p->current_length);
+
+ //No extensions headers allowed here
+ if (ip6->protocol != IP_PROTOCOL_ICMP6)
+ return -1;
+
+ //There are no fragmented ICMP messages, so no extension header for now
+ if (icmp6_to_icmp_header (icmp, &inner_ip6))
+ return -1;
+
+ if (inner_ip6)
+ {
+ u16 *inner_L4_checksum, inner_l4_offset, inner_frag_offset,
+ inner_frag_id;
+ u8 *inner_l4, inner_protocol;
+
+ //We have two headers to translate
+ // FROM
+ // [ IPv6 ]<- ext ->[IC][ IPv6 ]<- ext ->[L4 header ...
+ // Handled cases:
+ // [ IPv6 ][IC][ IPv6 ][L4 header ...
+ // [ IPv6 ][IC][ IPv6 ][Fr][L4 header ...
+ // TO
+ // [ IPv4][IC][ IPv4][L4 header ...
+
+ if (ip6_parse (inner_ip6, ip6_pay_len - 8,
+ &inner_protocol, &inner_l4_offset, &inner_frag_offset))
+ return -1;
+
+ inner_l4 = u8_ptr_add (inner_ip6, inner_l4_offset);
+ inner_ip4 =
+ (ip4_header_t *) u8_ptr_add (inner_l4, -sizeof (*inner_ip4));
+ if (inner_frag_offset)
+ {
+ ip6_frag_hdr_t *inner_frag =
+ (ip6_frag_hdr_t *) u8_ptr_add (inner_ip6, inner_frag_offset);
+ inner_frag_id = frag_id_6to4 (inner_frag->identification);
+ }
+ else
+ {
+ inner_frag_id = 0;
+ }
+
+ //Do the translation of the inner packet
+ if (inner_protocol == IP_PROTOCOL_TCP)
+ {
+ inner_L4_checksum = (u16 *) u8_ptr_add (inner_l4, 16);
+ }
+ else if (inner_protocol == IP_PROTOCOL_UDP)
+ {
+ inner_L4_checksum = (u16 *) u8_ptr_add (inner_l4, 6);
+ }
+ else if (inner_protocol == IP_PROTOCOL_ICMP6)
+ {
+ icmp46_header_t *inner_icmp = (icmp46_header_t *) inner_l4;
+ //It cannot be of a different type as ip6_icmp_to_icmp6_in_place succeeded
+ inner_icmp->type = (inner_icmp->type == ICMP6_echo_request) ?
+ ICMP4_echo_request : ICMP4_echo_reply;
+ inner_protocol = IP_PROTOCOL_ICMP; //Will be copied to ip6 later
+ inner_L4_checksum = &inner_icmp->checksum;
+ }
+ else
+ {
+ return -1;
+ }
+
+ csum = *inner_L4_checksum;
+ csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, inner_ip6->src_address.as_u64[1]);
+ csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, inner_ip6->dst_address.as_u64[1]);
+ *inner_L4_checksum = ip_csum_fold (csum);
+
+ if ((rv = inner_fn (inner_ip6, inner_ip4, inner_ctx)) != 0)
+ return rv;
+
+ inner_ip4->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ inner_ip4->tos = ip6_translate_tos (inner_ip6);
+ inner_ip4->length =
+ u16_net_add (inner_ip6->payload_length,
+ sizeof (*ip4) + sizeof (*ip6) - inner_l4_offset);
+ inner_ip4->fragment_id = inner_frag_id;
+ inner_ip4->flags_and_fragment_offset =
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ inner_ip4->ttl = inner_ip6->hop_limit;
+ inner_ip4->protocol = inner_protocol;
+ inner_ip4->checksum = ip4_header_checksum (inner_ip4);
+
+ if (inner_ip4->protocol == IP_PROTOCOL_ICMP)
+ {
+ //Recompute ICMP checksum
+ icmp46_header_t *inner_icmp = (icmp46_header_t *) inner_l4;
+ inner_icmp->checksum = 0;
+ csum =
+ ip_incremental_checksum (0, inner_icmp,
+ clib_net_to_host_u16 (inner_ip4->length)
+ - sizeof (*inner_ip4));
+ inner_icmp->checksum = ~ip_csum_fold (csum);
+ }
+ else
+ {
+ //Update to new pseudo-header
+ csum = *inner_L4_checksum;
+ csum = ip_csum_add_even (csum, inner_ip4->src_address.as_u32);
+ csum = ip_csum_add_even (csum, inner_ip4->dst_address.as_u32);
+ *inner_L4_checksum = ip_csum_fold (csum);
+ }
+
+ //Move up icmp header
+ ip4 = (ip4_header_t *) u8_ptr_add (inner_l4, -2 * sizeof (*ip4) - 8);
+ clib_memcpy (u8_ptr_add (inner_l4, -sizeof (*ip4) - 8), icmp, 8);
+ icmp = (icmp46_header_t *) u8_ptr_add (inner_l4, -sizeof (*ip4) - 8);
+ }
+ else
+ {
+ //Only one header to translate
+ ip4 = (ip4_header_t *) u8_ptr_add (ip6, sizeof (*ip6) - sizeof (*ip4));
+ }
+
+ vlib_buffer_advance (p, (u32) (((u8 *) ip4) - ((u8 *) ip6)));
+
+ if ((rv = fn (ip6, ip4, ctx)) != 0)
+ return rv;
+
+ ip4->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip4->tos = ip6_translate_tos (ip6);
+ ip4->fragment_id = 0;
+ ip4->flags_and_fragment_offset = 0;
+ ip4->ttl = ip6->hop_limit;
+ ip4->protocol = IP_PROTOCOL_ICMP;
+ //TODO fix the length depending on offset length
+ ip4->length = u16_net_add (ip6->payload_length,
+ (inner_ip6 ==
+ NULL) ? sizeof (*ip4) : (2 * sizeof (*ip4) -
+ sizeof (*ip6)));
+ ip4->checksum = ip4_header_checksum (ip4);
+
+ //Recompute ICMP checksum
+ icmp->checksum = 0;
+ csum =
+ ip_incremental_checksum (0, icmp,
+ clib_net_to_host_u16 (ip4->length) -
+ sizeof (*ip4));
+ icmp->checksum = ~ip_csum_fold (csum);
+
+ return 0;
+}
+
+/**
+ * @brief Translate IPv6 fragmented packet to IPv4.
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate header.
+ * @param ctx A context passed in the header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+ip6_to_ip4_fragmented (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx)
+{
+ ip6_header_t *ip6;
+ ip6_frag_hdr_t *frag;
+ ip4_header_t *ip4;
+ u16 frag_id;
+ u8 frag_more;
+ u16 frag_offset;
+ u8 l4_protocol;
+ u16 l4_offset;
+ int rv;
+
+ ip6 = vlib_buffer_get_current (p);
+
+ if (ip6_parse
+ (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
+ return -1;
+
+ frag = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset);
+ ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
+ vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
+
+ frag_id = frag_id_6to4 (frag->identification);
+ frag_more = ip6_frag_hdr_more (frag);
+ frag_offset = ip6_frag_hdr_offset (frag);
+
+ if ((rv = fn (ip6, ip4, ctx)) != 0)
+ return rv;
+
+ ip4->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip4->tos = ip6_translate_tos (ip6);
+ ip4->length = u16_net_add (ip6->payload_length,
+ sizeof (*ip4) - l4_offset + sizeof (*ip6));
+ ip4->fragment_id = frag_id;
+ ip4->flags_and_fragment_offset =
+ clib_host_to_net_u16 (frag_offset |
+ (frag_more ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0));
+ ip4->ttl = ip6->hop_limit;
+ ip4->protocol =
+ (l4_protocol == IP_PROTOCOL_ICMP6) ? IP_PROTOCOL_ICMP : l4_protocol;
+ ip4->checksum = ip4_header_checksum (ip4);
+
+ return 0;
+}
+
+/**
+ * @brief Translate IPv6 UDP/TCP packet to IPv4.
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate header.
+ * @param ctx A context passed in the header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+ip6_to_ip4_tcp_udp (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx,
+ u8 udp_checksum)
+{
+ ip6_header_t *ip6;
+ u16 *checksum;
+ ip_csum_t csum = 0;
+ ip4_header_t *ip4;
+ u16 fragment_id;
+ u16 flags;
+ u16 frag_offset;
+ u8 l4_protocol;
+ u16 l4_offset;
+ int rv;
+
+ ip6 = vlib_buffer_get_current (p);
+
+ if (ip6_parse
+ (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
+ return -1;
+
+ if (l4_protocol == IP_PROTOCOL_TCP)
+ {
+ tcp_header_t *tcp = ip6_next_header (ip6);
+ checksum = &tcp->checksum;
+ }
+ else
+ {
+ udp_header_t *udp = ip6_next_header (ip6);
+ checksum = &udp->checksum;
+ //UDP checksum is optional over IPv4
+ if (!udp_checksum)
+ goto no_csum;
+ }
+
+ csum = ip_csum_sub_even (*checksum, ip6->src_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, ip6->src_address.as_u64[1]);
+ csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[0]);
+ csum = ip_csum_sub_even (csum, ip6->dst_address.as_u64[1]);
+ *checksum = ip_csum_fold (csum);
+
+no_csum:
+ ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
+
+ vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
+
+ if (PREDICT_FALSE (frag_offset))
+ {
+ //Only the first fragment
+ ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset);
+ fragment_id = frag_id_6to4 (hdr->identification);
+ flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ }
+ else
+ {
+ fragment_id = 0;
+ flags = 0;
+ }
+
+ if ((rv = fn (ip6, ip4, ctx)) != 0)
+ return rv;
+
+ ip4->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip4->tos = ip6_translate_tos (ip6);
+ ip4->length = u16_net_add (ip6->payload_length,
+ sizeof (*ip4) + sizeof (*ip6) - l4_offset);
+ ip4->fragment_id = fragment_id;
+ ip4->flags_and_fragment_offset = flags;
+ ip4->ttl = ip6->hop_limit;
+ ip4->protocol = l4_protocol;
+ ip4->checksum = ip4_header_checksum (ip4);
+
+ //UDP checksum is optional over IPv4
+ if (!udp_checksum && l4_protocol == IP_PROTOCOL_UDP)
+ {
+ *checksum = 0;
+ }
+ else
+ {
+ csum = ip_csum_add_even (*checksum, ip4->dst_address.as_u32);
+ csum = ip_csum_add_even (csum, ip4->src_address.as_u32);
+ *checksum = ip_csum_fold (csum);
+ }
+
+ return 0;
+}
+
+/**
+ * @brief Translate IPv6 packet to IPv4 (IP header only).
+ *
+ * @param p Buffer to translate.
+ * @param fn The function to translate header.
+ * @param ctx A context passed in the header translate function.
+ *
+ * @returns 0 on success, non-zero value otherwise.
+ */
+always_inline int
+ip6_to_ip4 (vlib_buffer_t * p, ip6_to_ip4_set_fn_t fn, void *ctx)
+{
+ ip6_header_t *ip6;
+ ip4_header_t *ip4;
+ u16 fragment_id;
+ u16 flags;
+ u16 frag_offset;
+ u8 l4_protocol;
+ u16 l4_offset;
+ int rv;
+
+ ip6 = vlib_buffer_get_current (p);
+
+ if (ip6_parse
+ (ip6, p->current_length, &l4_protocol, &l4_offset, &frag_offset))
+ return -1;
+
+ ip4 = (ip4_header_t *) u8_ptr_add (ip6, l4_offset - sizeof (*ip4));
+
+ vlib_buffer_advance (p, l4_offset - sizeof (*ip4));
+
+ if (PREDICT_FALSE (frag_offset))
+ {
+ //Only the first fragment
+ ip6_frag_hdr_t *hdr = (ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset);
+ fragment_id = frag_id_6to4 (hdr->identification);
+ flags = clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS);
+ }
+ else
+ {
+ fragment_id = 0;
+ flags = 0;
+ }
+
+ if ((rv = fn (ip6, ip4, ctx)) != 0)
+ return rv;
+
+ ip4->ip_version_and_header_length =
+ IP4_VERSION_AND_HEADER_LENGTH_NO_OPTIONS;
+ ip4->tos = ip6_translate_tos (ip6);
+ ip4->length = u16_net_add (ip6->payload_length,
+ sizeof (*ip4) + sizeof (*ip6) - l4_offset);
+ ip4->fragment_id = fragment_id;
+ ip4->flags_and_fragment_offset = flags;
+ ip4->ttl = ip6->hop_limit;
+ ip4->protocol = l4_protocol;
+ ip4->checksum = ip4_header_checksum (ip4);
+
+ return 0;
+}
+
+#endif /* __included_ip6_to_ip4_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
new file mode 100644
index 00000000..e13e6e64
--- /dev/null
+++ b/src/vnet/ip/ip_api.c
@@ -0,0 +1,1825 @@
+/*
+ *------------------------------------------------------------------
+ * ip_api.c - vnet ip api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6_neighbor.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_api.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/lookup_dpo.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/dpo/ip_null_dpo.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <vnet/mfib/ip6_mfib.h>
+#include <vnet/mfib/ip4_mfib.h>
+#include <vnet/mfib/mfib_signal.h>
+#include <vnet/mfib/mfib_entry.h>
+
+#include <vnet/vnet_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_typedefs
+
+#define vl_endianfun /* define message structures */
+#include <vnet/vnet_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <vnet/vnet_all_api_h.h>
+#undef vl_printfun
+
+#include <vlibapi/api_helper_macros.h>
+
+
+#define foreach_ip_api_msg \
+_(IP_FIB_DUMP, ip_fib_dump) \
+_(IP6_FIB_DUMP, ip6_fib_dump) \
+_(IP_MFIB_DUMP, ip_mfib_dump) \
+_(IP6_MFIB_DUMP, ip6_mfib_dump) \
+_(IP_NEIGHBOR_DUMP, ip_neighbor_dump) \
+_(IP_MROUTE_ADD_DEL, ip_mroute_add_del) \
+_(MFIB_SIGNAL_DUMP, mfib_signal_dump) \
+_(IP_ADDRESS_DUMP, ip_address_dump) \
+_(IP_DUMP, ip_dump) \
+_(IP_NEIGHBOR_ADD_DEL, ip_neighbor_add_del) \
+_(IP_ADD_DEL_ROUTE, ip_add_del_route) \
+_(IP_TABLE_ADD_DEL, ip_table_add_del) \
+_(SET_IP_FLOW_HASH,set_ip_flow_hash) \
+_(SW_INTERFACE_IP6ND_RA_CONFIG, sw_interface_ip6nd_ra_config) \
+_(SW_INTERFACE_IP6ND_RA_PREFIX, sw_interface_ip6nd_ra_prefix) \
+_(IP6ND_PROXY_ADD_DEL, ip6nd_proxy_add_del) \
+_(IP6ND_PROXY_DUMP, ip6nd_proxy_dump) \
+_(SW_INTERFACE_IP6_ENABLE_DISABLE, sw_interface_ip6_enable_disable ) \
+_(SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS, \
+ sw_interface_ip6_set_link_local_address)
+
+extern void stats_dslock_with_hint (int hint, int tag);
+extern void stats_dsunlock (void);
+
+static void
+send_ip_neighbor_details (u8 is_ipv6,
+ u8 is_static,
+ u8 * mac_address,
+ u8 * ip_address,
+ unix_shared_memory_queue_t * q, u32 context)
+{
+ vl_api_ip_neighbor_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_DETAILS);
+ mp->context = context;
+ mp->is_ipv6 = is_ipv6;
+ mp->is_static = is_static;
+ memcpy (mp->mac_address, mac_address, 6);
+ memcpy (mp->ip_address, ip_address, (is_ipv6) ? 16 : 4);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_ip_neighbor_dump_t_handler (vl_api_ip_neighbor_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+
+ if (mp->is_ipv6)
+ {
+ ip6_neighbor_t *n, *ns;
+
+ ns = ip6_neighbors_entries (sw_if_index);
+ /* *INDENT-OFF* */
+ vec_foreach (n, ns)
+ {
+ send_ip_neighbor_details
+ (mp->is_ipv6, ((n->flags & IP6_NEIGHBOR_FLAG_STATIC) ? 1 : 0),
+ (u8 *) n->link_layer_address,
+ (u8 *) & (n->key.ip6_address.as_u8),
+ q, mp->context);
+ }
+ /* *INDENT-ON* */
+ vec_free (ns);
+ }
+ else
+ {
+ ethernet_arp_ip4_entry_t *n, *ns;
+
+ ns = ip4_neighbor_entries (sw_if_index);
+ /* *INDENT-OFF* */
+ vec_foreach (n, ns)
+ {
+ send_ip_neighbor_details (mp->is_ipv6,
+ ((n->flags & ETHERNET_ARP_IP4_ENTRY_FLAG_STATIC) ? 1 : 0),
+ (u8*) n->ethernet_address,
+ (u8*) & (n->ip4_address.as_u8),
+ q, mp->context);
+ }
+ /* *INDENT-ON* */
+ vec_free (ns);
+ }
+}
+
+
+void
+copy_fib_next_hop (fib_route_path_encode_t * api_rpath, void *fp_arg)
+{
+ int is_ip4;
+ vl_api_fib_path_t *fp = (vl_api_fib_path_t *) fp_arg;
+
+ if (api_rpath->rpath.frp_proto == DPO_PROTO_IP4)
+ fp->afi = IP46_TYPE_IP4;
+ else if (api_rpath->rpath.frp_proto == DPO_PROTO_IP6)
+ fp->afi = IP46_TYPE_IP6;
+ else
+ {
+ is_ip4 = ip46_address_is_ip4 (&api_rpath->rpath.frp_addr);
+ if (is_ip4)
+ fp->afi = IP46_TYPE_IP4;
+ else
+ fp->afi = IP46_TYPE_IP6;
+ }
+ if (fp->afi == IP46_TYPE_IP4)
+ memcpy (fp->next_hop, &api_rpath->rpath.frp_addr.ip4,
+ sizeof (api_rpath->rpath.frp_addr.ip4));
+ else
+ memcpy (fp->next_hop, &api_rpath->rpath.frp_addr.ip6,
+ sizeof (api_rpath->rpath.frp_addr.ip6));
+}
+
+static void
+send_ip_fib_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ const fib_table_t * table,
+ const fib_prefix_t * pfx,
+ fib_route_path_encode_t * api_rpaths, u32 context)
+{
+ vl_api_ip_fib_details_t *mp;
+ fib_route_path_encode_t *api_rpath;
+ vl_api_fib_path_t *fp;
+ int path_count;
+
+ path_count = vec_len (api_rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
+ if (!mp)
+ return;
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_FIB_DETAILS);
+ mp->context = context;
+
+ mp->table_id = htonl (table->ft_table_id);
+ memcpy (mp->table_name, table->ft_desc,
+ clib_min (vec_len (table->ft_desc), sizeof (mp->table_name)));
+ mp->address_length = pfx->fp_len;
+ memcpy (mp->address, &pfx->fp_addr.ip4, sizeof (pfx->fp_addr.ip4));
+
+ mp->count = htonl (path_count);
+ fp = mp->path;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+ switch (api_rpath->dpo.dpoi_type)
+ {
+ case DPO_RECEIVE:
+ fp->is_local = true;
+ break;
+ case DPO_DROP:
+ fp->is_drop = true;
+ break;
+ case DPO_IP_NULL:
+ switch (api_rpath->dpo.dpoi_index)
+ {
+ case IP_NULL_ACTION_NONE:
+ fp->is_drop = true;
+ break;
+ case IP_NULL_ACTION_SEND_ICMP_UNREACH:
+ fp->is_unreach = true;
+ break;
+ case IP_NULL_ACTION_SEND_ICMP_PROHIBIT:
+ fp->is_prohibit = true;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ fp->weight = api_rpath->rpath.frp_weight;
+ fp->preference = api_rpath->rpath.frp_preference;
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct vl_api_ip_fib_dump_walk_ctx_t_
+{
+ fib_node_index_t *feis;
+} vl_api_ip_fib_dump_walk_ctx_t;
+
+static int
+vl_api_ip_fib_dump_walk (fib_node_index_t fei, void *arg)
+{
+ vl_api_ip_fib_dump_walk_ctx_t *ctx = arg;
+
+ vec_add1 (ctx->feis, fei);
+
+ return (1);
+}
+
+static void
+vl_api_ip_fib_dump_t_handler (vl_api_ip_fib_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ ip4_main_t *im = &ip4_main;
+ fib_table_t *fib_table;
+ fib_node_index_t *lfeip;
+ fib_prefix_t pfx;
+ u32 fib_index;
+ fib_route_path_encode_t *api_rpaths;
+ vl_api_ip_fib_dump_walk_ctx_t ctx = {
+ .feis = NULL,
+ };
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (fib_table, im->fibs,
+ ({
+ fib_table_walk(fib_table->ft_index,
+ FIB_PROTOCOL_IP4,
+ vl_api_ip_fib_dump_walk,
+ &ctx);
+ }));
+ /* *INDENT-ON* */
+
+ vec_sort_with_function (ctx.feis, fib_entry_cmp_for_sort);
+
+ vec_foreach (lfeip, ctx.feis)
+ {
+ fib_entry_get_prefix (*lfeip, &pfx);
+ fib_index = fib_entry_get_fib_index (*lfeip);
+ fib_table = fib_table_get (fib_index, pfx.fp_proto);
+ api_rpaths = NULL;
+ fib_entry_encode (*lfeip, &api_rpaths);
+ send_ip_fib_details (am, q, fib_table, &pfx, api_rpaths, mp->context);
+ vec_free (api_rpaths);
+ }
+
+ vec_free (ctx.feis);
+}
+
+static void
+send_ip6_fib_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 table_id, fib_prefix_t * pfx,
+ fib_route_path_encode_t * api_rpaths, u32 context)
+{
+ vl_api_ip6_fib_details_t *mp;
+ fib_route_path_encode_t *api_rpath;
+ vl_api_fib_path_t *fp;
+ int path_count;
+
+ path_count = vec_len (api_rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
+ if (!mp)
+ return;
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP6_FIB_DETAILS);
+ mp->context = context;
+
+ mp->table_id = htonl (table_id);
+ mp->address_length = pfx->fp_len;
+ memcpy (mp->address, &pfx->fp_addr.ip6, sizeof (pfx->fp_addr.ip6));
+
+ mp->count = htonl (path_count);
+ fp = mp->path;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+ switch (api_rpath->dpo.dpoi_type)
+ {
+ case DPO_RECEIVE:
+ fp->is_local = true;
+ break;
+ case DPO_DROP:
+ fp->is_drop = true;
+ break;
+ case DPO_IP_NULL:
+ switch (api_rpath->dpo.dpoi_index)
+ {
+ case IP_NULL_DPO_ACTION_NUM + IP_NULL_ACTION_NONE:
+ fp->is_drop = true;
+ break;
+ case IP_NULL_DPO_ACTION_NUM + IP_NULL_ACTION_SEND_ICMP_UNREACH:
+ fp->is_unreach = true;
+ break;
+ case IP_NULL_DPO_ACTION_NUM + IP_NULL_ACTION_SEND_ICMP_PROHIBIT:
+ fp->is_prohibit = true;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ fp->weight = api_rpath->rpath.frp_weight;
+ fp->preference = api_rpath->rpath.frp_preference;
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct apt_ip6_fib_show_ctx_t_
+{
+ u32 fib_index;
+ fib_node_index_t *entries;
+} api_ip6_fib_show_ctx_t;
+
+static void
+api_ip6_fib_table_put_entries (clib_bihash_kv_24_8_t * kvp, void *arg)
+{
+ api_ip6_fib_show_ctx_t *ctx = arg;
+
+ if ((kvp->key[2] >> 32) == ctx->fib_index)
+ {
+ vec_add1 (ctx->entries, kvp->value);
+ }
+}
+
+static void
+api_ip6_fib_table_get_all (unix_shared_memory_queue_t * q,
+ vl_api_ip6_fib_dump_t * mp,
+ fib_table_t * fib_table)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ ip6_main_t *im6 = &ip6_main;
+ fib_node_index_t *fib_entry_index;
+ api_ip6_fib_show_ctx_t ctx = {
+ .fib_index = fib_table->ft_index,
+ .entries = NULL,
+ };
+ fib_route_path_encode_t *api_rpaths;
+ fib_prefix_t pfx;
+
+ BV (clib_bihash_foreach_key_value_pair)
+ ((BVT (clib_bihash) *) & im6->ip6_table[IP6_FIB_TABLE_NON_FWDING].
+ ip6_hash, api_ip6_fib_table_put_entries, &ctx);
+
+ vec_sort_with_function (ctx.entries, fib_entry_cmp_for_sort);
+
+ vec_foreach (fib_entry_index, ctx.entries)
+ {
+ fib_entry_get_prefix (*fib_entry_index, &pfx);
+ api_rpaths = NULL;
+ fib_entry_encode (*fib_entry_index, &api_rpaths);
+ send_ip6_fib_details (am, q,
+ fib_table->ft_table_id,
+ &pfx, api_rpaths, mp->context);
+ vec_free (api_rpaths);
+ }
+
+ vec_free (ctx.entries);
+}
+
+static void
+vl_api_ip6_fib_dump_t_handler (vl_api_ip6_fib_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ ip6_main_t *im6 = &ip6_main;
+ fib_table_t *fib_table;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ /* *INDENT-OFF* */
+ pool_foreach (fib_table, im6->fibs,
+ ({
+ api_ip6_fib_table_get_all(q, mp, fib_table);
+ }));
+ /* *INDENT-ON* */
+}
+
+static void
+send_ip_mfib_details (unix_shared_memory_queue_t * q,
+ u32 context, u32 table_id, fib_node_index_t mfei)
+{
+ fib_route_path_encode_t *api_rpath, *api_rpaths = NULL;
+ vl_api_ip_mfib_details_t *mp;
+ mfib_entry_t *mfib_entry;
+ vl_api_fib_path_t *fp;
+ mfib_prefix_t pfx;
+ int path_count;
+
+ mfib_entry = mfib_entry_get (mfei);
+ mfib_entry_get_prefix (mfei, &pfx);
+ mfib_entry_encode (mfei, &api_rpaths);
+
+ path_count = vec_len (api_rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
+ if (!mp)
+ return;
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_FIB_DETAILS);
+ mp->context = context;
+
+ mp->rpf_id = mfib_entry->mfe_rpf_id;
+ mp->entry_flags = mfib_entry->mfe_flags;
+ mp->table_id = htonl (table_id);
+ mp->address_length = pfx.fp_len;
+ memcpy (mp->grp_address, &pfx.fp_grp_addr.ip4,
+ sizeof (pfx.fp_grp_addr.ip4));
+ memcpy (mp->src_address, &pfx.fp_src_addr.ip4,
+ sizeof (pfx.fp_src_addr.ip4));
+
+ mp->count = htonl (path_count);
+ fp = mp->path;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+
+ fp->weight = 0;
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+ vec_free (api_rpaths);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct vl_api_ip_mfib_dump_ctc_t_
+{
+ fib_node_index_t *entries;
+} vl_api_ip_mfib_dump_ctc_t;
+
+static int
+vl_api_ip_mfib_table_dump_walk (fib_node_index_t fei, void *arg)
+{
+ vl_api_ip_mfib_dump_ctc_t *ctx = arg;
+
+ vec_add1 (ctx->entries, fei);
+
+ return (0);
+}
+
+static void
+vl_api_ip_mfib_dump_t_handler (vl_api_ip_mfib_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+ ip4_main_t *im = &ip4_main;
+ mfib_table_t *mfib_table;
+ fib_node_index_t *mfeip;
+ vl_api_ip_mfib_dump_ctc_t ctx = {
+ .entries = NULL,
+ };
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+
+ /* *INDENT-OFF* */
+ pool_foreach (mfib_table, im->mfibs,
+ ({
+ ip4_mfib_table_walk(&mfib_table->v4,
+ vl_api_ip_mfib_table_dump_walk,
+ &ctx);
+
+ vec_sort_with_function (ctx.entries, mfib_entry_cmp_for_sort);
+
+ vec_foreach (mfeip, ctx.entries)
+ {
+ send_ip_mfib_details (q, mp->context,
+ mfib_table->mft_table_id,
+ *mfeip);
+ }
+ vec_reset_length (ctx.entries);
+
+ }));
+ /* *INDENT-ON* */
+
+ vec_free (ctx.entries);
+}
+
+static void
+send_ip6_mfib_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u32 table_id,
+ mfib_prefix_t * pfx,
+ fib_route_path_encode_t * api_rpaths, u32 context)
+{
+ vl_api_ip6_mfib_details_t *mp;
+ fib_route_path_encode_t *api_rpath;
+ vl_api_fib_path_t *fp;
+ int path_count;
+
+ path_count = vec_len (api_rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + path_count * sizeof (*fp));
+ if (!mp)
+ return;
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP6_FIB_DETAILS);
+ mp->context = context;
+
+ mp->table_id = htonl (table_id);
+ mp->address_length = pfx->fp_len;
+ memcpy (mp->grp_address, &pfx->fp_grp_addr.ip6,
+ sizeof (pfx->fp_grp_addr.ip6));
+ memcpy (mp->src_address, &pfx->fp_src_addr.ip6,
+ sizeof (pfx->fp_src_addr.ip6));
+
+ mp->count = htonl (path_count);
+ fp = mp->path;
+ vec_foreach (api_rpath, api_rpaths)
+ {
+ memset (fp, 0, sizeof (*fp));
+
+ fp->weight = 0;
+ fp->sw_if_index = htonl (api_rpath->rpath.frp_sw_if_index);
+ copy_fib_next_hop (api_rpath, fp);
+ fp++;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct vl_api_ip6_mfib_dump_ctc_t_
+{
+ fib_node_index_t *entries;
+} vl_api_ip6_mfib_dump_ctc_t;
+
+static int
+vl_api_ip6_mfib_table_dump_walk (fib_node_index_t fei, void *arg)
+{
+ vl_api_ip6_mfib_dump_ctc_t *ctx = arg;
+
+ vec_add1 (ctx->entries, fei);
+
+ return (0);
+}
+
+static void
+vl_api_ip6_mfib_dump_t_handler (vl_api_ip6_mfib_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ ip6_main_t *im = &ip6_main;
+ mfib_table_t *mfib_table;
+ fib_node_index_t *mfeip;
+ mfib_prefix_t pfx;
+ fib_route_path_encode_t *api_rpaths = NULL;
+ vl_api_ip6_mfib_dump_ctc_t ctx = {
+ .entries = NULL,
+ };
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+
+ /* *INDENT-OFF* */
+ pool_foreach (mfib_table, im->mfibs,
+ ({
+ ip6_mfib_table_walk(&mfib_table->v6,
+ vl_api_ip6_mfib_table_dump_walk,
+ &ctx);
+
+ vec_sort_with_function (ctx.entries, mfib_entry_cmp_for_sort);
+
+ vec_foreach(mfeip, ctx.entries)
+ {
+ mfib_entry_get_prefix (*mfeip, &pfx);
+ mfib_entry_encode (*mfeip, &api_rpaths);
+ send_ip6_mfib_details (am, q,
+ mfib_table->mft_table_id,
+ &pfx, api_rpaths,
+ mp->context);
+ }
+ vec_reset_length (api_rpaths);
+ vec_reset_length (ctx.entries);
+
+ }));
+ /* *INDENT-ON* */
+
+ vec_free (ctx.entries);
+ vec_free (api_rpaths);
+}
+
+static void
+vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp,
+ vlib_main_t * vm)
+{
+ vl_api_ip_neighbor_add_del_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ stats_dslock_with_hint (1 /* release hint */ , 7 /* tag */ );
+
+ /*
+ * there's no validation here of the ND/ARP entry being added.
+ * The expectation is that the FIB will ensure that nothing bad
+ * will come of adding bogus entries.
+ */
+ if (mp->is_ipv6)
+ {
+ if (mp->is_add)
+ rv = vnet_set_ip6_ethernet_neighbor
+ (vm, ntohl (mp->sw_if_index),
+ (ip6_address_t *) (mp->dst_address),
+ mp->mac_address, sizeof (mp->mac_address), mp->is_static,
+ mp->is_no_adj_fib);
+ else
+ rv = vnet_unset_ip6_ethernet_neighbor
+ (vm, ntohl (mp->sw_if_index),
+ (ip6_address_t *) (mp->dst_address),
+ mp->mac_address, sizeof (mp->mac_address));
+ }
+ else
+ {
+ ethernet_arp_ip4_over_ethernet_address_t a;
+
+ clib_memcpy (&a.ethernet, mp->mac_address, 6);
+ clib_memcpy (&a.ip4, mp->dst_address, 4);
+
+ if (mp->is_add)
+ rv = vnet_arp_set_ip4_over_ethernet (vnm, ntohl (mp->sw_if_index),
+ &a, mp->is_static,
+ mp->is_no_adj_fib);
+ else
+ rv =
+ vnet_arp_unset_ip4_over_ethernet (vnm, ntohl (mp->sw_if_index), &a);
+ }
+
+ stats_dsunlock ();
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_IP_NEIGHBOR_ADD_DEL_REPLY);
+}
+
+void
+ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api)
+{
+ u32 fib_index, mfib_index;
+
+ /*
+ * ignore action on the default table - this is always present
+ * and cannot be added nor deleted from the API
+ */
+ if (0 != table_id)
+ {
+ /*
+ * The API holds only one lock on the table.
+ * i.e. it can be added many times via the API but needs to be
+ * deleted only once.
+ * The FIB index for unicast and multicast is not necessarily the
+ * same, since internal VPP systesm (like LISP and SR) create
+ * their own unicast tables.
+ */
+ fib_index = fib_table_find (fproto, table_id);
+ mfib_index = mfib_table_find (fproto, table_id);
+
+ if (~0 != fib_index)
+ {
+ fib_table_unlock (fib_index, fproto,
+ (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI));
+ }
+ if (~0 != mfib_index)
+ {
+ mfib_table_unlock (mfib_index, fproto,
+ (is_api ? MFIB_SOURCE_API : MFIB_SOURCE_CLI));
+ }
+ }
+}
+
+void
+vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp)
+{
+ vl_api_ip_table_add_del_reply_t *rmp;
+ fib_protocol_t fproto = (mp->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4);
+ u32 table_id = ntohl (mp->table_id);
+ int rv = 0;
+
+ if (mp->is_add)
+ {
+ ip_table_create (fproto, table_id, 1, mp->name);
+ }
+ else
+ {
+ ip_table_delete (fproto, table_id, 1);
+ }
+
+ REPLY_MACRO (VL_API_IP_TABLE_ADD_DEL_REPLY);
+}
+
+int
+add_del_route_t_handler (u8 is_multipath,
+ u8 is_add,
+ u8 is_drop,
+ u8 is_unreach,
+ u8 is_prohibit,
+ u8 is_local,
+ u8 is_multicast,
+ u8 is_classify,
+ u32 classify_table_index,
+ u8 is_resolve_host,
+ u8 is_resolve_attached,
+ u8 is_interface_rx,
+ u8 is_rpf_id,
+ u32 fib_index,
+ const fib_prefix_t * prefix,
+ dpo_proto_t next_hop_proto,
+ const ip46_address_t * next_hop,
+ u32 next_hop_sw_if_index,
+ u8 next_hop_fib_index,
+ u16 next_hop_weight,
+ u16 next_hop_preference,
+ mpls_label_t next_hop_via_label,
+ mpls_label_t * next_hop_out_label_stack)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ fib_route_path_flags_t path_flags = FIB_ROUTE_PATH_FLAG_NONE;
+ fib_route_path_t path = {
+ .frp_proto = next_hop_proto,
+ .frp_addr = (NULL == next_hop ? zero_addr : *next_hop),
+ .frp_sw_if_index = next_hop_sw_if_index,
+ .frp_fib_index = next_hop_fib_index,
+ .frp_weight = next_hop_weight,
+ .frp_preference = next_hop_preference,
+ .frp_label_stack = next_hop_out_label_stack,
+ };
+ fib_route_path_t *paths = NULL;
+ fib_entry_flag_t entry_flags = FIB_ENTRY_FLAG_NONE;
+
+ /*
+ * the special INVALID label meams we are not recursing via a
+ * label. Exp-null value is never a valid via-label so that
+ * also means it's not a via-label and means clients that set
+ * it to 0 by default get the expected behaviour
+ */
+ if ((MPLS_LABEL_INVALID != next_hop_via_label) && (0 != next_hop_via_label))
+ {
+ path.frp_proto = DPO_PROTO_MPLS;
+ path.frp_local_label = next_hop_via_label;
+ path.frp_eos = MPLS_NON_EOS;
+ }
+ if (is_resolve_host)
+ path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_HOST;
+ if (is_resolve_attached)
+ path_flags |= FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED;
+ if (is_interface_rx)
+ path_flags |= FIB_ROUTE_PATH_INTF_RX;
+ if (is_rpf_id)
+ path_flags |= FIB_ROUTE_PATH_RPF_ID;
+ if (is_multicast)
+ entry_flags |= FIB_ENTRY_FLAG_MULTICAST;
+
+ path.frp_flags = path_flags;
+
+ if (is_multipath)
+ {
+ stats_dslock_with_hint (1 /* release hint */ , 10 /* tag */ );
+
+
+ vec_add1 (paths, path);
+
+ if (is_add)
+ fib_table_entry_path_add2 (fib_index,
+ prefix,
+ FIB_SOURCE_API, entry_flags, paths);
+ else
+ fib_table_entry_path_remove2 (fib_index,
+ prefix, FIB_SOURCE_API, paths);
+
+ vec_free (paths);
+ stats_dsunlock ();
+ return 0;
+ }
+
+ stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ );
+
+ if (is_drop || is_local || is_classify || is_unreach || is_prohibit)
+ {
+ /*
+ * special route types that link directly to the adj
+ */
+ if (is_add)
+ {
+ dpo_id_t dpo = DPO_INVALID;
+ dpo_proto_t dproto;
+
+ dproto = fib_proto_to_dpo (prefix->fp_proto);
+
+ if (is_drop)
+ ip_null_dpo_add_and_lock (dproto, IP_NULL_ACTION_NONE, &dpo);
+ else if (is_local)
+ receive_dpo_add_or_lock (dproto, ~0, NULL, &dpo);
+ else if (is_unreach)
+ ip_null_dpo_add_and_lock (dproto,
+ IP_NULL_ACTION_SEND_ICMP_UNREACH, &dpo);
+ else if (is_prohibit)
+ ip_null_dpo_add_and_lock (dproto,
+ IP_NULL_ACTION_SEND_ICMP_PROHIBIT,
+ &dpo);
+ else if (is_classify)
+ {
+ if (pool_is_free_index (cm->tables,
+ ntohl (classify_table_index)))
+ {
+ stats_dsunlock ();
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+ }
+
+ dpo_set (&dpo, DPO_CLASSIFY, dproto,
+ classify_dpo_create (dproto,
+ ntohl (classify_table_index)));
+ }
+ else
+ {
+ stats_dsunlock ();
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+ }
+
+ fib_table_entry_special_dpo_update (fib_index,
+ prefix,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_EXCLUSIVE, &dpo);
+ dpo_reset (&dpo);
+ }
+ else
+ {
+ fib_table_entry_special_remove (fib_index, prefix, FIB_SOURCE_API);
+ }
+ }
+ else
+ {
+ if (is_add)
+ {
+ vec_add1 (paths, path);
+ fib_table_entry_update (fib_index,
+ prefix, FIB_SOURCE_API, entry_flags, paths);
+ vec_free (paths);
+ }
+ else
+ {
+ fib_table_entry_delete (fib_index, prefix, FIB_SOURCE_API);
+ }
+ }
+
+ stats_dsunlock ();
+ return (0);
+}
+
+int
+add_del_route_check (fib_protocol_t table_proto,
+ u32 table_id,
+ u32 next_hop_sw_if_index,
+ dpo_proto_t next_hop_table_proto,
+ u32 next_hop_table_id,
+ u8 is_rpf_id, u32 * fib_index, u32 * next_hop_fib_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ /* Temporaray whilst I do the CSIT dance */
+ u8 create_missing_tables = 1;
+
+ *fib_index = fib_table_find (table_proto, ntohl (table_id));
+ if (~0 == *fib_index)
+ {
+ if (create_missing_tables)
+ {
+ *fib_index = fib_table_find_or_create_and_lock (table_proto,
+ ntohl (table_id),
+ FIB_SOURCE_API);
+ }
+ else
+ {
+ /* No such VRF, and we weren't asked to create one */
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ }
+ }
+
+ if (!is_rpf_id && ~0 != ntohl (next_hop_sw_if_index))
+ {
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces,
+ ntohl (next_hop_sw_if_index)))
+ {
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ }
+ }
+ else
+ {
+ fib_protocol_t fib_nh_proto;
+
+ if (next_hop_table_proto > DPO_PROTO_MPLS)
+ return (0);
+
+ fib_nh_proto = dpo_proto_to_fib (next_hop_table_proto);
+
+ if (is_rpf_id)
+ *next_hop_fib_index = mfib_table_find (fib_nh_proto,
+ ntohl (next_hop_table_id));
+ else
+ *next_hop_fib_index = fib_table_find (fib_nh_proto,
+ ntohl (next_hop_table_id));
+
+ if (~0 == *next_hop_fib_index)
+ {
+ if (create_missing_tables)
+ {
+ if (is_rpf_id)
+ *next_hop_fib_index =
+ mfib_table_find_or_create_and_lock (fib_nh_proto,
+ ntohl
+ (next_hop_table_id),
+ MFIB_SOURCE_API);
+ else
+ *next_hop_fib_index =
+ fib_table_find_or_create_and_lock (fib_nh_proto,
+ ntohl
+ (next_hop_table_id),
+ FIB_SOURCE_API);
+ }
+ else
+ {
+ /* No such VRF, and we weren't asked to create one */
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ }
+ }
+ }
+
+ return (0);
+}
+
+static int
+ip4_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
+{
+ u32 fib_index, next_hop_fib_index;
+ mpls_label_t *label_stack = NULL;
+ int rv, ii, n_labels;;
+
+ rv = add_del_route_check (FIB_PROTOCOL_IP4,
+ mp->table_id,
+ mp->next_hop_sw_if_index,
+ DPO_PROTO_IP4,
+ mp->next_hop_table_id,
+ 0, &fib_index, &next_hop_fib_index);
+
+ if (0 != rv)
+ return (rv);
+
+ fib_prefix_t pfx = {
+ .fp_len = mp->dst_address_length,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ };
+ clib_memcpy (&pfx.fp_addr.ip4, mp->dst_address, sizeof (pfx.fp_addr.ip4));
+
+ ip46_address_t nh;
+ memset (&nh, 0, sizeof (nh));
+ memcpy (&nh.ip4, mp->next_hop_address, sizeof (nh.ip4));
+
+ n_labels = mp->next_hop_n_out_labels;
+ if (n_labels == 0)
+ ;
+ else if (1 == n_labels)
+ vec_add1 (label_stack, ntohl (mp->next_hop_out_label_stack[0]));
+ else
+ {
+ vec_validate (label_stack, n_labels - 1);
+ for (ii = 0; ii < n_labels; ii++)
+ label_stack[ii] = ntohl (mp->next_hop_out_label_stack[ii]);
+ }
+
+ return (add_del_route_t_handler (mp->is_multipath,
+ mp->is_add,
+ mp->is_drop,
+ mp->is_unreach,
+ mp->is_prohibit,
+ mp->is_local, 0,
+ mp->is_classify,
+ mp->classify_table_index,
+ mp->is_resolve_host,
+ mp->is_resolve_attached, 0, 0,
+ fib_index, &pfx, DPO_PROTO_IP4,
+ &nh,
+ ntohl (mp->next_hop_sw_if_index),
+ next_hop_fib_index,
+ mp->next_hop_weight,
+ mp->next_hop_preference,
+ ntohl (mp->next_hop_via_label),
+ label_stack));
+}
+
+static int
+ip6_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
+{
+ u32 fib_index, next_hop_fib_index;
+ mpls_label_t *label_stack = NULL;
+ int rv, ii, n_labels;;
+
+ rv = add_del_route_check (FIB_PROTOCOL_IP6,
+ mp->table_id,
+ mp->next_hop_sw_if_index,
+ DPO_PROTO_IP6,
+ mp->next_hop_table_id,
+ 0, &fib_index, &next_hop_fib_index);
+
+ if (0 != rv)
+ return (rv);
+
+ fib_prefix_t pfx = {
+ .fp_len = mp->dst_address_length,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ };
+ clib_memcpy (&pfx.fp_addr.ip6, mp->dst_address, sizeof (pfx.fp_addr.ip6));
+
+ ip46_address_t nh;
+ memset (&nh, 0, sizeof (nh));
+ memcpy (&nh.ip6, mp->next_hop_address, sizeof (nh.ip6));
+
+ n_labels = mp->next_hop_n_out_labels;
+ if (n_labels == 0)
+ ;
+ else if (1 == n_labels)
+ vec_add1 (label_stack, ntohl (mp->next_hop_out_label_stack[0]));
+ else
+ {
+ vec_validate (label_stack, n_labels - 1);
+ for (ii = 0; ii < n_labels; ii++)
+ label_stack[ii] = ntohl (mp->next_hop_out_label_stack[ii]);
+ }
+
+ return (add_del_route_t_handler (mp->is_multipath,
+ mp->is_add,
+ mp->is_drop,
+ mp->is_unreach,
+ mp->is_prohibit,
+ mp->is_local, 0,
+ mp->is_classify,
+ mp->classify_table_index,
+ mp->is_resolve_host,
+ mp->is_resolve_attached, 0, 0,
+ fib_index, &pfx, DPO_PROTO_IP6,
+ &nh, ntohl (mp->next_hop_sw_if_index),
+ next_hop_fib_index,
+ mp->next_hop_weight,
+ mp->next_hop_preference,
+ ntohl (mp->next_hop_via_label),
+ label_stack));
+}
+
+void
+vl_api_ip_add_del_route_t_handler (vl_api_ip_add_del_route_t * mp)
+{
+ vl_api_ip_add_del_route_reply_t *rmp;
+ int rv;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnm->api_errno = 0;
+
+ if (mp->is_ipv6)
+ rv = ip6_add_del_route_t_handler (mp);
+ else
+ rv = ip4_add_del_route_t_handler (mp);
+
+ rv = (rv == 0) ? vnm->api_errno : rv;
+
+ REPLY_MACRO (VL_API_IP_ADD_DEL_ROUTE_REPLY);
+}
+
+void
+ip_table_create (fib_protocol_t fproto,
+ u32 table_id, u8 is_api, const u8 * name)
+{
+ u32 fib_index, mfib_index;
+
+ /*
+ * ignore action on the default table - this is always present
+ * and cannot be added nor deleted from the API
+ */
+ if (0 != table_id)
+ {
+ /*
+ * The API holds only one lock on the table.
+ * i.e. it can be added many times via the API but needs to be
+ * deleted only once.
+ * The FIB index for unicast and multicast is not necessarily the
+ * same, since internal VPP systesm (like LISP and SR) create
+ * their own unicast tables.
+ */
+ fib_index = fib_table_find (fproto, table_id);
+ mfib_index = mfib_table_find (fproto, table_id);
+
+ if (~0 == fib_index)
+ {
+ fib_table_find_or_create_and_lock_w_name (fproto, table_id,
+ (is_api ?
+ FIB_SOURCE_API :
+ FIB_SOURCE_CLI), name);
+ }
+ if (~0 == mfib_index)
+ {
+ mfib_table_find_or_create_and_lock_w_name (fproto, table_id,
+ (is_api ?
+ MFIB_SOURCE_API :
+ MFIB_SOURCE_CLI), name);
+ }
+ }
+}
+
+static int
+add_del_mroute_check (fib_protocol_t table_proto,
+ u32 table_id,
+ u32 next_hop_sw_if_index, u8 is_local, u32 * fib_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ *fib_index = mfib_table_find (table_proto, ntohl (table_id));
+ if (~0 == *fib_index)
+ {
+ /* No such table */
+ return VNET_API_ERROR_NO_SUCH_FIB;
+ }
+
+ if (~0 != ntohl (next_hop_sw_if_index))
+ {
+ if (pool_is_free_index (vnm->interface_main.sw_interfaces,
+ ntohl (next_hop_sw_if_index)))
+ {
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
+ }
+ }
+
+ return (0);
+}
+
+static int
+mroute_add_del_handler (u8 is_add,
+ u8 is_local,
+ u32 fib_index,
+ const mfib_prefix_t * prefix,
+ u32 entry_flags,
+ fib_rpf_id_t rpf_id,
+ u32 next_hop_sw_if_index, u32 itf_flags)
+{
+ stats_dslock_with_hint (1 /* release hint */ , 2 /* tag */ );
+
+ fib_route_path_t path = {
+ .frp_sw_if_index = next_hop_sw_if_index,
+ .frp_proto = fib_proto_to_dpo (prefix->fp_proto),
+ };
+
+ if (is_local)
+ path.frp_flags |= FIB_ROUTE_PATH_LOCAL;
+
+
+ if (!is_local && ~0 == next_hop_sw_if_index)
+ {
+ mfib_table_entry_update (fib_index, prefix,
+ MFIB_SOURCE_API, rpf_id, entry_flags);
+ }
+ else
+ {
+ if (is_add)
+ {
+ mfib_table_entry_path_update (fib_index, prefix,
+ MFIB_SOURCE_API, &path, itf_flags);
+ }
+ else
+ {
+ mfib_table_entry_path_remove (fib_index, prefix,
+ MFIB_SOURCE_API, &path);
+ }
+ }
+
+ stats_dsunlock ();
+ return (0);
+}
+
+static int
+api_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp)
+{
+ fib_protocol_t fproto;
+ u32 fib_index;
+ int rv;
+
+ fproto = (mp->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4);
+ rv = add_del_mroute_check (fproto,
+ mp->table_id,
+ mp->next_hop_sw_if_index,
+ mp->is_local, &fib_index);
+
+ if (0 != rv)
+ return (rv);
+
+ mfib_prefix_t pfx = {
+ .fp_len = ntohs (mp->grp_address_length),
+ .fp_proto = fproto,
+ };
+
+ if (FIB_PROTOCOL_IP4 == fproto)
+ {
+ clib_memcpy (&pfx.fp_grp_addr.ip4, mp->grp_address,
+ sizeof (pfx.fp_grp_addr.ip4));
+ clib_memcpy (&pfx.fp_src_addr.ip4, mp->src_address,
+ sizeof (pfx.fp_src_addr.ip4));
+ }
+ else
+ {
+ clib_memcpy (&pfx.fp_grp_addr.ip6, mp->grp_address,
+ sizeof (pfx.fp_grp_addr.ip6));
+ clib_memcpy (&pfx.fp_src_addr.ip6, mp->src_address,
+ sizeof (pfx.fp_src_addr.ip6));
+ }
+
+ return (mroute_add_del_handler (mp->is_add,
+ mp->is_local,
+ fib_index, &pfx,
+ ntohl (mp->entry_flags),
+ ntohl (mp->rpf_id),
+ ntohl (mp->next_hop_sw_if_index),
+ ntohl (mp->itf_flags)));
+}
+
+void
+vl_api_ip_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp)
+{
+ vl_api_ip_mroute_add_del_reply_t *rmp;
+ int rv;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnm->api_errno = 0;
+
+ rv = api_mroute_add_del_t_handler (mp);
+
+ rv = (rv == 0) ? vnm->api_errno : rv;
+
+ REPLY_MACRO (VL_API_IP_MROUTE_ADD_DEL_REPLY);
+}
+
+static void
+send_ip_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q, u32 sw_if_index,
+ u8 is_ipv6, u32 context)
+{
+ vl_api_ip_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_DETAILS);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_ipv6 = is_ipv6;
+ mp->context = context;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+send_ip_address_details (vpe_api_main_t * am,
+ unix_shared_memory_queue_t * q,
+ u8 * ip, u16 prefix_length,
+ u32 sw_if_index, u8 is_ipv6, u32 context)
+{
+ vl_api_ip_address_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP_ADDRESS_DETAILS);
+
+ if (is_ipv6)
+ {
+ clib_memcpy (&mp->ip, ip, sizeof (mp->ip));
+ }
+ else
+ {
+ u32 *tp = (u32 *) mp->ip;
+ *tp = *(u32 *) ip;
+ }
+ mp->prefix_length = prefix_length;
+ mp->context = context;
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->is_ipv6 = is_ipv6;
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ unix_shared_memory_queue_t *q;
+ ip6_address_t *r6;
+ ip4_address_t *r4;
+ ip6_main_t *im6 = &ip6_main;
+ ip4_main_t *im4 = &ip4_main;
+ ip_lookup_main_t *lm6 = &im6->lookup_main;
+ ip_lookup_main_t *lm4 = &im4->lookup_main;
+ ip_interface_address_t *ia = 0;
+ u32 sw_if_index = ~0;
+ int rv __attribute__ ((unused)) = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ return;
+
+ if (mp->is_ipv6)
+ {
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm6, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ r6 = ip_interface_address_get_address (lm6, ia);
+ u16 prefix_length = ia->address_length;
+ send_ip_address_details(am, q, (u8*)r6, prefix_length,
+ sw_if_index, 1, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ else
+ {
+ /* *INDENT-OFF* */
+ foreach_ip_interface_address (lm4, ia, sw_if_index,
+ 1 /* honor unnumbered */,
+ ({
+ r4 = ip_interface_address_get_address (lm4, ia);
+ u16 prefix_length = ia->address_length;
+ send_ip_address_details(am, q, (u8*)r4, prefix_length,
+ sw_if_index, 0, mp->context);
+ }));
+ /* *INDENT-ON* */
+ }
+ BAD_SW_IF_INDEX_LABEL;
+}
+
+static void
+vl_api_ip_dump_t_handler (vl_api_ip_dump_t * mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ unix_shared_memory_queue_t *q;
+ vnet_sw_interface_t *si, *sorted_sis;
+ u32 sw_if_index = ~0;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ /* Gather interfaces. */
+ sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
+ _vec_len (sorted_sis) = 0;
+ /* *INDENT-OFF* */
+ pool_foreach (si, im->sw_interfaces,
+ ({
+ vec_add1 (sorted_sis, si[0]);
+ }));
+ /* *INDENT-ON* */
+
+ vec_foreach (si, sorted_sis)
+ {
+ if (!(si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED))
+ {
+ if (mp->is_ipv6 && !ip6_interface_enabled (vm, si->sw_if_index))
+ {
+ continue;
+ }
+ sw_if_index = si->sw_if_index;
+ send_ip_details (am, q, sw_if_index, mp->is_ipv6, mp->context);
+ }
+ }
+}
+
+static void
+set_ip6_flow_hash (vl_api_set_ip_flow_hash_t * mp)
+{
+ vl_api_set_ip_flow_hash_reply_t *rmp;
+ int rv;
+ u32 table_id;
+ flow_hash_config_t flow_hash_config = 0;
+
+ table_id = ntohl (mp->vrf_id);
+
+#define _(a,b) if (mp->a) flow_hash_config |= b;
+ foreach_flow_hash_bit;
+#undef _
+
+ rv = vnet_set_ip6_flow_hash (table_id, flow_hash_config);
+
+ REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_REPLY);
+}
+
+static void
+set_ip4_flow_hash (vl_api_set_ip_flow_hash_t * mp)
+{
+ vl_api_set_ip_flow_hash_reply_t *rmp;
+ int rv;
+ u32 table_id;
+ flow_hash_config_t flow_hash_config = 0;
+
+ table_id = ntohl (mp->vrf_id);
+
+#define _(a,b) if (mp->a) flow_hash_config |= b;
+ foreach_flow_hash_bit;
+#undef _
+
+ rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
+
+ REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_REPLY);
+}
+
+
+static void
+vl_api_set_ip_flow_hash_t_handler (vl_api_set_ip_flow_hash_t * mp)
+{
+ if (mp->is_ipv6 == 0)
+ set_ip4_flow_hash (mp);
+ else
+ set_ip6_flow_hash (mp);
+}
+
+static void
+ vl_api_sw_interface_ip6nd_ra_config_t_handler
+ (vl_api_sw_interface_ip6nd_ra_config_t * mp)
+{
+ vl_api_sw_interface_ip6nd_ra_config_reply_t *rmp;
+ vlib_main_t *vm = vlib_get_main ();
+ int rv = 0;
+ u8 is_no, suppress, managed, other, ll_option, send_unicast, cease,
+ default_router;
+
+ is_no = mp->is_no == 1;
+ suppress = mp->suppress == 1;
+ managed = mp->managed == 1;
+ other = mp->other == 1;
+ ll_option = mp->ll_option == 1;
+ send_unicast = mp->send_unicast == 1;
+ cease = mp->cease == 1;
+ default_router = mp->default_router == 1;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = ip6_neighbor_ra_config (vm, ntohl (mp->sw_if_index),
+ suppress, managed, other,
+ ll_option, send_unicast, cease,
+ default_router, ntohl (mp->lifetime),
+ ntohl (mp->initial_count),
+ ntohl (mp->initial_interval),
+ ntohl (mp->max_interval),
+ ntohl (mp->min_interval), is_no);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_IP6ND_RA_CONFIG_REPLY);
+}
+
+static void
+ vl_api_sw_interface_ip6nd_ra_prefix_t_handler
+ (vl_api_sw_interface_ip6nd_ra_prefix_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_sw_interface_ip6nd_ra_prefix_reply_t *rmp;
+ int rv = 0;
+ u8 is_no, use_default, no_advertise, off_link, no_autoconfig, no_onlink;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ is_no = mp->is_no == 1;
+ use_default = mp->use_default == 1;
+ no_advertise = mp->no_advertise == 1;
+ off_link = mp->off_link == 1;
+ no_autoconfig = mp->no_autoconfig == 1;
+ no_onlink = mp->no_onlink == 1;
+
+ rv = ip6_neighbor_ra_prefix (vm, ntohl (mp->sw_if_index),
+ (ip6_address_t *) mp->address,
+ mp->address_length, use_default,
+ ntohl (mp->val_lifetime),
+ ntohl (mp->pref_lifetime), no_advertise,
+ off_link, no_autoconfig, no_onlink, is_no);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SW_INTERFACE_IP6ND_RA_PREFIX_REPLY);
+}
+
+static void
+send_ip6nd_proxy_details (unix_shared_memory_queue_t * q,
+ u32 context,
+ const ip46_address_t * addr, u32 sw_if_index)
+{
+ vl_api_ip6nd_proxy_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_IP6ND_PROXY_DETAILS);
+ mp->context = context;
+ mp->sw_if_index = htonl (sw_if_index);
+ memcpy (mp->address, addr, 16);
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+typedef struct api_ip6nd_proxy_fib_table_walk_ctx_t_
+{
+ u32 *indices;
+} api_ip6nd_proxy_fib_table_walk_ctx_t;
+
+static int
+api_ip6nd_proxy_fib_table_walk (fib_node_index_t fei, void *arg)
+{
+ api_ip6nd_proxy_fib_table_walk_ctx_t *ctx = arg;
+
+ if (fib_entry_is_sourced (fei, FIB_SOURCE_IP6_ND_PROXY))
+ {
+ vec_add1 (ctx->indices, fei);
+ }
+
+ return (1);
+}
+
+static void
+vl_api_ip6nd_proxy_dump_t_handler (vl_api_ip6nd_proxy_dump_t * mp)
+{
+ ip6_main_t *im6 = &ip6_main;
+ fib_table_t *fib_table;
+ api_ip6nd_proxy_fib_table_walk_ctx_t ctx = {
+ .indices = NULL,
+ };
+ fib_node_index_t *feip;
+ fib_prefix_t pfx;
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ /* *INDENT-OFF* */
+ pool_foreach (fib_table, im6->fibs,
+ ({
+ fib_table_walk(fib_table->ft_index,
+ FIB_PROTOCOL_IP6,
+ api_ip6nd_proxy_fib_table_walk,
+ &ctx);
+ }));
+ /* *INDENT-ON* */
+
+ vec_sort_with_function (ctx.indices, fib_entry_cmp_for_sort);
+
+ vec_foreach (feip, ctx.indices)
+ {
+ fib_entry_get_prefix (*feip, &pfx);
+
+ send_ip6nd_proxy_details (q,
+ mp->context,
+ &pfx.fp_addr,
+ fib_entry_get_resolving_interface (*feip));
+ }
+
+ vec_free (ctx.indices);
+}
+
+static void
+vl_api_ip6nd_proxy_add_del_t_handler (vl_api_ip6nd_proxy_add_del_t * mp)
+{
+ vl_api_ip6nd_proxy_add_del_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = ip6_neighbor_proxy_add_del (ntohl (mp->sw_if_index),
+ (ip6_address_t *) mp->address, mp->is_del);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_IP6ND_PROXY_ADD_DEL_REPLY);
+}
+
+static void
+ vl_api_sw_interface_ip6_enable_disable_t_handler
+ (vl_api_sw_interface_ip6_enable_disable_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_sw_interface_ip6_enable_disable_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ int rv = 0;
+ clib_error_t *error;
+
+ vnm->api_errno = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ error =
+ (mp->enable == 1) ? enable_ip6_interface (vm,
+ ntohl (mp->sw_if_index)) :
+ disable_ip6_interface (vm, ntohl (mp->sw_if_index));
+
+ if (error)
+ {
+ clib_error_report (error);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ else
+ {
+ rv = vnm->api_errno;
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_IP6_ENABLE_DISABLE_REPLY);
+}
+
+static void
+ vl_api_sw_interface_ip6_set_link_local_address_t_handler
+ (vl_api_sw_interface_ip6_set_link_local_address_t * mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_sw_interface_ip6_set_link_local_address_reply_t *rmp;
+ int rv = 0;
+ clib_error_t *error;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnm->api_errno = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ error = set_ip6_link_local_address (vm,
+ ntohl (mp->sw_if_index),
+ (ip6_address_t *) mp->address);
+ if (error)
+ {
+ clib_error_report (error);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ else
+ {
+ rv = vnm->api_errno;
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS_REPLY);
+}
+
+void
+vl_mfib_signal_send_one (unix_shared_memory_queue_t * q,
+ u32 context, const mfib_signal_t * mfs)
+{
+ vl_api_mfib_signal_details_t *mp;
+ mfib_prefix_t prefix;
+ mfib_table_t *mfib;
+ mfib_itf_t *mfi;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_MFIB_SIGNAL_DETAILS);
+ mp->context = context;
+
+ mfi = mfib_itf_get (mfs->mfs_itf);
+ mfib_entry_get_prefix (mfs->mfs_entry, &prefix);
+ mfib = mfib_table_get (mfib_entry_get_fib_index (mfs->mfs_entry),
+ prefix.fp_proto);
+ mp->table_id = ntohl (mfib->mft_table_id);
+ mp->sw_if_index = ntohl (mfi->mfi_sw_if_index);
+
+ if (FIB_PROTOCOL_IP4 == prefix.fp_proto)
+ {
+ mp->grp_address_len = ntohs (prefix.fp_len);
+
+ memcpy (mp->grp_address, &prefix.fp_grp_addr.ip4, 4);
+ if (prefix.fp_len > 32)
+ {
+ memcpy (mp->src_address, &prefix.fp_src_addr.ip4, 4);
+ }
+ }
+ else
+ {
+ mp->grp_address_len = ntohs (prefix.fp_len);
+
+ ASSERT (0);
+ }
+
+ if (0 != mfs->mfs_buffer_len)
+ {
+ mp->ip_packet_len = ntohs (mfs->mfs_buffer_len);
+
+ memcpy (mp->ip_packet_data, mfs->mfs_buffer, mfs->mfs_buffer_len);
+ }
+ else
+ {
+ mp->ip_packet_len = 0;
+ }
+
+ vl_msg_api_send_shmem (q, (u8 *) & mp);
+}
+
+static void
+vl_api_mfib_signal_dump_t_handler (vl_api_mfib_signal_dump_t * mp)
+{
+ unix_shared_memory_queue_t *q;
+
+ q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q == 0)
+ {
+ return;
+ }
+
+ while (q->cursize < q->maxsize && mfib_signal_send_one (q, mp->context))
+ ;
+}
+
+#define vl_msg_name_crc_list
+#include <vnet/ip/ip.api.h>
+#undef vl_msg_name_crc_list
+
+static void
+setup_message_id_table (api_main_t * am)
+{
+#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_ip;
+#undef _
+}
+
+static clib_error_t *
+ip_api_hookup (vlib_main_t * vm)
+{
+ api_main_t *am = &api_main;
+
+#define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_ip_api_msg;
+#undef _
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ setup_message_id_table (am);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (ip_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_checksum.c b/src/vnet/ip/ip_checksum.c
new file mode 100644
index 00000000..6a9cf657
--- /dev/null
+++ b/src/vnet/ip/ip_checksum.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip4/ip_checksum.c: ip/tcp/udp checksums
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+ip_csum_t
+ip_incremental_checksum (ip_csum_t sum, void *_data, uword n_bytes)
+{
+ uword data = pointer_to_uword (_data);
+ ip_csum_t sum0, sum1;
+
+ sum0 = 0;
+ sum1 = sum;
+
+ /* Align data pointer to 64 bits. */
+#define _(t) \
+do { \
+ if (n_bytes >= sizeof (t) \
+ && sizeof (t) < sizeof (ip_csum_t) \
+ && (data % (2 * sizeof (t))) != 0) \
+ { \
+ sum0 += * uword_to_pointer (data, t *); \
+ data += sizeof (t); \
+ n_bytes -= sizeof (t); \
+ } \
+} while (0)
+
+ _(u8);
+ _(u16);
+ if (BITS (ip_csum_t) > 32)
+ _(u32);
+
+#undef _
+
+ {
+ ip_csum_t *d = uword_to_pointer (data, ip_csum_t *);
+
+ while (n_bytes >= 2 * sizeof (d[0]))
+ {
+ sum0 = ip_csum_with_carry (sum0, d[0]);
+ sum1 = ip_csum_with_carry (sum1, d[1]);
+ d += 2;
+ n_bytes -= 2 * sizeof (d[0]);
+ }
+
+ data = pointer_to_uword (d);
+ }
+
+#define _(t) \
+do { \
+ if (n_bytes >= sizeof (t) && sizeof (t) <= sizeof (ip_csum_t)) \
+ { \
+ sum0 = ip_csum_with_carry (sum0, * uword_to_pointer (data, t *)); \
+ data += sizeof (t); \
+ n_bytes -= sizeof (t); \
+ } \
+} while (0)
+
+ if (BITS (ip_csum_t) > 32)
+ _(u64);
+ _(u32);
+ _(u16);
+ _(u8);
+
+#undef _
+
+ /* Combine even and odd sums. */
+ sum0 = ip_csum_with_carry (sum0, sum1);
+
+ return sum0;
+}
+
+ip_csum_t
+ip_csum_and_memcpy (ip_csum_t sum, void *dst, void *src, uword n_bytes)
+{
+ uword n_left;
+ ip_csum_t sum0 = sum, sum1;
+ n_left = n_bytes;
+
+ if (n_left && (pointer_to_uword (dst) & sizeof (u8)))
+ {
+ u8 *d8, val;
+
+ d8 = dst;
+ val = ((u8 *) src)[0];
+ d8[0] = val;
+ dst += 1;
+ src += 1;
+ n_left -= 1;
+ sum0 =
+ ip_csum_with_carry (sum0, val << (8 * CLIB_ARCH_IS_LITTLE_ENDIAN));
+ }
+
+ while ((n_left >= sizeof (u16))
+ && (pointer_to_uword (dst) & (sizeof (sum) - sizeof (u16))))
+ {
+ u16 *d16, *s16;
+
+ d16 = dst;
+ s16 = src;
+
+ d16[0] = clib_mem_unaligned (&s16[0], u16);
+
+ sum0 = ip_csum_with_carry (sum0, d16[0]);
+ dst += sizeof (u16);
+ src += sizeof (u16);
+ n_left -= sizeof (u16);
+ }
+
+ sum1 = 0;
+ while (n_left >= 2 * sizeof (sum))
+ {
+ ip_csum_t dst0, dst1;
+ ip_csum_t *dst_even, *src_even;
+
+ dst_even = dst;
+ src_even = src;
+ dst0 = clib_mem_unaligned (&src_even[0], ip_csum_t);
+ dst1 = clib_mem_unaligned (&src_even[1], ip_csum_t);
+
+ dst_even[0] = dst0;
+ dst_even[1] = dst1;
+
+ dst += 2 * sizeof (dst_even[0]);
+ src += 2 * sizeof (dst_even[0]);
+ n_left -= 2 * sizeof (dst_even[0]);
+
+ sum0 = ip_csum_with_carry (sum0, dst0);
+ sum1 = ip_csum_with_carry (sum1, dst1);
+ }
+
+ sum0 = ip_csum_with_carry (sum0, sum1);
+ while (n_left >= 1 * sizeof (sum))
+ {
+ ip_csum_t dst0, *dst_even, *src_even;
+
+ dst_even = dst;
+ src_even = src;
+
+ dst0 = clib_mem_unaligned (&src_even[0], ip_csum_t);
+
+ dst_even[0] = dst0;
+
+ dst += 1 * sizeof (sum);
+ src += 1 * sizeof (sum);
+ n_left -= 1 * sizeof (sum);
+
+ sum0 = ip_csum_with_carry (sum0, dst0);
+ }
+
+ while (n_left >= sizeof (u16))
+ {
+ u16 dst0, *dst_short, *src_short;
+
+ dst_short = dst;
+ src_short = src;
+
+ dst0 = clib_mem_unaligned (&src_short[0], u16);
+
+ dst_short[0] = dst0;
+
+ sum0 = ip_csum_with_carry (sum0, dst_short[0]);
+ dst += 1 * sizeof (dst0);
+ src += 1 * sizeof (dst0);
+ n_left -= 1 * sizeof (dst0);
+
+ }
+
+ if (n_left == 1)
+ {
+ u8 *d8, *s8, val;
+
+ d8 = dst;
+ s8 = src;
+
+ d8[0] = val = s8[0];
+ d8 += 1;
+ s8 += 1;
+ n_left -= 1;
+ sum0 = ip_csum_with_carry (sum0, val << (8 * CLIB_ARCH_IS_BIG_ENDIAN));
+ }
+
+ return sum0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
new file mode 100644
index 00000000..ca062bfd
--- /dev/null
+++ b/src/vnet/ip/ip_frag.c
@@ -0,0 +1,581 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+/*
+ * IPv4 Fragmentation Node
+ *
+ *
+ */
+
+#include "ip_frag.h"
+
+#include <vnet/ip/ip.h>
+
+
+typedef struct
+{
+ u8 ipv6;
+ u16 header_offset;
+ u16 mtu;
+ u8 next;
+ u16 n_fragments;
+} ip_frag_trace_t;
+
+static u8 *
+format_ip_frag_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
+ s = format (s, "IPv%s offset: %u mtu: %u fragments: %u",
+ t->ipv6 ? "6" : "4", t->header_offset, t->mtu, t->n_fragments);
+ return s;
+}
+
+static u32 running_fragment_id;
+
+static void
+ip4_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
+ ip_frag_error_t * error)
+{
+ vlib_buffer_t *p;
+ ip4_header_t *ip4;
+ u16 mtu, ptr, len, max, rem, offset, ip_frag_id, ip_frag_offset;
+ u8 *packet, more;
+
+ vec_add1 (*buffer, pi);
+ p = vlib_get_buffer (vm, pi);
+ offset = vnet_buffer (p)->ip_frag.header_offset;
+ mtu = vnet_buffer (p)->ip_frag.mtu;
+ packet = (u8 *) vlib_buffer_get_current (p);
+ ip4 = (ip4_header_t *) (packet + offset);
+
+ rem = clib_net_to_host_u16 (ip4->length) - sizeof (*ip4);
+ ptr = 0;
+ max = (mtu - sizeof (*ip4) - vnet_buffer (p)->ip_frag.header_offset) & ~0x7;
+
+ if (rem < (p->current_length - offset - sizeof (*ip4)))
+ {
+ *error = IP_FRAG_ERROR_MALFORMED;
+ return;
+ }
+
+ if (mtu < sizeof (*ip4))
+ {
+ *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
+ return;
+ }
+
+ if (ip4->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT))
+ {
+ *error = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
+ return;
+ }
+
+ if (ip4_is_fragment (ip4))
+ {
+ ip_frag_id = ip4->fragment_id;
+ ip_frag_offset = ip4_get_fragment_offset (ip4);
+ more =
+ ! !(ip4->flags_and_fragment_offset &
+ clib_host_to_net_u16 (IP4_HEADER_FLAG_MORE_FRAGMENTS));
+ }
+ else
+ {
+ ip_frag_id = (++running_fragment_id);
+ ip_frag_offset = 0;
+ more = 0;
+ }
+
+ //Do the actual fragmentation
+ while (rem)
+ {
+ u32 bi;
+ vlib_buffer_t *b;
+ ip4_header_t *fip4;
+
+ len =
+ (rem >
+ (mtu - sizeof (*ip4) -
+ vnet_buffer (p)->ip_frag.header_offset)) ? max : rem;
+
+ if (ptr == 0)
+ {
+ bi = pi;
+ b = p;
+ fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset);
+ }
+ else
+ {
+ if (!vlib_buffer_alloc (vm, &bi, 1))
+ {
+ *error = IP_FRAG_ERROR_MEMORY;
+ return;
+ }
+ vec_add1 (*buffer, bi);
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (p)->sw_if_index[VLIB_RX];
+ vnet_buffer (b)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p)->sw_if_index[VLIB_TX];
+ fip4 = (ip4_header_t *) (vlib_buffer_get_current (b) + offset);
+
+ //Copy offset and ip4 header
+ clib_memcpy (b->data, packet, offset + sizeof (*ip4));
+ //Copy data
+ clib_memcpy (((u8 *) (fip4)) + sizeof (*fip4),
+ packet + offset + sizeof (*fip4) + ptr, len);
+ }
+ b->current_length = offset + len + sizeof (*fip4);
+
+ fip4->fragment_id = ip_frag_id;
+ fip4->flags_and_fragment_offset =
+ clib_host_to_net_u16 ((ptr >> 3) + ip_frag_offset);
+ fip4->flags_and_fragment_offset |=
+ clib_host_to_net_u16 (((len != rem) || more) << 13);
+ // ((len0 != rem0) || more0) << 13 is optimization for
+ // ((len0 != rem0) || more0) ? IP4_HEADER_FLAG_MORE_FRAGMENTS : 0
+ fip4->length = clib_host_to_net_u16 (len + sizeof (*fip4));
+ fip4->checksum = ip4_header_checksum (fip4);
+
+ if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
+ {
+ //Encapsulating ipv4 header
+ ip4_header_t *encap_header4 =
+ (ip4_header_t *) vlib_buffer_get_current (b);
+ encap_header4->length = clib_host_to_net_u16 (b->current_length);
+ encap_header4->checksum = ip4_header_checksum (encap_header4);
+ }
+ else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
+ {
+ //Encapsulating ipv6 header
+ ip6_header_t *encap_header6 =
+ (ip6_header_t *) vlib_buffer_get_current (b);
+ encap_header6->payload_length =
+ clib_host_to_net_u16 (b->current_length -
+ sizeof (*encap_header6));
+ }
+
+ rem -= len;
+ ptr += len;
+ }
+}
+
+void
+ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu,
+ u8 next_index, u8 flags)
+{
+ vnet_buffer (b)->ip_frag.header_offset = offset;
+ vnet_buffer (b)->ip_frag.mtu = mtu;
+ vnet_buffer (b)->ip_frag.next_index = next_index;
+ vnet_buffer (b)->ip_frag.flags = flags;
+}
+
+static uword
+ip4_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip4_frag_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 frag_sent = 0, small_packets = 0;
+ u32 *buffer = 0;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0, *frag_from, frag_left;
+ vlib_buffer_t *p0;
+ ip_frag_error_t error0;
+ ip4_frag_next_t next0;
+
+ //Note: The packet is not enqueued now.
+ //It is instead put in a vector where other fragments
+ //will be put as well.
+ pi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ error0 = IP_FRAG_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip4_frag_do_fragment (vm, pi0, &buffer, &error0);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip_frag_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
+ tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
+ tr->ipv6 = 0;
+ tr->n_fragments = vec_len (buffer);
+ tr->next = vnet_buffer (p0)->ip_frag.next_index;
+ }
+
+ if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
+ {
+ icmp4_error_set_vnet_buffer (p0, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+ vnet_buffer (p0)->ip_frag.mtu);
+ vlib_buffer_advance (p0,
+ vnet_buffer (p0)->ip_frag.header_offset);
+ next0 = IP4_FRAG_NEXT_ICMP_ERROR;
+ }
+ else
+ next0 =
+ (error0 ==
+ IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
+ ip_frag.next_index : IP4_FRAG_NEXT_DROP;
+
+ if (error0 == IP_FRAG_ERROR_NONE)
+ {
+ frag_sent += vec_len (buffer);
+ small_packets += (vec_len (buffer) == 1);
+ }
+ else
+ vlib_error_count (vm, ip4_frag_node.index, error0, 1);
+
+ //Send fragments that were added in the frame
+ frag_from = buffer;
+ frag_left = vec_len (buffer);
+
+ while (frag_left > 0)
+ {
+ while (frag_left > 0 && n_left_to_next > 0)
+ {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_get_buffer (vm, i)->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, i,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (buffer);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vec_free (buffer);
+
+ vlib_node_increment_counter (vm, ip4_frag_node.index,
+ IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
+ vlib_node_increment_counter (vm, ip4_frag_node.index,
+ IP_FRAG_ERROR_SMALL_PACKET, small_packets);
+
+ return frame->n_vectors;
+}
+
+
+static void
+ip6_frag_do_fragment (vlib_main_t * vm, u32 pi, u32 ** buffer,
+ ip_frag_error_t * error)
+{
+ vlib_buffer_t *p;
+ ip6_header_t *ip6_hdr;
+ ip6_frag_hdr_t *frag_hdr;
+ u8 *payload, *next_header;
+
+ p = vlib_get_buffer (vm, pi);
+
+ //Parsing the IPv6 headers
+ ip6_hdr =
+ vlib_buffer_get_current (p) + vnet_buffer (p)->ip_frag.header_offset;
+ payload = (u8 *) (ip6_hdr + 1);
+ next_header = &ip6_hdr->protocol;
+ if (*next_header == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ next_header = payload;
+ payload += payload[1] * 8;
+ }
+
+ if (*next_header == IP_PROTOCOL_IP6_DESTINATION_OPTIONS)
+ {
+ next_header = payload;
+ payload += payload[1] * 8;
+ }
+
+ if (*next_header == IP_PROTOCOL_IPV6_ROUTE)
+ {
+ next_header = payload;
+ payload += payload[1] * 8;
+ }
+
+ if (PREDICT_FALSE
+ (payload >= (u8 *) vlib_buffer_get_current (p) + p->current_length))
+ {
+ //A malicious packet could set an extension header with a too big size
+ //and make us modify another vlib_buffer
+ *error = IP_FRAG_ERROR_MALFORMED;
+ return;
+ }
+
+ u8 has_more;
+ u16 initial_offset;
+ if (*next_header == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ //The fragmentation header is already there
+ frag_hdr = (ip6_frag_hdr_t *) payload;
+ has_more = ip6_frag_hdr_more (frag_hdr);
+ initial_offset = ip6_frag_hdr_offset (frag_hdr);
+ }
+ else
+ {
+ //Insert a fragmentation header in the packet
+ u8 nh = *next_header;
+ *next_header = IP_PROTOCOL_IPV6_FRAGMENTATION;
+ vlib_buffer_advance (p, -sizeof (*frag_hdr));
+ u8 *start = vlib_buffer_get_current (p);
+ memmove (start, start + sizeof (*frag_hdr),
+ payload - (start + sizeof (*frag_hdr)));
+ frag_hdr = (ip6_frag_hdr_t *) (payload - sizeof (*frag_hdr));
+ frag_hdr->identification = ++running_fragment_id;
+ frag_hdr->next_hdr = nh;
+ frag_hdr->rsv = 0;
+ has_more = 0;
+ initial_offset = 0;
+ }
+ payload = (u8 *) (frag_hdr + 1);
+
+ u16 headers_len = payload - (u8 *) vlib_buffer_get_current (p);
+ u16 max_payload = vnet_buffer (p)->ip_frag.mtu - headers_len;
+ u16 rem = p->current_length - headers_len;
+ u16 ptr = 0;
+
+ if (max_payload < 8)
+ {
+ *error = IP_FRAG_ERROR_CANT_FRAGMENT_HEADER;
+ return;
+ }
+
+ while (rem)
+ {
+ u32 bi;
+ vlib_buffer_t *b;
+ u16 len = (rem > max_payload) ? (max_payload & ~0x7) : rem;
+ rem -= len;
+
+ if (ptr != 0)
+ {
+ if (!vlib_buffer_alloc (vm, &bi, 1))
+ {
+ *error = IP_FRAG_ERROR_MEMORY;
+ return;
+ }
+ b = vlib_get_buffer (vm, bi);
+ vnet_buffer (b)->sw_if_index[VLIB_RX] =
+ vnet_buffer (p)->sw_if_index[VLIB_RX];
+ vnet_buffer (b)->sw_if_index[VLIB_TX] =
+ vnet_buffer (p)->sw_if_index[VLIB_TX];
+ clib_memcpy (vlib_buffer_get_current (b),
+ vlib_buffer_get_current (p), headers_len);
+ clib_memcpy (vlib_buffer_get_current (b) + headers_len,
+ payload + ptr, len);
+ frag_hdr =
+ vlib_buffer_get_current (b) + headers_len - sizeof (*frag_hdr);
+ }
+ else
+ {
+ bi = pi;
+ b = vlib_get_buffer (vm, bi);
+ //frag_hdr already set here
+ }
+
+ ip6_hdr =
+ vlib_buffer_get_current (b) + vnet_buffer (p)->ip_frag.header_offset;
+ frag_hdr->fragment_offset_and_more =
+ ip6_frag_hdr_offset_and_more (initial_offset + (ptr >> 3),
+ (rem || has_more));
+ b->current_length = headers_len + len;
+ ip6_hdr->payload_length =
+ clib_host_to_net_u16 (b->current_length -
+ vnet_buffer (p)->ip_frag.header_offset -
+ sizeof (*ip6_hdr));
+
+ if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP4_HEADER)
+ {
+ //Encapsulating ipv4 header
+ ip4_header_t *encap_header4 =
+ (ip4_header_t *) vlib_buffer_get_current (b);
+ encap_header4->length = clib_host_to_net_u16 (b->current_length);
+ encap_header4->checksum = ip4_header_checksum (encap_header4);
+ }
+ else if (vnet_buffer (p)->ip_frag.flags & IP_FRAG_FLAG_IP6_HEADER)
+ {
+ //Encapsulating ipv6 header
+ ip6_header_t *encap_header6 =
+ (ip6_header_t *) vlib_buffer_get_current (b);
+ encap_header6->payload_length =
+ clib_host_to_net_u16 (b->current_length -
+ sizeof (*encap_header6));
+ }
+
+ vec_add1 (*buffer, bi);
+
+ ptr += len;
+ }
+}
+
+static uword
+ip6_frag (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
+ vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, ip6_frag_node.index);
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+ u32 frag_sent = 0, small_packets = 0;
+ u32 *buffer = 0;
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 pi0, *frag_from, frag_left;
+ vlib_buffer_t *p0;
+ ip_frag_error_t error0;
+ ip6_frag_next_t next0;
+
+ pi0 = from[0];
+ from += 1;
+ n_left_from -= 1;
+ error0 = IP_FRAG_ERROR_NONE;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ ip6_frag_do_fragment (vm, pi0, &buffer, &error0);
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip_frag_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->header_offset = vnet_buffer (p0)->ip_frag.header_offset;
+ tr->mtu = vnet_buffer (p0)->ip_frag.mtu;
+ tr->ipv6 = 1;
+ tr->n_fragments = vec_len (buffer);
+ tr->next = vnet_buffer (p0)->ip_frag.next_index;
+ }
+
+ next0 =
+ (error0 ==
+ IP_FRAG_ERROR_NONE) ? vnet_buffer (p0)->
+ ip_frag.next_index : IP6_FRAG_NEXT_DROP;
+ frag_sent += vec_len (buffer);
+ small_packets += (vec_len (buffer) == 1);
+
+ //Send fragments that were added in the frame
+ frag_from = buffer;
+ frag_left = vec_len (buffer);
+ while (frag_left > 0)
+ {
+ while (frag_left > 0 && n_left_to_next > 0)
+ {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_get_buffer (vm, i)->error = error_node->errors[error0];
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next, i,
+ next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (buffer);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+ vec_free (buffer);
+ vlib_node_increment_counter (vm, ip6_frag_node.index,
+ IP_FRAG_ERROR_FRAGMENT_SENT, frag_sent);
+ vlib_node_increment_counter (vm, ip6_frag_node.index,
+ IP_FRAG_ERROR_SMALL_PACKET, small_packets);
+
+ return frame->n_vectors;
+}
+
+static char *ip4_frag_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_frag_error
+#undef _
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_frag_node) = {
+ .function = ip4_frag,
+ .name = IP4_FRAG_NODE_NAME,
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_strings = ip4_frag_error_strings,
+
+ .n_next_nodes = IP4_FRAG_N_NEXT,
+ .next_nodes = {
+ [IP4_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP4_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP4_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [IP4_FRAG_NEXT_DROP] = "error-drop"
+ },
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_frag_node) = {
+ .function = ip6_frag,
+ .name = IP6_FRAG_NODE_NAME,
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_strings = ip4_frag_error_strings,
+
+ .n_next_nodes = IP6_FRAG_N_NEXT,
+ .next_nodes = {
+ [IP6_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP6_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP6_FRAG_NEXT_DROP] = "error-drop"
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h
new file mode 100644
index 00000000..348f5a2f
--- /dev/null
+++ b/src/vnet/ip/ip_frag.h
@@ -0,0 +1,96 @@
+/*---------------------------------------------------------------------------
+ * Copyright (c) 2009-2014 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *---------------------------------------------------------------------------
+ */
+/*
+ * IPv4 and IPv6 Fragmentation Nodes
+ *
+ * A packet sent to those nodes require the following
+ * buffer attributes to be set:
+ * ip_frag.header_offset :
+ * Where to find the IPv4 (or IPv6) header in the packet. Previous
+ * bytes are left untouched and copied in every fragment. The fragments
+ * are then appended. This option is used for fragmented packets
+ * that are encapsulated.
+ * ip_frag.mtu :
+ * Maximum size of IP packets, header included, but ignoring
+ * the 'ip_frag.header_offset' copied bytes.
+ * ip_frag.next_index :
+ * One of ip_frag_next_t, indicating to which exit node the fragments
+ * should be sent to.
+ *
+ */
+
+#ifndef IP_FRAG_H
+#define IP_FRAG_H
+
+#include <vnet/vnet.h>
+
+#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header
+#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header
+
+#define IP4_FRAG_NODE_NAME "ip4-frag"
+#define IP6_FRAG_NODE_NAME "ip6-frag"
+
+extern vlib_node_registration_t ip4_frag_node;
+extern vlib_node_registration_t ip6_frag_node;
+
+typedef enum
+{
+ IP4_FRAG_NEXT_IP4_LOOKUP,
+ IP4_FRAG_NEXT_IP6_LOOKUP,
+ IP4_FRAG_NEXT_ICMP_ERROR,
+ IP4_FRAG_NEXT_DROP,
+ IP4_FRAG_N_NEXT
+} ip4_frag_next_t;
+
+typedef enum
+{
+ IP6_FRAG_NEXT_IP4_LOOKUP,
+ IP6_FRAG_NEXT_IP6_LOOKUP,
+ IP6_FRAG_NEXT_DROP,
+ IP6_FRAG_N_NEXT
+} ip6_frag_next_t;
+
+#define foreach_ip_frag_error \
+ /* Must be first. */ \
+ _(NONE, "packet fragmented") \
+ _(SMALL_PACKET, "packet smaller than MTU") \
+ _(FRAGMENT_SENT, "number of sent fragments") \
+ _(CANT_FRAGMENT_HEADER, "can't fragment header") \
+ _(DONT_FRAGMENT_SET, "can't fragment this packet") \
+ _(MALFORMED, "malformed packet") \
+ _(MEMORY, "could not allocate buffer") \
+ _(UNKNOWN, "unknown error")
+
+typedef enum
+{
+#define _(sym,str) IP_FRAG_ERROR_##sym,
+ foreach_ip_frag_error
+#undef _
+ IP_FRAG_N_ERROR,
+} ip_frag_error_t;
+
+void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 offset, u16 mtu,
+ u8 next_index, u8 flags);
+
+#endif /* ifndef IP_FRAG_H */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c
new file mode 100644
index 00000000..f7635b35
--- /dev/null
+++ b/src/vnet/ip/ip_init.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_init.c: ip generic initialization
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+
+ip_main_t ip_main;
+
+clib_error_t *
+ip_main_init (vlib_main_t * vm)
+{
+ ip_main_t *im = &ip_main;
+ clib_error_t *error = 0;
+
+ memset (im, 0, sizeof (im[0]));
+
+ {
+ ip_protocol_info_t *pi;
+ u32 i;
+
+#define ip_protocol(n,s) \
+do { \
+ vec_add2 (im->protocol_infos, pi, 1); \
+ pi->protocol = n; \
+ pi->name = (u8 *) #s; \
+} while (0);
+
+#include "protocols.def"
+
+#undef ip_protocol
+
+ im->protocol_info_by_name = hash_create_string (0, sizeof (uword));
+ for (i = 0; i < vec_len (im->protocol_infos); i++)
+ {
+ pi = im->protocol_infos + i;
+
+ hash_set_mem (im->protocol_info_by_name, pi->name, i);
+ hash_set (im->protocol_info_by_protocol, pi->protocol, i);
+ }
+ }
+
+ {
+ tcp_udp_port_info_t *pi;
+ u32 i;
+ static char *port_names[] = {
+#define ip_port(s,n) #s,
+#include "ports.def"
+#undef ip_port
+ };
+ static u16 ports[] = {
+#define ip_port(s,n) n,
+#include "ports.def"
+#undef ip_port
+ };
+
+ vec_resize (im->port_infos, ARRAY_LEN (port_names));
+ im->port_info_by_name = hash_create_string (0, sizeof (uword));
+
+ for (i = 0; i < vec_len (im->port_infos); i++)
+ {
+ pi = im->port_infos + i;
+ pi->port = clib_host_to_net_u16 (ports[i]);
+ pi->name = (u8 *) port_names[i];
+ hash_set_mem (im->port_info_by_name, pi->name, i);
+ hash_set (im->port_info_by_port, pi->port, i);
+ }
+ }
+
+ if ((error = vlib_call_init_function (vm, vnet_main_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip4_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, icmp4_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, icmp6_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip6_hop_by_hop_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, udp_local_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, udp_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, ip_classify_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, input_acl_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, policer_classify_init)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, flow_classify_init)))
+ return error;
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (ip_main_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_input_acl.c b/src/vnet/ip/ip_input_acl.c
new file mode 100644
index 00000000..b0b52ab1
--- /dev/null
+++ b/src/vnet/ip/ip_input_acl.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/input_acl.h>
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_index;
+ u32 table_index;
+ u32 offset;
+} ip_inacl_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip_inacl_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ ip_inacl_trace_t *t = va_arg (*args, ip_inacl_trace_t *);
+
+ s = format (s, "INACL: sw_if_index %d, next_index %d, table %d, offset %d",
+ t->sw_if_index, t->next_index, t->table_index, t->offset);
+ return s;
+}
+
+vlib_node_registration_t ip4_inacl_node;
+vlib_node_registration_t ip6_inacl_node;
+
+#define foreach_ip_inacl_error \
+_(MISS, "input ACL misses") \
+_(HIT, "input ACL hits") \
+_(CHAIN_HIT, "input ACL hits after chain walk")
+
+typedef enum
+{
+#define _(sym,str) IP_INACL_ERROR_##sym,
+ foreach_ip_inacl_error
+#undef _
+ IP_INACL_N_ERROR,
+} ip_inacl_error_t;
+
+static char *ip_inacl_error_strings[] = {
+#define _(sym,string) string,
+ foreach_ip_inacl_error
+#undef _
+};
+
+static inline uword
+ip_inacl_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame, int is_ip4)
+{
+ u32 n_left_from, *from, *to_next;
+ acl_next_index_t next_index;
+ input_acl_main_t *am = &input_acl_main;
+ vnet_classify_main_t *vcm = am->vnet_classify_main;
+ f64 now = vlib_time_now (vm);
+ u32 hits = 0;
+ u32 misses = 0;
+ u32 chain_hits = 0;
+ input_acl_table_id_t tid;
+ vlib_node_runtime_t *error_node;
+ u32 n_next_nodes;
+
+ n_next_nodes = node->n_next_nodes;
+
+ if (is_ip4)
+ {
+ tid = INPUT_ACL_TABLE_IP4;
+ error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
+ }
+ else
+ {
+ tid = INPUT_ACL_TABLE_IP6;
+ error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
+ }
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ /* First pass: compute hashes */
+
+ while (n_left_from > 2)
+ {
+ vlib_buffer_t *b0, *b1;
+ u32 bi0, bi1;
+ u8 *h0, *h1;
+ u32 sw_if_index0, sw_if_index1;
+ u32 table_index0, table_index1;
+ vnet_classify_table_t *t0, *t1;
+
+ /* prefetch next iteration */
+ {
+ vlib_buffer_t *p1, *p2;
+
+ p1 = vlib_get_buffer (vm, from[1]);
+ p2 = vlib_get_buffer (vm, from[2]);
+
+ vlib_prefetch_buffer_header (p1, STORE);
+ CLIB_PREFETCH (p1->data, CLIB_CACHE_LINE_BYTES, STORE);
+ vlib_prefetch_buffer_header (p2, STORE);
+ CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ table_index1 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index1];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ t1 = pool_elt_at_index (vcm->tables, table_index1);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ vnet_buffer (b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash);
+
+ if (t1->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h1 = (void *) vlib_buffer_get_current (b1) + t1->current_data_offset;
+ else
+ h1 = b1->data;
+
+ vnet_buffer (b1)->l2_classify.hash =
+ vnet_classify_hash_packet (t1, (u8 *) h1);
+
+ vnet_classify_prefetch_bucket (t1, vnet_buffer (b1)->l2_classify.hash);
+
+ vnet_buffer (b0)->l2_classify.table_index = table_index0;
+
+ vnet_buffer (b1)->l2_classify.table_index = table_index1;
+
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0)
+ {
+ vlib_buffer_t *b0;
+ u32 bi0;
+ u8 *h0;
+ u32 sw_if_index0;
+ u32 table_index0;
+ vnet_classify_table_t *t0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ table_index0 =
+ am->classify_table_index_by_sw_if_index[tid][sw_if_index0];
+
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 = (void *) vlib_buffer_get_current (b0) + t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ vnet_buffer (b0)->l2_classify.hash =
+ vnet_classify_hash_packet (t0, (u8 *) h0);
+
+ vnet_buffer (b0)->l2_classify.table_index = table_index0;
+ vnet_classify_prefetch_bucket (t0, vnet_buffer (b0)->l2_classify.hash);
+
+ from++;
+ n_left_from--;
+ }
+
+ next_index = node->cached_next_index;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* Not enough load/store slots to dual loop... */
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0 = ACL_NEXT_INDEX_DENY;
+ u32 table_index0;
+ vnet_classify_table_t *t0;
+ vnet_classify_entry_t *e0;
+ u64 hash0;
+ u8 *h0;
+ u8 error0;
+
+ /* Stride 3 seems to work best */
+ if (PREDICT_TRUE (n_left_from > 3))
+ {
+ vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
+ vnet_classify_table_t *tp1;
+ u32 table_index1;
+ u64 phash1;
+
+ table_index1 = vnet_buffer (p1)->l2_classify.table_index;
+
+ if (PREDICT_TRUE (table_index1 != ~0))
+ {
+ tp1 = pool_elt_at_index (vcm->tables, table_index1);
+ phash1 = vnet_buffer (p1)->l2_classify.hash;
+ vnet_classify_prefetch_entry (tp1, phash1);
+ }
+ }
+
+ /* speculatively enqueue b0 to the current next frame */
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ table_index0 = vnet_buffer (b0)->l2_classify.table_index;
+ e0 = 0;
+ t0 = 0;
+ vnet_get_config_data (am->vnet_config_main[tid],
+ &b0->current_config_index, &next0,
+ /* # bytes of config data */ 0);
+
+ vnet_buffer (b0)->l2_classify.opaque_index = ~0;
+
+ if (PREDICT_TRUE (table_index0 != ~0))
+ {
+ hash0 = vnet_buffer (b0)->l2_classify.hash;
+ t0 = pool_elt_at_index (vcm->tables, table_index0);
+
+ if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 =
+ (void *) vlib_buffer_get_current (b0) +
+ t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ e0 = vnet_classify_find_entry (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+
+ hits++;
+
+ if (is_ip4)
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP4_ERROR_INACL_SESSION_DENY : IP4_ERROR_NONE;
+ else
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP6_ERROR_INACL_SESSION_DENY : IP6_ERROR_NONE;
+ b0->error = error_node->errors[error0];
+
+ if (e0->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX ||
+ e0->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = e0->metadata;
+ }
+ else
+ {
+ while (1)
+ {
+ if (PREDICT_TRUE (t0->next_table_index != ~0))
+ t0 = pool_elt_at_index (vcm->tables,
+ t0->next_table_index);
+ else
+ {
+ next0 = (t0->miss_next_index < n_next_nodes) ?
+ t0->miss_next_index : next0;
+
+ misses++;
+
+ if (is_ip4)
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP4_ERROR_INACL_TABLE_MISS : IP4_ERROR_NONE;
+ else
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP6_ERROR_INACL_TABLE_MISS : IP6_ERROR_NONE;
+ b0->error = error_node->errors[error0];
+ break;
+ }
+
+ if (t0->current_data_flag ==
+ CLASSIFY_FLAG_USE_CURR_DATA)
+ h0 =
+ (void *) vlib_buffer_get_current (b0) +
+ t0->current_data_offset;
+ else
+ h0 = b0->data;
+
+ hash0 = vnet_classify_hash_packet (t0, (u8 *) h0);
+ e0 = vnet_classify_find_entry
+ (t0, (u8 *) h0, hash0, now);
+ if (e0)
+ {
+ vnet_buffer (b0)->l2_classify.opaque_index
+ = e0->opaque_index;
+ vlib_buffer_advance (b0, e0->advance);
+ next0 = (e0->next_index < n_next_nodes) ?
+ e0->next_index : next0;
+ hits++;
+ chain_hits++;
+
+ if (is_ip4)
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP4_ERROR_INACL_SESSION_DENY : IP4_ERROR_NONE;
+ else
+ error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
+ IP6_ERROR_INACL_SESSION_DENY : IP6_ERROR_NONE;
+ b0->error = error_node->errors[error0];
+
+ if (e0->action == CLASSIFY_ACTION_SET_IP4_FIB_INDEX
+ || e0->action ==
+ CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ e0->metadata;
+ break;
+ }
+ }
+ }
+ }
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+ && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ ip_inacl_trace_t *t =
+ vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ t->next_index = next0;
+ t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->offset = (e0 && t0) ? vnet_classify_get_offset (t0, e0) : ~0;
+ }
+
+ /* verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_INACL_ERROR_MISS, misses);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_INACL_ERROR_HIT, hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP_INACL_ERROR_CHAIN_HIT, chain_hits);
+ return frame->n_vectors;
+}
+
+static uword
+ip4_inacl (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip_inacl_inline (vm, node, frame, 1 /* is_ip4 */ );
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_inacl_node) = {
+ .function = ip4_inacl,
+ .name = "ip4-inacl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_inacl_trace,
+ .n_errors = ARRAY_LEN(ip_inacl_error_strings),
+ .error_strings = ip_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip4_inacl_node, ip4_inacl);
+
+static uword
+ip6_inacl (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return ip_inacl_inline (vm, node, frame, 0 /* is_ip4 */ );
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_inacl_node) = {
+ .function = ip6_inacl,
+ .name = "ip6-inacl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_inacl_trace,
+ .n_errors = ARRAY_LEN(ip_inacl_error_strings),
+ .error_strings = ip_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "error-drop",
+ },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_inacl_node, ip6_inacl);
+
+static clib_error_t *
+ip_inacl_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip_inacl_init);
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_packet.h b/src/vnet/ip/ip_packet.h
new file mode 100644
index 00000000..d3f3de77
--- /dev/null
+++ b/src/vnet/ip/ip_packet.h
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_packet.h: packet format common between ip4 & ip6
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_ip_packet_h
+#define included_ip_packet_h
+
+#include <vppinfra/byte_order.h>
+#include <vppinfra/error.h>
+
+typedef enum ip_protocol
+{
+#define ip_protocol(n,s) IP_PROTOCOL_##s = n,
+#include "protocols.def"
+#undef ip_protocol
+} ip_protocol_t;
+
+/* TCP/UDP ports. */
+typedef enum
+{
+#define ip_port(s,n) IP_PORT_##s = n,
+#include "ports.def"
+#undef ip_port
+} ip_port_t;
+
+/* Classifies protocols into UDP, ICMP or other. */
+typedef enum
+{
+ IP_BUILTIN_PROTOCOL_UDP,
+ IP_BUILTIN_PROTOCOL_ICMP,
+ IP_BUILTIN_PROTOCOL_UNKNOWN,
+} ip_builtin_protocol_t;
+
+#define foreach_ip_builtin_multicast_group \
+ _ (1, all_hosts_on_subnet) \
+ _ (2, all_routers_on_subnet) \
+ _ (4, dvmrp) \
+ _ (5, ospf_all_routers) \
+ _ (6, ospf_designated_routers) \
+ _ (13, pim) \
+ _ (18, vrrp) \
+ _ (102, hsrp) \
+ _ (22, igmp_v3)
+
+typedef enum
+{
+#define _(n,f) IP_MULTICAST_GROUP_##f = n,
+ foreach_ip_builtin_multicast_group
+#undef _
+} ip_multicast_group_t;
+
+/* IP checksum support. */
+
+/* Incremental checksum update. */
+typedef uword ip_csum_t;
+
+always_inline ip_csum_t
+ip_csum_with_carry (ip_csum_t sum, ip_csum_t x)
+{
+ ip_csum_t t = sum + x;
+ return t + (t < x);
+}
+
+/* Update checksum changing field at even byte offset from x -> 0. */
+always_inline ip_csum_t
+ip_csum_add_even (ip_csum_t c, ip_csum_t x)
+{
+ ip_csum_t d;
+
+ d = c - x;
+
+ /* Fold in carry from high bit. */
+ d -= d > c;
+
+ ASSERT (ip_csum_with_carry (d, x) == c);
+
+ return d;
+}
+
+/* Update checksum changing field at even byte offset from 0 -> x. */
+always_inline ip_csum_t
+ip_csum_sub_even (ip_csum_t c, ip_csum_t x)
+{
+ return ip_csum_with_carry (c, x);
+}
+
+always_inline ip_csum_t
+ip_csum_update_inline (ip_csum_t sum, ip_csum_t old, ip_csum_t new,
+ u32 field_byte_offset, u32 field_n_bytes)
+{
+ /* For even 1-byte fields on big-endian and odd 1-byte fields on little endian
+ we need to shift byte into place for checksum. */
+ if ((field_n_bytes % 2)
+ && (field_byte_offset % 2) == CLIB_ARCH_IS_LITTLE_ENDIAN)
+ {
+ old = old << 8;
+ new = new << 8;
+ }
+ sum = ip_csum_sub_even (sum, old);
+ sum = ip_csum_add_even (sum, new);
+ return sum;
+}
+
+#define ip_csum_update(sum,old,new,type,field) \
+ ip_csum_update_inline ((sum), (old), (new), \
+ STRUCT_OFFSET_OF (type, field), \
+ STRUCT_SIZE_OF (type, field))
+
+always_inline u16
+ip_csum_fold (ip_csum_t c)
+{
+ /* Reduce to 16 bits. */
+#if uword_bits == 64
+ c = (c & (ip_csum_t) 0xffffffff) + (c >> (ip_csum_t) 32);
+ c = (c & 0xffff) + (c >> 16);
+#endif
+
+ c = (c & 0xffff) + (c >> 16);
+ c = (c & 0xffff) + (c >> 16);
+
+ return c;
+}
+
+/* Copy data and checksum at the same time. */
+ip_csum_t ip_csum_and_memcpy (ip_csum_t sum, void *dst, void *src,
+ uword n_bytes);
+
+always_inline u16
+ip_csum_and_memcpy_fold (ip_csum_t sum, void *dst)
+{
+ return ip_csum_fold (sum);
+}
+
+/* Checksum routine. */
+ip_csum_t ip_incremental_checksum (ip_csum_t sum, void *data, uword n_bytes);
+
+#endif /* included_ip_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_source_and_port_range_check.h b/src/vnet/ip/ip_source_and_port_range_check.h
new file mode 100644
index 00000000..fefe5ff1
--- /dev/null
+++ b/src/vnet/ip/ip_source_and_port_range_check.h
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_ip_ip_source_and_port_range_check_h
+#define included_ip_ip_source_and_port_range_check_h
+
+
+typedef struct
+{
+ /* convenience */
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+} source_range_check_main_t;
+
+source_range_check_main_t source_range_check_main;
+
+typedef enum
+{
+ IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_OUT,
+ IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_OUT,
+ IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_TCP_IN,
+ IP_SOURCE_AND_PORT_RANGE_CHECK_PROTOCOL_UDP_IN,
+ IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS,
+} ip_source_and_port_range_check_protocol_t;
+
+typedef struct
+{
+ u32 fib_index[IP_SOURCE_AND_PORT_RANGE_CHECK_N_PROTOCOLS];
+} ip_source_and_port_range_check_config_t;
+
+#define IP_SOURCE_AND_PORT_RANGE_CHECK_RANGE_LIMIT VLIB_BUFFER_PRE_DATA_SIZE/(2*sizeof(u16x8));
+
+typedef struct
+{
+ union
+ {
+ u16x8 as_u16x8;
+ u16 as_u16[8];
+ };
+} u16x8vec_t;
+
+typedef struct
+{
+ u16x8vec_t low;
+ u16x8vec_t hi;
+} protocol_port_range_t;
+
+/**
+ * @brief The number of supported ranges per-data path object.
+ * If more ranges are required, bump this number.
+ */
+#define N_PORT_RANGES_PER_DPO 64
+#define N_RANGES_PER_BLOCK (sizeof(u16x8vec_t)/2)
+#define N_BLOCKS_PER_DPO (N_PORT_RANGES_PER_DPO/N_RANGES_PER_BLOCK)
+
+/**
+ * @brief
+ * The object that is in the data-path to perform the check.
+ *
+ * Some trade-offs here; memory vs performance.
+ *
+ * performance:
+ * the principle factor is d-cache line misses/hits.
+ * so we want the data layout to minimise the d-cache misses. This
+ * means not following dependent reads. i.e. not doing
+ *
+ * struct B {
+ * u16 n_ranges;
+ * range_t *ragnes; // vector of ranges.
+ * }
+ *
+ * so to read ranges[0] we would first d-cache miss on the address
+ * of the object of type B, for which we would need to wait before we
+ * can get the address of B->ranges.
+ * So this layout is better:
+ *
+ * struct B {
+ * u16 n_ranges;
+ * range_t ragnes[N];
+ * }
+ *
+ * memory:
+ * the latter layout above is more memory hungry. And N needs to be:
+ * 1 - sized for the maximum required
+ * 2 - fixed, so that objects of type B can be pool allocated and so
+ * 'get'-able using an index.
+ * An option over fixed might be to allocate contiguous chunk from
+ * the pool (like we used to do for multi-path adjs).
+ */
+typedef struct protocol_port_range_dpo_t_
+{
+ /**
+ * The number of blocks from the 'block' array below
+ * that have rnages configured. We keep this count so that in the data-path
+ * we can limit the loop to be only over the blocks we need
+ */
+ u16 n_used_blocks;
+
+ /**
+ * The total number of free ranges from all blocks.
+ * Used to prevent overrun of the ranges available.
+ */
+ u16 n_free_ranges;
+
+ /**
+ * the fixed size array of ranges
+ */
+ protocol_port_range_t blocks[N_BLOCKS_PER_DPO];
+} protocol_port_range_dpo_t;
+
+int ip4_source_and_port_range_check_add_del (ip4_address_t * address,
+ u32 length,
+ u32 vrf_id,
+ u16 * low_ports,
+ u16 * hi_ports, int is_add);
+
+// This will be moved to another file in another patch -- for API freeze
+int ip6_source_and_port_range_check_add_del (ip6_address_t * address,
+ u32 length,
+ u32 vrf_id,
+ u16 * low_ports,
+ u16 * hi_ports, int is_add);
+
+int set_ip_source_and_port_range_check (vlib_main_t * vm,
+ u32 * fib_index,
+ u32 sw_if_index, u32 is_add);
+
+#endif /* included ip_source_and_port_range_check_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
new file mode 100644
index 00000000..856c4942
--- /dev/null
+++ b/src/vnet/ip/lookup.c
@@ -0,0 +1,1442 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_lookup.c: ip4/6 adjacency and lookup table managment
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vnet/ip/ip.h>
+#include <vnet/adj/adj.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/mpls/mpls.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/classify_dpo.h>
+#include <vnet/dpo/punt_dpo.h>
+#include <vnet/dpo/receive_dpo.h>
+#include <vnet/dpo/ip_null_dpo.h>
+#include <vnet/ip/ip6_neighbor.h>
+
+/**
+ * @file
+ * @brief IPv4 and IPv6 adjacency and lookup table managment.
+ *
+ */
+
+clib_error_t *
+ip_interface_address_add_del (ip_lookup_main_t * lm,
+ u32 sw_if_index,
+ void *addr_fib,
+ u32 address_length,
+ u32 is_del, u32 * result_if_address_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ ip_interface_address_t *a, *prev, *next;
+ uword *p = mhash_get (&lm->address_to_if_address_index, addr_fib);
+
+ vec_validate_init_empty (lm->if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ a = p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
+
+ /* Verify given length. */
+ if ((a && (address_length != a->address_length)) ||
+ (address_length == 0) ||
+ (lm->is_ip6 && address_length > 128) ||
+ (!lm->is_ip6 && address_length > 32))
+ {
+ vnm->api_errno = VNET_API_ERROR_ADDRESS_LENGTH_MISMATCH;
+ return clib_error_create
+ ("%U wrong length (expected %d) for interface %U",
+ lm->format_address_and_length, addr_fib,
+ address_length, a ? a->address_length : -1,
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ }
+
+ if (is_del)
+ {
+ if (!a)
+ {
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ vnm->api_errno = VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
+ return clib_error_create ("%U not found for interface %U",
+ lm->format_address_and_length,
+ addr_fib, address_length,
+ format_vnet_sw_interface_name, vnm, si);
+ }
+
+ if (a->prev_this_sw_interface != ~0)
+ {
+ prev =
+ pool_elt_at_index (lm->if_address_pool,
+ a->prev_this_sw_interface);
+ prev->next_this_sw_interface = a->next_this_sw_interface;
+ }
+ if (a->next_this_sw_interface != ~0)
+ {
+ next =
+ pool_elt_at_index (lm->if_address_pool,
+ a->next_this_sw_interface);
+ next->prev_this_sw_interface = a->prev_this_sw_interface;
+
+ if (a->prev_this_sw_interface == ~0)
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index] =
+ a->next_this_sw_interface;
+ }
+
+ if ((a->next_this_sw_interface == ~0)
+ && (a->prev_this_sw_interface == ~0))
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index] = ~0;
+
+ mhash_unset (&lm->address_to_if_address_index, addr_fib,
+ /* old_value */ 0);
+ pool_put (lm->if_address_pool, a);
+
+ if (result_if_address_index)
+ *result_if_address_index = ~0;
+ }
+
+ else if (!a)
+ {
+ u32 pi; /* previous index */
+ u32 ai;
+ u32 hi; /* head index */
+
+ pool_get (lm->if_address_pool, a);
+ memset (a, ~0, sizeof (a[0]));
+ ai = a - lm->if_address_pool;
+
+ hi = pi = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ prev = 0;
+ while (pi != (u32) ~ 0)
+ {
+ prev = pool_elt_at_index (lm->if_address_pool, pi);
+ pi = prev->next_this_sw_interface;
+ }
+ pi = prev ? prev - lm->if_address_pool : (u32) ~ 0;
+
+ a->address_key = mhash_set (&lm->address_to_if_address_index,
+ addr_fib, ai, /* old_value */ 0);
+ a->address_length = address_length;
+ a->sw_if_index = sw_if_index;
+ a->flags = 0;
+ a->prev_this_sw_interface = pi;
+ a->next_this_sw_interface = ~0;
+ if (prev)
+ prev->next_this_sw_interface = ai;
+
+ lm->if_address_pool_index_by_sw_if_index[sw_if_index] =
+ (hi != ~0) ? hi : ai;
+ if (result_if_address_index)
+ *result_if_address_index = ai;
+ }
+ else
+ {
+ if (sw_if_index != a->sw_if_index)
+ {
+ if (result_if_address_index)
+ *result_if_address_index = ~0;
+ vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
+ return clib_error_create
+ ("Prefix %U already found on interface %U",
+ lm->format_address_and_length, addr_fib, address_length,
+ format_vnet_sw_if_index_name, vnm, a->sw_if_index);
+ }
+
+ if (result_if_address_index)
+ *result_if_address_index = a - lm->if_address_pool;
+ }
+
+ return /* no error */ 0;
+}
+
+static clib_error_t *
+ip_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
+{
+ vec_validate_init_empty (ip4_main.
+ lookup_main.if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+ vec_validate_init_empty (ip6_main.
+ lookup_main.if_address_pool_index_by_sw_if_index,
+ sw_if_index, ~0);
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_sw_interface_add_del);
+
+void
+ip_lookup_init (ip_lookup_main_t * lm, u32 is_ip6)
+{
+ if (!lm->fib_result_n_bytes)
+ lm->fib_result_n_bytes = sizeof (uword);
+
+ lm->is_ip6 = is_ip6;
+ if (is_ip6)
+ {
+ lm->format_address_and_length = format_ip6_address_and_length;
+ mhash_init (&lm->address_to_if_address_index, sizeof (uword),
+ sizeof (ip6_address_fib_t));
+ }
+ else
+ {
+ lm->format_address_and_length = format_ip4_address_and_length;
+ mhash_init (&lm->address_to_if_address_index, sizeof (uword),
+ sizeof (ip4_address_fib_t));
+ }
+
+ {
+ int i;
+
+ /* Setup all IP protocols to be punted and builtin-unknown. */
+ for (i = 0; i < 256; i++)
+ {
+ lm->local_next_by_ip_protocol[i] = IP_LOCAL_NEXT_PUNT;
+ lm->builtin_protocol_by_ip_protocol[i] = IP_BUILTIN_PROTOCOL_UNKNOWN;
+ }
+
+ lm->local_next_by_ip_protocol[IP_PROTOCOL_UDP] = IP_LOCAL_NEXT_UDP_LOOKUP;
+ lm->local_next_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 :
+ IP_PROTOCOL_ICMP] = IP_LOCAL_NEXT_ICMP;
+ lm->builtin_protocol_by_ip_protocol[IP_PROTOCOL_UDP] =
+ IP_BUILTIN_PROTOCOL_UDP;
+ lm->builtin_protocol_by_ip_protocol[is_ip6 ? IP_PROTOCOL_ICMP6 :
+ IP_PROTOCOL_ICMP] =
+ IP_BUILTIN_PROTOCOL_ICMP;
+ }
+}
+
+u8 *
+format_ip_flow_hash_config (u8 * s, va_list * args)
+{
+ flow_hash_config_t flow_hash_config = va_arg (*args, u32);
+
+#define _(n,v) if (flow_hash_config & v) s = format (s, "%s ", #n);
+ foreach_flow_hash_bit;
+#undef _
+
+ return s;
+}
+
+u8 *
+format_ip_lookup_next (u8 * s, va_list * args)
+{
+ /* int promotion of ip_lookup_next_t */
+ ip_lookup_next_t n = va_arg (*args, int);
+ char *t = 0;
+
+ switch (n)
+ {
+ default:
+ s = format (s, "unknown %d", n);
+ return s;
+
+ case IP_LOOKUP_NEXT_DROP:
+ t = "drop";
+ break;
+ case IP_LOOKUP_NEXT_PUNT:
+ t = "punt";
+ break;
+ case IP_LOOKUP_NEXT_ARP:
+ t = "arp";
+ break;
+ case IP_LOOKUP_NEXT_MIDCHAIN:
+ t = "midchain";
+ break;
+ case IP_LOOKUP_NEXT_GLEAN:
+ t = "glean";
+ break;
+ case IP_LOOKUP_NEXT_MCAST:
+ t = "mcast";
+ break;
+ case IP_LOOKUP_NEXT_REWRITE:
+ break;
+ }
+
+ if (t)
+ vec_add (s, t, strlen (t));
+
+ return s;
+}
+
+u8 *
+format_ip_adjacency_packet_data (u8 * s, va_list * args)
+{
+ u32 adj_index = va_arg (*args, u32);
+ u8 *packet_data = va_arg (*args, u8 *);
+ u32 n_packet_data_bytes = va_arg (*args, u32);
+ ip_adjacency_t *adj = adj_get (adj_index);
+
+ switch (adj->lookup_next_index)
+ {
+ case IP_LOOKUP_NEXT_REWRITE:
+ case IP_LOOKUP_NEXT_MCAST:
+ s =
+ format (s, "%U", format_hex_bytes, packet_data, n_packet_data_bytes);
+ break;
+
+ default:
+ break;
+ }
+
+ return s;
+}
+
+static uword
+unformat_dpo (unformat_input_t * input, va_list * args)
+{
+ dpo_id_t *dpo = va_arg (*args, dpo_id_t *);
+ fib_protocol_t fp = va_arg (*args, int);
+ dpo_proto_t proto;
+
+ proto = fib_proto_to_dpo (fp);
+
+ if (unformat (input, "drop"))
+ dpo_copy (dpo, drop_dpo_get (proto));
+ else if (unformat (input, "punt"))
+ dpo_copy (dpo, punt_dpo_get (proto));
+ else if (unformat (input, "local"))
+ receive_dpo_add_or_lock (proto, ~0, NULL, dpo);
+ else if (unformat (input, "null-send-unreach"))
+ ip_null_dpo_add_and_lock (proto, IP_NULL_ACTION_SEND_ICMP_UNREACH, dpo);
+ else if (unformat (input, "null-send-prohibit"))
+ ip_null_dpo_add_and_lock (proto, IP_NULL_ACTION_SEND_ICMP_PROHIBIT, dpo);
+ else if (unformat (input, "null"))
+ ip_null_dpo_add_and_lock (proto, IP_NULL_ACTION_NONE, dpo);
+ else if (unformat (input, "classify"))
+ {
+ u32 classify_table_index;
+
+ if (!unformat (input, "%d", &classify_table_index))
+ {
+ clib_warning ("classify adj must specify table index");
+ return 0;
+ }
+
+ dpo_set (dpo, DPO_CLASSIFY, proto,
+ classify_dpo_create (proto, classify_table_index));
+ }
+ else
+ return 0;
+
+ return 1;
+}
+
+const ip46_address_t zero_addr = {
+ .as_u64 = {
+ 0, 0},
+};
+
+clib_error_t *
+vnet_ip_route_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ fib_route_path_t *rpaths = NULL, rpath;
+ dpo_id_t dpo = DPO_INVALID, *dpos = NULL;
+ fib_prefix_t *prefixs = NULL, pfx;
+ mpls_label_t out_label, via_label;
+ clib_error_t *error = NULL;
+ u32 weight, preference;
+ u32 table_id, is_del;
+ vnet_main_t *vnm;
+ u32 fib_index;
+ f64 count;
+ int i;
+
+ vnm = vnet_get_main ();
+ is_del = 0;
+ table_id = 0;
+ count = 1;
+ memset (&pfx, 0, sizeof (pfx));
+ out_label = via_label = MPLS_LABEL_INVALID;
+
+ /* Get a line of input. */
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ memset (&rpath, 0, sizeof (rpath));
+
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "resolve-via-host"))
+ {
+ if (vec_len (rpaths) == 0)
+ {
+ error = clib_error_return (0, "Paths then flags");
+ goto done;
+ }
+ rpaths[vec_len (rpaths) - 1].frp_flags |=
+ FIB_ROUTE_PATH_RESOLVE_VIA_HOST;
+ }
+ else if (unformat (line_input, "resolve-via-attached"))
+ {
+ if (vec_len (rpaths) == 0)
+ {
+ error = clib_error_return (0, "Paths then flags");
+ goto done;
+ }
+ rpaths[vec_len (rpaths) - 1].frp_flags |=
+ FIB_ROUTE_PATH_RESOLVE_VIA_ATTACHED;
+ }
+ else if (unformat (line_input, "out-labels"))
+ {
+ if (vec_len (rpaths) == 0)
+ {
+ error = clib_error_return (0, "Paths then labels");
+ goto done;
+ }
+ else
+ {
+ while (unformat (line_input, "%U",
+ unformat_mpls_unicast_label, &out_label))
+ {
+ vec_add1 (rpaths[vec_len (rpaths) - 1].frp_label_stack,
+ out_label);
+ }
+ }
+ }
+ else if (unformat (line_input, "via-label %U",
+ unformat_mpls_unicast_label, &rpath.frp_local_label))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_eos = MPLS_NON_EOS;
+ rpath.frp_proto = DPO_PROTO_MPLS;
+ rpath.frp_sw_if_index = ~0;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "count %f", &count))
+ ;
+
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip4_address, &pfx.fp_addr.ip4, &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ vec_add1 (prefixs, pfx);
+ }
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip6_address, &pfx.fp_addr.ip6, &pfx.fp_len))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ vec_add1 (prefixs, pfx);
+ }
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1 (rpaths, rpath);
+ }
+
+ else if (unformat (line_input, "via %U %U",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6,
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "weight %u", &weight))
+ {
+ ASSERT (vec_len (rpaths));
+ rpaths[vec_len (rpaths) - 1].frp_weight = weight;
+ }
+ else if (unformat (line_input, "preference %u", &preference))
+ {
+ ASSERT (vec_len (rpaths));
+ rpaths[vec_len (rpaths) - 1].frp_preference = preference;
+ }
+ else if (unformat (line_input, "via %U next-hop-table %d",
+ unformat_ip4_address,
+ &rpath.frp_addr.ip4, &rpath.frp_fib_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U next-hop-table %d",
+ unformat_ip6_address,
+ &rpath.frp_addr.ip6, &rpath.frp_fib_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip4_address, &rpath.frp_addr.ip4))
+ {
+ /*
+ * the recursive next-hops are by default in the same table
+ * as the prefix
+ */
+ rpath.frp_fib_index = table_id;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP4;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_ip6_address, &rpath.frp_addr.ip6))
+ {
+ rpath.frp_fib_index = table_id;
+ rpath.frp_weight = 1;
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_proto = DPO_PROTO_IP6;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (unformat (line_input,
+ "lookup in table %d", &rpath.frp_fib_index))
+ {
+ rpath.frp_proto = fib_proto_to_dpo (pfx.fp_proto);
+ rpath.frp_sw_if_index = ~0;
+ vec_add1 (rpaths, rpath);
+ }
+ else if (vec_len (prefixs) > 0 &&
+ unformat (line_input, "via %U",
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ rpath.frp_proto = fib_proto_to_dpo (prefixs[0].fp_proto);
+ vec_add1 (rpaths, rpath);
+ }
+ else if (vec_len (prefixs) > 0 &&
+ unformat (line_input, "via %U",
+ unformat_dpo, &dpo, prefixs[0].fp_proto))
+ {
+ vec_add1 (dpos, dpo);
+ }
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "add"))
+ is_del = 0;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (vec_len (prefixs) == 0)
+ {
+ error =
+ clib_error_return (0, "expected ip4/ip6 destination address/length.");
+ goto done;
+ }
+
+ if (!is_del && vec_len (rpaths) + vec_len (dpos) == 0)
+ {
+ error = clib_error_return (0, "expected paths.");
+ goto done;
+ }
+
+ if (~0 == table_id)
+ {
+ /*
+ * if no table_id is passed we will manipulate the default
+ */
+ fib_index = 0;
+ }
+ else
+ {
+ fib_index = fib_table_find (prefixs[0].fp_proto, table_id);
+
+ if (~0 == fib_index)
+ {
+ error = clib_error_return (0, "Nonexistent table id %d", table_id);
+ goto done;
+ }
+ }
+
+ for (i = 0; i < vec_len (prefixs); i++)
+ {
+ if (is_del && 0 == vec_len (rpaths))
+ {
+ fib_table_entry_delete (fib_index, &prefixs[i], FIB_SOURCE_CLI);
+ }
+ else if (!is_del && 1 == vec_len (dpos))
+ {
+ fib_table_entry_special_dpo_add (fib_index,
+ &prefixs[i],
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_EXCLUSIVE,
+ &dpos[0]);
+ dpo_reset (&dpos[0]);
+ }
+ else if (vec_len (dpos) > 0)
+ {
+ error =
+ clib_error_return (0,
+ "Load-balancing over multiple special adjacencies is unsupported");
+ goto done;
+ }
+ else if (0 < vec_len (rpaths))
+ {
+ u32 k, j, n, incr;
+ ip46_address_t dst = prefixs[i].fp_addr;
+ f64 t[2];
+ n = count;
+ t[0] = vlib_time_now (vm);
+ incr = 1 << ((FIB_PROTOCOL_IP4 == prefixs[0].fp_proto ? 32 : 128) -
+ prefixs[i].fp_len);
+
+ for (k = 0; k < n; k++)
+ {
+ for (j = 0; j < vec_len (rpaths); j++)
+ {
+ u32 fi;
+ /*
+ * the CLI parsing stored table Ids, swap to FIB indicies
+ */
+ fi = fib_table_find (prefixs[i].fp_proto,
+ rpaths[i].frp_fib_index);
+
+ if (~0 == fi)
+ {
+ error =
+ clib_error_return (0, "Via table %d does not exist",
+ rpaths[i].frp_fib_index);
+ goto done;
+ }
+ rpaths[i].frp_fib_index = fi;
+
+ fib_prefix_t rpfx = {
+ .fp_len = prefixs[i].fp_len,
+ .fp_proto = prefixs[i].fp_proto,
+ .fp_addr = dst,
+ };
+
+ if (is_del)
+ fib_table_entry_path_remove2 (fib_index,
+ &rpfx,
+ FIB_SOURCE_CLI, &rpaths[j]);
+ else
+ fib_table_entry_path_add2 (fib_index,
+ &rpfx,
+ FIB_SOURCE_CLI,
+ FIB_ENTRY_FLAG_NONE,
+ &rpaths[j]);
+ }
+
+ if (FIB_PROTOCOL_IP4 == prefixs[0].fp_proto)
+ {
+ dst.ip4.as_u32 =
+ clib_host_to_net_u32 (incr +
+ clib_net_to_host_u32 (dst.
+ ip4.as_u32));
+ }
+ else
+ {
+ int bucket = (incr < 64 ? 0 : 1);
+ dst.ip6.as_u64[bucket] =
+ clib_host_to_net_u64 (incr +
+ clib_net_to_host_u64 (dst.ip6.as_u64
+ [bucket]));
+
+ }
+ }
+ t[1] = vlib_time_now (vm);
+ if (count > 1)
+ vlib_cli_output (vm, "%.6e routes/sec", count / (t[1] - t[0]));
+ }
+ else
+ {
+ error = clib_error_return (0, "Don't understand what you want...");
+ goto done;
+ }
+ }
+
+
+done:
+ vec_free (dpos);
+ vec_free (prefixs);
+ vec_free (rpaths);
+ unformat_free (line_input);
+ return error;
+}
+
+clib_error_t *
+vnet_ip_table_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input,
+ vlib_cli_command_t * cmd, fib_protocol_t fproto)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ u32 table_id, is_add;
+ u8 *name = NULL;
+
+ is_add = 1;
+ table_id = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "name %s", &name))
+ ;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == table_id)
+ {
+ error = clib_error_return (0, "No table id");
+ goto done;
+ }
+ else if (0 == table_id)
+ {
+ error = clib_error_return (0, "Can't change the default table");
+ goto done;
+ }
+ else
+ {
+ if (is_add)
+ {
+ ip_table_create (fproto, table_id, 0, name);
+ }
+ else
+ {
+ ip_table_delete (fproto, table_id, 0);
+ }
+ }
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+clib_error_t *
+vnet_ip4_table_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+ return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP4));
+}
+
+clib_error_t *
+vnet_ip6_table_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+ return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6));
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = {
+ .path = "ip",
+ .short_help = "Internet protocol (IP) commands",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_ip6_command, static) = {
+ .path = "ip6",
+ .short_help = "Internet protocol version 6 (IPv6) commands",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = {
+ .path = "show ip",
+ .short_help = "Internet protocol (IP) show commands",
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
+ .path = "show ip6",
+ .short_help = "Internet protocol version 6 (IPv6) show commands",
+};
+/* *INDENT-ON* */
+
+/*?
+ * This command is used to add or delete IPv4 or IPv6 routes. All
+ * IP Addresses ('<em><dst-ip-addr>/<width></em>',
+ * '<em><next-hop-ip-addr></em>' and '<em><adj-hop-ip-addr></em>')
+ * can be IPv4 or IPv6, but all must be of the same form in a single
+ * command. To display the current set of routes, use the commands
+ * '<em>show ip fib</em>' and '<em>show ip6 fib</em>'.
+ *
+ * @cliexpar
+ * Example of how to add a straight forward static route:
+ * @cliexcmd{ip route add 6.0.1.2/32 via 6.0.0.1 GigabitEthernet2/0/0}
+ * Example of how to delete a straight forward static route:
+ * @cliexcmd{ip route del 6.0.1.2/32 via 6.0.0.1 GigabitEthernet2/0/0}
+ * Mainly for route add/del performance testing, one can add or delete
+ * multiple routes by adding 'count N' to the previous item:
+ * @cliexcmd{ip route add count 10 7.0.0.0/24 via 6.0.0.1 GigabitEthernet2/0/0}
+ * Add multiple routes for the same destination to create equal-cost multipath:
+ * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.1 GigabitEthernet2/0/0}
+ * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.2 GigabitEthernet2/0/0}
+ * For unequal-cost multipath, specify the desired weights. This
+ * combination of weights results in 3/4 of the traffic following the
+ * second path, 1/4 following the first path:
+ * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.1 GigabitEthernet2/0/0 weight 1}
+ * @cliexcmd{ip route add 7.0.0.1/32 via 6.0.0.2 GigabitEthernet2/0/0 weight 3}
+ * To add a route to a particular FIB table (VRF), use:
+ * @cliexcmd{ip route add 172.16.24.0/24 table 7 via GigabitEthernet2/0/0}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_route_command, static) = {
+ .path = "ip route",
+ .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table <table-id>] [via <next-hop-ip-addr> [<interface>] [weight <weight>]] | [via arp <interface> <adj-hop-ip-addr>] | [via drop|punt|local<id>|arp|classify <classify-idx>] [lookup in table <out-table-id>]",
+ .function = vnet_ip_route_cmd,
+ .is_mp_safe = 1,
+};
+
+/* *INDENT-ON* */
+/*?
+ * This command is used to add or delete IPv4 Tables. All
+ * Tables must be explicitly added before that can be used. Creating a
+ * table will add both unicast and multicast FIBs
+ *
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip4_table_command, static) = {
+ .path = "ip table",
+ .short_help = "ip table [add|del] <table-id>",
+ .function = vnet_ip4_table_cmd,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-ON* */
+/*?
+ * This command is used to add or delete IPv4 Tables. All
+ * Tables must be explicitly added before that can be used. Creating a
+ * table will add both unicast and multicast FIBs
+ *
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip6_table_command, static) = {
+ .path = "ip6 table",
+ .short_help = "ip6 table [add|del] <table-id>",
+ .function = vnet_ip6_table_cmd,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+ip_table_bind_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd,
+ fib_protocol_t fproto)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ u32 sw_if_index, table_id;
+ int rv;
+
+ sw_if_index = ~0;
+
+ if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error = clib_error_return (0, "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ if (unformat (input, "%d", &table_id))
+ ;
+ else
+ {
+ error = clib_error_return (0, "expected table id `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ rv = ip_table_bind (fproto, sw_if_index, table_id, 0);
+
+ if (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE == rv)
+ {
+ error = clib_error_return (0, "IP addresses are still present on %U",
+ format_vnet_sw_if_index_name,
+ vnet_get_main(),
+ sw_if_index);
+ }
+ else if (VNET_API_ERROR_NO_SUCH_FIB == rv)
+ {
+ error = clib_error_return (0, "no such table %d", table_id);
+ }
+ else if (0 != rv)
+ {
+ error = clib_error_return (0, "unknown error");
+ }
+
+ done:
+ return error;
+}
+
+static clib_error_t *
+ip4_table_bind_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP4));
+}
+
+static clib_error_t *
+ip6_table_bind_cmd (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ return (ip_table_bind_cmd (vm , input, cmd, FIB_PROTOCOL_IP6));
+}
+
+/*?
+ * Place the indicated interface into the supplied IPv4 FIB table (also known
+ * as a VRF). If the FIB table does not exist, this command creates it. To
+ * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
+ * FIB table will only be displayed if a route has been added to the table, or
+ * an IP Address is assigned to an interface in the table (which adds a route
+ * automatically).
+ *
+ * @note IP addresses added after setting the interface IP table are added to
+ * the indicated FIB table. If an IP address is added prior to changing the
+ * table then this is an error. The control plane must remove these addresses
+ * first and then change the table. VPP will not automatically move the
+ * addresses from the old to the new table as it does not know the validity
+ * of such a change.
+ *
+ * @cliexpar
+ * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
+ * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
+{
+ .path = "set interface ip table",
+ .function = ip4_table_bind_cmd,
+ .short_help = "set interface ip table <interface> <table-id>",
+};
+/* *INDENT-ON* */
+
+/*?
+ * Place the indicated interface into the supplied IPv6 FIB table (also known
+ * as a VRF). If the FIB table does not exist, this command creates it. To
+ * display the current IPv6 FIB table, use the command '<em>show ip6 fib</em>'.
+ * FIB table will only be displayed if a route has been added to the table, or
+ * an IP Address is assigned to an interface in the table (which adds a route
+ * automatically).
+ *
+ * @note IP addresses added after setting the interface IP table are added to
+ * the indicated FIB table. If an IP address is added prior to changing the
+ * table then this is an error. The control plane must remove these addresses
+ * first and then change the table. VPP will not automatically move the
+ * addresses from the old to the new table as it does not know the validity
+ * of such a change.
+ *
+ * @cliexpar
+ * Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
+ * @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2}
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) =
+{
+ .path = "set interface ip6 table",
+ .function = ip6_table_bind_cmd,
+ .short_help = "set interface ip6 table <interface> <table-id>"
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+vnet_ip_mroute_cmd (vlib_main_t * vm,
+ unformat_input_t * main_input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ fib_route_path_t rpath;
+ u32 table_id, is_del;
+ vnet_main_t *vnm;
+ mfib_prefix_t pfx;
+ u32 fib_index;
+ mfib_itf_flags_t iflags = 0;
+ mfib_entry_flags_t eflags = 0;
+ u32 gcount, scount, ss, gg, incr;
+ f64 timet[2];
+
+ gcount = scount = 1;
+ vnm = vnet_get_main ();
+ is_del = 0;
+ table_id = 0;
+ memset (&pfx, 0, sizeof (pfx));
+ memset (&rpath, 0, sizeof (rpath));
+ rpath.frp_sw_if_index = ~0;
+
+ /* Get a line of input. */
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "table %d", &table_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "add"))
+ is_del = 0;
+ else if (unformat (line_input, "scount %d", &scount))
+ ;
+ else if (unformat (line_input, "gcount %d", &gcount))
+ ;
+ else if (unformat (line_input, "%U %U",
+ unformat_ip4_address,
+ &pfx.fp_src_addr.ip4,
+ unformat_ip4_address, &pfx.fp_grp_addr.ip4))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = 64;
+ }
+ else if (unformat (line_input, "%U %U",
+ unformat_ip6_address,
+ &pfx.fp_src_addr.ip6,
+ unformat_ip6_address, &pfx.fp_grp_addr.ip6))
+ {
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = 256;
+ }
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip4_address,
+ &pfx.fp_grp_addr.ip4, &pfx.fp_len))
+ {
+ memset (&pfx.fp_src_addr.ip4, 0, sizeof (pfx.fp_src_addr.ip4));
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ }
+ else if (unformat (line_input, "%U/%d",
+ unformat_ip6_address,
+ &pfx.fp_grp_addr.ip6, &pfx.fp_len))
+ {
+ memset (&pfx.fp_src_addr.ip6, 0, sizeof (pfx.fp_src_addr.ip6));
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ }
+ else if (unformat (line_input, "%U",
+ unformat_ip4_address, &pfx.fp_grp_addr.ip4))
+ {
+ memset (&pfx.fp_src_addr.ip4, 0, sizeof (pfx.fp_src_addr.ip4));
+ pfx.fp_proto = FIB_PROTOCOL_IP4;
+ pfx.fp_len = 32;
+ }
+ else if (unformat (line_input, "%U",
+ unformat_ip6_address, &pfx.fp_grp_addr.ip6))
+ {
+ memset (&pfx.fp_src_addr.ip6, 0, sizeof (pfx.fp_src_addr.ip6));
+ pfx.fp_proto = FIB_PROTOCOL_IP6;
+ pfx.fp_len = 128;
+ }
+ else if (unformat (line_input, "via %U",
+ unformat_vnet_sw_interface, vnm,
+ &rpath.frp_sw_if_index))
+ {
+ rpath.frp_weight = 1;
+ }
+ else if (unformat (line_input, "via local"))
+ {
+ rpath.frp_sw_if_index = ~0;
+ rpath.frp_weight = 1;
+ rpath.frp_flags |= FIB_ROUTE_PATH_LOCAL;
+ }
+ else if (unformat (line_input, "%U", unformat_mfib_itf_flags, &iflags))
+ ;
+ else if (unformat (line_input, "%U",
+ unformat_mfib_entry_flags, &eflags))
+ ;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == table_id)
+ {
+ /*
+ * if no table_id is passed we will manipulate the default
+ */
+ fib_index = 0;
+ }
+ else
+ {
+ fib_index = mfib_table_find (pfx.fp_proto, table_id);
+
+ if (~0 == fib_index)
+ {
+ error = clib_error_return (0, "Nonexistent table id %d", table_id);
+ goto done;
+ }
+ }
+
+ timet[0] = vlib_time_now (vm);
+
+ if (FIB_PROTOCOL_IP4 == pfx.fp_proto)
+ {
+ incr = 1 << (32 - (pfx.fp_len % 32));
+ }
+ else
+ {
+ incr = 1 << (128 - (pfx.fp_len % 128));
+ }
+
+ for (ss = 0; ss < scount; ss++)
+ {
+ for (gg = 0; gg < gcount; gg++)
+ {
+ if (is_del && 0 == rpath.frp_weight)
+ {
+ /* no path provided => route delete */
+ mfib_table_entry_delete (fib_index, &pfx, MFIB_SOURCE_CLI);
+ }
+ else if (eflags)
+ {
+ mfib_table_entry_update (fib_index, &pfx, MFIB_SOURCE_CLI,
+ MFIB_RPF_ID_NONE, eflags);
+ }
+ else
+ {
+ if (is_del)
+ mfib_table_entry_path_remove (fib_index,
+ &pfx, MFIB_SOURCE_CLI, &rpath);
+ else
+ mfib_table_entry_path_update (fib_index,
+ &pfx, MFIB_SOURCE_CLI, &rpath,
+ iflags);
+ }
+
+ if (FIB_PROTOCOL_IP4 == pfx.fp_proto)
+ {
+ pfx.fp_grp_addr.ip4.as_u32 =
+ clib_host_to_net_u32 (incr +
+ clib_net_to_host_u32 (pfx.
+ fp_grp_addr.ip4.
+ as_u32));
+ }
+ else
+ {
+ int bucket = (incr < 64 ? 0 : 1);
+ pfx.fp_grp_addr.ip6.as_u64[bucket] =
+ clib_host_to_net_u64 (incr +
+ clib_net_to_host_u64 (pfx.
+ fp_grp_addr.ip6.as_u64
+ [bucket]));
+
+ }
+ }
+ if (FIB_PROTOCOL_IP4 == pfx.fp_proto)
+ {
+ pfx.fp_src_addr.ip4.as_u32 =
+ clib_host_to_net_u32 (1 +
+ clib_net_to_host_u32 (pfx.fp_src_addr.
+ ip4.as_u32));
+ }
+ else
+ {
+ pfx.fp_src_addr.ip6.as_u64[1] =
+ clib_host_to_net_u64 (1 +
+ clib_net_to_host_u64 (pfx.fp_src_addr.
+ ip6.as_u64[1]));
+ }
+ }
+
+ timet[1] = vlib_time_now (vm);
+
+ if (scount > 1 || gcount > 1)
+ vlib_cli_output (vm, "%.6e routes/sec",
+ (scount * gcount) / (timet[1] - timet[0]));
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * This command is used to add or delete IPv4 or IPv6 multicastroutes. All
+ * IP Addresses ('<em><dst-ip-addr>/<width></em>',
+ * '<em><next-hop-ip-addr></em>' and '<em><adj-hop-ip-addr></em>')
+ * can be IPv4 or IPv6, but all must be of the same form in a single
+ * command. To display the current set of routes, use the commands
+ * '<em>show ip mfib</em>' and '<em>show ip6 mfib</em>'.
+ * The full set of support flags for interfaces and route is shown via;
+ * '<em>show mfib route flags</em>' and '<em>show mfib itf flags</em>'
+ * respectively.
+ * @cliexpar
+ * Example of how to add a forwarding interface to a route (and create the
+ * route if it does not exist)
+ * @cliexcmd{ip mroute add 232.1.1.1 via GigabitEthernet2/0/0 Forward}
+ * Example of how to add an accepting interface to a route (and create the
+ * route if it does not exist)
+ * @cliexcmd{ip mroute add 232.1.1.1 via GigabitEthernet2/0/1 Accept}
+ * Example of changing the route's flags to send signals via the API
+ * @cliexcmd{ip mroute add 232.1.1.1 Signal}
+
+ ?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_mroute_command, static) =
+{
+ .path = "ip mroute",
+ .short_help = "ip mroute [add|del] <dst-ip-addr>/<width> [table <table-id>] [via <next-hop-ip-addr> [<interface>],",
+ .function = vnet_ip_mroute_cmd,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+/*
+ * The next two routines address a longstanding script hemorrhoid.
+ * Probing a v4 or v6 neighbor needs to appear to be synchronous,
+ * or dependent route-adds will simply fail.
+ */
+static clib_error_t *
+ip6_probe_neighbor_wait (vlib_main_t * vm, ip6_address_t * a, u32 sw_if_index,
+ int retry_count)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *e;
+ int i;
+ int resolved = 0;
+ uword event_type;
+ uword *event_data = 0;
+
+ ASSERT (vlib_in_process_context (vm));
+
+ if (retry_count > 0)
+ vnet_register_ip6_neighbor_resolution_event
+ (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
+ 1 /* event */ , 0 /* data */ );
+
+ for (i = 0; i < retry_count; i++)
+ {
+ /* The interface may be down, etc. */
+ e = ip6_probe_neighbor (vm, a, sw_if_index);
+
+ if (e)
+ return e;
+
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case 1: /* resolved... */
+ vlib_cli_output (vm, "Resolved %U", format_ip6_address, a);
+ resolved = 1;
+ goto done;
+
+ case ~0: /* timeout */
+ break;
+
+ default:
+ clib_warning ("unknown event_type %d", event_type);
+ }
+ vec_reset_length (event_data);
+ }
+
+done:
+
+ if (!resolved)
+ return clib_error_return (0, "Resolution failed for %U",
+ format_ip6_address, a);
+ return 0;
+}
+
+static clib_error_t *
+ip4_probe_neighbor_wait (vlib_main_t * vm, ip4_address_t * a, u32 sw_if_index,
+ int retry_count)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *e;
+ int i;
+ int resolved = 0;
+ uword event_type;
+ uword *event_data = 0;
+
+ ASSERT (vlib_in_process_context (vm));
+
+ if (retry_count > 0)
+ vnet_register_ip4_arp_resolution_event
+ (vnm, a, vlib_get_current_process (vm)->node_runtime.node_index,
+ 1 /* event */ , 0 /* data */ );
+
+ for (i = 0; i < retry_count; i++)
+ {
+ /* The interface may be down, etc. */
+ e = ip4_probe_neighbor (vm, a, sw_if_index);
+
+ if (e)
+ return e;
+
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case 1: /* resolved... */
+ vlib_cli_output (vm, "Resolved %U", format_ip4_address, a);
+ resolved = 1;
+ goto done;
+
+ case ~0: /* timeout */
+ break;
+
+ default:
+ clib_warning ("unknown event_type %d", event_type);
+ }
+ vec_reset_length (event_data);
+ }
+
+done:
+
+ vec_reset_length (event_data);
+
+ if (!resolved)
+ return clib_error_return (0, "Resolution failed for %U",
+ format_ip4_address, a);
+ return 0;
+}
+
+static clib_error_t *
+probe_neighbor_address (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t _line_input, *line_input = &_line_input;
+ ip4_address_t a4;
+ ip6_address_t a6;
+ clib_error_t *error = 0;
+ u32 sw_if_index = ~0;
+ int retry_count = 3;
+ int is_ip4 = 1;
+ int address_set = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat_user (line_input, unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "retry %d", &retry_count))
+ ;
+
+ else if (unformat (line_input, "%U", unformat_ip4_address, &a4))
+ address_set++;
+ else if (unformat (line_input, "%U", unformat_ip6_address, &a6))
+ {
+ address_set++;
+ is_ip4 = 0;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (sw_if_index == ~0)
+ {
+ error = clib_error_return (0, "Interface required, not set.");
+ goto done;
+ }
+ if (address_set == 0)
+ {
+ error = clib_error_return (0, "ip address required, not set.");
+ goto done;
+ }
+ if (address_set > 1)
+ {
+ error = clib_error_return (0, "Multiple ip addresses not supported.");
+ goto done;
+ }
+
+ if (is_ip4)
+ error = ip4_probe_neighbor_wait (vm, &a4, sw_if_index, retry_count);
+ else
+ error = ip6_probe_neighbor_wait (vm, &a6, sw_if_index, retry_count);
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+/*?
+ * The '<em>ip probe-neighbor</em>' command ARPs for IPv4 addresses or
+ * attempts IPv6 neighbor discovery depending on the supplied IP address
+ * format.
+ *
+ * @note This command will not immediately affect the indicated FIB; it
+ * is not suitable for use in establishing a FIB entry prior to adding
+ * recursive FIB entries. As in: don't use it in a script to probe a
+ * gateway prior to adding a default route. It won't work. Instead,
+ * configure a static ARP cache entry [see '<em>set ip arp</em>'], or
+ * a static IPv6 neighbor [see '<em>set ip6 neighbor</em>'].
+ *
+ * @cliexpar
+ * Example of probe for an IPv4 address:
+ * @cliexcmd{ip probe-neighbor GigabitEthernet2/0/0 172.16.1.2}
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ip_probe_neighbor_command, static) = {
+ .path = "ip probe-neighbor",
+ .function = probe_neighbor_address,
+ .short_help = "ip probe-neighbor <interface> <ip4-addr> | <ip6-addr> [retry nn]",
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/lookup.h b/src/vnet/ip/lookup.h
new file mode 100644
index 00000000..28a4bd8f
--- /dev/null
+++ b/src/vnet/ip/lookup.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/ip_lookup.h: ip (4 or 6) lookup structures, adjacencies, ...
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ * Definitions for all things IP (v4|v6) unicast and multicast lookup related.
+ *
+ * - Adjacency definitions and registration.
+ * - Callbacks on route add.
+ * - Callbacks on interface address change.
+ */
+#ifndef included_ip_lookup_h
+#define included_ip_lookup_h
+
+#include <vnet/vnet.h>
+#include <vlib/buffer.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/fib_node.h>
+#include <vnet/adj/adj.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/feature/feature.h>
+
+/** Flow hash configuration */
+#define IP_FLOW_HASH_SRC_ADDR (1<<0)
+#define IP_FLOW_HASH_DST_ADDR (1<<1)
+#define IP_FLOW_HASH_PROTO (1<<2)
+#define IP_FLOW_HASH_SRC_PORT (1<<3)
+#define IP_FLOW_HASH_DST_PORT (1<<4)
+#define IP_FLOW_HASH_REVERSE_SRC_DST (1<<5)
+
+/** Default: 5-tuple without the "reverse" bit */
+#define IP_FLOW_HASH_DEFAULT (0x1F)
+
+#define foreach_flow_hash_bit \
+_(src, IP_FLOW_HASH_SRC_ADDR) \
+_(dst, IP_FLOW_HASH_DST_ADDR) \
+_(sport, IP_FLOW_HASH_SRC_PORT) \
+_(dport, IP_FLOW_HASH_DST_PORT) \
+_(proto, IP_FLOW_HASH_PROTO) \
+_(reverse, IP_FLOW_HASH_REVERSE_SRC_DST)
+
+/**
+ * A flow hash configuration is a mask of the flow hash options
+ */
+typedef u32 flow_hash_config_t;
+
+/* An all zeros address */
+extern const ip46_address_t zero_addr;
+
+
+typedef struct
+{
+ /* Key for mhash; in fact, just a byte offset into mhash key vector. */
+ u32 address_key;
+
+ /* Interface which has this address. */
+ u32 sw_if_index;
+
+ /* Address (prefix) length for this interface. */
+ u16 address_length;
+
+ /* Will be used for something eventually. Primary vs. secondary? */
+ u16 flags;
+
+ /* Next and previous pointers for doubly linked list of
+ addresses per software interface. */
+ u32 next_this_sw_interface;
+ u32 prev_this_sw_interface;
+} ip_interface_address_t;
+
+typedef enum
+{
+ IP_LOCAL_NEXT_DROP,
+ IP_LOCAL_NEXT_PUNT,
+ IP_LOCAL_NEXT_UDP_LOOKUP,
+ IP_LOCAL_NEXT_ICMP,
+ IP_LOCAL_N_NEXT,
+} ip_local_next_t;
+
+struct ip_lookup_main_t;
+
+typedef struct ip_lookup_main_t
+{
+ /** Pool of addresses that are assigned to interfaces. */
+ ip_interface_address_t *if_address_pool;
+
+ /** Hash table mapping address to index in interface address pool. */
+ mhash_t address_to_if_address_index;
+
+ /** Head of doubly linked list of interface addresses for each software interface.
+ ~0 means this interface has no address. */
+ u32 *if_address_pool_index_by_sw_if_index;
+
+ /** First table index to use for this interface, ~0 => none */
+ u32 *classify_table_index_by_sw_if_index;
+
+ /** Feature arc indices */
+ u8 mcast_feature_arc_index;
+ u8 ucast_feature_arc_index;
+ u8 output_feature_arc_index;
+
+ /** Number of bytes in a fib result. Must be at least
+ sizeof (uword). First word is always adjacency index. */
+ u32 fib_result_n_bytes, fib_result_n_words;
+
+ /** 1 for ip6; 0 for ip4. */
+ u32 is_ip6;
+
+ /** Either format_ip4_address_and_length or format_ip6_address_and_length. */
+ format_function_t *format_address_and_length;
+
+ /** Table mapping ip protocol to ip[46]-local node next index. */
+ u8 local_next_by_ip_protocol[256];
+
+ /** IP_BUILTIN_PROTOCOL_{TCP,UDP,ICMP,OTHER} by protocol in IP header. */
+ u8 builtin_protocol_by_ip_protocol[256];
+} ip_lookup_main_t;
+
+clib_error_t *ip_interface_address_add_del (ip_lookup_main_t * lm,
+ u32 sw_if_index,
+ void *address,
+ u32 address_length,
+ u32 is_del, u32 * result_index);
+
+u8 *format_ip_flow_hash_config (u8 * s, va_list * args);
+
+always_inline ip_interface_address_t *
+ip_get_interface_address (ip_lookup_main_t * lm, void *addr_fib)
+{
+ uword *p = mhash_get (&lm->address_to_if_address_index, addr_fib);
+ return p ? pool_elt_at_index (lm->if_address_pool, p[0]) : 0;
+}
+
+always_inline void *
+ip_interface_address_get_address (ip_lookup_main_t * lm,
+ ip_interface_address_t * a)
+{
+ return mhash_key_to_mem (&lm->address_to_if_address_index, a->address_key);
+}
+
+/* *INDENT-OFF* */
+#define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \
+do { \
+ vnet_main_t *_vnm = vnet_get_main(); \
+ u32 _sw_if_index = sw_if_index; \
+ vnet_sw_interface_t *_swif; \
+ _swif = vnet_get_sw_interface (_vnm, _sw_if_index); \
+ \
+ /* \
+ * Loop => honor unnumbered interface addressing. \
+ */ \
+ if (_swif->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED) \
+ { \
+ if (loop) \
+ _sw_if_index = _swif->unnumbered_sw_if_index; \
+ else \
+ /* the interface is unnumbered, by the caller does not want \
+ * unnumbered interfaces considered/honoured */ \
+ break; \
+ } \
+ u32 _ia = ((vec_len((lm)->if_address_pool_index_by_sw_if_index) \
+ > (_sw_if_index)) ? \
+ vec_elt ((lm)->if_address_pool_index_by_sw_if_index, \
+ (_sw_if_index)) : \
+ (u32)~0); \
+ ip_interface_address_t * _a; \
+ while (_ia != ~0) \
+ { \
+ _a = pool_elt_at_index ((lm)->if_address_pool, _ia); \
+ _ia = _a->next_this_sw_interface; \
+ (a) = _a; \
+ body; \
+ } \
+} while (0)
+/* *INDENT-ON* */
+
+void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
+
+#endif /* included_ip_lookup_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ping.c b/src/vnet/ip/ping.c
new file mode 100755
index 00000000..0fa537f6
--- /dev/null
+++ b/src/vnet/ip/ping.c
@@ -0,0 +1,928 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stddef.h>
+#include <vnet/ip/ping.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/fib_entry.h>
+#include <vlib/vlib.h>
+
+/**
+ * @file
+ * @brief IPv4 and IPv6 ICMP Ping.
+ *
+ * This file contains code to suppport IPv4 or IPv6 ICMP ECHO_REQUEST to
+ * network hosts.
+ *
+ */
+
+
+u8 *
+format_icmp_echo_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ icmp_echo_trace_t *t = va_arg (*va, icmp_echo_trace_t *);
+
+ s = format (s, "ICMP echo id %d seq %d%s",
+ clib_net_to_host_u16 (t->id),
+ clib_net_to_host_u16 (t->seq), t->bound ? "" : " (unknown)");
+
+ return s;
+}
+
+/*
+ * If we can find the ping run by an ICMP ID, then we send the signal
+ * to the CLI process referenced by that ping run, alongside with
+ * a freshly made copy of the packet.
+ * I opted for a packet copy to keep the main packet processing path
+ * the same as for all the other nodes.
+ *
+ */
+
+static int
+signal_ip46_icmp_reply_event (u8 event_type, vlib_buffer_t * b0)
+{
+ ping_main_t *pm = &ping_main;
+ u16 net_icmp_id = 0;
+ u32 bi0_copy = 0;
+
+ switch (event_type)
+ {
+ case PING_RESPONSE_IP4:
+ {
+ icmp4_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ net_icmp_id = h0->icmp_echo.id;
+ }
+ break;
+ case PING_RESPONSE_IP6:
+ {
+ icmp6_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ net_icmp_id = h0->icmp_echo.id;
+ }
+ break;
+ default:
+ return 0;
+ }
+
+ uword *p = hash_get (pm->ping_run_by_icmp_id,
+ clib_net_to_host_u16 (net_icmp_id));
+ if (!p)
+ return 0;
+
+ ping_run_t *pr = vec_elt_at_index (pm->ping_runs, p[0]);
+ vlib_main_t *vm = vlib_mains[pr->cli_thread_index];
+ if (vlib_buffer_alloc (vm, &bi0_copy, 1) == 1)
+ {
+ void *dst = vlib_buffer_get_current (vlib_get_buffer (vm,
+ bi0_copy));
+ clib_memcpy (dst, vlib_buffer_get_current (b0), b0->current_length);
+ }
+ /* If buffer_alloc failed, bi0_copy == 0 - just signaling an event. */
+ f64 nowts = vlib_time_now (vm);
+ /* Pass the timestamp to the cli_process thanks to the vnet_buffer unused metadata field */
+ clib_memcpy (vnet_buffer
+ (vlib_get_buffer
+ (vm, bi0_copy))->unused, &nowts, sizeof (nowts));
+ vlib_process_signal_event_mt (vm, pr->cli_process_id, event_type, bi0_copy);
+ return 1;
+}
+
+/*
+ * Process ICMPv6 echo replies
+ */
+static uword
+ip6_icmp_echo_reply_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from;
+
+ from = vlib_frame_vector_args (frame); /* array of buffer indices */
+ n_left_from = frame->n_vectors; /* number of buffer indices */
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ next0 = signal_ip46_icmp_reply_event (PING_RESPONSE_IP6, b0) ?
+ ICMP6_ECHO_REPLY_NEXT_DROP : ICMP6_ECHO_REPLY_NEXT_PUNT;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ icmp6_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ icmp_echo_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->id = h0->icmp_echo.id;
+ tr->seq = h0->icmp_echo.seq;
+ tr->bound = (next0 == ICMP6_ECHO_REPLY_NEXT_DROP);
+ }
+
+ /* push this pkt to the next graph node */
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+
+ from += 1;
+ n_left_from -= 1;
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_icmp_echo_reply_node, static) =
+{
+ .function = ip6_icmp_echo_reply_node_fn,
+ .name = "ip6-icmp-echo-reply",
+ .vector_size = sizeof (u32),
+ .format_trace = format_icmp_echo_trace,
+ .n_next_nodes = ICMP6_ECHO_REPLY_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ECHO_REPLY_NEXT_DROP] = "error-drop",
+ [ICMP6_ECHO_REPLY_NEXT_PUNT] = "error-punt",
+ },
+};
+/* *INDENT-ON* */
+
+/*
+ * Process ICMPv4 echo replies
+ */
+static uword
+ip4_icmp_echo_reply_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 n_left_from, *from;
+
+ from = vlib_frame_vector_args (frame); /* array of buffer indices */
+ n_left_from = frame->n_vectors; /* number of buffer indices */
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+
+ next0 = signal_ip46_icmp_reply_event (PING_RESPONSE_IP4, b0) ?
+ ICMP4_ECHO_REPLY_NEXT_DROP : ICMP4_ECHO_REPLY_NEXT_PUNT;
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ icmp4_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ icmp_echo_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->id = h0->icmp_echo.id;
+ tr->seq = h0->icmp_echo.seq;
+ tr->bound = (next0 == ICMP4_ECHO_REPLY_NEXT_DROP);
+ }
+
+ /* push this pkt to the next graph node */
+ vlib_set_next_frame_buffer (vm, node, next0, bi0);
+
+ from += 1;
+ n_left_from -= 1;
+ }
+
+ return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip4_icmp_echo_reply_node, static) =
+{
+ .function = ip4_icmp_echo_reply_node_fn,
+ .name = "ip4-icmp-echo-reply",
+ .vector_size = sizeof (u32),
+ .format_trace = format_icmp_echo_trace,
+ .n_next_nodes = ICMP4_ECHO_REPLY_N_NEXT,
+ .next_nodes = {
+ [ICMP4_ECHO_REPLY_NEXT_DROP] = "error-drop",
+ [ICMP4_ECHO_REPLY_NEXT_PUNT] = "error-punt",
+ },
+};
+/* *INDENT-ON* */
+
+char *ip6_lookup_next_nodes[] = IP6_LOOKUP_NEXT_NODES;
+char *ip4_lookup_next_nodes[] = IP4_LOOKUP_NEXT_NODES;
+
+/* Fill in the ICMP ECHO structure, return the safety-checked and possibly shrunk data_len */
+static u16
+init_icmp46_echo_request (icmp46_echo_request_t * icmp46_echo,
+ u16 seq_host, u16 id_host, u16 data_len)
+{
+ int i;
+ icmp46_echo->seq = clib_host_to_net_u16 (seq_host);
+ icmp46_echo->id = clib_host_to_net_u16 (id_host);
+
+ if (data_len > PING_MAXIMUM_DATA_SIZE)
+ data_len = PING_MAXIMUM_DATA_SIZE;
+ for (i = 0; i < data_len; i++)
+ icmp46_echo->data[i] = i % 256;
+ return data_len;
+}
+
+static send_ip46_ping_result_t
+send_ip6_ping (vlib_main_t * vm, ip6_main_t * im,
+ u32 table_id, ip6_address_t * pa6,
+ u32 sw_if_index, u16 seq_host, u16 id_host, u16 data_len,
+ u32 burst, u8 verbose)
+{
+ icmp6_echo_request_header_t *h0;
+ u32 bi0 = 0;
+ int bogus_length = 0;
+ vlib_buffer_t *p0;
+ vlib_frame_t *f;
+ u32 *to_next;
+ vlib_buffer_free_list_t *fl;
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ return SEND_PING_ALLOC_FAIL;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ vlib_buffer_init_for_free_list (p0, fl);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (p0);
+
+ /*
+ * if the user did not provide a source interface, use the any interface
+ * that the destination resolves via.
+ */
+ if (~0 == sw_if_index)
+ {
+ fib_node_index_t fib_entry_index;
+ u32 fib_index;
+
+ fib_index = ip6_fib_index_from_table_id (table_id);
+
+ if (~0 == fib_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_TABLE;
+ }
+
+ fib_entry_index = ip6_fib_table_lookup (fib_index, pa6, 128);
+ sw_if_index = fib_entry_get_resolving_interface (fib_entry_index);
+ /*
+ * Set the TX interface to force ip-lookup to use its table ID
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index;
+ }
+ else
+ {
+ /*
+ * force an IP lookup in the table bound to the user's chosen
+ * source interface.
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ ip6_fib_table_get_index_for_sw_if_index (sw_if_index);
+ }
+
+ if (~0 == sw_if_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_INTERFACE;
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index;
+
+ h0 = vlib_buffer_get_current (p0);
+
+ /* Fill in ip6 header fields */
+ h0->ip6.ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (0x6 << 28);
+ h0->ip6.payload_length = 0; /* Set below */
+ h0->ip6.protocol = IP_PROTOCOL_ICMP6;
+ h0->ip6.hop_limit = 255;
+ h0->ip6.dst_address = *pa6;
+ h0->ip6.src_address = *pa6;
+
+ /* Fill in the correct source now */
+ ip6_address_t *a = ip6_interface_first_address (im, sw_if_index);
+ if (!a)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_SRC_ADDRESS;
+ }
+ h0->ip6.src_address = a[0];
+
+ /* Fill in icmp fields */
+ h0->icmp.type = ICMP6_echo_request;
+ h0->icmp.code = 0;
+ h0->icmp.checksum = 0;
+
+ data_len =
+ init_icmp46_echo_request (&h0->icmp_echo, seq_host, id_host, data_len);
+ h0->icmp_echo.time_sent = vlib_time_now (vm);
+
+ /* Fix up the lengths */
+ h0->ip6.payload_length =
+ clib_host_to_net_u16 (data_len + sizeof (icmp46_header_t));
+
+ p0->current_length = clib_net_to_host_u16 (h0->ip6.payload_length) +
+ STRUCT_OFFSET_OF (icmp6_echo_request_header_t, icmp);
+
+ /* Calculate the ICMP checksum */
+ h0->icmp.checksum = 0;
+ h0->icmp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, 0, &h0->ip6, &bogus_length);
+
+ /* Enqueue the packet right now */
+ f = vlib_get_frame_to_node (vm, ip6_lookup_node.index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+
+ ASSERT (burst <= VLIB_FRAME_SIZE);
+ f->n_vectors = burst;
+ while (--burst)
+ {
+ vlib_buffer_t *c0 = vlib_buffer_copy (vm, p0);
+ to_next++;
+ to_next[0] = vlib_get_buffer_index (vm, c0);
+ }
+ vlib_put_frame_to_node (vm, ip6_lookup_node.index, f);
+
+ return SEND_PING_OK;
+}
+
+static send_ip46_ping_result_t
+send_ip4_ping (vlib_main_t * vm,
+ ip4_main_t * im,
+ u32 table_id,
+ ip4_address_t * pa4,
+ u32 sw_if_index,
+ u16 seq_host, u16 id_host, u16 data_len, u32 burst, u8 verbose)
+{
+ icmp4_echo_request_header_t *h0;
+ u32 bi0 = 0;
+ ip_lookup_main_t *lm = &im->lookup_main;
+ vlib_buffer_t *p0;
+ vlib_frame_t *f;
+ u32 *to_next;
+ u32 if_add_index0;
+ vlib_buffer_free_list_t *fl;
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ return SEND_PING_ALLOC_FAIL;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+ vlib_buffer_init_for_free_list (p0, fl);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (p0);
+
+ /*
+ * if the user did not provide a source interface, use the any interface
+ * that the destination resolves via.
+ */
+ if (~0 == sw_if_index)
+ {
+ fib_node_index_t fib_entry_index;
+ u32 fib_index;
+
+ fib_index = ip4_fib_index_from_table_id (table_id);
+
+ if (~0 == fib_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_TABLE;
+ }
+
+ fib_entry_index =
+ ip4_fib_table_lookup (ip4_fib_get (fib_index), pa4, 32);
+ sw_if_index = fib_entry_get_resolving_interface (fib_entry_index);
+ /*
+ * Set the TX interface to force ip-lookup to use the user's table ID
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index;
+ }
+ else
+ {
+ /*
+ * force an IP lookup in the table bound to the user's chosen
+ * source interface.
+ */
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] =
+ ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+ }
+
+ if (~0 == sw_if_index)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ return SEND_PING_NO_INTERFACE;
+ }
+
+ vnet_buffer (p0)->sw_if_index[VLIB_RX] = sw_if_index;
+
+ h0 = vlib_buffer_get_current (p0);
+
+ /* Fill in ip4 header fields */
+ h0->ip4.checksum = 0;
+ h0->ip4.ip_version_and_header_length = 0x45;
+ h0->ip4.tos = 0;
+ h0->ip4.length = 0; /* Set below */
+ h0->ip4.fragment_id = 0;
+ h0->ip4.flags_and_fragment_offset = 0;
+ h0->ip4.ttl = 0xff;
+ h0->ip4.protocol = IP_PROTOCOL_ICMP;
+ h0->ip4.dst_address = *pa4;
+ h0->ip4.src_address = *pa4;
+
+ /* Fill in the correct source now */
+ if_add_index0 = lm->if_address_pool_index_by_sw_if_index[sw_if_index];
+ if (PREDICT_TRUE (if_add_index0 != ~0))
+ {
+ ip_interface_address_t *if_add =
+ pool_elt_at_index (lm->if_address_pool, if_add_index0);
+ ip4_address_t *if_ip = ip_interface_address_get_address (lm, if_add);
+ h0->ip4.src_address = *if_ip;
+ if (verbose)
+ {
+ vlib_cli_output (vm, "Source address: %U",
+ format_ip4_address, &h0->ip4.src_address);
+ }
+ }
+
+ /* Fill in icmp fields */
+ h0->icmp.type = ICMP4_echo_request;
+ h0->icmp.code = 0;
+ h0->icmp.checksum = 0;
+
+ data_len =
+ init_icmp46_echo_request (&h0->icmp_echo, seq_host, id_host, data_len);
+ h0->icmp_echo.time_sent = vlib_time_now (vm);
+
+ /* Fix up the lengths */
+ h0->ip4.length =
+ clib_host_to_net_u16 (data_len + sizeof (icmp46_header_t) +
+ sizeof (ip4_header_t));
+
+ p0->current_length = clib_net_to_host_u16 (h0->ip4.length);
+
+ /* Calculate the IP and ICMP checksums */
+ h0->ip4.checksum = ip4_header_checksum (&(h0->ip4));
+ h0->icmp.checksum =
+ ~ip_csum_fold (ip_incremental_checksum (0, &(h0->icmp),
+ p0->current_length -
+ sizeof (ip4_header_t)));
+
+ /* Enqueue the packet right now */
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+
+ ASSERT (burst <= VLIB_FRAME_SIZE);
+ f->n_vectors = burst;
+ while (--burst)
+ {
+ vlib_buffer_t *c0 = vlib_buffer_copy (vm, p0);
+ to_next++;
+ to_next[0] = vlib_get_buffer_index (vm, c0);
+ }
+ vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+
+ return SEND_PING_OK;
+}
+
+
+static void
+print_ip6_icmp_reply (vlib_main_t * vm, u32 bi0)
+{
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ icmp6_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ f64 rtt = 0;
+ clib_memcpy (&rtt, vnet_buffer (b0)->unused, sizeof (rtt));
+ rtt -= h0->icmp_echo.time_sent;
+ vlib_cli_output (vm,
+ "%d bytes from %U: icmp_seq=%d ttl=%d time=%.4f ms",
+ clib_host_to_net_u16 (h0->ip6.payload_length),
+ format_ip6_address,
+ &h0->ip6.src_address,
+ clib_host_to_net_u16 (h0->icmp_echo.seq),
+ h0->ip6.hop_limit, rtt * 1000.0);
+}
+
+static void
+print_ip4_icmp_reply (vlib_main_t * vm, u32 bi0)
+{
+ vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
+ icmp4_echo_request_header_t *h0 = vlib_buffer_get_current (b0);
+ f64 rtt = 0;
+ clib_memcpy (&rtt, vnet_buffer (b0)->unused, sizeof (rtt));
+ rtt -= h0->icmp_echo.time_sent;
+ u32 rcvd_icmp_len =
+ clib_host_to_net_u16 (h0->ip4.length) -
+ (4 * (0xF & h0->ip4.ip_version_and_header_length));
+
+ vlib_cli_output (vm,
+ "%d bytes from %U: icmp_seq=%d ttl=%d time=%.4f ms",
+ rcvd_icmp_len,
+ format_ip4_address,
+ &h0->ip4.src_address,
+ clib_host_to_net_u16 (h0->icmp_echo.seq),
+ h0->ip4.ttl, rtt * 1000.0);
+}
+
+
+/*
+ * Perform the ping run with the given parameters in the current CLI process.
+ * Depending on whether pa4 or pa6 is set, runs IPv4 or IPv6 ping.
+ * The amusing side effect is of course if both are set, then both pings are sent.
+ * This behavior can be used to ping a dualstack host over IPv4 and IPv6 at once.
+ */
+
+static void
+run_ping_ip46_address (vlib_main_t * vm, u32 table_id, ip4_address_t * pa4,
+ ip6_address_t * pa6, u32 sw_if_index,
+ f64 ping_interval, u32 ping_repeat, u32 data_len,
+ u32 ping_burst, u32 verbose)
+{
+ int i;
+ ping_main_t *pm = &ping_main;
+ uword curr_proc = vlib_current_process (vm);
+ u32 n_replies = 0;
+ u32 n_requests = 0;
+ ping_run_t *pr = 0;
+ u32 ping_run_index = 0;
+ u16 icmp_id;
+
+ static u32 rand_seed = 0;
+
+ if (PREDICT_FALSE (!rand_seed))
+ rand_seed = random_default_seed ();
+
+ icmp_id = random_u32 (&rand_seed) & 0xffff;
+
+ while (hash_get (pm->ping_run_by_icmp_id, icmp_id))
+ {
+ vlib_cli_output (vm, "ICMP ID collision at %d, incrementing", icmp_id);
+ icmp_id++;
+ }
+ pool_get (pm->ping_runs, pr);
+ ping_run_index = pr - pm->ping_runs;
+ pr->cli_process_id = curr_proc;
+ pr->cli_thread_index = vlib_get_thread_index ();
+ pr->icmp_id = icmp_id;
+ hash_set (pm->ping_run_by_icmp_id, icmp_id, ping_run_index);
+ for (i = 1; i <= ping_repeat; i++)
+ {
+ f64 sleep_interval;
+ f64 time_ping_sent = vlib_time_now (vm);
+ /* Reset pr: running ping in other process could have changed pm->ping_runs */
+ pr = vec_elt_at_index (pm->ping_runs, ping_run_index);
+ pr->curr_seq = i;
+ if (pa6 &&
+ (SEND_PING_OK ==
+ send_ip6_ping (vm, ping_main.ip6_main, table_id, pa6, sw_if_index,
+ i, icmp_id, data_len, ping_burst, verbose)))
+ {
+ n_requests += ping_burst;
+ }
+ if (pa4 &&
+ (SEND_PING_OK ==
+ send_ip4_ping (vm, ping_main.ip4_main, table_id, pa4, sw_if_index,
+ i, icmp_id, data_len, ping_burst, verbose)))
+ {
+ n_requests += ping_burst;
+ }
+ while ((i <= ping_repeat)
+ &&
+ ((sleep_interval =
+ time_ping_sent + ping_interval - vlib_time_now (vm)) > 0.0))
+ {
+ uword event_type, *event_data = 0;
+ vlib_process_wait_for_event_or_clock (vm, sleep_interval);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0: /* no events => timeout */
+ break;
+ case PING_RESPONSE_IP6:
+ {
+ int i;
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ u32 bi0 = event_data[i];
+ print_ip6_icmp_reply (vm, bi0);
+ n_replies++;
+ if (0 != bi0)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ }
+ }
+ }
+ break;
+ case PING_RESPONSE_IP4:
+ {
+ int i;
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ u32 bi0 = event_data[i];
+ print_ip4_icmp_reply (vm, bi0);
+ n_replies++;
+ if (0 != bi0)
+ {
+ vlib_buffer_free (vm, &bi0, 1);
+ }
+ }
+ }
+ break;
+ default:
+ /* someone pressed a key, abort */
+ vlib_cli_output (vm, "Aborted due to a keypress.");
+ i = 1 + ping_repeat;
+ break;
+ }
+ vec_free (event_data);
+ }
+ }
+ vlib_cli_output (vm, "\n");
+ {
+ float loss =
+ (0 ==
+ n_requests) ? 0 : 100.0 * ((float) n_requests -
+ (float) n_replies) / (float) n_requests;
+ vlib_cli_output (vm,
+ "Statistics: %u sent, %u received, %f%% packet loss\n",
+ n_requests, n_replies, loss);
+ /* Reset pr: running ping in other process could have changed pm->ping_runs */
+ pr = vec_elt_at_index (pm->ping_runs, ping_run_index);
+ hash_unset (pm->ping_run_by_icmp_id, icmp_id);
+ pool_put (pm->ping_runs, pr);
+ }
+}
+
+
+
+
+
+static clib_error_t *
+ping_ip_address (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ ip4_address_t a4;
+ ip6_address_t a6;
+ clib_error_t *error = 0;
+ u32 ping_repeat = 5;
+ u32 ping_burst = 1;
+ u8 ping_ip4, ping_ip6;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 data_len = PING_DEFAULT_DATA_LEN;
+ u32 verbose = 0;
+ f64 ping_interval = PING_DEFAULT_INTERVAL;
+ u32 sw_if_index, table_id;
+
+ table_id = 0;
+ ping_ip4 = ping_ip6 = 0;
+ sw_if_index = ~0;
+
+ if (unformat (input, "%U", unformat_ip4_address, &a4))
+ {
+ ping_ip4 = 1;
+ }
+ else if (unformat (input, "%U", unformat_ip6_address, &a6))
+ {
+ ping_ip6 = 1;
+ }
+ else if (unformat (input, "ipv4"))
+ {
+ if (unformat (input, "%U", unformat_ip4_address, &a4))
+ {
+ ping_ip4 = 1;
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "expecting IPv4 address but got `%U'",
+ format_unformat_error, input);
+ }
+ }
+ else if (unformat (input, "ipv6"))
+ {
+ if (unformat (input, "%U", unformat_ip6_address, &a6))
+ {
+ ping_ip6 = 1;
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "expecting IPv6 address but got `%U'",
+ format_unformat_error, input);
+ }
+ }
+ else
+ {
+ error =
+ clib_error_return (0,
+ "expecting IP4/IP6 address `%U'. Usage: ping <addr> [source <intf>] [size <datasz>] [repeat <count>] [verbose]",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ /* allow for the second AF in the same ping */
+ if (!ping_ip4 && (unformat (input, "ipv4")))
+ {
+ if (unformat (input, "%U", unformat_ip4_address, &a4))
+ {
+ ping_ip4 = 1;
+ }
+ }
+ else if (!ping_ip6 && (unformat (input, "ipv6")))
+ {
+ if (unformat (input, "%U", unformat_ip6_address, &a6))
+ {
+ ping_ip6 = 1;
+ }
+ }
+
+ /* parse the rest of the parameters in a cycle */
+ while (!unformat_eof (input, NULL))
+ {
+ if (unformat (input, "source"))
+ {
+ if (!unformat_user
+ (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
+ {
+ error =
+ clib_error_return (0,
+ "unknown interface `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "size"))
+ {
+ if (!unformat (input, "%u", &data_len))
+ {
+ error =
+ clib_error_return (0,
+ "expecting size but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ if (data_len > PING_MAXIMUM_DATA_SIZE)
+ {
+ error =
+ clib_error_return (0,
+ "%d is bigger than maximum allowed payload size %d",
+ data_len, PING_MAXIMUM_DATA_SIZE);
+ goto done;
+ }
+ }
+ else if (unformat (input, "table-id"))
+ {
+ if (!unformat (input, "%u", &table_id))
+ {
+ error =
+ clib_error_return (0,
+ "expecting table-id but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "interval"))
+ {
+ if (!unformat (input, "%f", &ping_interval))
+ {
+ error =
+ clib_error_return (0,
+ "expecting interval (floating point number) got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "repeat"))
+ {
+ if (!unformat (input, "%u", &ping_repeat))
+ {
+ error =
+ clib_error_return (0,
+ "expecting repeat count but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "burst"))
+ {
+ if (!unformat (input, "%u", &ping_burst))
+ {
+ error =
+ clib_error_return (0,
+ "expecting burst count but got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+ else if (unformat (input, "verbose"))
+ {
+ verbose = 1;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (ping_burst < 1 || ping_burst > VLIB_FRAME_SIZE)
+ return clib_error_return (0, "burst size must be between 1 and %u",
+ VLIB_FRAME_SIZE);
+
+ run_ping_ip46_address (vm, table_id, ping_ip4 ? &a4 : NULL,
+ ping_ip6 ? &a6 : NULL, sw_if_index, ping_interval,
+ ping_repeat, data_len, ping_burst, verbose);
+done:
+ return error;
+}
+
+/*?
+ * This command sends an ICMP ECHO_REQUEST to network hosts. The address
+ * can be an IPv4 or IPv6 address (or both at the same time).
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how ping an IPv4 address:
+ * @cliexstart{ping 172.16.1.2 source GigabitEthernet2/0/0 repeat 2}
+ * 64 bytes from 172.16.1.2: icmp_seq=1 ttl=64 time=.1090 ms
+ * 64 bytes from 172.16.1.2: icmp_seq=2 ttl=64 time=.0914 ms
+ *
+ * Statistics: 2 sent, 2 received, 0% packet loss
+ * @cliexend
+ *
+ * Example of how ping both an IPv4 address and IPv6 address at the same time:
+ * @cliexstart{ping 172.16.1.2 ipv6 fe80::24a5:f6ff:fe9c:3a36 source GigabitEthernet2/0/0 repeat 2 verbose}
+ * Adjacency index: 10, sw_if_index: 1
+ * Adj: ip6-discover-neighbor
+ * Adj Interface: 0
+ * Forced set interface: 1
+ * Adjacency index: 0, sw_if_index: 4294967295
+ * Adj: ip4-miss
+ * Adj Interface: 0
+ * Forced set interface: 1
+ * Source address: 172.16.1.1
+ * 64 bytes from 172.16.1.2: icmp_seq=1 ttl=64 time=.1899 ms
+ * Adjacency index: 10, sw_if_index: 1
+ * Adj: ip6-discover-neighbor
+ * Adj Interface: 0
+ * Forced set interface: 1
+ * Adjacency index: 0, sw_if_index: 4294967295
+ * Adj: ip4-miss
+ * Adj Interface: 0
+ * Forced set interface: 1
+ * Source address: 172.16.1.1
+ * 64 bytes from 172.16.1.2: icmp_seq=2 ttl=64 time=.0910 ms
+ *
+ * Statistics: 4 sent, 2 received, 50% packet loss
+ * @cliexend
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ping_command, static) =
+{
+ .path = "ping",
+ .function = ping_ip_address,
+ .short_help = "ping {<ip-addr> | ipv4 <ip4-addr> | ipv6 <ip6-addr>}"
+ " [ipv4 <ip4-addr> | ipv6 <ip6-addr>] [source <interface>]"
+ " [size <pktsize>] [interval <sec>] [repeat <cnt>] [table-id <id>]"
+ " [verbose]",
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+ping_cli_init (vlib_main_t * vm)
+{
+ ping_main_t *pm = &ping_main;
+ pm->ip6_main = &ip6_main;
+ pm->ip4_main = &ip4_main;
+ icmp6_register_type (vm, ICMP6_echo_reply, ip6_icmp_echo_reply_node.index);
+ ip4_icmp_register_type (vm, ICMP4_echo_reply,
+ ip4_icmp_echo_reply_node.index);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ping_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ping.h b/src/vnet/ip/ping.h
new file mode 100644
index 00000000..b1b71f68
--- /dev/null
+++ b/src/vnet/ip/ping.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_ping_h
+#define included_vnet_ping_h
+
+
+#include <vnet/ip/ip.h>
+
+#include <vnet/ip/lookup.h>
+
+typedef enum
+{
+ PING_RESPONSE_IP6 = 42,
+ PING_RESPONSE_IP4,
+} ping_response_type_t;
+
+typedef enum
+{
+ SEND_PING_OK = 0,
+ SEND_PING_ALLOC_FAIL,
+ SEND_PING_NO_INTERFACE,
+ SEND_PING_NO_TABLE,
+ SEND_PING_NO_SRC_ADDRESS,
+} send_ip46_ping_result_t;
+
+/*
+ * Currently running ping command.
+ */
+typedef struct ping_run_t
+{
+ u16 icmp_id;
+ u16 curr_seq;
+ uword cli_process_id;
+ uword cli_thread_index;
+} ping_run_t;
+
+typedef struct ping_main_t
+{
+ ip6_main_t *ip6_main;
+ ip4_main_t *ip4_main;
+ ping_run_t *ping_runs;
+ /* hash table to find back the CLI process for a reply */
+ // uword *cli_proc_by_icmp_id;
+ ping_run_t *ping_run_by_icmp_id;
+} ping_main_t;
+
+ping_main_t ping_main;
+
+#define PING_DEFAULT_DATA_LEN 60
+#define PING_DEFAULT_INTERVAL 1.0
+
+#define PING_MAXIMUM_DATA_SIZE (VLIB_BUFFER_DATA_SIZE - sizeof(ip6_header_t) - sizeof(icmp46_header_t) - offsetof(icmp46_echo_request_t, data))
+
+/* *INDENT-OFF* */
+
+typedef CLIB_PACKED (struct {
+ u16 id;
+ u16 seq;
+ f64 time_sent;
+ u8 data[0];
+}) icmp46_echo_request_t;
+
+
+typedef CLIB_PACKED (struct {
+ ip6_header_t ip6;
+ icmp46_header_t icmp;
+ icmp46_echo_request_t icmp_echo;
+}) icmp6_echo_request_header_t;
+
+typedef CLIB_PACKED (struct {
+ ip4_header_t ip4;
+ icmp46_header_t icmp;
+ icmp46_echo_request_t icmp_echo;
+}) icmp4_echo_request_header_t;
+
+/* *INDENT-ON* */
+
+
+typedef struct
+{
+ u16 id;
+ u16 seq;
+ u8 bound;
+} icmp_echo_trace_t;
+
+
+
+
+typedef enum
+{
+ ICMP6_ECHO_REPLY_NEXT_DROP,
+ ICMP6_ECHO_REPLY_NEXT_PUNT,
+ ICMP6_ECHO_REPLY_N_NEXT,
+} icmp6_echo_reply_next_t;
+
+typedef enum
+{
+ ICMP4_ECHO_REPLY_NEXT_DROP,
+ ICMP4_ECHO_REPLY_NEXT_PUNT,
+ ICMP4_ECHO_REPLY_N_NEXT,
+} icmp4_echo_reply_next_t;
+
+#endif /* included_vnet_ping_h */
diff --git a/src/vnet/ip/ports.def b/src/vnet/ip/ports.def
new file mode 100644
index 00000000..cdb754f5
--- /dev/null
+++ b/src/vnet/ip/ports.def
@@ -0,0 +1,757 @@
+/*
+ * ip/ports.def: tcp/udp port definitions
+ *
+ * Eliot Dresselhaus
+ * August, 2005
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+PORT NUMBERS
+
+(last updated 18 October 2005)
+
+The port numbers are divided into three ranges: the Well Known Ports,
+the Registered Ports, and the Dynamic and/or Private Ports.
+
+The Well Known Ports are those from 0 through 1023.
+
+The Registered Ports are those from 1024 through 49151
+
+The Dynamic and/or Private Ports are those from 49152 through 65535
+
+
+************************************************************************
+* PLEASE NOTE THE FOLLOWING: *
+* *
+* 1. UNASSIGNED PORT NUMBERS SHOULD NOT BE USED. THE IANA WILL ASSIGN *
+* THE NUMBER FOR THE PORT AFTER YOUR APPLICATION HAS BEEN APPROVED. *
+* *
+* 2. ASSIGNMENT OF A PORT NUMBER DOES NOT IN ANY WAY IMPLY AN *
+* ENDORSEMENT OF AN APPLICATION OR PRODUCT, AND THE FACT THAT NETWORK *
+* TRAFFIC IS FLOWING TO OR FROM A REGISTERED PORT DOES NOT MEAN THAT *
+* IT IS "GOOD" TRAFFIC. FIREWALL AND SYSTEM ADMINISTRATORS SHOULD *
+* CHOOSE HOW TO CONFIGURE THEIR SYSTEMS BASED ON THEIR KNOWLEDGE OF *
+* THE TRAFFIC IN QUESTION, NOT WHETHER THERE IS A PORT NUMBER *
+* REGISTERED OR NOT. *
+************************************************************************
+
+
+WELL KNOWN PORT NUMBERS
+
+The Well Known Ports are assigned by the IANA and on most systems can
+only be used by system (or root) processes or by programs executed by
+privileged users.
+
+Ports are used in the TCP [RFC793] to name the ends of logical
+connections which carry long term conversations. For the purpose of
+providing services to unknown callers, a service contact port is
+defined. This list specifies the port used by the server process as
+its contact port. The contact port is sometimes called the
+"well-known port".
+
+To the extent possible, these same port assignments are used with the
+UDP [RFC768].
+
+The range for assigned ports managed by the IANA is 0-1023.
+*/
+ip_port (TCPMUX, 1)
+ip_port (COMPRESS_NET_MANAGEMENT, 2)
+ip_port (COMPRESS_NET, 3)
+ip_port (RJE, 5)
+ip_port (ECHO, 7)
+ip_port (DISCARD, 9)
+ip_port (SYSTAT, 11)
+ip_port (DAYTIME, 13)
+ip_port (QOTD, 17)
+ip_port (MSP, 18)
+ip_port (CHARGEN, 19)
+ip_port (FTP_DATA, 20)
+ip_port (FTP, 21)
+ip_port (SSH, 22)
+ip_port (TELNET, 23)
+ip_port (SMTP, 25)
+ip_port (NSW_FE, 27)
+ip_port (MSG_ICP, 29)
+ip_port (MSG_AUTH, 31)
+ip_port (DSP, 33)
+ip_port (TIME, 37)
+ip_port (RAP, 38)
+ip_port (RLP, 39)
+ip_port (GRAPHICS, 41)
+ip_port (NAME, 42)
+ip_port (NAMESERVER, 42)
+ip_port (NICNAME, 43)
+ip_port (MPM_FLAGS, 44)
+ip_port (MPM, 45)
+ip_port (MPM_SND, 46)
+ip_port (NI_FTP, 47)
+ip_port (AUDITD, 48)
+ip_port (TACACS, 49)
+ip_port (RE_MAIL_CK, 50)
+ip_port (LA_MAINT, 51)
+ip_port (XNS_TIME, 52)
+ip_port (DNS, 53)
+ip_port (XNS_CH, 54)
+ip_port (ISI_GL, 55)
+ip_port (XNS_AUTH, 56)
+ip_port (XNS_MAIL, 58)
+ip_port (NI_MAIL, 61)
+ip_port (ACAS, 62)
+ip_port (WHOIS_PLUS_PLUS, 63)
+ip_port (COVIA, 64)
+ip_port (TACACS_DS, 65)
+ip_port (ORACLE_SQL_NET, 66)
+ip_port (BOOTPS, 67)
+ip_port (BOOTPC, 68)
+ip_port (TFTP, 69)
+ip_port (GOPHER, 70)
+ip_port (NETRJS_1, 71)
+ip_port (NETRJS_2, 72)
+ip_port (NETRJS_3, 73)
+ip_port (NETRJS_4, 74)
+ip_port (DEOS, 76)
+ip_port (VETTCP, 78)
+ip_port (FINGER, 79)
+ip_port (WWW, 80)
+ip_port (HOSTS2_NS, 81)
+ip_port (XFER, 82)
+ip_port (MIT_ML_DEV, 83)
+ip_port (CTF, 84)
+ip_port (MIT_ML_DEV1, 85)
+ip_port (MFCOBOL, 86)
+ip_port (KERBEROS, 88)
+ip_port (SU_MIT_TG, 89)
+ip_port (DNSIX, 90)
+ip_port (MIT_DOV, 91)
+ip_port (NPP, 92)
+ip_port (DCP, 93)
+ip_port (OBJCALL, 94)
+ip_port (SUPDUP, 95)
+ip_port (DIXIE, 96)
+ip_port (SWIFT_RVF, 97)
+ip_port (TACNEWS, 98)
+ip_port (METAGRAM, 99)
+ip_port (NEWACCT, 100)
+ip_port (HOSTNAME, 101)
+ip_port (ISO_TSAP, 102)
+ip_port (GPPITNP, 103)
+ip_port (ACR_NEMA, 104)
+ip_port (CSO, 105)
+ip_port (CSNET_NS, 105)
+ip_port (3COM_TSMUX, 106)
+ip_port (RTELNET, 107)
+ip_port (SNAGAS, 108)
+ip_port (POP2, 109)
+ip_port (POP3, 110)
+ip_port (SUNRPC, 111)
+ip_port (MCIDAS, 112)
+ip_port (IDENT, 113)
+ip_port (SFTP, 115)
+ip_port (ANSANOTIFY, 116)
+ip_port (UUCP_PATH, 117)
+ip_port (SQLSERV, 118)
+ip_port (NNTP, 119)
+ip_port (CFDPTKT, 120)
+ip_port (ERPC, 121)
+ip_port (SMAKYNET, 122)
+ip_port (NTP, 123)
+ip_port (ANSATRADER, 124)
+ip_port (LOCUS_MAP, 125)
+ip_port (NXEDIT, 126)
+ip_port (LOCUS_CON, 127)
+ip_port (GSS_XLICEN, 128)
+ip_port (PWDGEN, 129)
+ip_port (CISCO_FNA, 130)
+ip_port (CISCO_TNA, 131)
+ip_port (CISCO_SYS, 132)
+ip_port (STATSRV, 133)
+ip_port (INGRES_NET, 134)
+ip_port (EPMAP, 135)
+ip_port (PROFILE, 136)
+ip_port (NETBIOS_NS, 137)
+ip_port (NETBIOS_DGM, 138)
+ip_port (NETBIOS_SSN, 139)
+ip_port (EMFIS_DATA, 140)
+ip_port (EMFIS_CNTL, 141)
+ip_port (BL_IDM, 142)
+ip_port (IMAP, 143)
+ip_port (UMA, 144)
+ip_port (UAAC, 145)
+ip_port (ISO_TP0, 146)
+ip_port (ISO_IP, 147)
+ip_port (JARGON, 148)
+ip_port (AED_512, 149)
+ip_port (SQL_NET, 150)
+ip_port (HEMS, 151)
+ip_port (BFTP, 152)
+ip_port (SGMP, 153)
+ip_port (NETSC_PROD, 154)
+ip_port (NETSC_DEV, 155)
+ip_port (SQLSRV, 156)
+ip_port (KNET_CMP, 157)
+ip_port (PCMAIL_SRV, 158)
+ip_port (NSS_ROUTING, 159)
+ip_port (SGMP_TRAPS, 160)
+ip_port (SNMP, 161)
+ip_port (SNMPTRAP, 162)
+ip_port (CMIP_MAN, 163)
+ip_port (CMIP_AGENT, 164)
+ip_port (XNS_COURIER, 165)
+ip_port (S_NET, 166)
+ip_port (NAMP, 167)
+ip_port (RSVD, 168)
+ip_port (SEND, 169)
+ip_port (PRINT_SRV, 170)
+ip_port (MULTIPLEX, 171)
+ip_port (CL1, 172)
+ip_port (XYPLEX_MUX, 173)
+ip_port (MAILQ, 174)
+ip_port (VMNET, 175)
+ip_port (GENRAD_MUX, 176)
+ip_port (XDMCP, 177)
+ip_port (NEXTSTEP, 178)
+ip_port (BGP, 179)
+ip_port (RIS, 180)
+ip_port (UNIFY, 181)
+ip_port (AUDIT, 182)
+ip_port (OCBINDER, 183)
+ip_port (OCSERVER, 184)
+ip_port (REMOTE_KIS, 185)
+ip_port (KIS, 186)
+ip_port (ACI, 187)
+ip_port (MUMPS, 188)
+ip_port (QFT, 189)
+ip_port (GACP, 190)
+ip_port (PROSPERO, 191)
+ip_port (OSU_NMS, 192)
+ip_port (SRMP, 193)
+ip_port (IRC, 194)
+ip_port (DN6_NLM_AUD, 195)
+ip_port (DN6_SMM_RED, 196)
+ip_port (DLS, 197)
+ip_port (DLS_MON, 198)
+ip_port (SMUX, 199)
+ip_port (SRC, 200)
+ip_port (AT_RTMP, 201)
+ip_port (AT_NBP, 202)
+ip_port (AT_3, 203)
+ip_port (AT_ECHO, 204)
+ip_port (AT_5, 205)
+ip_port (AT_ZIS, 206)
+ip_port (AT_7, 207)
+ip_port (AT_8, 208)
+ip_port (QMTP, 209)
+ip_port (Z39_50, 210)
+ip_port (TI914CG, 211)
+ip_port (ANET, 212)
+ip_port (IPX, 213)
+ip_port (VMPWSCS, 214)
+ip_port (SOFTPC, 215)
+ip_port (CAILIC, 216)
+ip_port (DBASE, 217)
+ip_port (MPP, 218)
+ip_port (UARPS, 219)
+ip_port (IMAP3, 220)
+ip_port (FLN_SPX, 221)
+ip_port (RSH_SPX, 222)
+ip_port (CDC, 223)
+ip_port (MASQDIALER, 224)
+ip_port (DIRECT, 242)
+ip_port (SUR_MEAS, 243)
+ip_port (INBUSINESS, 244)
+ip_port (LINK, 245)
+ip_port (DSP3270, 246)
+ip_port (SUBNTBCST_TFTP, 247)
+ip_port (BHFHS, 248)
+ip_port (RAP1, 256)
+ip_port (SET, 257)
+ip_port (YAK_CHAT, 258)
+ip_port (ESRO_GEN, 259)
+ip_port (OPENPORT, 260)
+ip_port (NSIIOPS, 261)
+ip_port (ARCISDMS, 262)
+ip_port (HDAP, 263)
+ip_port (BGMP, 264)
+ip_port (X_BONE_CTL, 265)
+ip_port (SST, 266)
+ip_port (TD_SERVICE, 267)
+ip_port (TD_REPLICA, 268)
+ip_port (HTTP_MGMT, 280)
+ip_port (PERSONAL_LINK, 281)
+ip_port (CABLEPORT_AX, 282)
+ip_port (RESCAP, 283)
+ip_port (CORERJD, 284)
+ip_port (FXP, 286)
+ip_port (K_BLOCK, 287)
+ip_port (NOVASTORBAKCUP, 308)
+ip_port (ENTRUSTTIME, 309)
+ip_port (BHMDS, 310)
+ip_port (ASIP_WEBADMIN, 311)
+ip_port (VSLMP, 312)
+ip_port (MAGENTA_LOGIC, 313)
+ip_port (OPALIS_ROBOT, 314)
+ip_port (DPSI, 315)
+ip_port (DECAUTH, 316)
+ip_port (ZANNET, 317)
+ip_port (PKIX_TIMESTAMP, 318)
+ip_port (PTP_EVENT, 319)
+ip_port (PTP_GENERAL, 320)
+ip_port (PIP, 321)
+ip_port (RTSPS, 322)
+ip_port (TEXAR, 333)
+ip_port (PDAP, 344)
+ip_port (PAWSERV, 345)
+ip_port (ZSERV, 346)
+ip_port (FATSERV, 347)
+ip_port (CSI_SGWP, 348)
+ip_port (MFTP, 349)
+ip_port (MATIP_TYPE_A, 350)
+ip_port (MATIP_TYPE_B, 351)
+ip_port (BHOETTY, 351)
+ip_port (DTAG_STE_SB, 352)
+ip_port (BHOEDAP4, 352)
+ip_port (NDSAUTH, 353)
+ip_port (BH611, 354)
+ip_port (DATEX_ASN, 355)
+ip_port (CLOANTO_NET_1, 356)
+ip_port (BHEVENT, 357)
+ip_port (SHRINKWRAP, 358)
+ip_port (NSRMP, 359)
+ip_port (SCOI2ODIALOG, 360)
+ip_port (SEMANTIX, 361)
+ip_port (SRSSEND, 362)
+ip_port (RSVP_TUNNEL, 363)
+ip_port (AURORA_CMGR, 364)
+ip_port (DTK, 365)
+ip_port (ODMR, 366)
+ip_port (MORTGAGEWARE, 367)
+ip_port (QBIKGDP, 368)
+ip_port (RPC2PORTMAP, 369)
+ip_port (CODAAUTH2, 370)
+ip_port (CLEARCASE, 371)
+ip_port (ULISTPROC, 372)
+ip_port (LEGENT_1, 373)
+ip_port (LEGENT_2, 374)
+ip_port (HASSLE, 375)
+ip_port (NIP, 376)
+ip_port (TNETOS, 377)
+ip_port (DSETOS, 378)
+ip_port (IS99C, 379)
+ip_port (IS99S, 380)
+ip_port (HP_COLLECTOR, 381)
+ip_port (HP_MANAGED_NODE, 382)
+ip_port (HP_ALARM_MGR, 383)
+ip_port (ARNS, 384)
+ip_port (IBM_APP, 385)
+ip_port (ASA, 386)
+ip_port (AURP, 387)
+ip_port (UNIDATA_LDM, 388)
+ip_port (LDAP, 389)
+ip_port (UIS, 390)
+ip_port (SYNOTICS_RELAY, 391)
+ip_port (SYNOTICS_BROKER, 392)
+ip_port (META5, 393)
+ip_port (EMBL_NDT, 394)
+ip_port (NETCP, 395)
+ip_port (NETWARE_IP, 396)
+ip_port (MPTN, 397)
+ip_port (KRYPTOLAN, 398)
+ip_port (ISO_TSAP_C2, 399)
+ip_port (WORK_SOL, 400)
+ip_port (UPS, 401)
+ip_port (GENIE, 402)
+ip_port (DECAP, 403)
+ip_port (NCED, 404)
+ip_port (NCLD, 405)
+ip_port (IMSP, 406)
+ip_port (TIMBUKTU, 407)
+ip_port (PRM_SM, 408)
+ip_port (PRM_NM, 409)
+ip_port (DECLADEBUG, 410)
+ip_port (RMT, 411)
+ip_port (SYNOPTICS_TRAP, 412)
+ip_port (SMSP, 413)
+ip_port (INFOSEEK, 414)
+ip_port (BNET, 415)
+ip_port (SILVERPLATTER, 416)
+ip_port (ONMUX, 417)
+ip_port (HYPER_G, 418)
+ip_port (ARIEL1, 419)
+ip_port (SMPTE, 420)
+ip_port (ARIEL2, 421)
+ip_port (ARIEL3, 422)
+ip_port (OPC_JOB_START, 423)
+ip_port (OPC_JOB_TRACK, 424)
+ip_port (ICAD_EL, 425)
+ip_port (SMARTSDP, 426)
+ip_port (SVRLOC, 427)
+ip_port (OCS_CMU, 428)
+ip_port (OCS_AMU, 429)
+ip_port (UTMPSD, 430)
+ip_port (UTMPCD, 431)
+ip_port (IASD, 432)
+ip_port (NNSP, 433)
+ip_port (MOBILEIP_AGENT, 434)
+ip_port (MOBILIP_MN, 435)
+ip_port (DNA_CML, 436)
+ip_port (COMSCM, 437)
+ip_port (DSFGW, 438)
+ip_port (DASP, 439)
+ip_port (SGCP, 440)
+ip_port (DECVMS_SYSMGT, 441)
+ip_port (CVC_HOSTD, 442)
+ip_port (HTTPS, 443)
+ip_port (SNPP, 444)
+ip_port (MICROSOFT_DS, 445)
+ip_port (DDM_RDB, 446)
+ip_port (DDM_DFM, 447)
+ip_port (DDM_SSL, 448)
+ip_port (AS_SERVERMAP, 449)
+ip_port (TSERVER, 450)
+ip_port (SFS_SMP_NET, 451)
+ip_port (SFS_CONFIG, 452)
+ip_port (CREATIVESERVER, 453)
+ip_port (CONTENTSERVER, 454)
+ip_port (CREATIVEPARTNR, 455)
+ip_port (MACON_TCP, 456)
+ip_port (SCOHELP, 457)
+ip_port (APPLEQTC, 458)
+ip_port (AMPR_RCMD, 459)
+ip_port (SKRONK, 460)
+ip_port (DATASURFSRV, 461)
+ip_port (DATASURFSRVSEC, 462)
+ip_port (ALPES, 463)
+ip_port (KPASSWD, 464)
+ip_port (URD, 465)
+ip_port (DIGITAL_VRC, 466)
+ip_port (MYLEX_MAPD, 467)
+ip_port (PHOTURIS, 468)
+ip_port (RCP, 469)
+ip_port (SCX_PROXY, 470)
+ip_port (MONDEX, 471)
+ip_port (LJK_LOGIN, 472)
+ip_port (HYBRID_POP, 473)
+ip_port (TN_TL_W1, 474)
+ip_port (TCPNETHASPSRV, 475)
+ip_port (TN_TL_FD1, 476)
+ip_port (SS7NS, 477)
+ip_port (SPSC, 478)
+ip_port (IAFSERVER, 479)
+ip_port (IAFDBASE, 480)
+ip_port (PH, 481)
+ip_port (BGS_NSI, 482)
+ip_port (ULPNET, 483)
+ip_port (INTEGRA_SME, 484)
+ip_port (POWERBURST, 485)
+ip_port (AVIAN, 486)
+ip_port (SAFT, 487)
+ip_port (GSS_HTTP, 488)
+ip_port (NEST_PROTOCOL, 489)
+ip_port (MICOM_PFS, 490)
+ip_port (GO_LOGIN, 491)
+ip_port (TICF_1, 492)
+ip_port (TICF_2, 493)
+ip_port (POV_RAY, 494)
+ip_port (INTECOURIER, 495)
+ip_port (PIM_RP_DISC, 496)
+ip_port (DANTZ, 497)
+ip_port (SIAM, 498)
+ip_port (ISO_ILL, 499)
+ip_port (ISAKMP, 500)
+ip_port (STMF, 501)
+ip_port (ASA_APPL_PROTO, 502)
+ip_port (INTRINSA, 503)
+ip_port (CITADEL, 504)
+ip_port (MAILBOX_LM, 505)
+ip_port (OHIMSRV, 506)
+ip_port (CRS, 507)
+ip_port (XVTTP, 508)
+ip_port (SNARE, 509)
+ip_port (FCP, 510)
+ip_port (PASSGO, 511)
+ip_port (EXEC, 512)
+ip_port (LOGIN, 513)
+ip_port (SHELL, 514)
+ip_port (PRINTER, 515)
+ip_port (VIDEOTEX, 516)
+ip_port (TALK, 517)
+ip_port (NTALK, 518)
+ip_port (UTIME, 519)
+ip_port (EFS, 520)
+ip_port (RIPNG, 521)
+ip_port (ULP, 522)
+ip_port (IBM_DB2, 523)
+ip_port (NCP, 524)
+ip_port (TIMED, 525)
+ip_port (TEMPO, 526)
+ip_port (STX, 527)
+ip_port (CUSTIX, 528)
+ip_port (IRC_SERV, 529)
+ip_port (COURIER, 530)
+ip_port (CONFERENCE, 531)
+ip_port (NETNEWS, 532)
+ip_port (NETWALL, 533)
+ip_port (MM_ADMIN, 534)
+ip_port (IIOP, 535)
+ip_port (OPALIS_RDV, 536)
+ip_port (NMSP, 537)
+ip_port (GDOMAP, 538)
+ip_port (APERTUS_LDP, 539)
+ip_port (UUCP, 540)
+ip_port (UUCP_RLOGIN, 541)
+ip_port (COMMERCE, 542)
+ip_port (KLOGIN, 543)
+ip_port (KSHELL, 544)
+ip_port (APPLEQTCSRVR, 545)
+ip_port (DHCPV6_CLIENT, 546)
+ip_port (DHCPV6_SERVER, 547)
+ip_port (AFPOVERTCP, 548)
+ip_port (IDFP, 549)
+ip_port (NEW_RWHO, 550)
+ip_port (CYBERCASH, 551)
+ip_port (DEVSHR_NTS, 552)
+ip_port (PIRP, 553)
+ip_port (RTSP, 554)
+ip_port (DSF, 555)
+ip_port (REMOTEFS, 556)
+ip_port (OPENVMS_SYSIPC, 557)
+ip_port (SDNSKMP, 558)
+ip_port (TEEDTAP, 559)
+ip_port (RMONITOR, 560)
+ip_port (MONITOR, 561)
+ip_port (CHSHELL, 562)
+ip_port (NNTPS, 563)
+ip_port (9PFS, 564)
+ip_port (WHOAMI, 565)
+ip_port (STREETTALK, 566)
+ip_port (BANYAN_RPC, 567)
+ip_port (MS_SHUTTLE, 568)
+ip_port (MS_ROME, 569)
+ip_port (METER, 570)
+ip_port (METER1, 571)
+ip_port (SONAR, 572)
+ip_port (BANYAN_VIP, 573)
+ip_port (FTP_AGENT, 574)
+ip_port (VEMMI, 575)
+ip_port (IPCD, 576)
+ip_port (VNAS, 577)
+ip_port (IPDD, 578)
+ip_port (DECBSRV, 579)
+ip_port (SNTP_HEARTBEAT, 580)
+ip_port (BDP, 581)
+ip_port (SCC_SECURITY, 582)
+ip_port (PHILIPS_VC, 583)
+ip_port (KEYSERVER, 584)
+ip_port (IMAP4_SSL, 585)
+ip_port (PASSWORD_CHG, 586)
+ip_port (SUBMISSION, 587)
+ip_port (CAL, 588)
+ip_port (EYELINK, 589)
+ip_port (TNS_CML, 590)
+ip_port (HTTP_ALT, 591)
+ip_port (EUDORA_SET, 592)
+ip_port (HTTP_RPC_EPMAP, 593)
+ip_port (TPIP, 594)
+ip_port (CAB_PROTOCOL, 595)
+ip_port (SMSD, 596)
+ip_port (PTCNAMESERVICE, 597)
+ip_port (SCO_WEBSRVRMG3, 598)
+ip_port (ACP, 599)
+ip_port (IPCSERVER, 600)
+ip_port (SYSLOG_CONN, 601)
+ip_port (XMLRPC_BEEP, 602)
+ip_port (IDXP, 603)
+ip_port (TUNNEL, 604)
+ip_port (SOAP_BEEP, 605)
+ip_port (URM, 606)
+ip_port (NQS, 607)
+ip_port (SIFT_UFT, 608)
+ip_port (NPMP_TRAP, 609)
+ip_port (NPMP_LOCAL, 610)
+ip_port (NPMP_GUI, 611)
+ip_port (HMMP_IND, 612)
+ip_port (HMMP_OP, 613)
+ip_port (SSHELL, 614)
+ip_port (SCO_INETMGR, 615)
+ip_port (SCO_SYSMGR, 616)
+ip_port (SCO_DTMGR, 617)
+ip_port (DEI_ICDA, 618)
+ip_port (COMPAQ_EVM, 619)
+ip_port (SCO_WEBSRVRMGR, 620)
+ip_port (ESCP_IP, 621)
+ip_port (COLLABORATOR, 622)
+ip_port (ASF_RMCP, 623)
+ip_port (CRYPTOADMIN, 624)
+ip_port (DEC_DLM, 625)
+ip_port (ASIA, 626)
+ip_port (PASSGO_TIVOLI, 627)
+ip_port (QMQP, 628)
+ip_port (3COM_AMP3, 629)
+ip_port (RDA, 630)
+ip_port (IPP, 631)
+ip_port (BMPP, 632)
+ip_port (SERVSTAT, 633)
+ip_port (GINAD, 634)
+ip_port (RLZDBASE, 635)
+ip_port (LDAPS, 636)
+ip_port (LANSERVER, 637)
+ip_port (MCNS_SEC, 638)
+ip_port (MSDP, 639)
+ip_port (ENTRUST_SPS, 640)
+ip_port (REPCMD, 641)
+ip_port (ESRO_EMSDP, 642)
+ip_port (SANITY, 643)
+ip_port (DWR, 644)
+ip_port (PSSC, 645)
+ip_port (LDP, 646)
+ip_port (DHCP_FAILOVER, 647)
+ip_port (RRP, 648)
+ip_port (CADVIEW_3D, 649)
+ip_port (OBEX, 650)
+ip_port (IEEE_MMS, 651)
+ip_port (HELLO_PORT, 652)
+ip_port (REPSCMD, 653)
+ip_port (AODV, 654)
+ip_port (TINC, 655)
+ip_port (SPMP, 656)
+ip_port (RMC, 657)
+ip_port (TENFOLD, 658)
+ip_port (MAC_SRVR_ADMIN, 660)
+ip_port (HAP, 661)
+ip_port (PFTP, 662)
+ip_port (PURENOISE, 663)
+ip_port (ASF_SECURE_RMCP, 664)
+ip_port (SUN_DR, 665)
+ip_port (MDQS, 666)
+ip_port (DOOM, 666)
+ip_port (DISCLOSE, 667)
+ip_port (MECOMM, 668)
+ip_port (MEREGISTER, 669)
+ip_port (VACDSM_SWS, 670)
+ip_port (VACDSM_APP, 671)
+ip_port (VPPS_QUA, 672)
+ip_port (CIMPLEX, 673)
+ip_port (ACAP, 674)
+ip_port (DCTP, 675)
+ip_port (VPPS_VIA, 676)
+ip_port (VPP, 677)
+ip_port (GGF_NCP, 678)
+ip_port (MRM, 679)
+ip_port (ENTRUST_AAAS, 680)
+ip_port (ENTRUST_AAMS, 681)
+ip_port (XFR, 682)
+ip_port (CORBA_IIOP, 683)
+ip_port (CORBA_IIOP_SSL, 684)
+ip_port (MDC_PORTMAPPER, 685)
+ip_port (HCP_WISMAR, 686)
+ip_port (ASIPREGISTRY, 687)
+ip_port (REALM_RUSD, 688)
+ip_port (NMAP, 689)
+ip_port (VATP, 690)
+ip_port (MSEXCH_ROUTING, 691)
+ip_port (HYPERWAVE_ISP, 692)
+ip_port (CONNENDP, 693)
+ip_port (HA_CLUSTER, 694)
+ip_port (IEEE_MMS_SSL, 695)
+ip_port (RUSHD, 696)
+ip_port (UUIDGEN, 697)
+ip_port (OLSR, 698)
+ip_port (ACCESSNETWORK, 699)
+ip_port (EPP, 700)
+ip_port (LMP, 701)
+ip_port (IRIS_BEEP, 702)
+ip_port (ELCSD, 704)
+ip_port (AGENTX, 705)
+ip_port (SILC, 706)
+ip_port (BORLAND_DSJ, 707)
+ip_port (ENTRUST_KMSH, 709)
+ip_port (ENTRUST_ASH, 710)
+ip_port (CISCO_TDP, 711)
+ip_port (TBRPF, 712)
+ip_port (NETVIEWDM1, 729)
+ip_port (NETVIEWDM2, 730)
+ip_port (NETVIEWDM3, 731)
+ip_port (NETGW, 741)
+ip_port (NETRCS, 742)
+ip_port (FLEXLM, 744)
+ip_port (FUJITSU_DEV, 747)
+ip_port (RIS_CM, 748)
+ip_port (KERBEROS_ADM, 749)
+ip_port (RFILE, 750)
+ip_port (PUMP, 751)
+ip_port (QRH, 752)
+ip_port (RRH, 753)
+ip_port (TELL, 754)
+ip_port (NLOGIN, 758)
+ip_port (CON, 759)
+ip_port (NS, 760)
+ip_port (RXE, 761)
+ip_port (QUOTAD, 762)
+ip_port (CYCLESERV, 763)
+ip_port (OMSERV, 764)
+ip_port (WEBSTER, 765)
+ip_port (PHONEBOOK, 767)
+ip_port (VID, 769)
+ip_port (CADLOCK, 770)
+ip_port (RTIP, 771)
+ip_port (CYCLESERV2, 772)
+ip_port (SUBMIT, 773)
+ip_port (RPASSWD, 774)
+ip_port (ENTOMB, 775)
+ip_port (WPAGES, 776)
+ip_port (MULTILING_HTTP, 777)
+ip_port (WPGS, 780)
+ip_port (MDBS_DAEMON, 800)
+ip_port (DEVICE, 801)
+ip_port (FCP_UDP, 810)
+ip_port (ITM_MCELL_S, 828)
+ip_port (PKIX_3_CA_RA, 829)
+ip_port (DHCP_FAILOVER2, 847)
+ip_port (GDOI, 848)
+ip_port (ISCSI, 860)
+ip_port (RSYNC, 873)
+ip_port (ICLCNET_LOCATE, 886)
+ip_port (ICLCNET_SVINFO, 887)
+ip_port (ACCESSBUILDER, 888)
+ip_port (CDDBP, 888)
+ip_port (OMGINITIALREFS, 900)
+ip_port (SMPNAMERES, 901)
+ip_port (IDEAFARM_CHAT, 902)
+ip_port (IDEAFARM_CATCH, 903)
+ip_port (XACT_BACKUP, 911)
+ip_port (APEX_MESH, 912)
+ip_port (APEX_EDGE, 913)
+ip_port (FTPS_DATA, 989)
+ip_port (FTPS, 990)
+ip_port (NAS, 991)
+ip_port (TELNETS, 992)
+ip_port (IMAPS, 993)
+ip_port (IRCS, 994)
+ip_port (POP3S, 995)
+ip_port (VSINET, 996)
+ip_port (MAITRD, 997)
+ip_port (BUSBOY, 998)
+ip_port (GARCON, 999)
+ip_port (PUPROUTER, 999)
+ip_port (CADLOCK2, 1000)
+ip_port (SURF, 1010)
+
diff --git a/src/vnet/ip/protocols.def b/src/vnet/ip/protocols.def
new file mode 100644
index 00000000..77fab31d
--- /dev/null
+++ b/src/vnet/ip/protocols.def
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Emacs editing mode -*-C-*-
+
+From http://www.iana.org/assignments/protocol-numbers
+
+PROTOCOL NUMBERS
+
+(last updated 18 October 2004)
+
+In the Internet Protocol version 4 (IPv4) [RFC791] there is a field,
+called "Protocol", to identify the next level protocol. This is an 8
+bit field. In Internet Protocol version 6 (IPv6) [RFC1883] this field
+is called the "Next Header" field.
+*/
+ip_protocol (0, IP6_HOP_BY_HOP_OPTIONS)
+ip_protocol (1, ICMP)
+ip_protocol (2, IGMP)
+ip_protocol (3, GGP)
+ip_protocol (4, IP_IN_IP)
+ip_protocol (5, ST)
+ip_protocol (6, TCP)
+ip_protocol (7, CBT)
+ip_protocol (8, EGP)
+ip_protocol (9, IGP)
+ip_protocol (10, BBN_RCC_MON)
+ip_protocol (11, NVP_II)
+ip_protocol (12, PUP)
+ip_protocol (13, ARGUS)
+ip_protocol (14, EMCON)
+ip_protocol (15, XNET)
+ip_protocol (16, CHAOS)
+ip_protocol (17, UDP)
+ip_protocol (18, MUX)
+ip_protocol (19, DCN_MEAS)
+ip_protocol (20, HMP)
+ip_protocol (21, PRM)
+ip_protocol (22, XNS_IDP)
+ip_protocol (23, TRUNK_1)
+ip_protocol (24, TRUNK_2)
+ip_protocol (25, LEAF_1)
+ip_protocol (26, LEAF_2)
+ip_protocol (27, RDP)
+ip_protocol (28, IRTP)
+ip_protocol (29, ISO_TP4)
+ip_protocol (30, NETBLT)
+ip_protocol (31, MFE_NSP)
+ip_protocol (32, MERIT_INP)
+ip_protocol (33, SEP)
+ip_protocol (34, 3PC)
+ip_protocol (35, IDPR)
+ip_protocol (36, XTP)
+ip_protocol (37, DDP)
+ip_protocol (38, IDPR_CMTP)
+ip_protocol (39, TP)
+ip_protocol (40, IL)
+ip_protocol (41, IPV6)
+ip_protocol (42, SDRP)
+ip_protocol (43, IPV6_ROUTE)
+ip_protocol (44, IPV6_FRAGMENTATION)
+ip_protocol (45, IDRP)
+ip_protocol (46, RSVP)
+ip_protocol (47, GRE)
+ip_protocol (48, MHRP)
+ip_protocol (49, BNA)
+ip_protocol (50, IPSEC_ESP)
+ip_protocol (51, IPSEC_AH)
+ip_protocol (52, I_NLSP)
+ip_protocol (53, SWIPE)
+ip_protocol (54, NARP)
+ip_protocol (55, MOBILE)
+ip_protocol (56, TLSP)
+ip_protocol (57, SKIP)
+ip_protocol (58, ICMP6)
+ip_protocol (59, IP6_NONXT)
+ip_protocol (60, IP6_DESTINATION_OPTIONS)
+ip_protocol (62, CFTP)
+ip_protocol (64, SAT_EXPAK)
+ip_protocol (65, KRYPTOLAN)
+ip_protocol (66, RVD)
+ip_protocol (67, IPPC)
+ip_protocol (69, SAT_MON)
+ip_protocol (70, VISA)
+ip_protocol (71, IPCV)
+ip_protocol (72, CPNX)
+ip_protocol (73, CPHB)
+ip_protocol (74, WSN)
+ip_protocol (75, PVP)
+ip_protocol (76, BR_SAT_MON)
+ip_protocol (77, SUN_ND)
+ip_protocol (78, WB_MON)
+ip_protocol (79, WB_EXPAK)
+ip_protocol (80, ISO_IP)
+ip_protocol (81, VMTP)
+ip_protocol (82, SECURE_VMTP)
+ip_protocol (83, VINES)
+ip_protocol (84, TTP)
+ip_protocol (85, NSFNET_IGP)
+ip_protocol (86, DGP)
+ip_protocol (87, TCF)
+ip_protocol (88, EIGRP)
+ip_protocol (89, OSPF)
+ip_protocol (90, SPRITE_RPC)
+ip_protocol (91, LARP)
+ip_protocol (92, MTP)
+ip_protocol (93, AX)
+ip_protocol (94, IPIP)
+ip_protocol (95, MICP)
+ip_protocol (96, SCC_SP)
+ip_protocol (97, ETHERIP)
+ip_protocol (98, ENCAP)
+ip_protocol (100, GMTP)
+ip_protocol (101, IFMP)
+ip_protocol (102, PNNI)
+ip_protocol (103, PIM)
+ip_protocol (104, ARIS)
+ip_protocol (105, SCPS)
+ip_protocol (106, QNX)
+ip_protocol (107, A)
+ip_protocol (108, IPCOMP)
+ip_protocol (109, SNP)
+ip_protocol (110, COMPAQ_PEER)
+ip_protocol (111, IPX_IN_IP)
+ip_protocol (112, VRRP)
+ip_protocol (113, PGM)
+ip_protocol (115, L2TP)
+ip_protocol (116, DDX)
+ip_protocol (117, IATP)
+ip_protocol (118, STP)
+ip_protocol (119, SRP)
+ip_protocol (120, UTI)
+ip_protocol (121, SMP)
+ip_protocol (122, SM)
+ip_protocol (123, PTP)
+ip_protocol (124, ISIS)
+ip_protocol (125, FIRE)
+ip_protocol (126, CRTP)
+ip_protocol (127, CRUDP)
+ip_protocol (128, SSCOPMCE)
+ip_protocol (129, IPLT)
+ip_protocol (130, SPS)
+ip_protocol (131, PIPE)
+ip_protocol (132, SCTP)
+ip_protocol (133, FC)
+ip_protocol (134, RSVP_E2E_IGNORE)
+ip_protocol (135, MOBILITY)
+ip_protocol (136, UDP_LITE)
+ip_protocol (137, MPLS_IN_IP)
+ip_protocol (255, RESERVED)
+
diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c
new file mode 100644
index 00000000..0869954c
--- /dev/null
+++ b/src/vnet/ip/punt.c
@@ -0,0 +1,830 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Local TCP/IP stack punt infrastructure.
+ *
+ * Provides a set of VPP nodes together with the relevant APIs and CLI
+ * commands in order to adjust and dispatch packets from the VPP data plane
+ * to the local TCP/IP stack
+ */
+
+#include <vnet/ip/ip.h>
+#include <vlib/vlib.h>
+#include <vnet/pg/pg.h>
+#include <vnet/udp/udp.h>
+#include <vnet/tcp/tcp.h>
+#include <vnet/ip/punt.h>
+#include <vppinfra/sparse_vec.h>
+#include <vlib/unix/unix.h>
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/uio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define foreach_punt_next \
+ _ (PUNT, "error-punt")
+
+typedef enum
+{
+#define _(s,n) PUNT_NEXT_##s,
+ foreach_punt_next
+#undef _
+ PUNT_N_NEXT,
+} punt_next_t;
+
+enum punt_socket_rx_next_e
+{
+ PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT,
+ PUNT_SOCKET_RX_NEXT_IP4_LOOKUP,
+ PUNT_SOCKET_RX_NEXT_IP6_LOOKUP,
+ PUNT_SOCKET_RX_N_NEXT
+};
+
+vlib_node_registration_t udp4_punt_node;
+vlib_node_registration_t udp6_punt_node;
+vlib_node_registration_t udp4_punt_socket_node;
+vlib_node_registration_t udp6_punt_socket_node;
+static vlib_node_registration_t punt_socket_rx_node;
+
+punt_main_t punt_main;
+
+char *
+vnet_punt_get_server_pathname (void)
+{
+ punt_main_t *pm = &punt_main;
+ return pm->sun_path;
+}
+
+/** @brief IPv4/IPv6 UDP punt node main loop.
+
+ This is the main loop inline function for IPv4/IPv6 UDP punt
+ transition node.
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+ @param is_ipv4 indicates if called for IPv4 or IPv6 node
+*/
+always_inline uword
+udp46_punt_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * from_frame, int is_ip4)
+{
+ u32 n_left_from, *from, *to_next;
+ word advance;
+
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+
+ /* udp[46]_lookup hands us the data payload, not the IP header */
+ if (is_ip4)
+ advance = -(sizeof (ip4_header_t) + sizeof (udp_header_t));
+ else
+ advance = -(sizeof (ip6_header_t) + sizeof (udp_header_t));
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, PUNT_NEXT_PUNT, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ vlib_buffer_advance (b0, advance);
+ b0->error = node->errors[PUNT_ERROR_UDP_PORT];
+ }
+
+ vlib_put_next_frame (vm, node, PUNT_NEXT_PUNT, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+static char *punt_error_strings[] = {
+#define punt_error(n,s) s,
+#include "punt_error.def"
+#undef punt_error
+};
+
+/** @brief IPv4 UDP punt node.
+ @node ip4-udp-punt
+
+ This is the IPv4 UDP punt transition node. It is registered as a next
+ node for the "ip4-udp-lookup" handling UDP port(s) requested for punt.
+ The buffer's current data pointer is adjusted to the original packet
+ IPv4 header. All buffers are dispatched to "error-punt".
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: next index usage
+
+ @em Sets:
+ - <code>vnet_buffer(b)->current_data</code>
+ - <code>vnet_buffer(b)->current_len</code>
+
+ <em>Next Index:</em>
+ - Dispatches the packet to the "error-punt" node
+*/
+static uword
+udp4_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_punt_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+/** @brief IPv6 UDP punt node.
+ @node ip6-udp-punt
+
+ This is the IPv6 UDP punt transition node. It is registered as a next
+ node for the "ip6-udp-lookup" handling UDP port(s) requested for punt.
+ The buffer's current data pointer is adjusted to the original packet
+ IPv6 header. All buffers are dispatched to "error-punt".
+
+ @param vm vlib_main_t corresponding to the current thread
+ @param node vlib_node_runtime_t
+ @param frame vlib_frame_t whose contents should be dispatched
+
+ @par Graph mechanics: next index usage
+
+ @em Sets:
+ - <code>vnet_buffer(b)->current_data</code>
+ - <code>vnet_buffer(b)->current_len</code>
+
+ <em>Next Index:</em>
+ - Dispatches the packet to the "error-punt" node
+*/
+static uword
+udp6_punt (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp4_punt_node) = {
+ .function = udp4_punt,
+ .name = "ip4-udp-punt",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+
+ .n_next_nodes = PUNT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [PUNT_NEXT_##s] = n,
+ foreach_punt_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (udp4_punt_node, udp4_punt);
+
+VLIB_REGISTER_NODE (udp6_punt_node) = {
+ .function = udp6_punt,
+ .name = "ip6-udp-punt",
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+
+ .n_next_nodes = PUNT_N_NEXT,
+ .next_nodes = {
+#define _(s,n) [PUNT_NEXT_##s] = n,
+ foreach_punt_next
+#undef _
+ },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (udp6_punt_node, udp6_punt);;
+
+/* *INDENT-ON* */
+
+static struct sockaddr_un *
+punt_socket_get (bool is_ip4, u16 port)
+{
+ punt_main_t *pm = &punt_main;
+ punt_client_t *v = is_ip4 ? pm->clients_by_dst_port4 :
+ pm->clients_by_dst_port6;
+
+ u16 i = sparse_vec_index (v, port);
+ if (i == SPARSE_VEC_INVALID_INDEX)
+ return 0;
+
+ return &vec_elt (v, i).caddr;
+}
+
+static void
+punt_socket_register (bool is_ip4, u8 protocol, u16 port,
+ char *client_pathname)
+{
+ punt_main_t *pm = &punt_main;
+ punt_client_t c, *n;
+ punt_client_t *v = is_ip4 ? pm->clients_by_dst_port4 :
+ pm->clients_by_dst_port6;
+
+ memset (&c, 0, sizeof (c));
+ memcpy (c.caddr.sun_path, client_pathname, sizeof (c.caddr.sun_path));
+ c.caddr.sun_family = AF_UNIX;
+ c.port = port;
+ n = sparse_vec_validate (v, port);
+ n[0] = c;
+}
+
+/* $$$$ Just leaves the mapping in place for now */
+static void
+punt_socket_unregister (bool is_ip4, u8 protocol, u16 port)
+{
+ return;
+}
+
+always_inline uword
+udp46_punt_socket_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame, bool is_ip4)
+{
+ u32 *buffers = vlib_frame_args (frame);
+ uword n_packets = frame->n_vectors;
+ struct iovec *iovecs = 0;
+ punt_main_t *pm = &punt_main;
+ int i;
+
+ u32 node_index = is_ip4 ? udp4_punt_socket_node.index :
+ udp6_punt_socket_node.index;
+
+ for (i = 0; i < n_packets; i++)
+ {
+ struct iovec *iov;
+ vlib_buffer_t *b;
+ uword l;
+ punt_packetdesc_t packetdesc;
+
+ b = vlib_get_buffer (vm, buffers[i]);
+
+ /* Reverse UDP Punt advance */
+ udp_header_t *udp;
+ if (is_ip4)
+ {
+ vlib_buffer_advance (b, -(sizeof (ip4_header_t) +
+ sizeof (udp_header_t)));
+ ip4_header_t *ip = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (ip + 1);
+ }
+ else
+ {
+ vlib_buffer_advance (b, -(sizeof (ip6_header_t) +
+ sizeof (udp_header_t)));
+ ip6_header_t *ip = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (ip + 1);
+ }
+
+ u16 port = clib_net_to_host_u16 (udp->dst_port);
+
+ /*
+ * Find registerered client
+ * If no registered client, drop packet and count
+ */
+ struct sockaddr_un *caddr;
+ caddr = punt_socket_get (is_ip4, port);
+ if (!caddr)
+ {
+ vlib_node_increment_counter (vm, node_index,
+ PUNT_ERROR_SOCKET_TX_ERROR, 1);
+ goto error;
+ }
+
+ /* Re-set iovecs if present. */
+ if (iovecs)
+ _vec_len (iovecs) = 0;
+
+ /* Add packet descriptor */
+ packetdesc.sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ packetdesc.action = 0;
+ vec_add2 (iovecs, iov, 1);
+ iov->iov_base = &packetdesc;
+ iov->iov_len = sizeof (packetdesc);
+
+ /** VLIB buffer chain -> Unix iovec(s). */
+ vlib_buffer_advance (b, -(sizeof (ethernet_header_t)));
+ vec_add2 (iovecs, iov, 1);
+ iov->iov_base = b->data + b->current_data;
+ iov->iov_len = l = b->current_length;
+
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ do
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+
+ vec_add2 (iovecs, iov, 1);
+
+ iov->iov_base = b->data + b->current_data;
+ iov->iov_len = b->current_length;
+ l += b->current_length;
+ }
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ }
+
+ struct msghdr msg = {
+ .msg_name = caddr,
+ .msg_namelen = sizeof (*caddr),
+ .msg_iov = iovecs,
+ .msg_iovlen = vec_len (iovecs),
+ };
+
+ if (sendmsg (pm->socket_fd, &msg, 0) < l)
+ vlib_node_increment_counter (vm, node_index,
+ PUNT_ERROR_SOCKET_TX_ERROR, 1);
+ }
+
+error:
+ vlib_buffer_free_no_next (vm, buffers, n_packets);
+
+ return n_packets;
+}
+
+static uword
+udp4_punt_socket (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_punt_socket_inline (vm, node, from_frame, true /* is_ip4 */ );
+}
+
+static uword
+udp6_punt_socket (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * from_frame)
+{
+ return udp46_punt_socket_inline (vm, node, from_frame, false /* is_ip4 */ );
+}
+
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp4_punt_socket_node) = {
+ .function = udp4_punt_socket,
+ .name = "ip4-udp-punt-socket",
+ .flags = VLIB_NODE_FLAG_IS_DROP,
+ /* Takes a vector of packets. */
+ .vector_size = sizeof (u32),
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+};
+VLIB_REGISTER_NODE (udp6_punt_socket_node) = {
+ .function = udp6_punt_socket,
+ .name = "ip6-udp-punt-socket",
+ .flags = VLIB_NODE_FLAG_IS_DROP,
+ .vector_size = sizeof (u32),
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+};
+/* *INDENT-ON* */
+
+typedef struct
+{
+ enum punt_action_e action;
+ u32 sw_if_index;
+} punt_trace_t;
+
+static u8 *
+format_punt_trace (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ punt_trace_t *t = va_arg (*va, punt_trace_t *);
+ s = format (s, "%U Action: %d", format_vnet_sw_if_index_name,
+ vnm, t->sw_if_index, t->action);
+ return s;
+}
+
+static uword
+punt_socket_rx_fd (vlib_main_t * vm, vlib_node_runtime_t * node, u32 fd)
+{
+ const uword buffer_size = VLIB_BUFFER_DATA_SIZE;
+ u32 n_trace = vlib_get_trace_count (vm, node);
+ u32 next = node->cached_next_index;
+ u32 n_left_to_next, next_index;
+ u32 *to_next;
+ u32 error = PUNT_ERROR_NONE;
+ vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
+
+ /* $$$$ Only dealing with one buffer at the time for now */
+
+ u32 bi;
+ vlib_buffer_t *b;
+ punt_packetdesc_t packetdesc;
+ ssize_t size;
+ struct iovec io[2];
+
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+ {
+ error = PUNT_ERROR_NOBUFFER;
+ goto error;
+ }
+
+ b = vlib_get_buffer (vm, bi);
+ io[0].iov_base = &packetdesc;
+ io[0].iov_len = sizeof (packetdesc);
+ io[1].iov_base = b->data;
+ io[1].iov_len = buffer_size;
+
+ size = readv (fd, io, 2);
+ /* We need at least the packet descriptor plus a header */
+ if (size <= (int) (sizeof (packetdesc) + sizeof (ip4_header_t)))
+ {
+ vlib_buffer_free (vm, &bi, 1);
+ error = PUNT_ERROR_READV;
+ goto error;
+ }
+
+ b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ b->current_length = size - sizeof (packetdesc);
+
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
+
+ switch (packetdesc.action)
+ {
+ case PUNT_L2:
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = packetdesc.sw_if_index;
+ next_index = PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT;
+ break;
+
+ case PUNT_IP4_ROUTED:
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
+ next_index = PUNT_SOCKET_RX_NEXT_IP4_LOOKUP;
+ break;
+
+ case PUNT_IP6_ROUTED:
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = packetdesc.sw_if_index;
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
+ next_index = PUNT_SOCKET_RX_NEXT_IP6_LOOKUP;
+ break;
+
+ default:
+ error = PUNT_ERROR_ACTION;
+ vlib_buffer_free (vm, &bi, 1);
+ goto error;
+ }
+
+ if (PREDICT_FALSE (n_trace > 0))
+ {
+ punt_trace_t *t;
+ vlib_trace_buffer (vm, node, next_index, b, 1 /* follow_chain */ );
+ vlib_set_trace_count (vm, node, --n_trace);
+ t = vlib_add_trace (vm, node, b, sizeof (*t));
+ t->sw_if_index = packetdesc.sw_if_index;
+ t->action = packetdesc.action;
+ }
+
+ to_next[0] = bi;
+ to_next++;
+ n_left_to_next--;
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next, n_left_to_next,
+ bi, next_index);
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
+ return 1;
+
+error:
+ vlib_node_increment_counter (vm, punt_socket_rx_node.index, error, 1);
+ return 0;
+}
+
+static uword
+punt_socket_rx (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ punt_main_t *pm = &punt_main;
+ u32 total_count = 0;
+ int i;
+
+ for (i = 0; i < vec_len (pm->ready_fds); i++)
+ {
+ total_count += punt_socket_rx_fd (vm, node, pm->ready_fds[i]);
+ vec_del1 (pm->ready_fds, i);
+ }
+ return total_count;
+}
+
+VLIB_REGISTER_NODE (punt_socket_rx_node, static) =
+{
+ .function = punt_socket_rx,.name = "punt-socket-rx",.type =
+ VLIB_NODE_TYPE_INPUT,.state = VLIB_NODE_STATE_INTERRUPT,.vector_size =
+ 1,.n_errors = PUNT_N_ERROR,.error_strings =
+ punt_error_strings,.n_next_nodes = PUNT_SOCKET_RX_N_NEXT,.next_nodes =
+ {
+[PUNT_SOCKET_RX_NEXT_INTERFACE_OUTPUT] = "interface-output",
+ [PUNT_SOCKET_RX_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [PUNT_SOCKET_RX_NEXT_IP6_LOOKUP] = "ip6-lookup",},.format_trace =
+ format_punt_trace,};
+
+static clib_error_t *
+punt_socket_read_ready (clib_file_t * uf)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ punt_main_t *pm = &punt_main;
+
+ /** Schedule the rx node */
+ vlib_node_set_interrupt_pending (vm, punt_socket_rx_node.index);
+ vec_add1 (pm->ready_fds, uf->file_descriptor);
+
+ return 0;
+}
+
+clib_error_t *
+vnet_punt_socket_add (vlib_main_t * vm, u32 header_version,
+ bool is_ip4, u8 protocol, u16 port,
+ char *client_pathname)
+{
+ punt_main_t *pm = &punt_main;
+
+ if (!pm->is_configured)
+ return clib_error_return (0, "socket is not configured");
+
+ if (header_version != PUNT_PACKETDESC_VERSION)
+ return clib_error_return (0, "Invalid packet descriptor version");
+
+ /* For now we only support UDP punt */
+ if (protocol != IP_PROTOCOL_UDP)
+ return clib_error_return (0,
+ "only UDP protocol (%d) is supported, got %d",
+ IP_PROTOCOL_UDP, protocol);
+
+ if (port == (u16) ~ 0)
+ return clib_error_return (0, "UDP port number required");
+
+ /* Register client */
+ punt_socket_register (is_ip4, protocol, port, client_pathname);
+
+ u32 node_index = is_ip4 ? udp4_punt_socket_node.index :
+ udp6_punt_socket_node.index;
+
+ udp_register_dst_port (vm, port, node_index, is_ip4);
+
+ return 0;
+}
+
+clib_error_t *
+vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4, u8 l4_protocol, u16 port)
+{
+ punt_main_t *pm = &punt_main;
+
+ if (!pm->is_configured)
+ return clib_error_return (0, "socket is not configured");
+
+ punt_socket_unregister (is_ip4, l4_protocol, port);
+ udp_unregister_dst_port (vm, port, is_ip4);
+
+ return 0;
+}
+
+/**
+ * @brief Request IP traffic punt to the local TCP/IP stack.
+ *
+ * @em Note
+ * - UDP and TCP are the only protocols supported in the current implementation
+ *
+ * @param vm vlib_main_t corresponding to the current thread
+ * @param ipv IP protcol version.
+ * 4 - IPv4, 6 - IPv6, ~0 for both IPv6 and IPv4
+ * @param protocol 8-bits L4 protocol value
+ * UDP is 17
+ * TCP is 1
+ * @param port 16-bits L4 (TCP/IP) port number when applicable (UDP only)
+ *
+ * @returns 0 on success, non-zero value otherwise
+ */
+clib_error_t *
+vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port,
+ bool is_add)
+{
+
+ /* For now we only support UDP punt */
+ if (protocol != IP_PROTOCOL_UDP && protocol != IP_PROTOCOL_TCP)
+ return clib_error_return (0,
+ "only UDP (%d) and TCP (%d) protocols are supported, got %d",
+ IP_PROTOCOL_UDP, IP_PROTOCOL_TCP, protocol);
+
+ if (ipv != (u8) ~ 0 && ipv != 4 && ipv != 6)
+ return clib_error_return (0, "IP version must be 4 or 6, got %d", ipv);
+
+ if (port == (u16) ~ 0)
+ {
+ if ((ipv == 4) || (ipv == (u8) ~ 0))
+ {
+ if (protocol == IP_PROTOCOL_UDP)
+ udp_punt_unknown (vm, 1, is_add);
+ else if (protocol == IP_PROTOCOL_TCP)
+ tcp_punt_unknown (vm, 1, is_add);
+ }
+
+ if ((ipv == 6) || (ipv == (u8) ~ 0))
+ {
+ if (protocol == IP_PROTOCOL_UDP)
+ udp_punt_unknown (vm, 0, is_add);
+ else if (protocol == IP_PROTOCOL_TCP)
+ tcp_punt_unknown (vm, 0, is_add);
+ }
+
+ return 0;
+ }
+
+ else if (is_add)
+ {
+ if (protocol == IP_PROTOCOL_TCP)
+ return clib_error_return (0, "punt TCP ports is not supported yet");
+
+ if (ipv == 4 || ipv == (u8) ~ 0)
+ udp_register_dst_port (vm, port, udp4_punt_node.index, 1);
+
+ if (ipv == 6 || ipv == (u8) ~ 0)
+ udp_register_dst_port (vm, port, udp6_punt_node.index, 0);
+
+ return 0;
+ }
+ else
+ return clib_error_return (0, "punt delete is not supported yet");
+}
+
+static clib_error_t *
+punt_cli (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u32 port;
+ bool is_add = true;
+ u32 protocol = ~0;
+ clib_error_t *error;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "del"))
+ is_add = false;
+ else if (unformat (input, "all"))
+ {
+ /* punt both IPv6 and IPv4 when used in CLI */
+ error = vnet_punt_add_del (vm, ~0, protocol, ~0, is_add);
+ if (error)
+ clib_error_report (error);
+ }
+ else if (unformat (input, "%d", &port))
+ {
+ /* punt both IPv6 and IPv4 when used in CLI */
+ error = vnet_punt_add_del (vm, ~0, protocol, port, is_add);
+ if (error)
+ clib_error_report (error);
+ }
+ else if (unformat (input, "udp"))
+ protocol = IP_PROTOCOL_UDP;
+ else if (unformat (input, "tcp"))
+ protocol = IP_PROTOCOL_TCP;
+ }
+
+ return 0;
+}
+
+/*?
+ * The set of '<em>set punt</em>' commands allows specific IP traffic to
+ * be punted to the host TCP/IP stack
+ *
+ * @em Note
+ * - UDP is the only protocol supported in the current implementation
+ * - All TCP traffic is currently punted to the host by default
+ *
+ * @cliexpar
+ * @parblock
+ * Example of how to request NTP traffic to be punted
+ * @cliexcmd{set punt udp 125}
+ *
+ * Example of how to request all 'unknown' UDP traffic to be punted
+ * @cliexcmd{set punt udp all}
+ *
+ * Example of how to stop all 'unknown' UDP traffic to be punted
+ * @cliexcmd{set punt udp del all}
+ * @endparblock
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (punt_command, static) = {
+ .path = "set punt",
+ .short_help = "set punt [udp|tcp] [del] <all | port-num1 [port-num2 ...]>",
+ .function = punt_cli,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+punt_init (vlib_main_t * vm)
+{
+ punt_main_t *pm = &punt_main;
+
+ pm->clients_by_dst_port6 = sparse_vec_new
+ (sizeof (pm->clients_by_dst_port6[0]),
+ BITS (((udp_header_t *) 0)->dst_port));
+ pm->clients_by_dst_port4 = sparse_vec_new
+ (sizeof (pm->clients_by_dst_port4[0]),
+ BITS (((udp_header_t *) 0)->dst_port));
+
+ pm->is_configured = false;
+ pm->interface_output_node = vlib_get_node_by_name (vm,
+ (u8 *)
+ "interface-output");
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (punt_init);
+
+static clib_error_t *
+punt_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ punt_main_t *pm = &punt_main;
+ char *socket_path = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "socket %s", &socket_path))
+ strncpy (pm->sun_path, socket_path, 108 - 1);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (socket_path == 0)
+ return 0;
+
+ /* UNIX domain socket */
+ struct sockaddr_un addr;
+ if ((pm->socket_fd = socket (AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0)) == -1)
+ {
+ return clib_error_return (0, "socket error");
+ }
+
+ memset (&addr, 0, sizeof (addr));
+ addr.sun_family = AF_UNIX;
+ if (*socket_path == '\0')
+ {
+ *addr.sun_path = '\0';
+ strncpy (addr.sun_path + 1, socket_path + 1,
+ sizeof (addr.sun_path) - 2);
+ }
+ else
+ {
+ strncpy (addr.sun_path, socket_path, sizeof (addr.sun_path) - 1);
+ unlink (socket_path);
+ }
+
+ if (bind (pm->socket_fd, (struct sockaddr *) &addr, sizeof (addr)) == -1)
+ {
+ return clib_error_return (0, "bind error");
+ }
+
+ /* Register socket */
+ clib_file_main_t *fm = &file_main;
+ clib_file_t template = { 0 };
+ template.read_function = punt_socket_read_ready;
+ template.file_descriptor = pm->socket_fd;
+ pm->clib_file_index = clib_file_add (fm, &template);
+
+ pm->is_configured = true;
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (punt_config, "punt");
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h
new file mode 100644
index 00000000..9defa881
--- /dev/null
+++ b/src/vnet/ip/punt.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Definitions for punt infrastructure.
+ */
+#ifndef included_punt_h
+#define included_punt_h
+
+#include <sys/un.h>
+typedef enum
+{
+#define punt_error(n,s) PUNT_ERROR_##n,
+#include <vnet/ip/punt_error.def>
+#undef punt_error
+ PUNT_N_ERROR,
+} punt_error_t;
+
+
+clib_error_t *vnet_punt_add_del (vlib_main_t * vm, u8 ipv,
+ u8 protocol, u16 port, bool is_add);
+clib_error_t *vnet_punt_socket_add (vlib_main_t * vm, u32 header_version,
+ bool is_ip4, u8 protocol, u16 port,
+ char *client_pathname);
+clib_error_t *vnet_punt_socket_del (vlib_main_t * vm, bool is_ip4,
+ u8 l4_protocol, u16 port);
+char *vnet_punt_get_server_pathname (void);
+
+enum punt_action_e
+{
+ PUNT_L2 = 0,
+ PUNT_IP4_ROUTED,
+ PUNT_IP6_ROUTED,
+};
+
+/*
+ * Packet descriptor header. Version 1
+ * If this header changes, the version must also change to notify clients.
+ */
+#define PUNT_PACKETDESC_VERSION 1
+typedef struct __attribute__ ((packed))
+{
+ u32 sw_if_index; /* RX or TX interface */
+ enum punt_action_e action;
+} punt_packetdesc_t;
+
+/*
+ * Client registration
+ */
+typedef struct
+{
+ u16 port;
+ struct sockaddr_un caddr;
+} punt_client_t;
+
+typedef struct
+{
+ int socket_fd;
+ char sun_path[sizeof (struct sockaddr_un)];
+ punt_client_t *clients_by_dst_port4;
+ punt_client_t *clients_by_dst_port6;
+ u32 clib_file_index;
+ bool is_configured;
+ vlib_node_t *interface_output_node;
+ u32 *ready_fds;
+ u32 *rx_buffers;
+} punt_main_t;
+extern punt_main_t punt_main;
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/punt_error.def b/src/vnet/ip/punt_error.def
new file mode 100644
index 00000000..13afa2c7
--- /dev/null
+++ b/src/vnet/ip/punt_error.def
@@ -0,0 +1,27 @@
+/*
+ * punt_error.def: punt errors
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+punt_error (NONE, "no error")
+punt_error (UDP_PORT, "udp port punt")
+punt_error (SOCKET_RX, "Socket RX")
+punt_error (SOCKET_TX, "Socket TX")
+punt_error (SOCKET_RX_ERROR, "Socket RX error")
+punt_error (SOCKET_TX_ERROR, "Socket TX error")
+punt_error (NOBUFFER, "buffer allocation failure")
+punt_error (READV, "socket read failure")
+punt_error (ACTION, "invalid packet descriptor")
+